diff options
Diffstat (limited to 'arch/s390')
45 files changed, 325 insertions, 457 deletions
diff --git a/arch/s390/Kconfig b/arch/s390/Kconfig index 9c9ec08d78c7..43f019459d87 100644 --- a/arch/s390/Kconfig +++ b/arch/s390/Kconfig @@ -41,9 +41,6 @@ config AUDIT_ARCH config NO_IOPORT_MAP def_bool y -config PCI_QUIRKS - def_bool n - config ARCH_SUPPORTS_UPROBES def_bool y @@ -166,6 +163,7 @@ config S390 select GENERIC_PTDUMP select GENERIC_SMP_IDLE_THREAD select GENERIC_TIME_VSYSCALL + select GENERIC_VDSO_DATA_STORE select GENERIC_VDSO_TIME_NS select GENERIC_IOREMAP if PCI select HAVE_ALIGNED_STRUCT_PAGE @@ -258,6 +256,7 @@ config S390 select PCI_DOMAINS if PCI select PCI_MSI if PCI select PCI_MSI_ARCH_FALLBACKS if PCI_MSI + select PCI_QUIRKS if PCI select SPARSE_IRQ select SWIOTLB select SYSCTL_EXCEPTION_TRACE diff --git a/arch/s390/configs/debug_defconfig b/arch/s390/configs/debug_defconfig index 80bdfbae6e5b..26aa455d4068 100644 --- a/arch/s390/configs/debug_defconfig +++ b/arch/s390/configs/debug_defconfig @@ -815,9 +815,6 @@ CONFIG_SYSTEM_BLACKLIST_KEYRING=y CONFIG_CORDIC=m CONFIG_CRYPTO_LIB_CURVE25519=m CONFIG_CRYPTO_LIB_CHACHA20POLY1305=m -CONFIG_CRC4=m -CONFIG_CRC7=m -CONFIG_CRC8=m CONFIG_RANDOM32_SELFTEST=y CONFIG_XZ_DEC_MICROLZMA=y CONFIG_DMA_CMA=y diff --git a/arch/s390/configs/defconfig b/arch/s390/configs/defconfig index 449a0e996b96..8392f8a5ad6d 100644 --- a/arch/s390/configs/defconfig +++ b/arch/s390/configs/defconfig @@ -803,9 +803,6 @@ CONFIG_CORDIC=m CONFIG_PRIME_NUMBERS=m CONFIG_CRYPTO_LIB_CURVE25519=m CONFIG_CRYPTO_LIB_CHACHA20POLY1305=m -CONFIG_CRC4=m -CONFIG_CRC7=m -CONFIG_CRC8=m CONFIG_XZ_DEC_MICROLZMA=y CONFIG_DMA_CMA=y CONFIG_CMA_SIZE_MBYTES=0 diff --git a/arch/s390/crypto/Kconfig b/arch/s390/crypto/Kconfig index b760232537f1..8c4db8b64fa2 100644 --- a/arch/s390/crypto/Kconfig +++ b/arch/s390/crypto/Kconfig @@ -108,11 +108,12 @@ config CRYPTO_DES_S390 As of z196 the CTR mode is hardware accelerated. config CRYPTO_CHACHA_S390 - tristate "Ciphers: ChaCha20" + tristate depends on S390 select CRYPTO_SKCIPHER select CRYPTO_LIB_CHACHA_GENERIC select CRYPTO_ARCH_HAVE_LIB_CHACHA + default CRYPTO_LIB_CHACHA_INTERNAL help Length-preserving cipher: ChaCha20 stream cipher (RFC 7539) diff --git a/arch/s390/crypto/aes_s390.c b/arch/s390/crypto/aes_s390.c index 9c46b1b630b1..5d36f4020dfa 100644 --- a/arch/s390/crypto/aes_s390.c +++ b/arch/s390/crypto/aes_s390.c @@ -66,7 +66,6 @@ struct s390_xts_ctx { struct gcm_sg_walk { struct scatter_walk walk; unsigned int walk_bytes; - u8 *walk_ptr; unsigned int walk_bytes_remain; u8 buf[AES_BLOCK_SIZE]; unsigned int buf_bytes; @@ -787,29 +786,20 @@ static void gcm_walk_start(struct gcm_sg_walk *gw, struct scatterlist *sg, static inline unsigned int _gcm_sg_clamp_and_map(struct gcm_sg_walk *gw) { - struct scatterlist *nextsg; - - gw->walk_bytes = scatterwalk_clamp(&gw->walk, gw->walk_bytes_remain); - while (!gw->walk_bytes) { - nextsg = sg_next(gw->walk.sg); - if (!nextsg) - return 0; - scatterwalk_start(&gw->walk, nextsg); - gw->walk_bytes = scatterwalk_clamp(&gw->walk, - gw->walk_bytes_remain); - } - gw->walk_ptr = scatterwalk_map(&gw->walk); + if (gw->walk_bytes_remain == 0) + return 0; + gw->walk_bytes = scatterwalk_next(&gw->walk, gw->walk_bytes_remain); return gw->walk_bytes; } static inline void _gcm_sg_unmap_and_advance(struct gcm_sg_walk *gw, - unsigned int nbytes) + unsigned int nbytes, bool out) { gw->walk_bytes_remain -= nbytes; - scatterwalk_unmap(gw->walk_ptr); - scatterwalk_advance(&gw->walk, nbytes); - scatterwalk_done(&gw->walk, 0, gw->walk_bytes_remain); - gw->walk_ptr = NULL; + if (out) + scatterwalk_done_dst(&gw->walk, nbytes); + else + scatterwalk_done_src(&gw->walk, nbytes); } static int gcm_in_walk_go(struct gcm_sg_walk *gw, unsigned int minbytesneeded) @@ -835,16 +825,16 @@ static int gcm_in_walk_go(struct gcm_sg_walk *gw, unsigned int minbytesneeded) } if (!gw->buf_bytes && gw->walk_bytes >= minbytesneeded) { - gw->ptr = gw->walk_ptr; + gw->ptr = gw->walk.addr; gw->nbytes = gw->walk_bytes; goto out; } while (1) { n = min(gw->walk_bytes, AES_BLOCK_SIZE - gw->buf_bytes); - memcpy(gw->buf + gw->buf_bytes, gw->walk_ptr, n); + memcpy(gw->buf + gw->buf_bytes, gw->walk.addr, n); gw->buf_bytes += n; - _gcm_sg_unmap_and_advance(gw, n); + _gcm_sg_unmap_and_advance(gw, n, false); if (gw->buf_bytes >= minbytesneeded) { gw->ptr = gw->buf; gw->nbytes = gw->buf_bytes; @@ -876,13 +866,12 @@ static int gcm_out_walk_go(struct gcm_sg_walk *gw, unsigned int minbytesneeded) } if (gw->walk_bytes >= minbytesneeded) { - gw->ptr = gw->walk_ptr; + gw->ptr = gw->walk.addr; gw->nbytes = gw->walk_bytes; goto out; } - scatterwalk_unmap(gw->walk_ptr); - gw->walk_ptr = NULL; + scatterwalk_unmap(&gw->walk); gw->ptr = gw->buf; gw->nbytes = sizeof(gw->buf); @@ -904,7 +893,7 @@ static int gcm_in_walk_done(struct gcm_sg_walk *gw, unsigned int bytesdone) } else gw->buf_bytes = 0; } else - _gcm_sg_unmap_and_advance(gw, bytesdone); + _gcm_sg_unmap_and_advance(gw, bytesdone, false); return bytesdone; } @@ -921,11 +910,11 @@ static int gcm_out_walk_done(struct gcm_sg_walk *gw, unsigned int bytesdone) if (!_gcm_sg_clamp_and_map(gw)) return i; n = min(gw->walk_bytes, bytesdone - i); - memcpy(gw->walk_ptr, gw->buf + i, n); - _gcm_sg_unmap_and_advance(gw, n); + memcpy(gw->walk.addr, gw->buf + i, n); + _gcm_sg_unmap_and_advance(gw, n, true); } } else - _gcm_sg_unmap_and_advance(gw, bytesdone); + _gcm_sg_unmap_and_advance(gw, bytesdone, true); return bytesdone; } diff --git a/arch/s390/crypto/chacha-glue.c b/arch/s390/crypto/chacha-glue.c index f8b0c52e77a4..920e9f0941e7 100644 --- a/arch/s390/crypto/chacha-glue.c +++ b/arch/s390/crypto/chacha-glue.c @@ -41,7 +41,7 @@ static int chacha20_s390(struct skcipher_request *req) int rc; rc = skcipher_walk_virt(&walk, req, false); - chacha_init_generic(state, ctx->key, req->iv); + chacha_init(state, ctx->key, req->iv); while (walk.nbytes > 0) { nbytes = walk.nbytes; @@ -69,12 +69,6 @@ void hchacha_block_arch(const u32 *state, u32 *stream, int nrounds) } EXPORT_SYMBOL(hchacha_block_arch); -void chacha_init_arch(u32 *state, const u32 *key, const u8 *iv) -{ - chacha_init_generic(state, key, iv); -} -EXPORT_SYMBOL(chacha_init_arch); - void chacha_crypt_arch(u32 *state, u8 *dst, const u8 *src, unsigned int bytes, int nrounds) { diff --git a/arch/s390/include/asm/gmap.h b/arch/s390/include/asm/gmap.h index 4e73ef46d4b2..9f2814d0e1e9 100644 --- a/arch/s390/include/asm/gmap.h +++ b/arch/s390/include/asm/gmap.h @@ -139,7 +139,6 @@ int s390_replace_asce(struct gmap *gmap); void s390_uv_destroy_pfns(unsigned long count, unsigned long *pfns); int __s390_uv_destroy_range(struct mm_struct *mm, unsigned long start, unsigned long end, bool interruptible); -int kvm_s390_wiggle_split_folio(struct mm_struct *mm, struct folio *folio, bool split); unsigned long *gmap_table_walk(struct gmap *gmap, unsigned long gaddr, int level); /** diff --git a/arch/s390/include/asm/irq.h b/arch/s390/include/asm/irq.h index d9e705f4a697..bde6a496df5f 100644 --- a/arch/s390/include/asm/irq.h +++ b/arch/s390/include/asm/irq.h @@ -54,7 +54,6 @@ enum interruption_class { IRQIO_C70, IRQIO_TAP, IRQIO_VMR, - IRQIO_LCS, IRQIO_CTC, IRQIO_ADM, IRQIO_CSC, diff --git a/arch/s390/include/asm/kvm_host.h b/arch/s390/include/asm/kvm_host.h index 9a367866cab0..424f899d8163 100644 --- a/arch/s390/include/asm/kvm_host.h +++ b/arch/s390/include/asm/kvm_host.h @@ -1056,7 +1056,6 @@ bool kvm_s390_pv_cpu_is_protected(struct kvm_vcpu *vcpu); extern int kvm_s390_gisc_register(struct kvm *kvm, u32 gisc); extern int kvm_s390_gisc_unregister(struct kvm *kvm, u32 gisc); -static inline void kvm_arch_sync_events(struct kvm *kvm) {} static inline void kvm_arch_free_memslot(struct kvm *kvm, struct kvm_memory_slot *slot) {} static inline void kvm_arch_memslots_updated(struct kvm *kvm, u64 gen) {} diff --git a/arch/s390/include/asm/pci.h b/arch/s390/include/asm/pci.h index 474e1f8d1d3c..41f900f693d9 100644 --- a/arch/s390/include/asm/pci.h +++ b/arch/s390/include/asm/pci.h @@ -11,6 +11,9 @@ #include <asm/pci_insn.h> #include <asm/sclp.h> +#define ARCH_GENERIC_PCI_MMAP_RESOURCE 1 +#define arch_can_pci_mmap_wc() 1 + #define PCIBIOS_MIN_IO 0x1000 #define PCIBIOS_MIN_MEM 0x10000000 @@ -144,7 +147,7 @@ struct zpci_dev { u8 util_str_avail : 1; u8 irqs_registered : 1; u8 tid_avail : 1; - u8 reserved : 1; + u8 rtr_avail : 1; /* Relaxed translation allowed */ unsigned int devfn; /* DEVFN part of the RID*/ u8 pfip[CLP_PFIP_NR_SEGMENTS]; /* pci function internal path */ @@ -217,6 +220,7 @@ extern struct airq_iv *zpci_aif_sbv; struct zpci_dev *zpci_create_device(u32 fid, u32 fh, enum zpci_state state); int zpci_add_device(struct zpci_dev *zdev); int zpci_enable_device(struct zpci_dev *); +int zpci_reenable_device(struct zpci_dev *zdev); int zpci_disable_device(struct zpci_dev *); int zpci_scan_configured_device(struct zpci_dev *zdev, u32 fh); int zpci_deconfigure_device(struct zpci_dev *zdev); @@ -245,6 +249,7 @@ void update_uid_checking(bool new); /* IOMMU Interface */ int zpci_init_iommu(struct zpci_dev *zdev); void zpci_destroy_iommu(struct zpci_dev *zdev); +int zpci_iommu_register_ioat(struct zpci_dev *zdev, u8 *status); #ifdef CONFIG_PCI static inline bool zpci_use_mio(struct zpci_dev *zdev) diff --git a/arch/s390/include/asm/pci_clp.h b/arch/s390/include/asm/pci_clp.h index 3fff2f7095c8..7ebff39c84b3 100644 --- a/arch/s390/include/asm/pci_clp.h +++ b/arch/s390/include/asm/pci_clp.h @@ -156,7 +156,9 @@ struct clp_rsp_query_pci_grp { u16 : 4; u16 noi : 12; /* number of interrupts */ u8 version; - u8 : 6; + u8 : 2; + u8 rtr : 1; /* Relaxed translation requirement */ + u8 : 3; u8 frame : 1; u8 refresh : 1; /* TLB refresh mode */ u16 : 3; diff --git a/arch/s390/include/asm/uv.h b/arch/s390/include/asm/uv.h index b11f5b6d0bd1..46fb0ef6f984 100644 --- a/arch/s390/include/asm/uv.h +++ b/arch/s390/include/asm/uv.h @@ -631,7 +631,7 @@ int uv_pin_shared(unsigned long paddr); int uv_destroy_folio(struct folio *folio); int uv_destroy_pte(pte_t pte); int uv_convert_from_secure_pte(pte_t pte); -int make_folio_secure(struct folio *folio, struct uv_cb_header *uvcb); +int make_hva_secure(struct mm_struct *mm, unsigned long hva, struct uv_cb_header *uvcb); int uv_convert_from_secure(unsigned long paddr); int uv_convert_from_secure_folio(struct folio *folio); diff --git a/arch/s390/include/asm/vdso.h b/arch/s390/include/asm/vdso.h index 92c73e4d97a9..420a073fdde5 100644 --- a/arch/s390/include/asm/vdso.h +++ b/arch/s390/include/asm/vdso.h @@ -6,13 +6,11 @@ #ifndef __ASSEMBLY__ -extern struct vdso_data *vdso_data; - int vdso_getcpu_init(void); #endif /* __ASSEMBLY__ */ -#define __VVAR_PAGES 2 +#define __VDSO_PAGES 4 #define VDSO_VERSION_STRING LINUX_2.6.29 diff --git a/arch/s390/include/asm/vdso/getrandom.h b/arch/s390/include/asm/vdso/getrandom.h index 36355af7160b..f8713ce39bb2 100644 --- a/arch/s390/include/asm/vdso/getrandom.h +++ b/arch/s390/include/asm/vdso/getrandom.h @@ -23,18 +23,6 @@ static __always_inline ssize_t getrandom_syscall(void *buffer, size_t len, unsig return syscall3(__NR_getrandom, (long)buffer, (long)len, (long)flags); } -static __always_inline const struct vdso_rng_data *__arch_get_vdso_rng_data(void) -{ - /* - * The RNG data is in the real VVAR data page, but if a task belongs to a time namespace - * then VVAR_DATA_PAGE_OFFSET points to the namespace-specific VVAR page and VVAR_TIMENS_ - * PAGE_OFFSET points to the real VVAR page. - */ - if (IS_ENABLED(CONFIG_TIME_NS) && _vdso_data->clock_mode == VDSO_CLOCKMODE_TIMENS) - return (void *)&_vdso_rng_data + VVAR_TIMENS_PAGE_OFFSET * PAGE_SIZE; - return &_vdso_rng_data; -} - #endif /* !__ASSEMBLY__ */ #endif /* __ASM_VDSO_GETRANDOM_H */ diff --git a/arch/s390/include/asm/vdso/gettimeofday.h b/arch/s390/include/asm/vdso/gettimeofday.h index 7937765ccfa5..fb4564308e9d 100644 --- a/arch/s390/include/asm/vdso/gettimeofday.h +++ b/arch/s390/include/asm/vdso/gettimeofday.h @@ -14,12 +14,7 @@ #include <linux/compiler.h> -static __always_inline const struct vdso_data *__arch_get_vdso_data(void) -{ - return _vdso_data; -} - -static inline u64 __arch_get_hw_counter(s32 clock_mode, const struct vdso_data *vd) +static inline u64 __arch_get_hw_counter(s32 clock_mode, const struct vdso_time_data *vd) { u64 adj, now; @@ -49,12 +44,4 @@ long clock_getres_fallback(clockid_t clkid, struct __kernel_timespec *ts) return syscall2(__NR_clock_getres, (long)clkid, (long)ts); } -#ifdef CONFIG_TIME_NS -static __always_inline -const struct vdso_data *__arch_get_timens_vdso_data(const struct vdso_data *vd) -{ - return _timens_data; -} -#endif - #endif diff --git a/arch/s390/include/asm/vdso/vsyscall.h b/arch/s390/include/asm/vdso/vsyscall.h index 3eb576ecd3bd..d346ebe51301 100644 --- a/arch/s390/include/asm/vdso/vsyscall.h +++ b/arch/s390/include/asm/vdso/vsyscall.h @@ -2,32 +2,12 @@ #ifndef __ASM_VDSO_VSYSCALL_H #define __ASM_VDSO_VSYSCALL_H -#define __VDSO_RND_DATA_OFFSET 768 - #ifndef __ASSEMBLY__ #include <linux/hrtimer.h> #include <vdso/datapage.h> #include <asm/vdso.h> -enum vvar_pages { - VVAR_DATA_PAGE_OFFSET, - VVAR_TIMENS_PAGE_OFFSET, - VVAR_NR_PAGES -}; - -static __always_inline struct vdso_data *__s390_get_k_vdso_data(void) -{ - return vdso_data; -} -#define __arch_get_k_vdso_data __s390_get_k_vdso_data - -static __always_inline struct vdso_rng_data *__s390_get_k_vdso_rnd_data(void) -{ - return (void *)vdso_data + __VDSO_RND_DATA_OFFSET; -} -#define __arch_get_k_vdso_rng_data __s390_get_k_vdso_rnd_data - /* The asm-generic header needs to be included after the definitions above */ #include <asm-generic/vdso/vsyscall.h> diff --git a/arch/s390/kernel/crash_dump.c b/arch/s390/kernel/crash_dump.c index 276cb4c1e11b..4a981266b483 100644 --- a/arch/s390/kernel/crash_dump.c +++ b/arch/s390/kernel/crash_dump.c @@ -246,15 +246,6 @@ bool is_kdump_kernel(void) } EXPORT_SYMBOL_GPL(is_kdump_kernel); -static const char *nt_name(Elf64_Word type) -{ - const char *name = "LINUX"; - - if (type == NT_PRPSINFO || type == NT_PRSTATUS || type == NT_PRFPREG) - name = KEXEC_CORE_NOTE_NAME; - return name; -} - /* * Initialize ELF note */ @@ -279,10 +270,8 @@ static void *nt_init_name(void *buf, Elf64_Word type, void *desc, int d_len, return PTR_ADD(buf, len); } -static inline void *nt_init(void *buf, Elf64_Word type, void *desc, int d_len) -{ - return nt_init_name(buf, type, desc, d_len, nt_name(type)); -} +#define nt_init(buf, type, desc) \ + nt_init_name(buf, NT_ ## type, &(desc), sizeof(desc), NN_ ## type) /* * Calculate the size of ELF note @@ -298,10 +287,7 @@ static size_t nt_size_name(int d_len, const char *name) return size; } -static inline size_t nt_size(Elf64_Word type, int d_len) -{ - return nt_size_name(d_len, nt_name(type)); -} +#define nt_size(type, desc) nt_size_name(sizeof(desc), NN_ ## type) /* * Fill ELF notes for one CPU with save area registers @@ -322,18 +308,16 @@ static void *fill_cpu_elf_notes(void *ptr, int cpu, struct save_area *sa) memcpy(&nt_fpregset.fpc, &sa->fpc, sizeof(sa->fpc)); memcpy(&nt_fpregset.fprs, &sa->fprs, sizeof(sa->fprs)); /* Create ELF notes for the CPU */ - ptr = nt_init(ptr, NT_PRSTATUS, &nt_prstatus, sizeof(nt_prstatus)); - ptr = nt_init(ptr, NT_PRFPREG, &nt_fpregset, sizeof(nt_fpregset)); - ptr = nt_init(ptr, NT_S390_TIMER, &sa->timer, sizeof(sa->timer)); - ptr = nt_init(ptr, NT_S390_TODCMP, &sa->todcmp, sizeof(sa->todcmp)); - ptr = nt_init(ptr, NT_S390_TODPREG, &sa->todpreg, sizeof(sa->todpreg)); - ptr = nt_init(ptr, NT_S390_CTRS, &sa->ctrs, sizeof(sa->ctrs)); - ptr = nt_init(ptr, NT_S390_PREFIX, &sa->prefix, sizeof(sa->prefix)); + ptr = nt_init(ptr, PRSTATUS, nt_prstatus); + ptr = nt_init(ptr, PRFPREG, nt_fpregset); + ptr = nt_init(ptr, S390_TIMER, sa->timer); + ptr = nt_init(ptr, S390_TODCMP, sa->todcmp); + ptr = nt_init(ptr, S390_TODPREG, sa->todpreg); + ptr = nt_init(ptr, S390_CTRS, sa->ctrs); + ptr = nt_init(ptr, S390_PREFIX, sa->prefix); if (cpu_has_vx()) { - ptr = nt_init(ptr, NT_S390_VXRS_HIGH, - &sa->vxrs_high, sizeof(sa->vxrs_high)); - ptr = nt_init(ptr, NT_S390_VXRS_LOW, - &sa->vxrs_low, sizeof(sa->vxrs_low)); + ptr = nt_init(ptr, S390_VXRS_HIGH, sa->vxrs_high); + ptr = nt_init(ptr, S390_VXRS_LOW, sa->vxrs_low); } return ptr; } @@ -346,16 +330,16 @@ static size_t get_cpu_elf_notes_size(void) struct save_area *sa = NULL; size_t size; - size = nt_size(NT_PRSTATUS, sizeof(struct elf_prstatus)); - size += nt_size(NT_PRFPREG, sizeof(elf_fpregset_t)); - size += nt_size(NT_S390_TIMER, sizeof(sa->timer)); - size += nt_size(NT_S390_TODCMP, sizeof(sa->todcmp)); - size += nt_size(NT_S390_TODPREG, sizeof(sa->todpreg)); - size += nt_size(NT_S390_CTRS, sizeof(sa->ctrs)); - size += nt_size(NT_S390_PREFIX, sizeof(sa->prefix)); + size = nt_size(PRSTATUS, struct elf_prstatus); + size += nt_size(PRFPREG, elf_fpregset_t); + size += nt_size(S390_TIMER, sa->timer); + size += nt_size(S390_TODCMP, sa->todcmp); + size += nt_size(S390_TODPREG, sa->todpreg); + size += nt_size(S390_CTRS, sa->ctrs); + size += nt_size(S390_PREFIX, sa->prefix); if (cpu_has_vx()) { - size += nt_size(NT_S390_VXRS_HIGH, sizeof(sa->vxrs_high)); - size += nt_size(NT_S390_VXRS_LOW, sizeof(sa->vxrs_low)); + size += nt_size(S390_VXRS_HIGH, sa->vxrs_high); + size += nt_size(S390_VXRS_LOW, sa->vxrs_low); } return size; @@ -371,7 +355,7 @@ static void *nt_prpsinfo(void *ptr) memset(&prpsinfo, 0, sizeof(prpsinfo)); prpsinfo.pr_sname = 'R'; strcpy(prpsinfo.pr_fname, "vmlinux"); - return nt_init(ptr, NT_PRPSINFO, &prpsinfo, sizeof(prpsinfo)); + return nt_init(ptr, PRPSINFO, prpsinfo); } /* @@ -610,7 +594,7 @@ static size_t get_elfcorehdr_size(int phdr_count) /* PT_NOTES */ size += sizeof(Elf64_Phdr); /* nt_prpsinfo */ - size += nt_size(NT_PRPSINFO, sizeof(struct elf_prpsinfo)); + size += nt_size(PRPSINFO, struct elf_prpsinfo); /* regsets */ size += get_cpu_cnt() * get_cpu_elf_notes_size(); /* nt_vmcoreinfo */ diff --git a/arch/s390/kernel/dumpstack.c b/arch/s390/kernel/dumpstack.c index 1ecd0580561f..911b95cd57e5 100644 --- a/arch/s390/kernel/dumpstack.c +++ b/arch/s390/kernel/dumpstack.c @@ -198,13 +198,8 @@ void __noreturn die(struct pt_regs *regs, const char *str) console_verbose(); spin_lock_irq(&die_lock); bust_spinlocks(1); - printk("%s: %04x ilc:%d [#%d] ", str, regs->int_code & 0xffff, + printk("%s: %04x ilc:%d [#%d]", str, regs->int_code & 0xffff, regs->int_code >> 17, ++die_counter); -#ifdef CONFIG_PREEMPT - pr_cont("PREEMPT "); -#elif defined(CONFIG_PREEMPT_RT) - pr_cont("PREEMPT_RT "); -#endif pr_cont("SMP "); if (debug_pagealloc_enabled()) pr_cont("DEBUG_PAGEALLOC"); diff --git a/arch/s390/kernel/ftrace.c b/arch/s390/kernel/ftrace.c index 63ba6306632e..e540b022ceb2 100644 --- a/arch/s390/kernel/ftrace.c +++ b/arch/s390/kernel/ftrace.c @@ -266,12 +266,13 @@ void ftrace_graph_func(unsigned long ip, unsigned long parent_ip, struct ftrace_ops *op, struct ftrace_regs *fregs) { unsigned long *parent = &arch_ftrace_regs(fregs)->regs.gprs[14]; + unsigned long sp = arch_ftrace_regs(fregs)->regs.gprs[15]; if (unlikely(ftrace_graph_is_dead())) return; if (unlikely(atomic_read(¤t->tracing_graph_pause))) return; - if (!function_graph_enter_regs(*parent, ip, 0, parent, fregs)) + if (!function_graph_enter_regs(*parent, ip, 0, (unsigned long *)sp, fregs)) *parent = (unsigned long)&return_to_handler; } diff --git a/arch/s390/kernel/irq.c b/arch/s390/kernel/irq.c index ef7be599e1f7..7ca157ffab30 100644 --- a/arch/s390/kernel/irq.c +++ b/arch/s390/kernel/irq.c @@ -84,7 +84,6 @@ static const struct irq_class irqclass_sub_desc[] = { {.irq = IRQIO_C70, .name = "C70", .desc = "[I/O] 3270"}, {.irq = IRQIO_TAP, .name = "TAP", .desc = "[I/O] Tape"}, {.irq = IRQIO_VMR, .name = "VMR", .desc = "[I/O] Unit Record Devices"}, - {.irq = IRQIO_LCS, .name = "LCS", .desc = "[I/O] LCS"}, {.irq = IRQIO_CTC, .name = "CTC", .desc = "[I/O] CTC"}, {.irq = IRQIO_ADM, .name = "ADM", .desc = "[I/O] EADM Subchannel"}, {.irq = IRQIO_CSC, .name = "CSC", .desc = "[I/O] CHSC Subchannel"}, diff --git a/arch/s390/kernel/perf_pai_crypto.c b/arch/s390/kernel/perf_pai_crypto.c index 10725f5a6f0f..63875270941b 100644 --- a/arch/s390/kernel/perf_pai_crypto.c +++ b/arch/s390/kernel/perf_pai_crypto.c @@ -518,7 +518,8 @@ static void paicrypt_have_samples(void) /* Called on schedule-in and schedule-out. No access to event structure, * but for sampling only event CRYPTO_ALL is allowed. */ -static void paicrypt_sched_task(struct perf_event_pmu_context *pmu_ctx, bool sched_in) +static void paicrypt_sched_task(struct perf_event_pmu_context *pmu_ctx, + struct task_struct *task, bool sched_in) { /* We started with a clean page on event installation. So read out * results on schedule_out and if page was dirty, save old values. diff --git a/arch/s390/kernel/perf_pai_ext.c b/arch/s390/kernel/perf_pai_ext.c index a8f0bad99cf0..fd14d5ebccbc 100644 --- a/arch/s390/kernel/perf_pai_ext.c +++ b/arch/s390/kernel/perf_pai_ext.c @@ -542,7 +542,8 @@ static void paiext_have_samples(void) /* Called on schedule-in and schedule-out. No access to event structure, * but for sampling only event NNPA_ALL is allowed. */ -static void paiext_sched_task(struct perf_event_pmu_context *pmu_ctx, bool sched_in) +static void paiext_sched_task(struct perf_event_pmu_context *pmu_ctx, + struct task_struct *task, bool sched_in) { /* We started with a clean page on event installation. So read out * results on schedule_out and if page was dirty, save old values. diff --git a/arch/s390/kernel/processor.c b/arch/s390/kernel/processor.c index 5ce9a795a0fe..745649ad9779 100644 --- a/arch/s390/kernel/processor.c +++ b/arch/s390/kernel/processor.c @@ -72,7 +72,7 @@ void notrace stop_machine_yield(const struct cpumask *cpumask) this_cpu = smp_processor_id(); if (__this_cpu_inc_return(cpu_relax_retry) >= spin_retry) { __this_cpu_write(cpu_relax_retry, 0); - cpu = cpumask_next_wrap(this_cpu, cpumask, this_cpu, false); + cpu = cpumask_next_wrap(this_cpu, cpumask); if (cpu >= nr_cpu_ids) return; if (arch_vcpu_is_preempted(cpu)) diff --git a/arch/s390/kernel/syscalls/syscall.tbl b/arch/s390/kernel/syscalls/syscall.tbl index e9115b4d8b63..a4569b96ef06 100644 --- a/arch/s390/kernel/syscalls/syscall.tbl +++ b/arch/s390/kernel/syscalls/syscall.tbl @@ -469,3 +469,4 @@ 464 common getxattrat sys_getxattrat sys_getxattrat 465 common listxattrat sys_listxattrat sys_listxattrat 466 common removexattrat sys_removexattrat sys_removexattrat +467 common open_tree_attr sys_open_tree_attr sys_open_tree_attr diff --git a/arch/s390/kernel/time.c b/arch/s390/kernel/time.c index e9f47c3a6197..699a18f1c54e 100644 --- a/arch/s390/kernel/time.c +++ b/arch/s390/kernel/time.c @@ -79,12 +79,10 @@ void __init time_early_init(void) { struct ptff_qto qto; struct ptff_qui qui; - int cs; /* Initialize TOD steering parameters */ tod_steering_end = tod_clock_base.tod; - for (cs = 0; cs < CS_BASES; cs++) - vdso_data[cs].arch_data.tod_steering_end = tod_steering_end; + vdso_k_time_data->arch_data.tod_steering_end = tod_steering_end; if (!test_facility(28)) return; @@ -373,7 +371,6 @@ static void clock_sync_global(long delta) { unsigned long now, adj; struct ptff_qto qto; - int cs; /* Fixup the monotonic sched clock. */ tod_clock_base.eitod += delta; @@ -389,10 +386,8 @@ static void clock_sync_global(long delta) panic("TOD clock sync offset %li is too large to drift\n", tod_steering_delta); tod_steering_end = now + (abs(tod_steering_delta) << 15); - for (cs = 0; cs < CS_BASES; cs++) { - vdso_data[cs].arch_data.tod_steering_end = tod_steering_end; - vdso_data[cs].arch_data.tod_steering_delta = tod_steering_delta; - } + vdso_k_time_data->arch_data.tod_steering_end = tod_steering_end; + vdso_k_time_data->arch_data.tod_steering_delta = tod_steering_delta; /* Update LPAR offset. */ if (ptff_query(PTFF_QTO) && ptff(&qto, sizeof(qto), PTFF_QTO) == 0) diff --git a/arch/s390/kernel/traps.c b/arch/s390/kernel/traps.c index 24fee11b030d..b746213d3110 100644 --- a/arch/s390/kernel/traps.c +++ b/arch/s390/kernel/traps.c @@ -285,10 +285,10 @@ static void __init test_monitor_call(void) return; asm volatile( " mc 0,0\n" - "0: xgr %0,%0\n" + "0: lhi %[val],0\n" "1:\n" - EX_TABLE(0b,1b) - : "+d" (val)); + EX_TABLE(0b, 1b) + : [val] "+d" (val)); if (!val) panic("Monitor call doesn't work!\n"); } diff --git a/arch/s390/kernel/uv.c b/arch/s390/kernel/uv.c index 9f05df2da2f7..9a5d5be8acf4 100644 --- a/arch/s390/kernel/uv.c +++ b/arch/s390/kernel/uv.c @@ -206,6 +206,39 @@ int uv_convert_from_secure_pte(pte_t pte) return uv_convert_from_secure_folio(pfn_folio(pte_pfn(pte))); } +/** + * should_export_before_import - Determine whether an export is needed + * before an import-like operation + * @uvcb: the Ultravisor control block of the UVC to be performed + * @mm: the mm of the process + * + * Returns whether an export is needed before every import-like operation. + * This is needed for shared pages, which don't trigger a secure storage + * exception when accessed from a different guest. + * + * Although considered as one, the Unpin Page UVC is not an actual import, + * so it is not affected. + * + * No export is needed also when there is only one protected VM, because the + * page cannot belong to the wrong VM in that case (there is no "other VM" + * it can belong to). + * + * Return: true if an export is needed before every import, otherwise false. + */ +static bool should_export_before_import(struct uv_cb_header *uvcb, struct mm_struct *mm) +{ + /* + * The misc feature indicates, among other things, that importing a + * shared page from a different protected VM will automatically also + * transfer its ownership. + */ + if (uv_has_feature(BIT_UV_FEAT_MISC)) + return false; + if (uvcb->cmd == UVC_CMD_UNPIN_PAGE_SHARED) + return false; + return atomic_read(&mm->context.protected_count) > 1; +} + /* * Calculate the expected ref_count for a folio that would otherwise have no * further pins. This was cribbed from similar functions in other places in @@ -228,7 +261,7 @@ static int expected_folio_refs(struct folio *folio) } /** - * make_folio_secure() - make a folio secure + * __make_folio_secure() - make a folio secure * @folio: the folio to make secure * @uvcb: the uvcb that describes the UVC to be used * @@ -237,20 +270,18 @@ static int expected_folio_refs(struct folio *folio) * * Return: 0 on success; * -EBUSY if the folio is in writeback or has too many references; - * -E2BIG if the folio is large; * -EAGAIN if the UVC needs to be attempted again; * -ENXIO if the address is not mapped; * -EINVAL if the UVC failed for other reasons. * * Context: The caller must hold exactly one extra reference on the folio - * (it's the same logic as split_folio()) + * (it's the same logic as split_folio()), and the folio must be + * locked. */ -int make_folio_secure(struct folio *folio, struct uv_cb_header *uvcb) +static int __make_folio_secure(struct folio *folio, struct uv_cb_header *uvcb) { int expected, cc = 0; - if (folio_test_large(folio)) - return -E2BIG; if (folio_test_writeback(folio)) return -EBUSY; expected = expected_folio_refs(folio) + 1; @@ -277,7 +308,98 @@ int make_folio_secure(struct folio *folio, struct uv_cb_header *uvcb) return -EAGAIN; return uvcb->rc == 0x10a ? -ENXIO : -EINVAL; } -EXPORT_SYMBOL_GPL(make_folio_secure); + +static int make_folio_secure(struct mm_struct *mm, struct folio *folio, struct uv_cb_header *uvcb) +{ + int rc; + + if (!folio_trylock(folio)) + return -EAGAIN; + if (should_export_before_import(uvcb, mm)) + uv_convert_from_secure(folio_to_phys(folio)); + rc = __make_folio_secure(folio, uvcb); + folio_unlock(folio); + + return rc; +} + +/** + * s390_wiggle_split_folio() - try to drain extra references to a folio and optionally split. + * @mm: the mm containing the folio to work on + * @folio: the folio + * @split: whether to split a large folio + * + * Context: Must be called while holding an extra reference to the folio; + * the mm lock should not be held. + * Return: 0 if the folio was split successfully; + * -EAGAIN if the folio was not split successfully but another attempt + * can be made, or if @split was set to false; + * -EINVAL in case of other errors. See split_folio(). + */ +static int s390_wiggle_split_folio(struct mm_struct *mm, struct folio *folio, bool split) +{ + int rc; + + lockdep_assert_not_held(&mm->mmap_lock); + folio_wait_writeback(folio); + lru_add_drain_all(); + if (split) { + folio_lock(folio); + rc = split_folio(folio); + folio_unlock(folio); + + if (rc != -EBUSY) + return rc; + } + return -EAGAIN; +} + +int make_hva_secure(struct mm_struct *mm, unsigned long hva, struct uv_cb_header *uvcb) +{ + struct vm_area_struct *vma; + struct folio_walk fw; + struct folio *folio; + int rc; + + mmap_read_lock(mm); + vma = vma_lookup(mm, hva); + if (!vma) { + mmap_read_unlock(mm); + return -EFAULT; + } + folio = folio_walk_start(&fw, vma, hva, 0); + if (!folio) { + mmap_read_unlock(mm); + return -ENXIO; + } + + folio_get(folio); + /* + * Secure pages cannot be huge and userspace should not combine both. + * In case userspace does it anyway this will result in an -EFAULT for + * the unpack. The guest is thus never reaching secure mode. + * If userspace plays dirty tricks and decides to map huge pages at a + * later point in time, it will receive a segmentation fault or + * KVM_RUN will return -EFAULT. + */ + if (folio_test_hugetlb(folio)) + rc = -EFAULT; + else if (folio_test_large(folio)) + rc = -E2BIG; + else if (!pte_write(fw.pte) || (pte_val(fw.pte) & _PAGE_INVALID)) + rc = -ENXIO; + else + rc = make_folio_secure(mm, folio, uvcb); + folio_walk_end(&fw, vma); + mmap_read_unlock(mm); + + if (rc == -E2BIG || rc == -EBUSY) + rc = s390_wiggle_split_folio(mm, folio, rc == -E2BIG); + folio_put(folio); + + return rc; +} +EXPORT_SYMBOL_GPL(make_hva_secure); /* * To be called with the folio locked or with an extra reference! This will diff --git a/arch/s390/kernel/vdso.c b/arch/s390/kernel/vdso.c index 598b512cde01..70c8f9ad13cd 100644 --- a/arch/s390/kernel/vdso.c +++ b/arch/s390/kernel/vdso.c @@ -16,8 +16,8 @@ #include <linux/mm.h> #include <linux/slab.h> #include <linux/smp.h> -#include <linux/time_namespace.h> #include <linux/random.h> +#include <linux/vdso_datastore.h> #include <vdso/datapage.h> #include <asm/vdso/vsyscall.h> #include <asm/alternative.h> @@ -26,85 +26,6 @@ extern char vdso64_start[], vdso64_end[]; extern char vdso32_start[], vdso32_end[]; -static struct vm_special_mapping vvar_mapping; - -static union vdso_data_store vdso_data_store __page_aligned_data; - -struct vdso_data *vdso_data = vdso_data_store.data; - -#ifdef CONFIG_TIME_NS -struct vdso_data *arch_get_vdso_data(void *vvar_page) -{ - return (struct vdso_data *)(vvar_page); -} - -/* - * The VVAR page layout depends on whether a task belongs to the root or - * non-root time namespace. Whenever a task changes its namespace, the VVAR - * page tables are cleared and then they will be re-faulted with a - * corresponding layout. - * See also the comment near timens_setup_vdso_data() for details. - */ -int vdso_join_timens(struct task_struct *task, struct time_namespace *ns) -{ - struct mm_struct *mm = task->mm; - VMA_ITERATOR(vmi, mm, 0); - struct vm_area_struct *vma; - - mmap_read_lock(mm); - for_each_vma(vmi, vma) { - if (!vma_is_special_mapping(vma, &vvar_mapping)) - continue; - zap_vma_pages(vma); - break; - } - mmap_read_unlock(mm); - return 0; -} -#endif - -static vm_fault_t vvar_fault(const struct vm_special_mapping *sm, - struct vm_area_struct *vma, struct vm_fault *vmf) -{ - struct page *timens_page = find_timens_vvar_page(vma); - unsigned long addr, pfn; - vm_fault_t err; - - switch (vmf->pgoff) { - case VVAR_DATA_PAGE_OFFSET: - pfn = virt_to_pfn(vdso_data); - if (timens_page) { - /* - * Fault in VVAR page too, since it will be accessed - * to get clock data anyway. - */ - addr = vmf->address + VVAR_TIMENS_PAGE_OFFSET * PAGE_SIZE; - err = vmf_insert_pfn(vma, addr, pfn); - if (unlikely(err & VM_FAULT_ERROR)) - return err; - pfn = page_to_pfn(timens_page); - } - break; -#ifdef CONFIG_TIME_NS - case VVAR_TIMENS_PAGE_OFFSET: - /* - * If a task belongs to a time namespace then a namespace - * specific VVAR is mapped with the VVAR_DATA_PAGE_OFFSET and - * the real VVAR page is mapped with the VVAR_TIMENS_PAGE_OFFSET - * offset. - * See also the comment near timens_setup_vdso_data(). - */ - if (!timens_page) - return VM_FAULT_SIGBUS; - pfn = virt_to_pfn(vdso_data); - break; -#endif /* CONFIG_TIME_NS */ - default: - return VM_FAULT_SIGBUS; - } - return vmf_insert_pfn(vma, vmf->address, pfn); -} - static int vdso_mremap(const struct vm_special_mapping *sm, struct vm_area_struct *vma) { @@ -112,11 +33,6 @@ static int vdso_mremap(const struct vm_special_mapping *sm, return 0; } -static struct vm_special_mapping vvar_mapping = { - .name = "[vvar]", - .fault = vvar_fault, -}; - static struct vm_special_mapping vdso64_mapping = { .name = "[vdso]", .mremap = vdso_mremap, @@ -142,7 +58,7 @@ static int map_vdso(unsigned long addr, unsigned long vdso_mapping_len) struct vm_area_struct *vma; int rc; - BUILD_BUG_ON(VVAR_NR_PAGES != __VVAR_PAGES); + BUILD_BUG_ON(VDSO_NR_PAGES != __VDSO_PAGES); if (mmap_write_lock_killable(mm)) return -EINTR; @@ -157,14 +73,11 @@ static int map_vdso(unsigned long addr, unsigned long vdso_mapping_len) rc = vvar_start; if (IS_ERR_VALUE(vvar_start)) goto out; - vma = _install_special_mapping(mm, vvar_start, VVAR_NR_PAGES*PAGE_SIZE, - VM_READ|VM_MAYREAD|VM_IO|VM_DONTDUMP| - VM_PFNMAP, - &vvar_mapping); + vma = vdso_install_vvar_mapping(mm, vvar_start); rc = PTR_ERR(vma); if (IS_ERR(vma)) goto out; - vdso_text_start = vvar_start + VVAR_NR_PAGES * PAGE_SIZE; + vdso_text_start = vvar_start + VDSO_NR_PAGES * PAGE_SIZE; /* VM_MAYWRITE for COW so gdb can set breakpoints */ vma = _install_special_mapping(mm, vdso_text_start, vdso_text_len, VM_READ|VM_EXEC| @@ -220,7 +133,7 @@ unsigned long vdso_text_size(void) unsigned long vdso_size(void) { - return vdso_text_size() + VVAR_NR_PAGES * PAGE_SIZE; + return vdso_text_size() + VDSO_NR_PAGES * PAGE_SIZE; } int arch_setup_additional_pages(struct linux_binprm *bprm, int uses_interp) diff --git a/arch/s390/kernel/vdso32/Makefile b/arch/s390/kernel/vdso32/Makefile index 2c5afb88d298..1e4ddd1a683f 100644 --- a/arch/s390/kernel/vdso32/Makefile +++ b/arch/s390/kernel/vdso32/Makefile @@ -2,7 +2,7 @@ # List of files in the vdso # Include the generic Makefile to check the built vdso. -include $(srctree)/lib/vdso/Makefile +include $(srctree)/lib/vdso/Makefile.include obj-vdso32 = vdso_user_wrapper-32.o note-32.o # Build rules diff --git a/arch/s390/kernel/vdso32/vdso32.lds.S b/arch/s390/kernel/vdso32/vdso32.lds.S index c916c4f73f76..9630d58c2080 100644 --- a/arch/s390/kernel/vdso32/vdso32.lds.S +++ b/arch/s390/kernel/vdso32/vdso32.lds.S @@ -6,16 +6,15 @@ #include <asm/page.h> #include <asm/vdso.h> +#include <vdso/datapage.h> OUTPUT_FORMAT("elf32-s390", "elf32-s390", "elf32-s390") OUTPUT_ARCH(s390:31-bit) SECTIONS { - PROVIDE(_vdso_data = . - __VVAR_PAGES * PAGE_SIZE); -#ifdef CONFIG_TIME_NS - PROVIDE(_timens_data = _vdso_data + PAGE_SIZE); -#endif + VDSO_VVAR_SYMS + . = SIZEOF_HEADERS; .hash : { *(.hash) } :text diff --git a/arch/s390/kernel/vdso64/Makefile b/arch/s390/kernel/vdso64/Makefile index ad206f2068d8..d8f0df742809 100644 --- a/arch/s390/kernel/vdso64/Makefile +++ b/arch/s390/kernel/vdso64/Makefile @@ -2,7 +2,7 @@ # List of files in the vdso # Include the generic Makefile to check the built vdso. -include $(srctree)/lib/vdso/Makefile +include $(srctree)/lib/vdso/Makefile.include obj-vdso64 = vdso_user_wrapper.o note.o vgetrandom-chacha.o obj-cvdso64 = vdso64_generic.o getcpu.o vgetrandom.o VDSO_CFLAGS_REMOVE := -pg $(CC_FLAGS_FTRACE) $(CC_FLAGS_EXPOLINE) diff --git a/arch/s390/kernel/vdso64/vdso64.lds.S b/arch/s390/kernel/vdso64/vdso64.lds.S index ec42b7d9cb53..e4f6551ae898 100644 --- a/arch/s390/kernel/vdso64/vdso64.lds.S +++ b/arch/s390/kernel/vdso64/vdso64.lds.S @@ -7,17 +7,15 @@ #include <asm/vdso/vsyscall.h> #include <asm/page.h> #include <asm/vdso.h> +#include <vdso/datapage.h> OUTPUT_FORMAT("elf64-s390", "elf64-s390", "elf64-s390") OUTPUT_ARCH(s390:64-bit) SECTIONS { - PROVIDE(_vdso_data = . - __VVAR_PAGES * PAGE_SIZE); - PROVIDE(_vdso_rng_data = _vdso_data + __VDSO_RND_DATA_OFFSET); -#ifdef CONFIG_TIME_NS - PROVIDE(_timens_data = _vdso_data + PAGE_SIZE); -#endif + VDSO_VVAR_SYMS + . = SIZEOF_HEADERS; .hash : { *(.hash) } :text diff --git a/arch/s390/kvm/gmap.c b/arch/s390/kvm/gmap.c index 02adf151d4de..6d8944d1b4a0 100644 --- a/arch/s390/kvm/gmap.c +++ b/arch/s390/kvm/gmap.c @@ -23,116 +23,25 @@ #include "gmap.h" /** - * should_export_before_import - Determine whether an export is needed - * before an import-like operation - * @uvcb: the Ultravisor control block of the UVC to be performed - * @mm: the mm of the process - * - * Returns whether an export is needed before every import-like operation. - * This is needed for shared pages, which don't trigger a secure storage - * exception when accessed from a different guest. - * - * Although considered as one, the Unpin Page UVC is not an actual import, - * so it is not affected. - * - * No export is needed also when there is only one protected VM, because the - * page cannot belong to the wrong VM in that case (there is no "other VM" - * it can belong to). - * - * Return: true if an export is needed before every import, otherwise false. - */ -static bool should_export_before_import(struct uv_cb_header *uvcb, struct mm_struct *mm) -{ - /* - * The misc feature indicates, among other things, that importing a - * shared page from a different protected VM will automatically also - * transfer its ownership. - */ - if (uv_has_feature(BIT_UV_FEAT_MISC)) - return false; - if (uvcb->cmd == UVC_CMD_UNPIN_PAGE_SHARED) - return false; - return atomic_read(&mm->context.protected_count) > 1; -} - -static int __gmap_make_secure(struct gmap *gmap, struct page *page, void *uvcb) -{ - struct folio *folio = page_folio(page); - int rc; - - /* - * Secure pages cannot be huge and userspace should not combine both. - * In case userspace does it anyway this will result in an -EFAULT for - * the unpack. The guest is thus never reaching secure mode. - * If userspace plays dirty tricks and decides to map huge pages at a - * later point in time, it will receive a segmentation fault or - * KVM_RUN will return -EFAULT. - */ - if (folio_test_hugetlb(folio)) - return -EFAULT; - if (folio_test_large(folio)) { - mmap_read_unlock(gmap->mm); - rc = kvm_s390_wiggle_split_folio(gmap->mm, folio, true); - mmap_read_lock(gmap->mm); - if (rc) - return rc; - folio = page_folio(page); - } - - if (!folio_trylock(folio)) - return -EAGAIN; - if (should_export_before_import(uvcb, gmap->mm)) - uv_convert_from_secure(folio_to_phys(folio)); - rc = make_folio_secure(folio, uvcb); - folio_unlock(folio); - - /* - * In theory a race is possible and the folio might have become - * large again before the folio_trylock() above. In that case, no - * action is performed and -EAGAIN is returned; the callers will - * have to try again later. - * In most cases this implies running the VM again, getting the same - * exception again, and make another attempt in this function. - * This is expected to happen extremely rarely. - */ - if (rc == -E2BIG) - return -EAGAIN; - /* The folio has too many references, try to shake some off */ - if (rc == -EBUSY) { - mmap_read_unlock(gmap->mm); - kvm_s390_wiggle_split_folio(gmap->mm, folio, false); - mmap_read_lock(gmap->mm); - return -EAGAIN; - } - - return rc; -} - -/** * gmap_make_secure() - make one guest page secure * @gmap: the guest gmap * @gaddr: the guest address that needs to be made secure * @uvcb: the UVCB specifying which operation needs to be performed * * Context: needs to be called with kvm->srcu held. - * Return: 0 on success, < 0 in case of error (see __gmap_make_secure()). + * Return: 0 on success, < 0 in case of error. */ int gmap_make_secure(struct gmap *gmap, unsigned long gaddr, void *uvcb) { struct kvm *kvm = gmap->private; - struct page *page; - int rc = 0; + unsigned long vmaddr; lockdep_assert_held(&kvm->srcu); - page = gfn_to_page(kvm, gpa_to_gfn(gaddr)); - mmap_read_lock(gmap->mm); - if (page) - rc = __gmap_make_secure(gmap, page, uvcb); - kvm_release_page_clean(page); - mmap_read_unlock(gmap->mm); - - return rc; + vmaddr = gfn_to_hva(kvm, gpa_to_gfn(gaddr)); + if (kvm_is_error_hva(vmaddr)) + return -EFAULT; + return make_hva_secure(gmap->mm, vmaddr, uvcb); } int gmap_convert_to_secure(struct gmap *gmap, unsigned long gaddr) diff --git a/arch/s390/kvm/interrupt.c b/arch/s390/kvm/interrupt.c index 07ff0e10cb7f..0f00f8e85fee 100644 --- a/arch/s390/kvm/interrupt.c +++ b/arch/s390/kvm/interrupt.c @@ -3174,8 +3174,7 @@ void kvm_s390_gisa_init(struct kvm *kvm) gi->alert.mask = 0; spin_lock_init(&gi->alert.ref_lock); gi->expires = 50 * 1000; /* 50 usec */ - hrtimer_init(&gi->timer, CLOCK_MONOTONIC, HRTIMER_MODE_REL); - gi->timer.function = gisa_vcpu_kicker; + hrtimer_setup(&gi->timer, gisa_vcpu_kicker, CLOCK_MONOTONIC, HRTIMER_MODE_REL); memset(gi->origin, 0, sizeof(struct kvm_s390_gisa)); gi->origin->next_alert = (u32)virt_to_phys(gi->origin); VM_EVENT(kvm, 3, "gisa 0x%pK initialized", gi->origin); diff --git a/arch/s390/kvm/kvm-s390.c b/arch/s390/kvm/kvm-s390.c index ebecb96bacce..48104e59648b 100644 --- a/arch/s390/kvm/kvm-s390.c +++ b/arch/s390/kvm/kvm-s390.c @@ -3943,8 +3943,8 @@ static int kvm_s390_vcpu_setup(struct kvm_vcpu *vcpu) if (rc) return rc; } - hrtimer_init(&vcpu->arch.ckc_timer, CLOCK_MONOTONIC, HRTIMER_MODE_REL); - vcpu->arch.ckc_timer.function = kvm_s390_idle_wakeup; + hrtimer_setup(&vcpu->arch.ckc_timer, kvm_s390_idle_wakeup, CLOCK_MONOTONIC, + HRTIMER_MODE_REL); vcpu->arch.sie_block->hpid = HPID_KVM; @@ -4952,6 +4952,7 @@ static int vcpu_post_run_handle_fault(struct kvm_vcpu *vcpu) { unsigned int flags = 0; unsigned long gaddr; + int rc; gaddr = current->thread.gmap_teid.addr * PAGE_SIZE; if (kvm_s390_cur_gmap_fault_is_write()) @@ -4961,16 +4962,6 @@ static int vcpu_post_run_handle_fault(struct kvm_vcpu *vcpu) case 0: vcpu->stat.exit_null++; break; - case PGM_NON_SECURE_STORAGE_ACCESS: - kvm_s390_assert_primary_as(vcpu); - /* - * This is normal operation; a page belonging to a protected - * guest has not been imported yet. Try to import the page into - * the protected guest. - */ - if (gmap_convert_to_secure(vcpu->arch.gmap, gaddr) == -EINVAL) - send_sig(SIGSEGV, current, 0); - break; case PGM_SECURE_STORAGE_ACCESS: case PGM_SECURE_STORAGE_VIOLATION: kvm_s390_assert_primary_as(vcpu); @@ -4995,6 +4986,20 @@ static int vcpu_post_run_handle_fault(struct kvm_vcpu *vcpu) send_sig(SIGSEGV, current, 0); } break; + case PGM_NON_SECURE_STORAGE_ACCESS: + kvm_s390_assert_primary_as(vcpu); + /* + * This is normal operation; a page belonging to a protected + * guest has not been imported yet. Try to import the page into + * the protected guest. + */ + rc = gmap_convert_to_secure(vcpu->arch.gmap, gaddr); + if (rc == -EINVAL) + send_sig(SIGSEGV, current, 0); + if (rc != -ENXIO) + break; + flags = FAULT_FLAG_WRITE; + fallthrough; case PGM_PROTECTION: case PGM_SEGMENT_TRANSLATION: case PGM_PAGE_TRANSLATION: diff --git a/arch/s390/kvm/pci.c b/arch/s390/kvm/pci.c index 9b9e7fdd5380..8c40154ff50f 100644 --- a/arch/s390/kvm/pci.c +++ b/arch/s390/kvm/pci.c @@ -433,7 +433,6 @@ static void kvm_s390_pci_dev_release(struct zpci_dev *zdev) static int kvm_s390_pci_register_kvm(void *opaque, struct kvm *kvm) { struct zpci_dev *zdev = opaque; - u8 status; int rc; if (!zdev) @@ -480,13 +479,7 @@ static int kvm_s390_pci_register_kvm(void *opaque, struct kvm *kvm) */ zdev->gisa = (u32)virt_to_phys(&kvm->arch.sie_page2->gisa); - rc = zpci_enable_device(zdev); - if (rc) - goto clear_gisa; - - /* Re-register the IOMMU that was already created */ - rc = zpci_register_ioat(zdev, 0, zdev->start_dma, zdev->end_dma, - virt_to_phys(zdev->dma_table), &status); + rc = zpci_reenable_device(zdev); if (rc) goto clear_gisa; @@ -516,7 +509,6 @@ static void kvm_s390_pci_unregister_kvm(void *opaque) { struct zpci_dev *zdev = opaque; struct kvm *kvm; - u8 status; if (!zdev) return; @@ -550,12 +542,7 @@ static void kvm_s390_pci_unregister_kvm(void *opaque) goto out; } - if (zpci_enable_device(zdev)) - goto out; - - /* Re-register the IOMMU that was already created */ - zpci_register_ioat(zdev, 0, zdev->start_dma, zdev->end_dma, - virt_to_phys(zdev->dma_table), &status); + zpci_reenable_device(zdev); out: spin_lock(&kvm->arch.kzdev_list_lock); diff --git a/arch/s390/lib/crc32-glue.c b/arch/s390/lib/crc32-glue.c index 137080e61f90..124214a27340 100644 --- a/arch/s390/lib/crc32-glue.c +++ b/arch/s390/lib/crc32-glue.c @@ -62,7 +62,7 @@ static DEFINE_STATIC_KEY_FALSE(have_vxrs); DEFINE_CRC32_VX(crc32_le_arch, crc32_le_vgfm_16, crc32_le_base) DEFINE_CRC32_VX(crc32_be_arch, crc32_be_vgfm_16, crc32_be_base) -DEFINE_CRC32_VX(crc32c_le_arch, crc32c_le_vgfm_16, crc32c_le_base) +DEFINE_CRC32_VX(crc32c_arch, crc32c_le_vgfm_16, crc32c_base) static int __init crc32_s390_init(void) { diff --git a/arch/s390/mm/gmap.c b/arch/s390/mm/gmap.c index 94d927785800..d14b488e7a1f 100644 --- a/arch/s390/mm/gmap.c +++ b/arch/s390/mm/gmap.c @@ -2626,31 +2626,3 @@ int s390_replace_asce(struct gmap *gmap) return 0; } EXPORT_SYMBOL_GPL(s390_replace_asce); - -/** - * kvm_s390_wiggle_split_folio() - try to drain extra references to a folio and optionally split - * @mm: the mm containing the folio to work on - * @folio: the folio - * @split: whether to split a large folio - * - * Context: Must be called while holding an extra reference to the folio; - * the mm lock should not be held. - */ -int kvm_s390_wiggle_split_folio(struct mm_struct *mm, struct folio *folio, bool split) -{ - int rc; - - lockdep_assert_not_held(&mm->mmap_lock); - folio_wait_writeback(folio); - lru_add_drain_all(); - if (split) { - folio_lock(folio); - rc = split_folio(folio); - folio_unlock(folio); - - if (rc != -EBUSY) - return rc; - } - return -EAGAIN; -} -EXPORT_SYMBOL_GPL(kvm_s390_wiggle_split_folio); diff --git a/arch/s390/pci/Makefile b/arch/s390/pci/Makefile index df73c5182990..1810e0944a4e 100644 --- a/arch/s390/pci/Makefile +++ b/arch/s390/pci/Makefile @@ -5,6 +5,6 @@ obj-$(CONFIG_PCI) += pci.o pci_irq.o pci_clp.o \ pci_event.o pci_debug.o pci_insn.o pci_mmio.o \ - pci_bus.o pci_kvm_hook.o pci_report.o + pci_bus.o pci_kvm_hook.o pci_report.o pci_fixup.o obj-$(CONFIG_PCI_IOV) += pci_iov.o obj-$(CONFIG_SYSFS) += pci_sysfs.o diff --git a/arch/s390/pci/pci.c b/arch/s390/pci/pci.c index 88f72745fa59..fa879351efb5 100644 --- a/arch/s390/pci/pci.c +++ b/arch/s390/pci/pci.c @@ -124,14 +124,13 @@ int zpci_register_ioat(struct zpci_dev *zdev, u8 dmaas, struct zpci_fib fib = {0}; u8 cc; - WARN_ON_ONCE(iota & 0x3fff); fib.pba = base; /* Work around off by one in ISM virt device */ if (zdev->pft == PCI_FUNC_TYPE_ISM && limit > base) fib.pal = limit + (1 << 12); else fib.pal = limit; - fib.iota = iota | ZPCI_IOTA_RTTO_FLAG; + fib.iota = iota; fib.gd = zdev->gisa; cc = zpci_mod_fc(req, &fib, status); if (cc) @@ -690,6 +689,23 @@ int zpci_enable_device(struct zpci_dev *zdev) } EXPORT_SYMBOL_GPL(zpci_enable_device); +int zpci_reenable_device(struct zpci_dev *zdev) +{ + u8 status; + int rc; + + rc = zpci_enable_device(zdev); + if (rc) + return rc; + + rc = zpci_iommu_register_ioat(zdev, &status); + if (rc) + zpci_disable_device(zdev); + + return rc; +} +EXPORT_SYMBOL_GPL(zpci_reenable_device); + int zpci_disable_device(struct zpci_dev *zdev) { u32 fh = zdev->fh; @@ -739,7 +755,6 @@ EXPORT_SYMBOL_GPL(zpci_disable_device); */ int zpci_hot_reset_device(struct zpci_dev *zdev) { - u8 status; int rc; lockdep_assert_held(&zdev->state_lock); @@ -758,19 +773,9 @@ int zpci_hot_reset_device(struct zpci_dev *zdev) return rc; } - rc = zpci_enable_device(zdev); - if (rc) - return rc; - - if (zdev->dma_table) - rc = zpci_register_ioat(zdev, 0, zdev->start_dma, zdev->end_dma, - virt_to_phys(zdev->dma_table), &status); - if (rc) { - zpci_disable_device(zdev); - return rc; - } + rc = zpci_reenable_device(zdev); - return 0; + return rc; } /** diff --git a/arch/s390/pci/pci_bus.c b/arch/s390/pci/pci_bus.c index 39a481ec4a40..0e725039861f 100644 --- a/arch/s390/pci/pci_bus.c +++ b/arch/s390/pci/pci_bus.c @@ -19,6 +19,7 @@ #include <linux/jump_label.h> #include <linux/pci.h> #include <linux/printk.h> +#include <linux/dma-direct.h> #include <asm/pci_clp.h> #include <asm/pci_dma.h> @@ -283,10 +284,34 @@ static struct zpci_bus *zpci_bus_alloc(int topo, bool topo_is_tid) return zbus; } +static void pci_dma_range_setup(struct pci_dev *pdev) +{ + struct zpci_dev *zdev = to_zpci(pdev); + struct bus_dma_region *map; + u64 aligned_end; + + map = kzalloc(sizeof(*map), GFP_KERNEL); + if (!map) + return; + + map->cpu_start = 0; + map->dma_start = PAGE_ALIGN(zdev->start_dma); + aligned_end = PAGE_ALIGN_DOWN(zdev->end_dma + 1); + if (aligned_end >= map->dma_start) + map->size = aligned_end - map->dma_start; + else + map->size = 0; + WARN_ON_ONCE(map->size == 0); + + pdev->dev.dma_range_map = map; +} + void pcibios_bus_add_device(struct pci_dev *pdev) { struct zpci_dev *zdev = to_zpci(pdev); + pci_dma_range_setup(pdev); + /* * With pdev->no_vf_scan the common PCI probing code does not * perform PF/VF linking. diff --git a/arch/s390/pci/pci_clp.c b/arch/s390/pci/pci_clp.c index 14bf7e8d06b7..27248686e588 100644 --- a/arch/s390/pci/pci_clp.c +++ b/arch/s390/pci/pci_clp.c @@ -112,6 +112,7 @@ static void clp_store_query_pci_fngrp(struct zpci_dev *zdev, zdev->version = response->version; zdev->maxstbl = response->maxstbl; zdev->dtsm = response->dtsm; + zdev->rtr_avail = response->rtr; switch (response->version) { case 1: diff --git a/arch/s390/pci/pci_fixup.c b/arch/s390/pci/pci_fixup.c new file mode 100644 index 000000000000..35688b645098 --- /dev/null +++ b/arch/s390/pci/pci_fixup.c @@ -0,0 +1,23 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Exceptions for specific devices, + * + * Copyright IBM Corp. 2025 + * + * Author(s): + * Niklas Schnelle <schnelle@linux.ibm.com> + */ +#include <linux/pci.h> + +static void zpci_ism_bar_no_mmap(struct pci_dev *pdev) +{ + /* + * ISM's BAR is special. Drivers written for ISM know + * how to handle this but others need to be aware of their + * special nature e.g. to prevent attempts to mmap() it. + */ + pdev->non_mappable_bars = 1; +} +DECLARE_PCI_FIXUP_EARLY(PCI_VENDOR_ID_IBM, + PCI_DEVICE_ID_IBM_ISM, + zpci_ism_bar_no_mmap); diff --git a/arch/s390/pci/pci_mmio.c b/arch/s390/pci/pci_mmio.c index 46f99dc164ad..1997d9b7965d 100644 --- a/arch/s390/pci/pci_mmio.c +++ b/arch/s390/pci/pci_mmio.c @@ -175,8 +175,12 @@ SYSCALL_DEFINE3(s390_pci_mmio_write, unsigned long, mmio_addr, args.address = mmio_addr; args.vma = vma; ret = follow_pfnmap_start(&args); - if (ret) - goto out_unlock_mmap; + if (ret) { + fixup_user_fault(current->mm, mmio_addr, FAULT_FLAG_WRITE, NULL); + ret = follow_pfnmap_start(&args); + if (ret) + goto out_unlock_mmap; + } io_addr = (void __iomem *)((args.pfn << PAGE_SHIFT) | (mmio_addr & ~PAGE_MASK)); @@ -315,14 +319,18 @@ SYSCALL_DEFINE3(s390_pci_mmio_read, unsigned long, mmio_addr, if (!(vma->vm_flags & (VM_IO | VM_PFNMAP))) goto out_unlock_mmap; ret = -EACCES; - if (!(vma->vm_flags & VM_WRITE)) + if (!(vma->vm_flags & VM_READ)) goto out_unlock_mmap; args.vma = vma; args.address = mmio_addr; ret = follow_pfnmap_start(&args); - if (ret) - goto out_unlock_mmap; + if (ret) { + fixup_user_fault(current->mm, mmio_addr, 0, NULL); + ret = follow_pfnmap_start(&args); + if (ret) + goto out_unlock_mmap; + } io_addr = (void __iomem *)((args.pfn << PAGE_SHIFT) | (mmio_addr & ~PAGE_MASK)); diff --git a/arch/s390/pci/pci_sysfs.c b/arch/s390/pci/pci_sysfs.c index 2de1ea6c3a8c..0ecad08e1b1e 100644 --- a/arch/s390/pci/pci_sysfs.c +++ b/arch/s390/pci/pci_sysfs.c @@ -52,7 +52,6 @@ static DEVICE_ATTR_RO(mio_enabled); static int _do_recover(struct pci_dev *pdev, struct zpci_dev *zdev) { - u8 status; int ret; pci_stop_and_remove_bus_device(pdev); @@ -70,16 +69,8 @@ static int _do_recover(struct pci_dev *pdev, struct zpci_dev *zdev) return ret; } - ret = zpci_enable_device(zdev); - if (ret) - return ret; + ret = zpci_reenable_device(zdev); - if (zdev->dma_table) { - ret = zpci_register_ioat(zdev, 0, zdev->start_dma, zdev->end_dma, - virt_to_phys(zdev->dma_table), &status); - if (ret) - zpci_disable_device(zdev); - } return ret; } |