diff options
| author | Paolo Bonzini <pbonzini@redhat.com> | 2025-10-18 10:25:43 +0200 |
|---|---|---|
| committer | Paolo Bonzini <pbonzini@redhat.com> | 2025-10-18 10:25:43 +0200 |
| commit | 4361f5aa8bfcecbab3fc8db987482b9e08115a6a (patch) | |
| tree | 7fbee6760544a527348a11775c286105e47f73d2 /virt | |
| parent | 5d26eaae15fb5c190164362a3e19081935574efc (diff) | |
| parent | 505f5224b197b77169c977e747cbc18b222f85f9 (diff) | |
Merge tag 'kvm-x86-fixes-6.18-rc2' of https://github.com/kvm-x86/linux into HEAD
KVM x86 fixes for 6.18:
- Expand the KVM_PRE_FAULT_MEMORY selftest to add a regression test for the
bug fixed by commit 3ccbf6f47098 ("KVM: x86/mmu: Return -EAGAIN if userspace
deletes/moves memslot during prefault")
- Don't try to get PMU capabbilities from perf when running a CPU with hybrid
CPUs/PMUs, as perf will rightly WARN.
- Rework KVM_CAP_GUEST_MEMFD_MMAP (newly introduced in 6.18) into a more
generic KVM_CAP_GUEST_MEMFD_FLAGS
- Add a guest_memfd INIT_SHARED flag and require userspace to explicitly set
said flag to initialize memory as SHARED, irrespective of MMAP. The
behavior merged in 6.18 is that enabling mmap() implicitly initializes
memory as SHARED, which would result in an ABI collision for x86 CoCo VMs
as their memory is currently always initialized PRIVATE.
- Allow mmap() on guest_memfd for x86 CoCo VMs, i.e. on VMs with private
memory, to enable testing such setups, i.e. to hopefully flush out any
other lurking ABI issues before 6.18 is officially released.
- Add testcases to the guest_memfd selftest to cover guest_memfd without MMAP,
and host userspace accesses to mmap()'d private memory.
Diffstat (limited to 'virt')
| -rw-r--r-- | virt/kvm/Kconfig | 1 | ||||
| -rw-r--r-- | virt/kvm/guest_memfd.c | 75 | ||||
| -rw-r--r-- | virt/kvm/kvm_main.c | 4 |
3 files changed, 52 insertions, 28 deletions
diff --git a/virt/kvm/Kconfig b/virt/kvm/Kconfig index 0227e13cd8dd..5f0015c5dd95 100644 --- a/virt/kvm/Kconfig +++ b/virt/kvm/Kconfig @@ -113,6 +113,7 @@ config KVM_GENERIC_MEMORY_ATTRIBUTES bool config KVM_GUEST_MEMFD + depends on KVM_GENERIC_MMU_NOTIFIER select XARRAY_MULTI bool diff --git a/virt/kvm/guest_memfd.c b/virt/kvm/guest_memfd.c index 94bafd6c558c..fbca8c0972da 100644 --- a/virt/kvm/guest_memfd.c +++ b/virt/kvm/guest_memfd.c @@ -102,8 +102,17 @@ static struct folio *kvm_gmem_get_folio(struct inode *inode, pgoff_t index) return filemap_grab_folio(inode->i_mapping, index); } -static void kvm_gmem_invalidate_begin(struct kvm_gmem *gmem, pgoff_t start, - pgoff_t end) +static enum kvm_gfn_range_filter kvm_gmem_get_invalidate_filter(struct inode *inode) +{ + if ((u64)inode->i_private & GUEST_MEMFD_FLAG_INIT_SHARED) + return KVM_FILTER_SHARED; + + return KVM_FILTER_PRIVATE; +} + +static void __kvm_gmem_invalidate_begin(struct kvm_gmem *gmem, pgoff_t start, + pgoff_t end, + enum kvm_gfn_range_filter attr_filter) { bool flush = false, found_memslot = false; struct kvm_memory_slot *slot; @@ -118,8 +127,7 @@ static void kvm_gmem_invalidate_begin(struct kvm_gmem *gmem, pgoff_t start, .end = slot->base_gfn + min(pgoff + slot->npages, end) - pgoff, .slot = slot, .may_block = true, - /* guest memfd is relevant to only private mappings. */ - .attr_filter = KVM_FILTER_PRIVATE, + .attr_filter = attr_filter, }; if (!found_memslot) { @@ -139,8 +147,21 @@ static void kvm_gmem_invalidate_begin(struct kvm_gmem *gmem, pgoff_t start, KVM_MMU_UNLOCK(kvm); } -static void kvm_gmem_invalidate_end(struct kvm_gmem *gmem, pgoff_t start, - pgoff_t end) +static void kvm_gmem_invalidate_begin(struct inode *inode, pgoff_t start, + pgoff_t end) +{ + struct list_head *gmem_list = &inode->i_mapping->i_private_list; + enum kvm_gfn_range_filter attr_filter; + struct kvm_gmem *gmem; + + attr_filter = kvm_gmem_get_invalidate_filter(inode); + + list_for_each_entry(gmem, gmem_list, entry) + __kvm_gmem_invalidate_begin(gmem, start, end, attr_filter); +} + +static void __kvm_gmem_invalidate_end(struct kvm_gmem *gmem, pgoff_t start, + pgoff_t end) { struct kvm *kvm = gmem->kvm; @@ -151,12 +172,20 @@ static void kvm_gmem_invalidate_end(struct kvm_gmem *gmem, pgoff_t start, } } -static long kvm_gmem_punch_hole(struct inode *inode, loff_t offset, loff_t len) +static void kvm_gmem_invalidate_end(struct inode *inode, pgoff_t start, + pgoff_t end) { struct list_head *gmem_list = &inode->i_mapping->i_private_list; + struct kvm_gmem *gmem; + + list_for_each_entry(gmem, gmem_list, entry) + __kvm_gmem_invalidate_end(gmem, start, end); +} + +static long kvm_gmem_punch_hole(struct inode *inode, loff_t offset, loff_t len) +{ pgoff_t start = offset >> PAGE_SHIFT; pgoff_t end = (offset + len) >> PAGE_SHIFT; - struct kvm_gmem *gmem; /* * Bindings must be stable across invalidation to ensure the start+end @@ -164,13 +193,11 @@ static long kvm_gmem_punch_hole(struct inode *inode, loff_t offset, loff_t len) */ filemap_invalidate_lock(inode->i_mapping); - list_for_each_entry(gmem, gmem_list, entry) - kvm_gmem_invalidate_begin(gmem, start, end); + kvm_gmem_invalidate_begin(inode, start, end); truncate_inode_pages_range(inode->i_mapping, offset, offset + len - 1); - list_for_each_entry(gmem, gmem_list, entry) - kvm_gmem_invalidate_end(gmem, start, end); + kvm_gmem_invalidate_end(inode, start, end); filemap_invalidate_unlock(inode->i_mapping); @@ -280,8 +307,9 @@ static int kvm_gmem_release(struct inode *inode, struct file *file) * Zap all SPTEs pointed at by this file. Do not free the backing * memory, as its lifetime is associated with the inode, not the file. */ - kvm_gmem_invalidate_begin(gmem, 0, -1ul); - kvm_gmem_invalidate_end(gmem, 0, -1ul); + __kvm_gmem_invalidate_begin(gmem, 0, -1ul, + kvm_gmem_get_invalidate_filter(inode)); + __kvm_gmem_invalidate_end(gmem, 0, -1ul); list_del(&gmem->entry); @@ -328,6 +356,9 @@ static vm_fault_t kvm_gmem_fault_user_mapping(struct vm_fault *vmf) if (((loff_t)vmf->pgoff << PAGE_SHIFT) >= i_size_read(inode)) return VM_FAULT_SIGBUS; + if (!((u64)inode->i_private & GUEST_MEMFD_FLAG_INIT_SHARED)) + return VM_FAULT_SIGBUS; + folio = kvm_gmem_get_folio(inode, vmf->pgoff); if (IS_ERR(folio)) { int err = PTR_ERR(folio); @@ -400,8 +431,6 @@ static int kvm_gmem_migrate_folio(struct address_space *mapping, static int kvm_gmem_error_folio(struct address_space *mapping, struct folio *folio) { - struct list_head *gmem_list = &mapping->i_private_list; - struct kvm_gmem *gmem; pgoff_t start, end; filemap_invalidate_lock_shared(mapping); @@ -409,8 +438,7 @@ static int kvm_gmem_error_folio(struct address_space *mapping, struct folio *fol start = folio->index; end = start + folio_nr_pages(folio); - list_for_each_entry(gmem, gmem_list, entry) - kvm_gmem_invalidate_begin(gmem, start, end); + kvm_gmem_invalidate_begin(mapping->host, start, end); /* * Do not truncate the range, what action is taken in response to the @@ -421,8 +449,7 @@ static int kvm_gmem_error_folio(struct address_space *mapping, struct folio *fol * error to userspace. */ - list_for_each_entry(gmem, gmem_list, entry) - kvm_gmem_invalidate_end(gmem, start, end); + kvm_gmem_invalidate_end(mapping->host, start, end); filemap_invalidate_unlock_shared(mapping); @@ -458,7 +485,7 @@ static const struct inode_operations kvm_gmem_iops = { .setattr = kvm_gmem_setattr, }; -bool __weak kvm_arch_supports_gmem_mmap(struct kvm *kvm) +bool __weak kvm_arch_supports_gmem_init_shared(struct kvm *kvm) { return true; } @@ -522,12 +549,8 @@ int kvm_gmem_create(struct kvm *kvm, struct kvm_create_guest_memfd *args) { loff_t size = args->size; u64 flags = args->flags; - u64 valid_flags = 0; - - if (kvm_arch_supports_gmem_mmap(kvm)) - valid_flags |= GUEST_MEMFD_FLAG_MMAP; - if (flags & ~valid_flags) + if (flags & ~kvm_gmem_get_supported_flags(kvm)) return -EINVAL; if (size <= 0 || !PAGE_ALIGNED(size)) diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c index 226faeaa8e56..b7a0ae2a7b20 100644 --- a/virt/kvm/kvm_main.c +++ b/virt/kvm/kvm_main.c @@ -4928,8 +4928,8 @@ static int kvm_vm_ioctl_check_extension_generic(struct kvm *kvm, long arg) #ifdef CONFIG_KVM_GUEST_MEMFD case KVM_CAP_GUEST_MEMFD: return 1; - case KVM_CAP_GUEST_MEMFD_MMAP: - return !kvm || kvm_arch_supports_gmem_mmap(kvm); + case KVM_CAP_GUEST_MEMFD_FLAGS: + return kvm_gmem_get_supported_flags(kvm); #endif default: break; |