diff options
| author | Paolo Bonzini <pbonzini@redhat.com> | 2025-11-26 09:32:44 +0100 |
|---|---|---|
| committer | Paolo Bonzini <pbonzini@redhat.com> | 2025-11-26 09:32:44 +0100 |
| commit | 236831743ced9162a8953aa281b73c65bd68b822 (patch) | |
| tree | 0309150780ed361a388d8f4a99cac458aac07180 | |
| parent | 9aca52b5523aab9c9a777a591c0b27acbede9881 (diff) | |
| parent | 83e0e12219a402bf7b8fdef067e51f945a92fd26 (diff) | |
Merge tag 'kvm-x86-gmem-6.19' of https://github.com/kvm-x86/linux into HEAD
KVM guest_memfd changes for 6.19:
- Add NUMA mempolicy support for guest_memfd, and clean up a variety of
rough edges in guest_memfd along the way.
- Define a CLASS to automatically handle get+put when grabbing a guest_memfd
from a memslot to make it harder to leak references.
- Enhance KVM selftests to make it easer to develop and debug selftests like
those added for guest_memfd NUMA support, e.g. where test and/or KVM bugs
often result in hard-to-debug SIGBUS errors.
- Misc cleanups.
| -rw-r--r-- | fs/btrfs/compression.c | 4 | ||||
| -rw-r--r-- | fs/btrfs/verity.c | 2 | ||||
| -rw-r--r-- | fs/erofs/zdata.c | 2 | ||||
| -rw-r--r-- | fs/f2fs/compress.c | 2 | ||||
| -rw-r--r-- | include/linux/pagemap.h | 18 | ||||
| -rw-r--r-- | include/uapi/linux/magic.h | 1 | ||||
| -rw-r--r-- | mm/filemap.c | 23 | ||||
| -rw-r--r-- | mm/mempolicy.c | 6 | ||||
| -rw-r--r-- | mm/readahead.c | 2 | ||||
| -rw-r--r-- | tools/testing/selftests/kvm/arm64/vgic_irq.c | 2 | ||||
| -rw-r--r-- | tools/testing/selftests/kvm/guest_memfd_test.c | 98 | ||||
| -rw-r--r-- | tools/testing/selftests/kvm/include/kvm_syscalls.h | 81 | ||||
| -rw-r--r-- | tools/testing/selftests/kvm/include/kvm_util.h | 39 | ||||
| -rw-r--r-- | tools/testing/selftests/kvm/include/numaif.h | 110 | ||||
| -rw-r--r-- | tools/testing/selftests/kvm/kvm_binary_stats_test.c | 4 | ||||
| -rw-r--r-- | tools/testing/selftests/kvm/lib/kvm_util.c | 101 | ||||
| -rw-r--r-- | tools/testing/selftests/kvm/x86/private_mem_conversions_test.c | 9 | ||||
| -rw-r--r-- | tools/testing/selftests/kvm/x86/xapic_ipi_test.c | 5 | ||||
| -rw-r--r-- | virt/kvm/guest_memfd.c | 373 | ||||
| -rw-r--r-- | virt/kvm/kvm_main.c | 7 | ||||
| -rw-r--r-- | virt/kvm/kvm_mm.h | 9 |
21 files changed, 645 insertions, 253 deletions
diff --git a/fs/btrfs/compression.c b/fs/btrfs/compression.c index bacad18357b3..d927ae32e7d0 100644 --- a/fs/btrfs/compression.c +++ b/fs/btrfs/compression.c @@ -491,8 +491,8 @@ static noinline int add_ra_bio_pages(struct inode *inode, continue; } - folio = filemap_alloc_folio(mapping_gfp_constraint(mapping, - ~__GFP_FS), 0); + folio = filemap_alloc_folio(mapping_gfp_constraint(mapping, ~__GFP_FS), + 0, NULL); if (!folio) break; diff --git a/fs/btrfs/verity.c b/fs/btrfs/verity.c index 46bd8ca58670..d4523d5debcd 100644 --- a/fs/btrfs/verity.c +++ b/fs/btrfs/verity.c @@ -742,7 +742,7 @@ again: } folio = filemap_alloc_folio(mapping_gfp_constraint(inode->i_mapping, ~__GFP_FS), - 0); + 0, NULL); if (!folio) return ERR_PTR(-ENOMEM); diff --git a/fs/erofs/zdata.c b/fs/erofs/zdata.c index bc80cfe482f7..b7369fb4fbe9 100644 --- a/fs/erofs/zdata.c +++ b/fs/erofs/zdata.c @@ -562,7 +562,7 @@ static void z_erofs_bind_cache(struct z_erofs_frontend *fe) * Allocate a managed folio for cached I/O, or it may be * then filled with a file-backed folio for in-place I/O */ - newfolio = filemap_alloc_folio(gfp, 0); + newfolio = filemap_alloc_folio(gfp, 0, NULL); if (!newfolio) continue; newfolio->private = Z_EROFS_PREALLOCATED_FOLIO; diff --git a/fs/f2fs/compress.c b/fs/f2fs/compress.c index 6ad8d3bc6df7..a65e8cd388bc 100644 --- a/fs/f2fs/compress.c +++ b/fs/f2fs/compress.c @@ -1947,7 +1947,7 @@ static void f2fs_cache_compressed_page(struct f2fs_sb_info *sbi, return; } - cfolio = filemap_alloc_folio(__GFP_NOWARN | __GFP_IO, 0); + cfolio = filemap_alloc_folio(__GFP_NOWARN | __GFP_IO, 0, NULL); if (!cfolio) return; diff --git a/include/linux/pagemap.h b/include/linux/pagemap.h index 09b581c1d878..a17fabbc0269 100644 --- a/include/linux/pagemap.h +++ b/include/linux/pagemap.h @@ -654,9 +654,11 @@ static inline void *detach_page_private(struct page *page) } #ifdef CONFIG_NUMA -struct folio *filemap_alloc_folio_noprof(gfp_t gfp, unsigned int order); +struct folio *filemap_alloc_folio_noprof(gfp_t gfp, unsigned int order, + struct mempolicy *policy); #else -static inline struct folio *filemap_alloc_folio_noprof(gfp_t gfp, unsigned int order) +static inline struct folio *filemap_alloc_folio_noprof(gfp_t gfp, unsigned int order, + struct mempolicy *policy) { return folio_alloc_noprof(gfp, order); } @@ -667,7 +669,7 @@ static inline struct folio *filemap_alloc_folio_noprof(gfp_t gfp, unsigned int o static inline struct page *__page_cache_alloc(gfp_t gfp) { - return &filemap_alloc_folio(gfp, 0)->page; + return &filemap_alloc_folio(gfp, 0, NULL)->page; } static inline gfp_t readahead_gfp_mask(struct address_space *x) @@ -753,11 +755,17 @@ static inline fgf_t fgf_set_order(size_t size) } void *filemap_get_entry(struct address_space *mapping, pgoff_t index); -struct folio *__filemap_get_folio(struct address_space *mapping, pgoff_t index, - fgf_t fgp_flags, gfp_t gfp); +struct folio *__filemap_get_folio_mpol(struct address_space *mapping, + pgoff_t index, fgf_t fgf_flags, gfp_t gfp, struct mempolicy *policy); struct page *pagecache_get_page(struct address_space *mapping, pgoff_t index, fgf_t fgp_flags, gfp_t gfp); +static inline struct folio *__filemap_get_folio(struct address_space *mapping, + pgoff_t index, fgf_t fgf_flags, gfp_t gfp) +{ + return __filemap_get_folio_mpol(mapping, index, fgf_flags, gfp, NULL); +} + /** * write_begin_get_folio - Get folio for write_begin with flags. * @iocb: The kiocb passed from write_begin (may be NULL). diff --git a/include/uapi/linux/magic.h b/include/uapi/linux/magic.h index bb575f3ab45e..638ca21b7a90 100644 --- a/include/uapi/linux/magic.h +++ b/include/uapi/linux/magic.h @@ -103,5 +103,6 @@ #define DEVMEM_MAGIC 0x454d444d /* "DMEM" */ #define SECRETMEM_MAGIC 0x5345434d /* "SECM" */ #define PID_FS_MAGIC 0x50494446 /* "PIDF" */ +#define GUEST_MEMFD_MAGIC 0x474d454d /* "GMEM" */ #endif /* __LINUX_MAGIC_H__ */ diff --git a/mm/filemap.c b/mm/filemap.c index 2f1e7e283a51..270485b19a5b 100644 --- a/mm/filemap.c +++ b/mm/filemap.c @@ -1002,11 +1002,16 @@ int filemap_add_folio(struct address_space *mapping, struct folio *folio, EXPORT_SYMBOL_GPL(filemap_add_folio); #ifdef CONFIG_NUMA -struct folio *filemap_alloc_folio_noprof(gfp_t gfp, unsigned int order) +struct folio *filemap_alloc_folio_noprof(gfp_t gfp, unsigned int order, + struct mempolicy *policy) { int n; struct folio *folio; + if (policy) + return folio_alloc_mpol_noprof(gfp, order, policy, + NO_INTERLEAVE_INDEX, numa_node_id()); + if (cpuset_do_page_mem_spread()) { unsigned int cpuset_mems_cookie; do { @@ -1923,11 +1928,12 @@ out: } /** - * __filemap_get_folio - Find and get a reference to a folio. + * __filemap_get_folio_mpol - Find and get a reference to a folio. * @mapping: The address_space to search. * @index: The page index. * @fgp_flags: %FGP flags modify how the folio is returned. * @gfp: Memory allocation flags to use if %FGP_CREAT is specified. + * @policy: NUMA memory allocation policy to follow. * * Looks up the page cache entry at @mapping & @index. * @@ -1938,8 +1944,8 @@ out: * * Return: The found folio or an ERR_PTR() otherwise. */ -struct folio *__filemap_get_folio(struct address_space *mapping, pgoff_t index, - fgf_t fgp_flags, gfp_t gfp) +struct folio *__filemap_get_folio_mpol(struct address_space *mapping, + pgoff_t index, fgf_t fgp_flags, gfp_t gfp, struct mempolicy *policy) { struct folio *folio; @@ -2009,7 +2015,7 @@ no_page: err = -ENOMEM; if (order > min_order) alloc_gfp |= __GFP_NORETRY | __GFP_NOWARN; - folio = filemap_alloc_folio(alloc_gfp, order); + folio = filemap_alloc_folio(alloc_gfp, order, policy); if (!folio) continue; @@ -2056,7 +2062,7 @@ no_page: folio_clear_dropbehind(folio); return folio; } -EXPORT_SYMBOL(__filemap_get_folio); +EXPORT_SYMBOL(__filemap_get_folio_mpol); static inline struct folio *find_get_entry(struct xa_state *xas, pgoff_t max, xa_mark_t mark) @@ -2551,7 +2557,7 @@ static int filemap_create_folio(struct kiocb *iocb, struct folio_batch *fbatch) if (iocb->ki_flags & (IOCB_NOWAIT | IOCB_WAITQ)) return -EAGAIN; - folio = filemap_alloc_folio(mapping_gfp_mask(mapping), min_order); + folio = filemap_alloc_folio(mapping_gfp_mask(mapping), min_order, NULL); if (!folio) return -ENOMEM; if (iocb->ki_flags & IOCB_DONTCACHE) @@ -3995,8 +4001,7 @@ static struct folio *do_read_cache_folio(struct address_space *mapping, repeat: folio = filemap_get_folio(mapping, index); if (IS_ERR(folio)) { - folio = filemap_alloc_folio(gfp, - mapping_min_folio_order(mapping)); + folio = filemap_alloc_folio(gfp, mapping_min_folio_order(mapping), NULL); if (!folio) return ERR_PTR(-ENOMEM); index = mapping_align_index(mapping, index); diff --git a/mm/mempolicy.c b/mm/mempolicy.c index eb83cff7db8c..3d797d47a040 100644 --- a/mm/mempolicy.c +++ b/mm/mempolicy.c @@ -354,6 +354,7 @@ struct mempolicy *get_task_policy(struct task_struct *p) return &default_policy; } +EXPORT_SYMBOL_FOR_MODULES(get_task_policy, "kvm"); static const struct mempolicy_operations { int (*create)(struct mempolicy *pol, const nodemask_t *nodes); @@ -487,6 +488,7 @@ void __mpol_put(struct mempolicy *pol) return; kmem_cache_free(policy_cache, pol); } +EXPORT_SYMBOL_FOR_MODULES(__mpol_put, "kvm"); static void mpol_rebind_default(struct mempolicy *pol, const nodemask_t *nodes) { @@ -2885,6 +2887,7 @@ struct mempolicy *mpol_shared_policy_lookup(struct shared_policy *sp, read_unlock(&sp->lock); return pol; } +EXPORT_SYMBOL_FOR_MODULES(mpol_shared_policy_lookup, "kvm"); static void sp_free(struct sp_node *n) { @@ -3170,6 +3173,7 @@ put_mpol: mpol_put(mpol); /* drop our incoming ref on sb mpol */ } } +EXPORT_SYMBOL_FOR_MODULES(mpol_shared_policy_init, "kvm"); int mpol_set_shared_policy(struct shared_policy *sp, struct vm_area_struct *vma, struct mempolicy *pol) @@ -3188,6 +3192,7 @@ int mpol_set_shared_policy(struct shared_policy *sp, sp_free(new); return err; } +EXPORT_SYMBOL_FOR_MODULES(mpol_set_shared_policy, "kvm"); /* Free a backing policy store on inode delete. */ void mpol_free_shared_policy(struct shared_policy *sp) @@ -3206,6 +3211,7 @@ void mpol_free_shared_policy(struct shared_policy *sp) } write_unlock(&sp->lock); } +EXPORT_SYMBOL_FOR_MODULES(mpol_free_shared_policy, "kvm"); #ifdef CONFIG_NUMA_BALANCING static int __initdata numabalancing_override; diff --git a/mm/readahead.c b/mm/readahead.c index 3a4b5d58eeb6..b415c9969176 100644 --- a/mm/readahead.c +++ b/mm/readahead.c @@ -186,7 +186,7 @@ static struct folio *ractl_alloc_folio(struct readahead_control *ractl, { struct folio *folio; - folio = filemap_alloc_folio(gfp_mask, order); + folio = filemap_alloc_folio(gfp_mask, order, NULL); if (folio && ractl->dropbehind) __folio_set_dropbehind(folio); diff --git a/tools/testing/selftests/kvm/arm64/vgic_irq.c b/tools/testing/selftests/kvm/arm64/vgic_irq.c index 6338f5bbdb70..8d7758f12280 100644 --- a/tools/testing/selftests/kvm/arm64/vgic_irq.c +++ b/tools/testing/selftests/kvm/arm64/vgic_irq.c @@ -636,7 +636,7 @@ static void kvm_routing_and_irqfd_check(struct kvm_vm *vm, } for (f = 0, i = intid; i < (uint64_t)intid + num; i++, f++) - close(fd[f]); + kvm_close(fd[f]); } /* handles the valid case: intid=0xffffffff num=1 */ diff --git a/tools/testing/selftests/kvm/guest_memfd_test.c b/tools/testing/selftests/kvm/guest_memfd_test.c index e7d9aeb418d3..618c937f3c90 100644 --- a/tools/testing/selftests/kvm/guest_memfd_test.c +++ b/tools/testing/selftests/kvm/guest_memfd_test.c @@ -19,6 +19,7 @@ #include <sys/stat.h> #include "kvm_util.h" +#include "numaif.h" #include "test_util.h" #include "ucall_common.h" @@ -75,6 +76,101 @@ static void test_mmap_supported(int fd, size_t total_size) kvm_munmap(mem, total_size); } +static void test_mbind(int fd, size_t total_size) +{ + const unsigned long nodemask_0 = 1; /* nid: 0 */ + unsigned long nodemask = 0; + unsigned long maxnode = 8; + int policy; + char *mem; + int ret; + + if (!is_multi_numa_node_system()) + return; + + mem = kvm_mmap(total_size, PROT_READ | PROT_WRITE, MAP_SHARED, fd); + + /* Test MPOL_INTERLEAVE policy */ + kvm_mbind(mem, page_size * 2, MPOL_INTERLEAVE, &nodemask_0, maxnode, 0); + kvm_get_mempolicy(&policy, &nodemask, maxnode, mem, MPOL_F_ADDR); + TEST_ASSERT(policy == MPOL_INTERLEAVE && nodemask == nodemask_0, + "Wanted MPOL_INTERLEAVE (%u) and nodemask 0x%lx, got %u and 0x%lx", + MPOL_INTERLEAVE, nodemask_0, policy, nodemask); + + /* Test basic MPOL_BIND policy */ + kvm_mbind(mem + page_size * 2, page_size * 2, MPOL_BIND, &nodemask_0, maxnode, 0); + kvm_get_mempolicy(&policy, &nodemask, maxnode, mem + page_size * 2, MPOL_F_ADDR); + TEST_ASSERT(policy == MPOL_BIND && nodemask == nodemask_0, + "Wanted MPOL_BIND (%u) and nodemask 0x%lx, got %u and 0x%lx", + MPOL_BIND, nodemask_0, policy, nodemask); + + /* Test MPOL_DEFAULT policy */ + kvm_mbind(mem, total_size, MPOL_DEFAULT, NULL, 0, 0); + kvm_get_mempolicy(&policy, &nodemask, maxnode, mem, MPOL_F_ADDR); + TEST_ASSERT(policy == MPOL_DEFAULT && !nodemask, + "Wanted MPOL_DEFAULT (%u) and nodemask 0x0, got %u and 0x%lx", + MPOL_DEFAULT, policy, nodemask); + + /* Test with invalid policy */ + ret = mbind(mem, page_size, 999, &nodemask_0, maxnode, 0); + TEST_ASSERT(ret == -1 && errno == EINVAL, + "mbind with invalid policy should fail with EINVAL"); + + kvm_munmap(mem, total_size); +} + +static void test_numa_allocation(int fd, size_t total_size) +{ + unsigned long node0_mask = 1; /* Node 0 */ + unsigned long node1_mask = 2; /* Node 1 */ + unsigned long maxnode = 8; + void *pages[4]; + int status[4]; + char *mem; + int i; + + if (!is_multi_numa_node_system()) + return; + + mem = kvm_mmap(total_size, PROT_READ | PROT_WRITE, MAP_SHARED, fd); + + for (i = 0; i < 4; i++) + pages[i] = (char *)mem + page_size * i; + + /* Set NUMA policy after allocation */ + memset(mem, 0xaa, page_size); + kvm_mbind(pages[0], page_size, MPOL_BIND, &node0_mask, maxnode, 0); + kvm_fallocate(fd, FALLOC_FL_PUNCH_HOLE | FALLOC_FL_KEEP_SIZE, 0, page_size); + + /* Set NUMA policy before allocation */ + kvm_mbind(pages[0], page_size * 2, MPOL_BIND, &node1_mask, maxnode, 0); + kvm_mbind(pages[2], page_size * 2, MPOL_BIND, &node0_mask, maxnode, 0); + memset(mem, 0xaa, total_size); + + /* Validate if pages are allocated on specified NUMA nodes */ + kvm_move_pages(0, 4, pages, NULL, status, 0); + TEST_ASSERT(status[0] == 1, "Expected page 0 on node 1, got it on node %d", status[0]); + TEST_ASSERT(status[1] == 1, "Expected page 1 on node 1, got it on node %d", status[1]); + TEST_ASSERT(status[2] == 0, "Expected page 2 on node 0, got it on node %d", status[2]); + TEST_ASSERT(status[3] == 0, "Expected page 3 on node 0, got it on node %d", status[3]); + + /* Punch hole for all pages */ + kvm_fallocate(fd, FALLOC_FL_PUNCH_HOLE | FALLOC_FL_KEEP_SIZE, 0, total_size); + + /* Change NUMA policy nodes and reallocate */ + kvm_mbind(pages[0], page_size * 2, MPOL_BIND, &node0_mask, maxnode, 0); + kvm_mbind(pages[2], page_size * 2, MPOL_BIND, &node1_mask, maxnode, 0); + memset(mem, 0xaa, total_size); + + kvm_move_pages(0, 4, pages, NULL, status, 0); + TEST_ASSERT(status[0] == 0, "Expected page 0 on node 0, got it on node %d", status[0]); + TEST_ASSERT(status[1] == 0, "Expected page 1 on node 0, got it on node %d", status[1]); + TEST_ASSERT(status[2] == 1, "Expected page 2 on node 1, got it on node %d", status[2]); + TEST_ASSERT(status[3] == 1, "Expected page 3 on node 1, got it on node %d", status[3]); + + kvm_munmap(mem, total_size); +} + static void test_fault_sigbus(int fd, size_t accessible_size, size_t map_size) { const char val = 0xaa; @@ -273,11 +369,13 @@ static void __test_guest_memfd(struct kvm_vm *vm, uint64_t flags) if (flags & GUEST_MEMFD_FLAG_INIT_SHARED) { gmem_test(mmap_supported, vm, flags); gmem_test(fault_overflow, vm, flags); + gmem_test(numa_allocation, vm, flags); } else { gmem_test(fault_private, vm, flags); } gmem_test(mmap_cow, vm, flags); + gmem_test(mbind, vm, flags); } else { gmem_test(mmap_not_supported, vm, flags); } diff --git a/tools/testing/selftests/kvm/include/kvm_syscalls.h b/tools/testing/selftests/kvm/include/kvm_syscalls.h new file mode 100644 index 000000000000..d4e613162bba --- /dev/null +++ b/tools/testing/selftests/kvm/include/kvm_syscalls.h @@ -0,0 +1,81 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ +#ifndef SELFTEST_KVM_SYSCALLS_H +#define SELFTEST_KVM_SYSCALLS_H + +#include <sys/syscall.h> + +#define MAP_ARGS0(m,...) +#define MAP_ARGS1(m,t,a,...) m(t,a) +#define MAP_ARGS2(m,t,a,...) m(t,a), MAP_ARGS1(m,__VA_ARGS__) +#define MAP_ARGS3(m,t,a,...) m(t,a), MAP_ARGS2(m,__VA_ARGS__) +#define MAP_ARGS4(m,t,a,...) m(t,a), MAP_ARGS3(m,__VA_ARGS__) +#define MAP_ARGS5(m,t,a,...) m(t,a), MAP_ARGS4(m,__VA_ARGS__) +#define MAP_ARGS6(m,t,a,...) m(t,a), MAP_ARGS5(m,__VA_ARGS__) +#define MAP_ARGS(n,...) MAP_ARGS##n(__VA_ARGS__) + +#define __DECLARE_ARGS(t, a) t a +#define __UNPACK_ARGS(t, a) a + +#define DECLARE_ARGS(nr_args, args...) MAP_ARGS(nr_args, __DECLARE_ARGS, args) +#define UNPACK_ARGS(nr_args, args...) MAP_ARGS(nr_args, __UNPACK_ARGS, args) + +#define __KVM_SYSCALL_ERROR(_name, _ret) \ + "%s failed, rc: %i errno: %i (%s)", (_name), (_ret), errno, strerror(errno) + +/* Define a kvm_<syscall>() API to assert success. */ +#define __KVM_SYSCALL_DEFINE(name, nr_args, args...) \ +static inline void kvm_##name(DECLARE_ARGS(nr_args, args)) \ +{ \ + int r; \ + \ + r = name(UNPACK_ARGS(nr_args, args)); \ + TEST_ASSERT(!r, __KVM_SYSCALL_ERROR(#name, r)); \ +} + +/* + * Macro to define syscall APIs, either because KVM selftests doesn't link to + * the standard library, e.g. libnuma, or because there is no library that yet + * provides the syscall. These + */ +#define KVM_SYSCALL_DEFINE(name, nr_args, args...) \ +static inline long name(DECLARE_ARGS(nr_args, args)) \ +{ \ + return syscall(__NR_##name, UNPACK_ARGS(nr_args, args)); \ +} \ +__KVM_SYSCALL_DEFINE(name, nr_args, args) + +/* + * Special case mmap(), as KVM selftest rarely/never specific an address, + * rarely specify an offset, and because the unique return code requires + * special handling anyways. + */ +static inline void *__kvm_mmap(size_t size, int prot, int flags, int fd, + off_t offset) +{ + void *mem; + + mem = mmap(NULL, size, prot, flags, fd, offset); + TEST_ASSERT(mem != MAP_FAILED, __KVM_SYSCALL_ERROR("mmap()", + (int)(unsigned long)MAP_FAILED)); + return mem; +} + +static inline void *kvm_mmap(size_t size, int prot, int flags, int fd) +{ + return __kvm_mmap(size, prot, flags, fd, 0); +} + +static inline int kvm_dup(int fd) +{ + int new_fd = dup(fd); + + TEST_ASSERT(new_fd >= 0, __KVM_SYSCALL_ERROR("dup()", new_fd)); + return new_fd; +} + +__KVM_SYSCALL_DEFINE(munmap, 2, void *, mem, size_t, size); +__KVM_SYSCALL_DEFINE(close, 1, int, fd); +__KVM_SYSCALL_DEFINE(fallocate, 4, int, fd, int, mode, loff_t, offset, loff_t, len); +__KVM_SYSCALL_DEFINE(ftruncate, 2, unsigned int, fd, off_t, length); + +#endif /* SELFTEST_KVM_SYSCALLS_H */ diff --git a/tools/testing/selftests/kvm/include/kvm_util.h b/tools/testing/selftests/kvm/include/kvm_util.h index d3f3e455c031..c2481be61434 100644 --- a/tools/testing/selftests/kvm/include/kvm_util.h +++ b/tools/testing/selftests/kvm/include/kvm_util.h @@ -23,6 +23,7 @@ #include <pthread.h> +#include "kvm_syscalls.h" #include "kvm_util_arch.h" #include "kvm_util_types.h" #include "sparsebit.h" @@ -283,34 +284,6 @@ static inline bool kvm_has_cap(long cap) return kvm_check_cap(cap); } -#define __KVM_SYSCALL_ERROR(_name, _ret) \ - "%s failed, rc: %i errno: %i (%s)", (_name), (_ret), errno, strerror(errno) - -static inline void *__kvm_mmap(size_t size, int prot, int flags, int fd, - off_t offset) -{ - void *mem; - - mem = mmap(NULL, size, prot, flags, fd, offset); - TEST_ASSERT(mem != MAP_FAILED, __KVM_SYSCALL_ERROR("mmap()", - (int)(unsigned long)MAP_FAILED)); - - return mem; -} - -static inline void *kvm_mmap(size_t size, int prot, int flags, int fd) -{ - return __kvm_mmap(size, prot, flags, fd, 0); -} - -static inline void kvm_munmap(void *mem, size_t size) -{ - int ret; - - ret = munmap(mem, size); - TEST_ASSERT(!ret, __KVM_SYSCALL_ERROR("munmap()", ret)); -} - /* * Use the "inner", double-underscore macro when reporting errors from within * other macros so that the name of ioctl() and not its literal numeric value @@ -700,12 +673,12 @@ int __vm_set_user_memory_region2(struct kvm_vm *vm, uint32_t slot, uint32_t flag uint32_t guest_memfd, uint64_t guest_memfd_offset); void vm_userspace_mem_region_add(struct kvm_vm *vm, - enum vm_mem_backing_src_type src_type, - uint64_t guest_paddr, uint32_t slot, uint64_t npages, - uint32_t flags); + enum vm_mem_backing_src_type src_type, + uint64_t gpa, uint32_t slot, uint64_t npages, + uint32_t flags); void vm_mem_add(struct kvm_vm *vm, enum vm_mem_backing_src_type src_type, - uint64_t guest_paddr, uint32_t slot, uint64_t npages, - uint32_t flags, int guest_memfd_fd, uint64_t guest_memfd_offset); + uint64_t gpa, uint32_t slot, uint64_t npages, uint32_t flags, + int guest_memfd_fd, uint64_t guest_memfd_offset); #ifndef vm_arch_has_protected_memory static inline bool vm_arch_has_protected_memory(struct kvm_vm *vm) diff --git a/tools/testing/selftests/kvm/include/numaif.h b/tools/testing/selftests/kvm/include/numaif.h index b020547403fd..29572a6d789c 100644 --- a/tools/testing/selftests/kvm/include/numaif.h +++ b/tools/testing/selftests/kvm/include/numaif.h @@ -1,55 +1,83 @@ /* SPDX-License-Identifier: GPL-2.0-only */ -/* - * tools/testing/selftests/kvm/include/numaif.h - * - * Copyright (C) 2020, Google LLC. - * - * This work is licensed under the terms of the GNU GPL, version 2. - * - * Header file that provides access to NUMA API functions not explicitly - * exported to user space. - */ +/* Copyright (C) 2020, Google LLC. */ #ifndef SELFTEST_KVM_NUMAIF_H #define SELFTEST_KVM_NUMAIF_H -#define __NR_get_mempolicy 239 -#define __NR_migrate_pages 256 +#include <dirent.h> -/* System calls */ -long get_mempolicy(int *policy, const unsigned long *nmask, - unsigned long maxnode, void *addr, int flags) +#include <linux/mempolicy.h> + +#include "kvm_syscalls.h" + +KVM_SYSCALL_DEFINE(get_mempolicy, 5, int *, policy, const unsigned long *, nmask, + unsigned long, maxnode, void *, addr, int, flags); + +KVM_SYSCALL_DEFINE(set_mempolicy, 3, int, mode, const unsigned long *, nmask, + unsigned long, maxnode); + +KVM_SYSCALL_DEFINE(set_mempolicy_home_node, 4, unsigned long, start, + unsigned long, len, unsigned long, home_node, + unsigned long, flags); + +KVM_SYSCALL_DEFINE(migrate_pages, 4, int, pid, unsigned long, maxnode, + const unsigned long *, frommask, const unsigned long *, tomask); + +KVM_SYSCALL_DEFINE(move_pages, 6, int, pid, unsigned long, count, void *, pages, + const int *, nodes, int *, status, int, flags); + +KVM_SYSCALL_DEFINE(mbind, 6, void *, addr, unsigned long, size, int, mode, + const unsigned long *, nodemask, unsigned long, maxnode, + unsigned int, flags); + +static inline int get_max_numa_node(void) { - return syscall(__NR_get_mempolicy, policy, nmask, - maxnode, addr, flags); + struct dirent *de; + int max_node = 0; + DIR *d; + + /* + * Assume there's a single node if the kernel doesn't support NUMA, + * or if no nodes are found. + */ + d = opendir("/sys/devices/system/node"); + if (!d) + return 0; + + while ((de = readdir(d)) != NULL) { + int node_id; + char *endptr; + + if (strncmp(de->d_name, "node", 4) != 0) + continue; + + node_id = strtol(de->d_name + 4, &endptr, 10); + if (*endptr != '\0') + continue; + + if (node_id > max_node) + max_node = node_id; + } + closedir(d); + + return max_node; } -long migrate_pages(int pid, unsigned long maxnode, - const unsigned long *frommask, - const unsigned long *tomask) +static bool is_numa_available(void) { - return syscall(__NR_migrate_pages, pid, maxnode, frommask, tomask); + /* + * Probe for NUMA by doing a dummy get_mempolicy(). If the syscall + * fails with ENOSYS, then the kernel was built without NUMA support. + * if the syscall fails with EPERM, then the process/user lacks the + * necessary capabilities (CAP_SYS_NICE). + */ + return !get_mempolicy(NULL, NULL, 0, NULL, 0) || + (errno != ENOSYS && errno != EPERM); } -/* Policies */ -#define MPOL_DEFAULT 0 -#define MPOL_PREFERRED 1 -#define MPOL_BIND 2 -#define MPOL_INTERLEAVE 3 - -#define MPOL_MAX MPOL_INTERLEAVE - -/* Flags for get_mem_policy */ -#define MPOL_F_NODE (1<<0) /* return next il node or node of address */ - /* Warning: MPOL_F_NODE is unsupported and - * subject to change. Don't use. - */ -#define MPOL_F_ADDR (1<<1) /* look up vma using address */ -#define MPOL_F_MEMS_ALLOWED (1<<2) /* query nodes allowed in cpuset */ - -/* Flags for mbind */ -#define MPOL_MF_STRICT (1<<0) /* Verify existing pages in the mapping */ -#define MPOL_MF_MOVE (1<<1) /* Move pages owned by this process to conform to mapping */ -#define MPOL_MF_MOVE_ALL (1<<2) /* Move every page to conform to mapping */ +static inline bool is_multi_numa_node_system(void) +{ + return is_numa_available() && get_max_numa_node() >= 1; +} #endif /* SELFTEST_KVM_NUMAIF_H */ diff --git a/tools/testing/selftests/kvm/kvm_binary_stats_test.c b/tools/testing/selftests/kvm/kvm_binary_stats_test.c index f02355c3c4c2..b7dbde9c0843 100644 --- a/tools/testing/selftests/kvm/kvm_binary_stats_test.c +++ b/tools/testing/selftests/kvm/kvm_binary_stats_test.c @@ -239,14 +239,14 @@ int main(int argc, char *argv[]) * single stats file works and doesn't cause explosions. */ vm_stats_fds = vm_get_stats_fd(vms[i]); - stats_test(dup(vm_stats_fds)); + stats_test(kvm_dup(vm_stats_fds)); /* Verify userspace can instantiate multiple stats files. */ stats_test(vm_get_stats_fd(vms[i])); for (j = 0; j < max_vcpu; ++j) { vcpu_stats_fds[j] = vcpu_get_stats_fd(vcpus[i * max_vcpu + j]); - stats_test(dup(vcpu_stats_fds[j])); + stats_test(kvm_dup(vcpu_stats_fds[j])); stats_test(vcpu_get_stats_fd(vcpus[i * max_vcpu + j])); } diff --git a/tools/testing/selftests/kvm/lib/kvm_util.c b/tools/testing/selftests/kvm/lib/kvm_util.c index 1a93d6361671..a334f6ed50d6 100644 --- a/tools/testing/selftests/kvm/lib/kvm_util.c +++ b/tools/testing/selftests/kvm/lib/kvm_util.c @@ -704,8 +704,6 @@ userspace_mem_region_find(struct kvm_vm *vm, uint64_t start, uint64_t end) static void kvm_stats_release(struct kvm_binary_stats *stats) { - int ret; - if (stats->fd < 0) return; @@ -714,8 +712,7 @@ static void kvm_stats_release(struct kvm_binary_stats *stats) stats->desc = NULL; } - ret = close(stats->fd); - TEST_ASSERT(!ret, __KVM_SYSCALL_ERROR("close()", ret)); + kvm_close(stats->fd); stats->fd = -1; } @@ -738,8 +735,6 @@ __weak void vcpu_arch_free(struct kvm_vcpu *vcpu) */ static void vm_vcpu_rm(struct kvm_vm *vm, struct kvm_vcpu *vcpu) { - int ret; - if (vcpu->dirty_gfns) { kvm_munmap(vcpu->dirty_gfns, vm->dirty_ring_size); vcpu->dirty_gfns = NULL; @@ -747,9 +742,7 @@ static void vm_vcpu_rm(struct kvm_vm *vm, struct kvm_vcpu *vcpu) kvm_munmap(vcpu->run, vcpu_mmap_sz()); - ret = close(vcpu->fd); - TEST_ASSERT(!ret, __KVM_SYSCALL_ERROR("close()", ret)); - + kvm_close(vcpu->fd); kvm_stats_release(&vcpu->stats); list_del(&vcpu->list); @@ -761,16 +754,12 @@ static void vm_vcpu_rm(struct kvm_vm *vm, struct kvm_vcpu *vcpu) void kvm_vm_release(struct kvm_vm *vmp) { struct kvm_vcpu *vcpu, *tmp; - int ret; list_for_each_entry_safe(vcpu, tmp, &vmp->vcpus, list) vm_vcpu_rm(vmp, vcpu); - ret = close(vmp->fd); - TEST_ASSERT(!ret, __KVM_SYSCALL_ERROR("close()", ret)); - - ret = close(vmp->kvm_fd); - TEST_ASSERT(!ret, __KVM_SYSCALL_ERROR("close()", ret)); + kvm_close(vmp->fd); + kvm_close(vmp->kvm_fd); /* Free cached stats metadata and close FD */ kvm_stats_release(&vmp->stats); @@ -828,7 +817,7 @@ void kvm_vm_free(struct kvm_vm *vmp) int kvm_memfd_alloc(size_t size, bool hugepages) { int memfd_flags = MFD_CLOEXEC; - int fd, r; + int fd; if (hugepages) memfd_flags |= MFD_HUGETLB; @@ -836,11 +825,8 @@ int kvm_memfd_alloc(size_t size, bool hugepages) fd = memfd_create("kvm_selftest", memfd_flags); TEST_ASSERT(fd != -1, __KVM_SYSCALL_ERROR("memfd_create()", fd)); - r = ftruncate(fd, size); - TEST_ASSERT(!r, __KVM_SYSCALL_ERROR("ftruncate()", r)); - - r = fallocate(fd, FALLOC_FL_PUNCH_HOLE | FALLOC_FL_KEEP_SIZE, 0, size); - TEST_ASSERT(!r, __KVM_SYSCALL_ERROR("fallocate()", r)); + kvm_ftruncate(fd, size); + kvm_fallocate(fd, FALLOC_FL_PUNCH_HOLE | FALLOC_FL_KEEP_SIZE, 0, size); return fd; } @@ -957,8 +943,8 @@ void vm_set_user_memory_region2(struct kvm_vm *vm, uint32_t slot, uint32_t flags /* FIXME: This thing needs to be ripped apart and rewritten. */ void vm_mem_add(struct kvm_vm *vm, enum vm_mem_backing_src_type src_type, - uint64_t guest_paddr, uint32_t slot, uint64_t npages, - uint32_t flags, int guest_memfd, uint64_t guest_memfd_offset) + uint64_t gpa, uint32_t slot, uint64_t npages, uint32_t flags, + int guest_memfd, uint64_t guest_memfd_offset) { int ret; struct userspace_mem_region *region; @@ -972,30 +958,29 @@ void vm_mem_add(struct kvm_vm *vm, enum vm_mem_backing_src_type src_type, "Number of guest pages is not compatible with the host. " "Try npages=%d", vm_adjust_num_guest_pages(vm->mode, npages)); - TEST_ASSERT((guest_paddr % vm->page_size) == 0, "Guest physical " + TEST_ASSERT((gpa % vm->page_size) == 0, "Guest physical " "address not on a page boundary.\n" - " guest_paddr: 0x%lx vm->page_size: 0x%x", - guest_paddr, vm->page_size); - TEST_ASSERT((((guest_paddr >> vm->page_shift) + npages) - 1) + " gpa: 0x%lx vm->page_size: 0x%x", + gpa, vm->page_size); + TEST_ASSERT((((gpa >> vm->page_shift) + npages) - 1) <= vm->max_gfn, "Physical range beyond maximum " "supported physical address,\n" - " guest_paddr: 0x%lx npages: 0x%lx\n" + " gpa: 0x%lx npages: 0x%lx\n" " vm->max_gfn: 0x%lx vm->page_size: 0x%x", - guest_paddr, npages, vm->max_gfn, vm->page_size); + gpa, npages, vm->max_gfn, vm->page_size); /* * Confirm a mem region with an overlapping address doesn't * already exist. */ region = (struct userspace_mem_region *) userspace_mem_region_find( - vm, guest_paddr, (guest_paddr + npages * vm->page_size) - 1); + vm, gpa, (gpa + npages * vm->page_size) - 1); if (region != NULL) TEST_FAIL("overlapping userspace_mem_region already " "exists\n" - " requested guest_paddr: 0x%lx npages: 0x%lx " - "page_size: 0x%x\n" - " existing guest_paddr: 0x%lx size: 0x%lx", - guest_paddr, npages, vm->page_size, + " requested gpa: 0x%lx npages: 0x%lx page_size: 0x%x\n" + " existing gpa: 0x%lx size: 0x%lx", + gpa, npages, vm->page_size, (uint64_t) region->region.guest_phys_addr, (uint64_t) region->region.memory_size); @@ -1009,8 +994,7 @@ void vm_mem_add(struct kvm_vm *vm, enum vm_mem_backing_src_type src_type, "already exists.\n" " requested slot: %u paddr: 0x%lx npages: 0x%lx\n" " existing slot: %u paddr: 0x%lx size: 0x%lx", - slot, guest_paddr, npages, - region->region.slot, + slot, gpa, npages, region->region.slot, (uint64_t) region->region.guest_phys_addr, (uint64_t) region->region.memory_size); } @@ -1036,7 +1020,7 @@ void vm_mem_add(struct kvm_vm *vm, enum vm_mem_backing_src_type src_type, if (src_type == VM_MEM_SRC_ANONYMOUS_THP) alignment = max(backing_src_pagesz, alignment); - TEST_ASSERT_EQ(guest_paddr, align_up(guest_paddr, backing_src_pagesz)); + TEST_ASSERT_EQ(gpa, align_up(gpa, backing_src_pagesz)); /* Add enough memory to align up if necessary */ if (alignment > 1) @@ -1084,8 +1068,7 @@ void vm_mem_add(struct kvm_vm *vm, enum vm_mem_backing_src_type src_type, * needing to track if the fd is owned by the framework * or by the caller. */ - guest_memfd = dup(guest_memfd); - TEST_ASSERT(guest_memfd >= 0, __KVM_SYSCALL_ERROR("dup()", guest_memfd)); + guest_memfd = kvm_dup(guest_memfd); } region->region.guest_memfd = guest_memfd; @@ -1097,20 +1080,18 @@ void vm_mem_add(struct kvm_vm *vm, enum vm_mem_backing_src_type src_type, region->unused_phy_pages = sparsebit_alloc(); if (vm_arch_has_protected_memory(vm)) region->protected_phy_pages = sparsebit_alloc(); - sparsebit_set_num(region->unused_phy_pages, - guest_paddr >> vm->page_shift, npages); + sparsebit_set_num(region->unused_phy_pages, gpa >> vm->page_shift, npages); region->region.slot = slot; region->region.flags = flags; - region->region.guest_phys_addr = guest_paddr; + region->region.guest_phys_addr = gpa; region->region.memory_size = npages * vm->page_size; region->region.userspace_addr = (uintptr_t) region->host_mem; ret = __vm_ioctl(vm, KVM_SET_USER_MEMORY_REGION2, ®ion->region); TEST_ASSERT(ret == 0, "KVM_SET_USER_MEMORY_REGION2 IOCTL failed,\n" " rc: %i errno: %i\n" " slot: %u flags: 0x%x\n" - " guest_phys_addr: 0x%lx size: 0x%lx guest_memfd: %d", - ret, errno, slot, flags, - guest_paddr, (uint64_t) region->region.memory_size, + " guest_phys_addr: 0x%lx size: 0x%llx guest_memfd: %d", + ret, errno, slot, flags, gpa, region->region.memory_size, region->region.guest_memfd); /* Add to quick lookup data structures */ @@ -1132,10 +1113,10 @@ void vm_mem_add(struct kvm_vm *vm, enum vm_mem_backing_src_type src_type, void vm_userspace_mem_region_add(struct kvm_vm *vm, enum vm_mem_backing_src_type src_type, - uint64_t guest_paddr, uint32_t slot, - uint64_t npages, uint32_t flags) + uint64_t gpa, uint32_t slot, uint64_t npages, + uint32_t flags) { - vm_mem_add(vm, src_type, guest_paddr, slot, npages, flags, -1, 0); + vm_mem_add(vm, src_type, gpa, slot, npages, flags, -1, 0); } /* @@ -2305,11 +2286,35 @@ __weak void kvm_selftest_arch_init(void) { } +static void report_unexpected_signal(int signum) +{ +#define KVM_CASE_SIGNUM(sig) \ + case sig: TEST_FAIL("Unexpected " #sig " (%d)\n", signum) + + switch (signum) { + KVM_CASE_SIGNUM(SIGBUS); + KVM_CASE_SIGNUM(SIGSEGV); + KVM_CASE_SIGNUM(SIGILL); + KVM_CASE_SIGNUM(SIGFPE); + default: + TEST_FAIL("Unexpected signal %d\n", signum); + } +} + void __attribute((constructor)) kvm_selftest_init(void) { + struct sigaction sig_sa = { + .sa_handler = report_unexpected_signal, + }; + /* Tell stdout not to buffer its content. */ setbuf(stdout, NULL); + sigaction(SIGBUS, &sig_sa, NULL); + sigaction(SIGSEGV, &sig_sa, NULL); + sigaction(SIGILL, &sig_sa, NULL); + sigaction(SIGFPE, &sig_sa, NULL); + guest_random_seed = last_guest_seed = random(); pr_info("Random seed: 0x%x\n", guest_random_seed); diff --git a/tools/testing/selftests/kvm/x86/private_mem_conversions_test.c b/tools/testing/selftests/kvm/x86/private_mem_conversions_test.c index 82a8d88b5338..1969f4ab9b28 100644 --- a/tools/testing/selftests/kvm/x86/private_mem_conversions_test.c +++ b/tools/testing/selftests/kvm/x86/private_mem_conversions_test.c @@ -380,7 +380,7 @@ static void test_mem_conversions(enum vm_mem_backing_src_type src_type, uint32_t struct kvm_vcpu *vcpus[KVM_MAX_VCPUS]; pthread_t threads[KVM_MAX_VCPUS]; struct kvm_vm *vm; - int memfd, i, r; + int memfd, i; const struct vm_shape shape = { .mode = VM_MODE_DEFAULT, @@ -428,11 +428,8 @@ static void test_mem_conversions(enum vm_mem_backing_src_type src_type, uint32_t * should prevent the VM from being fully destroyed until the last * reference to the guest_memfd is also put. */ - r = fallocate(memfd, FALLOC_FL_KEEP_SIZE | FALLOC_FL_PUNCH_HOLE, 0, memfd_size); - TEST_ASSERT(!r, __KVM_SYSCALL_ERROR("fallocate()", r)); - - r = fallocate(memfd, FALLOC_FL_KEEP_SIZE, 0, memfd_size); - TEST_ASSERT(!r, __KVM_SYSCALL_ERROR("fallocate()", r)); + kvm_fallocate(memfd, FALLOC_FL_KEEP_SIZE | FALLOC_FL_PUNCH_HOLE, 0, memfd_size); + kvm_fallocate(memfd, FALLOC_FL_KEEP_SIZE, 0, memfd_size); close(memfd); } diff --git a/tools/testing/selftests/kvm/x86/xapic_ipi_test.c b/tools/testing/selftests/kvm/x86/xapic_ipi_test.c index 35cb9de54a82..ae4a4b6c05ca 100644 --- a/tools/testing/selftests/kvm/x86/xapic_ipi_test.c +++ b/tools/testing/selftests/kvm/x86/xapic_ipi_test.c @@ -256,7 +256,7 @@ void do_migrations(struct test_data_page *data, int run_secs, int delay_usecs, int nodes = 0; time_t start_time, last_update, now; time_t interval_secs = 1; - int i, r; + int i; int from, to; unsigned long bit; uint64_t hlt_count; @@ -267,9 +267,8 @@ void do_migrations(struct test_data_page *data, int run_secs, int delay_usecs, delay_usecs); /* Get set of first 64 numa nodes available */ - r = get_mempolicy(NULL, &nodemask, sizeof(nodemask) * 8, + kvm_get_mempolicy(NULL, &nodemask, sizeof(nodemask) * 8, 0, MPOL_F_MEMS_ALLOWED); - TEST_ASSERT(r == 0, "get_mempolicy failed errno=%d", errno); fprintf(stderr, "Numa nodes found amongst first %lu possible nodes " "(each 1-bit indicates node is present): %#lx\n", diff --git a/virt/kvm/guest_memfd.c b/virt/kvm/guest_memfd.c index ffadc5ee8e04..fdaea3422c30 100644 --- a/virt/kvm/guest_memfd.c +++ b/virt/kvm/guest_memfd.c @@ -1,18 +1,47 @@ // SPDX-License-Identifier: GPL-2.0 +#include <linux/anon_inodes.h> #include <linux/backing-dev.h> #include <linux/falloc.h> +#include <linux/fs.h> #include <linux/kvm_host.h> +#include <linux/mempolicy.h> +#include <linux/pseudo_fs.h> #include <linux/pagemap.h> -#include <linux/anon_inodes.h> #include "kvm_mm.h" -struct kvm_gmem { +static struct vfsmount *kvm_gmem_mnt; + +/* + * A guest_memfd instance can be associated multiple VMs, each with its own + * "view" of the underlying physical memory. + * + * The gmem's inode is effectively the raw underlying physical storage, and is + * used to track properties of the physical memory, while each gmem file is + * effectively a single VM's view of that storage, and is used to track assets + * specific to its associated VM, e.g. memslots=>gmem bindings. + */ +struct gmem_file { struct kvm *kvm; struct xarray bindings; struct list_head entry; }; +struct gmem_inode { + struct shared_policy policy; + struct inode vfs_inode; + + u64 flags; +}; + +static __always_inline struct gmem_inode *GMEM_I(struct inode *inode) +{ + return container_of(inode, struct gmem_inode, vfs_inode); +} + +#define kvm_gmem_for_each_file(f, mapping) \ + list_for_each_entry(f, &(mapping)->i_private_list, entry) + /** * folio_file_pfn - like folio_file_page, but return a pfn. * @folio: The folio which contains this index. @@ -25,6 +54,11 @@ static inline kvm_pfn_t folio_file_pfn(struct folio *folio, pgoff_t index) return folio_pfn(folio) + (index & (folio_nr_pages(folio) - 1)); } +static pgoff_t kvm_gmem_get_index(struct kvm_memory_slot *slot, gfn_t gfn) +{ + return gfn - slot->base_gfn + slot->gmem.pgoff; +} + static int __kvm_gmem_prepare_folio(struct kvm *kvm, struct kvm_memory_slot *slot, pgoff_t index, struct folio *folio) { @@ -77,9 +111,9 @@ static int kvm_gmem_prepare_folio(struct kvm *kvm, struct kvm_memory_slot *slot, * The order will be passed when creating the guest_memfd, and * checked when creating memslots. */ - WARN_ON(!IS_ALIGNED(slot->gmem.pgoff, 1 << folio_order(folio))); - index = gfn - slot->base_gfn + slot->gmem.pgoff; - index = ALIGN_DOWN(index, 1 << folio_order(folio)); + WARN_ON(!IS_ALIGNED(slot->gmem.pgoff, folio_nr_pages(folio))); + index = kvm_gmem_get_index(slot, gfn); + index = ALIGN_DOWN(index, folio_nr_pages(folio)); r = __kvm_gmem_prepare_folio(kvm, slot, index, folio); if (!r) kvm_gmem_mark_prepared(folio); @@ -99,27 +133,45 @@ static int kvm_gmem_prepare_folio(struct kvm *kvm, struct kvm_memory_slot *slot, static struct folio *kvm_gmem_get_folio(struct inode *inode, pgoff_t index) { /* TODO: Support huge pages. */ - return filemap_grab_folio(inode->i_mapping, index); + struct mempolicy *policy; + struct folio *folio; + + /* + * Fast-path: See if folio is already present in mapping to avoid + * policy_lookup. + */ + folio = __filemap_get_folio(inode->i_mapping, index, + FGP_LOCK | FGP_ACCESSED, 0); + if (!IS_ERR(folio)) + return folio; + + policy = mpol_shared_policy_lookup(&GMEM_I(inode)->policy, index); + folio = __filemap_get_folio_mpol(inode->i_mapping, index, + FGP_LOCK | FGP_ACCESSED | FGP_CREAT, + mapping_gfp_mask(inode->i_mapping), policy); + mpol_cond_put(policy); + + return folio; } static enum kvm_gfn_range_filter kvm_gmem_get_invalidate_filter(struct inode *inode) { - if ((u64)inode->i_private & GUEST_MEMFD_FLAG_INIT_SHARED) + if (GMEM_I(inode)->flags & GUEST_MEMFD_FLAG_INIT_SHARED) return KVM_FILTER_SHARED; return KVM_FILTER_PRIVATE; } -static void __kvm_gmem_invalidate_begin(struct kvm_gmem *gmem, pgoff_t start, +static void __kvm_gmem_invalidate_begin(struct gmem_file *f, pgoff_t start, pgoff_t end, enum kvm_gfn_range_filter attr_filter) { bool flush = false, found_memslot = false; struct kvm_memory_slot *slot; - struct kvm *kvm = gmem->kvm; + struct kvm *kvm = f->kvm; unsigned long index; - xa_for_each_range(&gmem->bindings, index, slot, start, end - 1) { + xa_for_each_range(&f->bindings, index, slot, start, end - 1) { pgoff_t pgoff = slot->gmem.pgoff; struct kvm_gfn_range gfn_range = { @@ -150,22 +202,21 @@ static void __kvm_gmem_invalidate_begin(struct kvm_gmem *gmem, pgoff_t start, static void kvm_gmem_invalidate_begin(struct inode *inode, pgoff_t start, pgoff_t end) { - struct list_head *gmem_list = &inode->i_mapping->i_private_list; enum kvm_gfn_range_filter attr_filter; - struct kvm_gmem *gmem; + struct gmem_file *f; attr_filter = kvm_gmem_get_invalidate_filter(inode); - list_for_each_entry(gmem, gmem_list, entry) - __kvm_gmem_invalidate_begin(gmem, start, end, attr_filter); + kvm_gmem_for_each_file(f, inode->i_mapping) + __kvm_gmem_invalidate_begin(f, start, end, attr_filter); } -static void __kvm_gmem_invalidate_end(struct kvm_gmem *gmem, pgoff_t start, +static void __kvm_gmem_invalidate_end(struct gmem_file *f, pgoff_t start, pgoff_t end) { - struct kvm *kvm = gmem->kvm; + struct kvm *kvm = f->kvm; - if (xa_find(&gmem->bindings, &start, end - 1, XA_PRESENT)) { + if (xa_find(&f->bindings, &start, end - 1, XA_PRESENT)) { KVM_MMU_LOCK(kvm); kvm_mmu_invalidate_end(kvm); KVM_MMU_UNLOCK(kvm); @@ -175,11 +226,10 @@ static void __kvm_gmem_invalidate_end(struct kvm_gmem *gmem, pgoff_t start, static void kvm_gmem_invalidate_end(struct inode *inode, pgoff_t start, pgoff_t end) { - struct list_head *gmem_list = &inode->i_mapping->i_private_list; - struct kvm_gmem *gmem; + struct gmem_file *f; - list_for_each_entry(gmem, gmem_list, entry) - __kvm_gmem_invalidate_end(gmem, start, end); + kvm_gmem_for_each_file(f, inode->i_mapping) + __kvm_gmem_invalidate_end(f, start, end); } static long kvm_gmem_punch_hole(struct inode *inode, loff_t offset, loff_t len) @@ -277,9 +327,9 @@ static long kvm_gmem_fallocate(struct file *file, int mode, loff_t offset, static int kvm_gmem_release(struct inode *inode, struct file *file) { - struct kvm_gmem *gmem = file->private_data; + struct gmem_file *f = file->private_data; struct kvm_memory_slot *slot; - struct kvm *kvm = gmem->kvm; + struct kvm *kvm = f->kvm; unsigned long index; /* @@ -299,7 +349,7 @@ static int kvm_gmem_release(struct inode *inode, struct file *file) filemap_invalidate_lock(inode->i_mapping); - xa_for_each(&gmem->bindings, index, slot) + xa_for_each(&f->bindings, index, slot) WRITE_ONCE(slot->gmem.file, NULL); /* @@ -307,18 +357,18 @@ static int kvm_gmem_release(struct inode *inode, struct file *file) * Zap all SPTEs pointed at by this file. Do not free the backing * memory, as its lifetime is associated with the inode, not the file. */ - __kvm_gmem_invalidate_begin(gmem, 0, -1ul, + __kvm_gmem_invalidate_begin(f, 0, -1ul, kvm_gmem_get_invalidate_filter(inode)); - __kvm_gmem_invalidate_end(gmem, 0, -1ul); + __kvm_gmem_invalidate_end(f, 0, -1ul); - list_del(&gmem->entry); + list_del(&f->entry); filemap_invalidate_unlock(inode->i_mapping); mutex_unlock(&kvm->slots_lock); - xa_destroy(&gmem->bindings); - kfree(gmem); + xa_destroy(&f->bindings); + kfree(f); kvm_put_kvm(kvm); @@ -335,16 +385,12 @@ static inline struct file *kvm_gmem_get_file(struct kvm_memory_slot *slot) return get_file_active(&slot->gmem.file); } -static pgoff_t kvm_gmem_get_index(struct kvm_memory_slot *slot, gfn_t gfn) -{ - return gfn - slot->base_gfn + slot->gmem.pgoff; -} +DEFINE_CLASS(gmem_get_file, struct file *, if (_T) fput(_T), + kvm_gmem_get_file(slot), struct kvm_memory_slot *slot); static bool kvm_gmem_supports_mmap(struct inode *inode) { - const u64 flags = (u64)inode->i_private; - - return flags & GUEST_MEMFD_FLAG_MMAP; + return GMEM_I(inode)->flags & GUEST_MEMFD_FLAG_MMAP; } static vm_fault_t kvm_gmem_fault_user_mapping(struct vm_fault *vmf) @@ -356,17 +402,15 @@ static vm_fault_t kvm_gmem_fault_user_mapping(struct vm_fault *vmf) if (((loff_t)vmf->pgoff << PAGE_SHIFT) >= i_size_read(inode)) return VM_FAULT_SIGBUS; - if (!((u64)inode->i_private & GUEST_MEMFD_FLAG_INIT_SHARED)) + if (!(GMEM_I(inode)->flags & GUEST_MEMFD_FLAG_INIT_SHARED)) return VM_FAULT_SIGBUS; folio = kvm_gmem_get_folio(inode, vmf->pgoff); if (IS_ERR(folio)) { - int err = PTR_ERR(folio); - - if (err == -EAGAIN) + if (PTR_ERR(folio) == -EAGAIN) return VM_FAULT_RETRY; - return vmf_error(err); + return vmf_error(PTR_ERR(folio)); } if (WARN_ON_ONCE(folio_test_large(folio))) { @@ -390,8 +434,40 @@ out_folio: return ret; } +#ifdef CONFIG_NUMA +static int kvm_gmem_set_policy(struct vm_area_struct *vma, struct mempolicy *mpol) +{ + struct inode *inode = file_inode(vma->vm_file); + + return mpol_set_shared_policy(&GMEM_I(inode)->policy, vma, mpol); +} + +static struct mempolicy *kvm_gmem_get_policy(struct vm_area_struct *vma, + unsigned long addr, pgoff_t *pgoff) +{ + struct inode *inode = file_inode(vma->vm_file); + + *pgoff = vma->vm_pgoff + ((addr - vma->vm_start) >> PAGE_SHIFT); + + /* + * Return the memory policy for this index, or NULL if none is set. + * + * Returning NULL, e.g. instead of the current task's memory policy, is + * important for the .get_policy kernel ABI: it indicates that no + * explicit policy has been set via mbind() for this memory. The caller + * can then replace NULL with the default memory policy instead of the + * current task's memory policy. + */ + return mpol_shared_policy_lookup(&GMEM_I(inode)->policy, *pgoff); +} +#endif /* CONFIG_NUMA */ + static const struct vm_operations_struct kvm_gmem_vm_ops = { - .fault = kvm_gmem_fault_user_mapping, + .fault = kvm_gmem_fault_user_mapping, +#ifdef CONFIG_NUMA + .get_policy = kvm_gmem_get_policy, + .set_policy = kvm_gmem_set_policy, +#endif }; static int kvm_gmem_mmap(struct file *file, struct vm_area_struct *vma) @@ -416,11 +492,6 @@ static struct file_operations kvm_gmem_fops = { .fallocate = kvm_gmem_fallocate, }; -void kvm_gmem_init(struct module *module) -{ - kvm_gmem_fops.owner = module; -} - static int kvm_gmem_migrate_folio(struct address_space *mapping, struct folio *dst, struct folio *src, enum migrate_mode mode) @@ -492,8 +563,8 @@ bool __weak kvm_arch_supports_gmem_init_shared(struct kvm *kvm) static int __kvm_gmem_create(struct kvm *kvm, loff_t size, u64 flags) { - const char *anon_name = "[kvm-gmem]"; - struct kvm_gmem *gmem; + static const char *name = "[kvm-gmem]"; + struct gmem_file *f; struct inode *inode; struct file *file; int fd, err; @@ -502,25 +573,24 @@ static int __kvm_gmem_create(struct kvm *kvm, loff_t size, u64 flags) if (fd < 0) return fd; - gmem = kzalloc(sizeof(*gmem), GFP_KERNEL); - if (!gmem) { + f = kzalloc(sizeof(*f), GFP_KERNEL); + if (!f) { err = -ENOMEM; goto err_fd; } - file = anon_inode_create_getfile(anon_name, &kvm_gmem_fops, gmem, - O_RDWR, NULL); - if (IS_ERR(file)) { - err = PTR_ERR(file); + /* __fput() will take care of fops_put(). */ + if (!fops_get(&kvm_gmem_fops)) { + err = -ENOENT; goto err_gmem; } - file->f_flags |= O_LARGEFILE; - - inode = file->f_inode; - WARN_ON(file->f_mapping != inode->i_mapping); + inode = anon_inode_make_secure_inode(kvm_gmem_mnt->mnt_sb, name, NULL); + if (IS_ERR(inode)) { + err = PTR_ERR(inode); + goto err_fops; + } - inode->i_private = (void *)(unsigned long)flags; inode->i_op = &kvm_gmem_iops; inode->i_mapping->a_ops = &kvm_gmem_aops; inode->i_mode |= S_IFREG; @@ -530,16 +600,31 @@ static int __kvm_gmem_create(struct kvm *kvm, loff_t size, u64 flags) /* Unmovable mappings are supposed to be marked unevictable as well. */ WARN_ON_ONCE(!mapping_unevictable(inode->i_mapping)); + GMEM_I(inode)->flags = flags; + + file = alloc_file_pseudo(inode, kvm_gmem_mnt, name, O_RDWR, &kvm_gmem_fops); + if (IS_ERR(file)) { + err = PTR_ERR(file); + goto err_inode; + } + + file->f_flags |= O_LARGEFILE; + file->private_data = f; + kvm_get_kvm(kvm); - gmem->kvm = kvm; - xa_init(&gmem->bindings); - list_add(&gmem->entry, &inode->i_mapping->i_private_list); + f->kvm = kvm; + xa_init(&f->bindings); + list_add(&f->entry, &inode->i_mapping->i_private_list); fd_install(fd, file); return fd; +err_inode: + iput(inode); +err_fops: + fops_put(&kvm_gmem_fops); err_gmem: - kfree(gmem); + kfree(f); err_fd: put_unused_fd(fd); return err; @@ -564,7 +649,7 @@ int kvm_gmem_bind(struct kvm *kvm, struct kvm_memory_slot *slot, { loff_t size = slot->npages << PAGE_SHIFT; unsigned long start, end; - struct kvm_gmem *gmem; + struct gmem_file *f; struct inode *inode; struct file *file; int r = -EINVAL; @@ -578,8 +663,8 @@ int kvm_gmem_bind(struct kvm *kvm, struct kvm_memory_slot *slot, if (file->f_op != &kvm_gmem_fops) goto err; - gmem = file->private_data; - if (gmem->kvm != kvm) + f = file->private_data; + if (f->kvm != kvm) goto err; inode = file_inode(file); @@ -593,8 +678,8 @@ int kvm_gmem_bind(struct kvm *kvm, struct kvm_memory_slot *slot, start = offset >> PAGE_SHIFT; end = start + slot->npages; - if (!xa_empty(&gmem->bindings) && - xa_find(&gmem->bindings, &start, end - 1, XA_PRESENT)) { + if (!xa_empty(&f->bindings) && + xa_find(&f->bindings, &start, end - 1, XA_PRESENT)) { filemap_invalidate_unlock(inode->i_mapping); goto err; } @@ -609,7 +694,7 @@ int kvm_gmem_bind(struct kvm *kvm, struct kvm_memory_slot *slot, if (kvm_gmem_supports_mmap(inode)) slot->flags |= KVM_MEMSLOT_GMEM_ONLY; - xa_store_range(&gmem->bindings, start, end - 1, slot, GFP_KERNEL); + xa_store_range(&f->bindings, start, end - 1, slot, GFP_KERNEL); filemap_invalidate_unlock(inode->i_mapping); /* @@ -623,12 +708,12 @@ err: return r; } -static void __kvm_gmem_unbind(struct kvm_memory_slot *slot, struct kvm_gmem *gmem) +static void __kvm_gmem_unbind(struct kvm_memory_slot *slot, struct gmem_file *f) { unsigned long start = slot->gmem.pgoff; unsigned long end = start + slot->npages; - xa_store_range(&gmem->bindings, start, end - 1, NULL, GFP_KERNEL); + xa_store_range(&f->bindings, start, end - 1, NULL, GFP_KERNEL); /* * synchronize_srcu(&kvm->srcu) ensured that kvm_gmem_get_pfn() @@ -639,8 +724,6 @@ static void __kvm_gmem_unbind(struct kvm_memory_slot *slot, struct kvm_gmem *gme void kvm_gmem_unbind(struct kvm_memory_slot *slot) { - struct file *file; - /* * Nothing to do if the underlying file was _already_ closed, as * kvm_gmem_release() invalidates and nullifies all bindings. @@ -648,7 +731,7 @@ void kvm_gmem_unbind(struct kvm_memory_slot *slot) if (!slot->gmem.file) return; - file = kvm_gmem_get_file(slot); + CLASS(gmem_get_file, file)(slot); /* * However, if the file is _being_ closed, then the bindings need to be @@ -668,8 +751,6 @@ void kvm_gmem_unbind(struct kvm_memory_slot *slot) filemap_invalidate_lock(file->f_mapping); __kvm_gmem_unbind(slot, file->private_data); filemap_invalidate_unlock(file->f_mapping); - - fput(file); } /* Returns a locked folio on success. */ @@ -678,18 +759,17 @@ static struct folio *__kvm_gmem_get_pfn(struct file *file, pgoff_t index, kvm_pfn_t *pfn, bool *is_prepared, int *max_order) { - struct file *gmem_file = READ_ONCE(slot->gmem.file); - struct kvm_gmem *gmem = file->private_data; + struct file *slot_file = READ_ONCE(slot->gmem.file); + struct gmem_file *f = file->private_data; struct folio *folio; - if (file != gmem_file) { - WARN_ON_ONCE(gmem_file); + if (file != slot_file) { + WARN_ON_ONCE(slot_file); return ERR_PTR(-EFAULT); } - gmem = file->private_data; - if (xa_load(&gmem->bindings, index) != slot) { - WARN_ON_ONCE(xa_load(&gmem->bindings, index)); + if (xa_load(&f->bindings, index) != slot) { + WARN_ON_ONCE(xa_load(&f->bindings, index)); return ERR_PTR(-EIO); } @@ -716,19 +796,17 @@ int kvm_gmem_get_pfn(struct kvm *kvm, struct kvm_memory_slot *slot, int *max_order) { pgoff_t index = kvm_gmem_get_index(slot, gfn); - struct file *file = kvm_gmem_get_file(slot); struct folio *folio; bool is_prepared = false; int r = 0; + CLASS(gmem_get_file, file)(slot); if (!file) return -EFAULT; folio = __kvm_gmem_get_pfn(file, slot, index, pfn, &is_prepared, max_order); - if (IS_ERR(folio)) { - r = PTR_ERR(folio); - goto out; - } + if (IS_ERR(folio)) + return PTR_ERR(folio); if (!is_prepared) r = kvm_gmem_prepare_folio(kvm, slot, gfn, folio); @@ -740,8 +818,6 @@ int kvm_gmem_get_pfn(struct kvm *kvm, struct kvm_memory_slot *slot, else folio_put(folio); -out: - fput(file); return r; } EXPORT_SYMBOL_FOR_KVM_INTERNAL(kvm_gmem_get_pfn); @@ -750,7 +826,6 @@ EXPORT_SYMBOL_FOR_KVM_INTERNAL(kvm_gmem_get_pfn); long kvm_gmem_populate(struct kvm *kvm, gfn_t start_gfn, void __user *src, long npages, kvm_gmem_populate_cb post_populate, void *opaque) { - struct file *file; struct kvm_memory_slot *slot; void __user *p; @@ -766,7 +841,7 @@ long kvm_gmem_populate(struct kvm *kvm, gfn_t start_gfn, void __user *src, long if (!kvm_slot_has_gmem(slot)) return -EINVAL; - file = kvm_gmem_get_file(slot); + CLASS(gmem_get_file, file)(slot); if (!file) return -EFAULT; @@ -824,8 +899,118 @@ put_folio_and_exit: filemap_invalidate_unlock(file->f_mapping); - fput(file); return ret && !i ? ret : i; } EXPORT_SYMBOL_FOR_KVM_INTERNAL(kvm_gmem_populate); #endif + +static struct kmem_cache *kvm_gmem_inode_cachep; + +static void kvm_gmem_init_inode_once(void *__gi) +{ + struct gmem_inode *gi = __gi; + + /* + * Note! Don't initialize the inode with anything specific to the + * guest_memfd instance, or that might be specific to how the inode is + * used (from the VFS-layer's perspective). This hook is called only + * during the initial slab allocation, i.e. only fields/state that are + * idempotent across _all_ use of the inode _object_ can be initialized + * at this time! + */ + inode_init_once(&gi->vfs_inode); +} + +static struct inode *kvm_gmem_alloc_inode(struct super_block *sb) +{ + struct gmem_inode *gi; + + gi = alloc_inode_sb(sb, kvm_gmem_inode_cachep, GFP_KERNEL); + if (!gi) + return NULL; + + mpol_shared_policy_init(&gi->policy, NULL); + + gi->flags = 0; + return &gi->vfs_inode; +} + +static void kvm_gmem_destroy_inode(struct inode *inode) +{ + mpol_free_shared_policy(&GMEM_I(inode)->policy); +} + +static void kvm_gmem_free_inode(struct inode *inode) +{ + kmem_cache_free(kvm_gmem_inode_cachep, GMEM_I(inode)); +} + +static const struct super_operations kvm_gmem_super_operations = { + .statfs = simple_statfs, + .alloc_inode = kvm_gmem_alloc_inode, + .destroy_inode = kvm_gmem_destroy_inode, + .free_inode = kvm_gmem_free_inode, +}; + +static int kvm_gmem_init_fs_context(struct fs_context *fc) +{ + struct pseudo_fs_context *ctx; + + if (!init_pseudo(fc, GUEST_MEMFD_MAGIC)) + return -ENOMEM; + + fc->s_iflags |= SB_I_NOEXEC; + fc->s_iflags |= SB_I_NODEV; + ctx = fc->fs_private; + ctx->ops = &kvm_gmem_super_operations; + + return 0; +} + +static struct file_system_type kvm_gmem_fs = { + .name = "guest_memfd", + .init_fs_context = kvm_gmem_init_fs_context, + .kill_sb = kill_anon_super, +}; + +static int kvm_gmem_init_mount(void) +{ + kvm_gmem_mnt = kern_mount(&kvm_gmem_fs); + + if (IS_ERR(kvm_gmem_mnt)) + return PTR_ERR(kvm_gmem_mnt); + + kvm_gmem_mnt->mnt_flags |= MNT_NOEXEC; + return 0; +} + +int kvm_gmem_init(struct module *module) +{ + struct kmem_cache_args args = { + .align = 0, + .ctor = kvm_gmem_init_inode_once, + }; + int ret; + + kvm_gmem_fops.owner = module; + kvm_gmem_inode_cachep = kmem_cache_create("kvm_gmem_inode_cache", + sizeof(struct gmem_inode), + &args, SLAB_ACCOUNT); + if (!kvm_gmem_inode_cachep) + return -ENOMEM; + + ret = kvm_gmem_init_mount(); + if (ret) { + kmem_cache_destroy(kvm_gmem_inode_cachep); + return ret; + } + return 0; +} + +void kvm_gmem_exit(void) +{ + kern_unmount(kvm_gmem_mnt); + kvm_gmem_mnt = NULL; + rcu_barrier(); + kmem_cache_destroy(kvm_gmem_inode_cachep); +} diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c index cde1eddbaa91..a5d9a6f221c2 100644 --- a/virt/kvm/kvm_main.c +++ b/virt/kvm/kvm_main.c @@ -6517,7 +6517,9 @@ int kvm_init(unsigned vcpu_size, unsigned vcpu_align, struct module *module) if (WARN_ON_ONCE(r)) goto err_vfio; - kvm_gmem_init(module); + r = kvm_gmem_init(module); + if (r) + goto err_gmem; r = kvm_init_virtualization(); if (r) @@ -6538,6 +6540,8 @@ int kvm_init(unsigned vcpu_size, unsigned vcpu_align, struct module *module) err_register: kvm_uninit_virtualization(); err_virt: + kvm_gmem_exit(); +err_gmem: kvm_vfio_ops_exit(); err_vfio: kvm_async_pf_deinit(); @@ -6569,6 +6573,7 @@ void kvm_exit(void) for_each_possible_cpu(cpu) free_cpumask_var(per_cpu(cpu_kick_mask, cpu)); kmem_cache_destroy(kvm_vcpu_cache); + kvm_gmem_exit(); kvm_vfio_ops_exit(); kvm_async_pf_deinit(); kvm_irqfd_exit(); diff --git a/virt/kvm/kvm_mm.h b/virt/kvm/kvm_mm.h index 31defb08ccba..9fcc5d5b7f8d 100644 --- a/virt/kvm/kvm_mm.h +++ b/virt/kvm/kvm_mm.h @@ -68,17 +68,18 @@ static inline void gfn_to_pfn_cache_invalidate_start(struct kvm *kvm, #endif /* HAVE_KVM_PFNCACHE */ #ifdef CONFIG_KVM_GUEST_MEMFD -void kvm_gmem_init(struct module *module); +int kvm_gmem_init(struct module *module); +void kvm_gmem_exit(void); int kvm_gmem_create(struct kvm *kvm, struct kvm_create_guest_memfd *args); int kvm_gmem_bind(struct kvm *kvm, struct kvm_memory_slot *slot, unsigned int fd, loff_t offset); void kvm_gmem_unbind(struct kvm_memory_slot *slot); #else -static inline void kvm_gmem_init(struct module *module) +static inline int kvm_gmem_init(struct module *module) { - + return 0; } - +static inline void kvm_gmem_exit(void) {}; static inline int kvm_gmem_bind(struct kvm *kvm, struct kvm_memory_slot *slot, unsigned int fd, loff_t offset) |