diff options
| author | Lorenzo Stoakes <lorenzo.stoakes@oracle.com> | 2025-10-20 13:11:27 +0100 |
|---|---|---|
| committer | Andrew Morton <akpm@linux-foundation.org> | 2025-11-16 17:28:12 -0800 |
| commit | ac0a3fc9c07df79dc8a4ce9d274df00afc7bf12d (patch) | |
| tree | c1db9e38fd4a636423b5a1f2342ee54eceae86e2 /mm/vma.c | |
| parent | db91b783290e395443151b0fe4b8dc32aceebef5 (diff) | |
mm: add ability to take further action in vm_area_desc
Some drivers/filesystems need to perform additional tasks after the VMA is
set up. This is typically in the form of pre-population.
The forms of pre-population most likely to be performed are a PFN remap
or the insertion of normal folios and PFNs into a mixed map.
We start by implementing the PFN remap functionality, ensuring that we
perform the appropriate actions at the appropriate time - that is setting
flags at the point of .mmap_prepare, and performing the actual remap at the
point at which the VMA is fully established.
This prevents the driver from doing anything too crazy with a VMA at any
stage, and we retain complete control over how the mm functionality is
applied.
Unfortunately callers still do often require some kind of custom action,
so we add an optional success/error _hook to allow the caller to do
something after the action has succeeded or failed.
This is done at the point when the VMA has already been established, so
the harm that can be done is limited.
The error hook can be used to filter errors if necessary.
There may be cases in which the caller absolutely must hold the file rmap
lock until the operation is entirely complete. It is an edge case, but
certainly the hugetlbfs mmap hook requires it.
To accommodate this, we add the hide_from_rmap_until_complete flag to the
mmap_action type. In this case, if a new VMA is allocated, we will hold the
file rmap lock until the operation is entirely completed (including any
success/error hooks).
Note that we do not need to update __compat_vma_mmap() to accommodate this
flag, as this function will be invoked from an .mmap handler whose VMA is
not yet visible, so we implicitly hide it from the rmap.
If any error arises on these final actions, we simply unmap the VMA
altogether.
Also update the stacked filesystem compatibility layer to utilise the
action behaviour, and update the VMA tests accordingly.
While we're here, rename __compat_vma_mmap_prepare() to __compat_vma_mmap()
as we are now performing actions invoked by the mmap_prepare in addition to
just the mmap_prepare hook.
Link: https://lkml.kernel.org/r/2601199a7b2eaeadfcd8ab6e199c6d1706650c94.1760959442.git.lorenzo.stoakes@oracle.com
Signed-off-by: Lorenzo Stoakes <lorenzo.stoakes@oracle.com>
Cc: Alexander Gordeev <agordeev@linux.ibm.com>
Cc: Al Viro <viro@zeniv.linux.org.uk>
Cc: Andreas Larsson <andreas@gaisler.com>
Cc: Andrey Konovalov <andreyknvl@gmail.com>
Cc: Arnd Bergmann <arnd@arndb.de>
Cc: Baolin Wang <baolin.wang@linux.alibaba.com>
Cc: Baoquan He <bhe@redhat.com>
Cc: Chatre, Reinette <reinette.chatre@intel.com>
Cc: Christian Borntraeger <borntraeger@linux.ibm.com>
Cc: Christian Brauner <brauner@kernel.org>
Cc: Dan Williams <dan.j.williams@intel.com>
Cc: Dave Jiang <dave.jiang@intel.com>
Cc: Dave Martin <dave.martin@arm.com>
Cc: Dave Young <dyoung@redhat.com>
Cc: David Hildenbrand <david@redhat.com>
Cc: David S. Miller <davem@davemloft.net>
Cc: Dmitriy Vyukov <dvyukov@google.com>
Cc: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Cc: Guo Ren <guoren@kernel.org>
Cc: Heiko Carstens <hca@linux.ibm.com>
Cc: Hugh Dickins <hughd@google.com>
Cc: James Morse <james.morse@arm.com>
Cc: Jan Kara <jack@suse.cz>
Cc: Jann Horn <jannh@google.com>
Cc: Jason Gunthorpe <jgg@nvidia.com>
Cc: Jonathan Corbet <corbet@lwn.net>
Cc: Kevin Tian <kevin.tian@intel.com>
Cc: Konstantin Komarov <almaz.alexandrovich@paragon-software.com>
Cc: Liam Howlett <liam.howlett@oracle.com>
Cc: "Luck, Tony" <tony.luck@intel.com>
Cc: Matthew Wilcox (Oracle) <willy@infradead.org>
Cc: Michal Hocko <mhocko@suse.com>
Cc: Mike Rapoport <rppt@kernel.org>
Cc: Muchun Song <muchun.song@linux.dev>
Cc: Nicolas Pitre <nico@fluxnic.net>
Cc: Oscar Salvador <osalvador@suse.de>
Cc: Pedro Falcato <pfalcato@suse.de>
Cc: Robin Murohy <robin.murphy@arm.com>
Cc: Sumanth Korikkar <sumanthk@linux.ibm.com>
Cc: Suren Baghdasaryan <surenb@google.com>
Cc: Sven Schnelle <svens@linux.ibm.com>
Cc: Thomas Bogendoerfer <tsbogend@alpha.franken.de>
Cc: "Uladzislau Rezki (Sony)" <urezki@gmail.com>
Cc: Vasily Gorbik <gor@linux.ibm.com>
Cc: Vishal Verma <vishal.l.verma@intel.com>
Cc: Vivek Goyal <vgoyal@redhat.com>
Cc: Vlastimil Babka <vbabka@suse.cz>
Cc: Will Deacon <will@kernel.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Diffstat (limited to 'mm/vma.c')
| -rw-r--r-- | mm/vma.c | 113 |
1 files changed, 85 insertions, 28 deletions
@@ -34,7 +34,9 @@ struct mmap_state { struct maple_tree mt_detach; /* Determine if we can check KSM flags early in mmap() logic. */ - bool check_ksm_early; + bool check_ksm_early :1; + /* If we map new, hold the file rmap lock on mapping. */ + bool hold_file_rmap_lock :1; }; #define MMAP_STATE(name, mm_, vmi_, addr_, len_, pgoff_, vm_flags_, file_) \ @@ -1754,7 +1756,7 @@ void unlink_file_vma_batch_final(struct unlink_vma_file_batch *vb) unlink_file_vma_batch_process(vb); } -static void vma_link_file(struct vm_area_struct *vma) +static void vma_link_file(struct vm_area_struct *vma, bool hold_rmap_lock) { struct file *file = vma->vm_file; struct address_space *mapping; @@ -1763,7 +1765,8 @@ static void vma_link_file(struct vm_area_struct *vma) mapping = file->f_mapping; i_mmap_lock_write(mapping); __vma_link_file(vma, mapping); - i_mmap_unlock_write(mapping); + if (!hold_rmap_lock) + i_mmap_unlock_write(mapping); } } @@ -1777,7 +1780,7 @@ static int vma_link(struct mm_struct *mm, struct vm_area_struct *vma) vma_start_write(vma); vma_iter_store_new(&vmi, vma); - vma_link_file(vma); + vma_link_file(vma, /* hold_rmap_lock= */false); mm->map_count++; validate_mm(mm); return 0; @@ -2311,17 +2314,33 @@ static void update_ksm_flags(struct mmap_state *map) map->vm_flags = ksm_vma_flags(map->mm, map->file, map->vm_flags); } +static void set_desc_from_map(struct vm_area_desc *desc, + const struct mmap_state *map) +{ + desc->start = map->addr; + desc->end = map->end; + + desc->pgoff = map->pgoff; + desc->vm_file = map->file; + desc->vm_flags = map->vm_flags; + desc->page_prot = map->page_prot; +} + /* * __mmap_setup() - Prepare to gather any overlapping VMAs that need to be * unmapped once the map operation is completed, check limits, account mapping * and clean up any pre-existing VMAs. * + * As a result it sets up the @map and @desc objects. + * * @map: Mapping state. + * @desc: VMA descriptor * @uf: Userfaultfd context list. * * Returns: 0 on success, error code otherwise. */ -static int __mmap_setup(struct mmap_state *map, struct list_head *uf) +static int __mmap_setup(struct mmap_state *map, struct vm_area_desc *desc, + struct list_head *uf) { int error; struct vma_iterator *vmi = map->vmi; @@ -2378,6 +2397,7 @@ static int __mmap_setup(struct mmap_state *map, struct list_head *uf) */ vms_clean_up_area(vms, &map->mas_detach); + set_desc_from_map(desc, map); return 0; } @@ -2479,7 +2499,7 @@ static int __mmap_new_vma(struct mmap_state *map, struct vm_area_struct **vmap) vma_start_write(vma); vma_iter_store_new(vmi, vma); map->mm->map_count++; - vma_link_file(vma); + vma_link_file(vma, map->hold_file_rmap_lock); /* * vma_merge_new_range() calls khugepaged_enter_vma() too, the below @@ -2539,6 +2559,17 @@ static void __mmap_complete(struct mmap_state *map, struct vm_area_struct *vma) vma_set_page_prot(vma); } +static void call_action_prepare(struct mmap_state *map, + struct vm_area_desc *desc) +{ + struct mmap_action *action = &desc->action; + + mmap_action_prepare(action, desc); + + if (action->hide_from_rmap_until_complete) + map->hold_file_rmap_lock = true; +} + /* * Invoke the f_op->mmap_prepare() callback for a file-backed mapping that * specifies it. @@ -2550,34 +2581,26 @@ static void __mmap_complete(struct mmap_state *map, struct vm_area_struct *vma) * * Returns 0 on success, or an error code otherwise. */ -static int call_mmap_prepare(struct mmap_state *map) +static int call_mmap_prepare(struct mmap_state *map, + struct vm_area_desc *desc) { int err; - struct vm_area_desc desc = { - .mm = map->mm, - .file = map->file, - .start = map->addr, - .end = map->end, - - .pgoff = map->pgoff, - .vm_file = map->file, - .vm_flags = map->vm_flags, - .page_prot = map->page_prot, - }; /* Invoke the hook. */ - err = vfs_mmap_prepare(map->file, &desc); + err = vfs_mmap_prepare(map->file, desc); if (err) return err; + call_action_prepare(map, desc); + /* Update fields permitted to be changed. */ - map->pgoff = desc.pgoff; - map->file = desc.vm_file; - map->vm_flags = desc.vm_flags; - map->page_prot = desc.page_prot; + map->pgoff = desc->pgoff; + map->file = desc->vm_file; + map->vm_flags = desc->vm_flags; + map->page_prot = desc->page_prot; /* User-defined fields. */ - map->vm_ops = desc.vm_ops; - map->vm_private_data = desc.private_data; + map->vm_ops = desc->vm_ops; + map->vm_private_data = desc->private_data; return 0; } @@ -2619,22 +2642,48 @@ static bool can_set_ksm_flags_early(struct mmap_state *map) return false; } +static int call_action_complete(struct mmap_state *map, + struct vm_area_desc *desc, + struct vm_area_struct *vma) +{ + struct mmap_action *action = &desc->action; + int ret; + + ret = mmap_action_complete(action, vma); + + /* If we held the file rmap we need to release it. */ + if (map->hold_file_rmap_lock) { + struct file *file = vma->vm_file; + + i_mmap_unlock_write(file->f_mapping); + } + return ret; +} + static unsigned long __mmap_region(struct file *file, unsigned long addr, unsigned long len, vm_flags_t vm_flags, unsigned long pgoff, struct list_head *uf) { struct mm_struct *mm = current->mm; struct vm_area_struct *vma = NULL; - int error; bool have_mmap_prepare = file && file->f_op->mmap_prepare; VMA_ITERATOR(vmi, mm, addr); MMAP_STATE(map, mm, &vmi, addr, len, pgoff, vm_flags, file); + struct vm_area_desc desc = { + .mm = mm, + .file = file, + .action = { + .type = MMAP_NOTHING, /* Default to no further action. */ + }, + }; + bool allocated_new = false; + int error; map.check_ksm_early = can_set_ksm_flags_early(&map); - error = __mmap_setup(&map, uf); + error = __mmap_setup(&map, &desc, uf); if (!error && have_mmap_prepare) - error = call_mmap_prepare(&map); + error = call_mmap_prepare(&map, &desc); if (error) goto abort_munmap; @@ -2653,6 +2702,7 @@ static unsigned long __mmap_region(struct file *file, unsigned long addr, error = __mmap_new_vma(&map, &vma); if (error) goto unacct_error; + allocated_new = true; } if (have_mmap_prepare) @@ -2660,6 +2710,13 @@ static unsigned long __mmap_region(struct file *file, unsigned long addr, __mmap_complete(&map, vma); + if (have_mmap_prepare && allocated_new) { + error = call_action_complete(&map, &desc, vma); + + if (error) + return error; + } + return addr; /* Accounting was done by __mmap_setup(). */ |