diff options
| author | Lorenzo Stoakes <lorenzo.stoakes@oracle.com> | 2025-10-20 13:11:29 +0100 |
|---|---|---|
| committer | Andrew Morton <akpm@linux-foundation.org> | 2025-11-16 17:28:13 -0800 |
| commit | ea52cb24cd3fb121283754ab82b2cb3044609359 (patch) | |
| tree | 7e41c9ce16637481ea3fd08220821966281b059e /mm/hugetlb.c | |
| parent | da003453dce728857bea2e3de74132a90c9c78e7 (diff) | |
mm/hugetlbfs: update hugetlbfs to use mmap_prepare
Since we can now perform actions after the VMA is established via
mmap_prepare, use desc->action_success_hook to set up the hugetlb lock
once the VMA is setup.
We also make changes throughout hugetlbfs to make this possible.
Note that we must hide newly established hugetlb VMAs from the rmap until
the operation is entirely complete as we establish a hugetlb lock during
VMA setup that can be raced by rmap users.
Link: https://lkml.kernel.org/r/b1afa16d3cfa585a03df9ae215ae9f905b3f0ed7.1760959442.git.lorenzo.stoakes@oracle.com
Signed-off-by: Lorenzo Stoakes <lorenzo.stoakes@oracle.com>
Reviewed-by: Jason Gunthorpe <jgg@nvidia.com>
Tested-by: Sumanth Korikkar <sumanthk@linux.ibm.com>
Cc: Alexander Gordeev <agordeev@linux.ibm.com>
Cc: Al Viro <viro@zeniv.linux.org.uk>
Cc: Andreas Larsson <andreas@gaisler.com>
Cc: Andrey Konovalov <andreyknvl@gmail.com>
Cc: Arnd Bergmann <arnd@arndb.de>
Cc: Baolin Wang <baolin.wang@linux.alibaba.com>
Cc: Baoquan He <bhe@redhat.com>
Cc: Chatre, Reinette <reinette.chatre@intel.com>
Cc: Christian Borntraeger <borntraeger@linux.ibm.com>
Cc: Christian Brauner <brauner@kernel.org>
Cc: Dan Williams <dan.j.williams@intel.com>
Cc: Dave Jiang <dave.jiang@intel.com>
Cc: Dave Martin <dave.martin@arm.com>
Cc: Dave Young <dyoung@redhat.com>
Cc: David Hildenbrand <david@redhat.com>
Cc: David S. Miller <davem@davemloft.net>
Cc: Dmitriy Vyukov <dvyukov@google.com>
Cc: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Cc: Guo Ren <guoren@kernel.org>
Cc: Heiko Carstens <hca@linux.ibm.com>
Cc: Hugh Dickins <hughd@google.com>
Cc: James Morse <james.morse@arm.com>
Cc: Jan Kara <jack@suse.cz>
Cc: Jann Horn <jannh@google.com>
Cc: Jonathan Corbet <corbet@lwn.net>
Cc: Kevin Tian <kevin.tian@intel.com>
Cc: Konstantin Komarov <almaz.alexandrovich@paragon-software.com>
Cc: Liam Howlett <liam.howlett@oracle.com>
Cc: "Luck, Tony" <tony.luck@intel.com>
Cc: Matthew Wilcox (Oracle) <willy@infradead.org>
Cc: Michal Hocko <mhocko@suse.com>
Cc: Mike Rapoport <rppt@kernel.org>
Cc: Muchun Song <muchun.song@linux.dev>
Cc: Nicolas Pitre <nico@fluxnic.net>
Cc: Oscar Salvador <osalvador@suse.de>
Cc: Pedro Falcato <pfalcato@suse.de>
Cc: Robin Murohy <robin.murphy@arm.com>
Cc: Suren Baghdasaryan <surenb@google.com>
Cc: Sven Schnelle <svens@linux.ibm.com>
Cc: Thomas Bogendoerfer <tsbogend@alpha.franken.de>
Cc: "Uladzislau Rezki (Sony)" <urezki@gmail.com>
Cc: Vasily Gorbik <gor@linux.ibm.com>
Cc: Vishal Verma <vishal.l.verma@intel.com>
Cc: Vivek Goyal <vgoyal@redhat.com>
Cc: Vlastimil Babka <vbabka@suse.cz>
Cc: Will Deacon <will@kernel.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Diffstat (limited to 'mm/hugetlb.c')
| -rw-r--r-- | mm/hugetlb.c | 77 |
1 files changed, 45 insertions, 32 deletions
diff --git a/mm/hugetlb.c b/mm/hugetlb.c index 7774c286b3b7..86e672fcb305 100644 --- a/mm/hugetlb.c +++ b/mm/hugetlb.c @@ -119,7 +119,6 @@ struct mutex *hugetlb_fault_mutex_table __ro_after_init; /* Forward declaration */ static int hugetlb_acct_memory(struct hstate *h, long delta); static void hugetlb_vma_lock_free(struct vm_area_struct *vma); -static void hugetlb_vma_lock_alloc(struct vm_area_struct *vma); static void __hugetlb_vma_unlock_write_free(struct vm_area_struct *vma); static void hugetlb_unshare_pmds(struct vm_area_struct *vma, unsigned long start, unsigned long end, bool take_locks); @@ -438,17 +437,21 @@ static void hugetlb_vma_lock_free(struct vm_area_struct *vma) } } -static void hugetlb_vma_lock_alloc(struct vm_area_struct *vma) +/* + * vma specific semaphore used for pmd sharing and fault/truncation + * synchronization + */ +int hugetlb_vma_lock_alloc(struct vm_area_struct *vma) { struct hugetlb_vma_lock *vma_lock; /* Only establish in (flags) sharable vmas */ if (!vma || !(vma->vm_flags & VM_MAYSHARE)) - return; + return 0; /* Should never get here with non-NULL vm_private_data */ if (vma->vm_private_data) - return; + return -EINVAL; vma_lock = kmalloc(sizeof(*vma_lock), GFP_KERNEL); if (!vma_lock) { @@ -463,13 +466,15 @@ static void hugetlb_vma_lock_alloc(struct vm_area_struct *vma) * allocation failure. */ pr_warn_once("HugeTLB: unable to allocate vma specific lock\n"); - return; + return -EINVAL; } kref_init(&vma_lock->refs); init_rwsem(&vma_lock->rw_sema); vma_lock->vma = vma; vma->vm_private_data = vma_lock; + + return 0; } /* Helper that removes a struct file_region from the resv_map cache and returns @@ -1201,20 +1206,28 @@ static struct resv_map *vma_resv_map(struct vm_area_struct *vma) } } -static void set_vma_resv_map(struct vm_area_struct *vma, struct resv_map *map) +static void set_vma_resv_flags(struct vm_area_struct *vma, unsigned long flags) { - VM_BUG_ON_VMA(!is_vm_hugetlb_page(vma), vma); - VM_BUG_ON_VMA(vma->vm_flags & VM_MAYSHARE, vma); + VM_WARN_ON_ONCE_VMA(!is_vm_hugetlb_page(vma), vma); + VM_WARN_ON_ONCE_VMA(vma->vm_flags & VM_MAYSHARE, vma); - set_vma_private_data(vma, (unsigned long)map); + set_vma_private_data(vma, get_vma_private_data(vma) | flags); } -static void set_vma_resv_flags(struct vm_area_struct *vma, unsigned long flags) +static void set_vma_desc_resv_map(struct vm_area_desc *desc, struct resv_map *map) { - VM_BUG_ON_VMA(!is_vm_hugetlb_page(vma), vma); - VM_BUG_ON_VMA(vma->vm_flags & VM_MAYSHARE, vma); + VM_WARN_ON_ONCE(!is_vm_hugetlb_flags(desc->vm_flags)); + VM_WARN_ON_ONCE(desc->vm_flags & VM_MAYSHARE); - set_vma_private_data(vma, get_vma_private_data(vma) | flags); + desc->private_data = map; +} + +static void set_vma_desc_resv_flags(struct vm_area_desc *desc, unsigned long flags) +{ + VM_WARN_ON_ONCE(!is_vm_hugetlb_flags(desc->vm_flags)); + VM_WARN_ON_ONCE(desc->vm_flags & VM_MAYSHARE); + + desc->private_data = (void *)((unsigned long)desc->private_data | flags); } static int is_vma_resv_set(struct vm_area_struct *vma, unsigned long flag) @@ -1224,6 +1237,13 @@ static int is_vma_resv_set(struct vm_area_struct *vma, unsigned long flag) return (get_vma_private_data(vma) & flag) != 0; } +static bool is_vma_desc_resv_set(struct vm_area_desc *desc, unsigned long flag) +{ + VM_WARN_ON_ONCE(!is_vm_hugetlb_flags(desc->vm_flags)); + + return ((unsigned long)desc->private_data) & flag; +} + bool __vma_private_lock(struct vm_area_struct *vma) { return !(vma->vm_flags & VM_MAYSHARE) && @@ -7270,9 +7290,9 @@ long hugetlb_change_protection(struct vm_area_struct *vma, */ long hugetlb_reserve_pages(struct inode *inode, - long from, long to, - struct vm_area_struct *vma, - vm_flags_t vm_flags) + long from, long to, + struct vm_area_desc *desc, + vm_flags_t vm_flags) { long chg = -1, add = -1, spool_resv, gbl_resv; struct hstate *h = hstate_inode(inode); @@ -7288,12 +7308,6 @@ long hugetlb_reserve_pages(struct inode *inode, } /* - * vma specific semaphore used for pmd sharing and fault/truncation - * synchronization - */ - hugetlb_vma_lock_alloc(vma); - - /* * Only apply hugepage reservation if asked. At fault time, an * attempt will be made for VM_NORESERVE to allocate a page * without using reserves @@ -7305,9 +7319,9 @@ long hugetlb_reserve_pages(struct inode *inode, * Shared mappings base their reservation on the number of pages that * are already allocated on behalf of the file. Private mappings need * to reserve the full area even if read-only as mprotect() may be - * called to make the mapping read-write. Assume !vma is a shm mapping + * called to make the mapping read-write. Assume !desc is a shm mapping */ - if (!vma || vma->vm_flags & VM_MAYSHARE) { + if (!desc || desc->vm_flags & VM_MAYSHARE) { /* * resv_map can not be NULL as hugetlb_reserve_pages is only * called for inodes for which resv_maps were created (see @@ -7324,8 +7338,8 @@ long hugetlb_reserve_pages(struct inode *inode, chg = to - from; - set_vma_resv_map(vma, resv_map); - set_vma_resv_flags(vma, HPAGE_RESV_OWNER); + set_vma_desc_resv_map(desc, resv_map); + set_vma_desc_resv_flags(desc, HPAGE_RESV_OWNER); } if (chg < 0) @@ -7335,7 +7349,7 @@ long hugetlb_reserve_pages(struct inode *inode, chg * pages_per_huge_page(h), &h_cg) < 0) goto out_err; - if (vma && !(vma->vm_flags & VM_MAYSHARE) && h_cg) { + if (desc && !(desc->vm_flags & VM_MAYSHARE) && h_cg) { /* For private mappings, the hugetlb_cgroup uncharge info hangs * of the resv_map. */ @@ -7369,7 +7383,7 @@ long hugetlb_reserve_pages(struct inode *inode, * consumed reservations are stored in the map. Hence, nothing * else has to be done for private mappings here */ - if (!vma || vma->vm_flags & VM_MAYSHARE) { + if (!desc || desc->vm_flags & VM_MAYSHARE) { add = region_add(resv_map, from, to, regions_needed, h, h_cg); if (unlikely(add < 0)) { @@ -7423,16 +7437,15 @@ out_uncharge_cgroup: hugetlb_cgroup_uncharge_cgroup_rsvd(hstate_index(h), chg * pages_per_huge_page(h), h_cg); out_err: - hugetlb_vma_lock_free(vma); - if (!vma || vma->vm_flags & VM_MAYSHARE) + if (!desc || desc->vm_flags & VM_MAYSHARE) /* Only call region_abort if the region_chg succeeded but the * region_add failed or didn't run. */ if (chg >= 0 && add < 0) region_abort(resv_map, from, to, regions_needed); - if (vma && is_vma_resv_set(vma, HPAGE_RESV_OWNER)) { + if (desc && is_vma_desc_resv_set(desc, HPAGE_RESV_OWNER)) { kref_put(&resv_map->refs, resv_map_release); - set_vma_resv_map(vma, NULL); + set_vma_desc_resv_map(desc, NULL); } return chg < 0 ? chg : add < 0 ? add : -EINVAL; } |