summaryrefslogtreecommitdiff
path: root/fs
diff options
context:
space:
mode:
Diffstat (limited to 'fs')
-rw-r--r--fs/dax.c2
-rw-r--r--fs/hugetlbfs/inode.c49
-rw-r--r--fs/ntfs3/file.c2
-rw-r--r--fs/proc/inode.c2
-rw-r--r--fs/proc/task_mmu.c314
-rw-r--r--fs/ramfs/file-mmu.c2
-rw-r--r--fs/resctrl/pseudo_lock.c20
-rw-r--r--fs/userfaultfd.c117
8 files changed, 290 insertions, 218 deletions
diff --git a/fs/dax.c b/fs/dax.c
index 38fae11ee419..289e6254aa30 100644
--- a/fs/dax.c
+++ b/fs/dax.c
@@ -24,7 +24,7 @@
#include <linux/mmu_notifier.h>
#include <linux/iomap.h>
#include <linux/rmap.h>
-#include <asm/pgalloc.h>
+#include <linux/pgalloc.h>
#define CREATE_TRACE_POINTS
#include <trace/events/fs_dax.h>
diff --git a/fs/hugetlbfs/inode.c b/fs/hugetlbfs/inode.c
index f42548ee9083..3919fca56553 100644
--- a/fs/hugetlbfs/inode.c
+++ b/fs/hugetlbfs/inode.c
@@ -96,8 +96,15 @@ static const struct fs_parameter_spec hugetlb_fs_parameters[] = {
#define PGOFF_LOFFT_MAX \
(((1UL << (PAGE_SHIFT + 1)) - 1) << (BITS_PER_LONG - (PAGE_SHIFT + 1)))
-static int hugetlbfs_file_mmap(struct file *file, struct vm_area_struct *vma)
+static int hugetlb_file_mmap_prepare_success(const struct vm_area_struct *vma)
{
+ /* Unfortunate we have to reassign vma->vm_private_data. */
+ return hugetlb_vma_lock_alloc((struct vm_area_struct *)vma);
+}
+
+static int hugetlbfs_file_mmap_prepare(struct vm_area_desc *desc)
+{
+ struct file *file = desc->file;
struct inode *inode = file_inode(file);
loff_t len, vma_len;
int ret;
@@ -112,8 +119,8 @@ static int hugetlbfs_file_mmap(struct file *file, struct vm_area_struct *vma)
* way when do_mmap unwinds (may be important on powerpc
* and ia64).
*/
- vm_flags_set(vma, VM_HUGETLB | VM_DONTEXPAND);
- vma->vm_ops = &hugetlb_vm_ops;
+ desc->vm_flags |= VM_HUGETLB | VM_DONTEXPAND;
+ desc->vm_ops = &hugetlb_vm_ops;
/*
* page based offset in vm_pgoff could be sufficiently large to
@@ -122,16 +129,16 @@ static int hugetlbfs_file_mmap(struct file *file, struct vm_area_struct *vma)
* sizeof(unsigned long). So, only check in those instances.
*/
if (sizeof(unsigned long) == sizeof(loff_t)) {
- if (vma->vm_pgoff & PGOFF_LOFFT_MAX)
+ if (desc->pgoff & PGOFF_LOFFT_MAX)
return -EINVAL;
}
/* must be huge page aligned */
- if (vma->vm_pgoff & (~huge_page_mask(h) >> PAGE_SHIFT))
+ if (desc->pgoff & (~huge_page_mask(h) >> PAGE_SHIFT))
return -EINVAL;
- vma_len = (loff_t)(vma->vm_end - vma->vm_start);
- len = vma_len + ((loff_t)vma->vm_pgoff << PAGE_SHIFT);
+ vma_len = (loff_t)vma_desc_size(desc);
+ len = vma_len + ((loff_t)desc->pgoff << PAGE_SHIFT);
/* check for overflow */
if (len < vma_len)
return -EINVAL;
@@ -141,7 +148,7 @@ static int hugetlbfs_file_mmap(struct file *file, struct vm_area_struct *vma)
ret = -ENOMEM;
- vm_flags = vma->vm_flags;
+ vm_flags = desc->vm_flags;
/*
* for SHM_HUGETLB, the pages are reserved in the shmget() call so skip
* reserving here. Note: only for SHM hugetlbfs file, the inode
@@ -151,17 +158,30 @@ static int hugetlbfs_file_mmap(struct file *file, struct vm_area_struct *vma)
vm_flags |= VM_NORESERVE;
if (hugetlb_reserve_pages(inode,
- vma->vm_pgoff >> huge_page_order(h),
- len >> huge_page_shift(h), vma,
- vm_flags) < 0)
+ desc->pgoff >> huge_page_order(h),
+ len >> huge_page_shift(h), desc,
+ vm_flags) < 0)
goto out;
ret = 0;
- if (vma->vm_flags & VM_WRITE && inode->i_size < len)
+ if ((desc->vm_flags & VM_WRITE) && inode->i_size < len)
i_size_write(inode, len);
out:
inode_unlock(inode);
+ if (!ret) {
+ /* Allocate the VMA lock after we set it up. */
+ desc->action.success_hook = hugetlb_file_mmap_prepare_success;
+ /*
+ * We cannot permit the rmap finding this VMA in the time
+ * between the VMA being inserted into the VMA tree and the
+ * completion/success hook being invoked.
+ *
+ * This is because we establish a per-VMA hugetlb lock which can
+ * be raced by rmap.
+ */
+ desc->action.hide_from_rmap_until_complete = true;
+ }
return ret;
}
@@ -184,8 +204,7 @@ hugetlb_get_unmapped_area(struct file *file, unsigned long addr,
if (addr)
addr0 = ALIGN(addr, huge_page_size(h));
- return mm_get_unmapped_area_vmflags(current->mm, file, addr0, len, pgoff,
- flags, 0);
+ return mm_get_unmapped_area_vmflags(file, addr0, len, pgoff, flags, 0);
}
/*
@@ -1221,7 +1240,7 @@ static void init_once(void *foo)
static const struct file_operations hugetlbfs_file_operations = {
.read_iter = hugetlbfs_read_iter,
- .mmap = hugetlbfs_file_mmap,
+ .mmap_prepare = hugetlbfs_file_mmap_prepare,
.fsync = noop_fsync,
.get_unmapped_area = hugetlb_get_unmapped_area,
.llseek = default_llseek,
diff --git a/fs/ntfs3/file.c b/fs/ntfs3/file.c
index 5016bccc2ac5..2e7b2e566ebe 100644
--- a/fs/ntfs3/file.c
+++ b/fs/ntfs3/file.c
@@ -379,7 +379,7 @@ static int ntfs_file_mmap_prepare(struct vm_area_desc *desc)
if (rw) {
u64 to = min_t(loff_t, i_size_read(inode),
- from + desc->end - desc->start);
+ from + vma_desc_size(desc));
if (is_sparsed(ni)) {
/* Allocate clusters for rw map. */
diff --git a/fs/proc/inode.c b/fs/proc/inode.c
index d9b7ef122343..2d3425cfa94b 100644
--- a/fs/proc/inode.c
+++ b/fs/proc/inode.c
@@ -443,7 +443,7 @@ pde_get_unmapped_area(struct proc_dir_entry *pde, struct file *file, unsigned lo
return pde->proc_ops->proc_get_unmapped_area(file, orig_addr, len, pgoff, flags);
#ifdef CONFIG_MMU
- return mm_get_unmapped_area(current->mm, file, orig_addr, len, pgoff, flags);
+ return mm_get_unmapped_area(file, orig_addr, len, pgoff, flags);
#endif
return orig_addr;
diff --git a/fs/proc/task_mmu.c b/fs/proc/task_mmu.c
index fc35a0543f01..81dfc26bfae8 100644
--- a/fs/proc/task_mmu.c
+++ b/fs/proc/task_mmu.c
@@ -14,7 +14,7 @@
#include <linux/rmap.h>
#include <linux/swap.h>
#include <linux/sched/mm.h>
-#include <linux/swapops.h>
+#include <linux/leafops.h>
#include <linux/mmu_notifier.h>
#include <linux/page_idle.h>
#include <linux/shmem_fs.h>
@@ -1017,14 +1017,16 @@ static void smaps_pte_entry(pte_t *pte, unsigned long addr,
young = pte_young(ptent);
dirty = pte_dirty(ptent);
present = true;
- } else if (is_swap_pte(ptent)) {
- swp_entry_t swpent = pte_to_swp_entry(ptent);
+ } else if (pte_none(ptent)) {
+ smaps_pte_hole_lookup(addr, walk);
+ } else {
+ const softleaf_t entry = softleaf_from_pte(ptent);
- if (!non_swap_entry(swpent)) {
+ if (softleaf_is_swap(entry)) {
int mapcount;
mss->swap += PAGE_SIZE;
- mapcount = swp_swapcount(swpent);
+ mapcount = swp_swapcount(entry);
if (mapcount >= 2) {
u64 pss_delta = (u64)PAGE_SIZE << PSS_SHIFT;
@@ -1033,14 +1035,11 @@ static void smaps_pte_entry(pte_t *pte, unsigned long addr,
} else {
mss->swap_pss += (u64)PAGE_SIZE << PSS_SHIFT;
}
- } else if (is_pfn_swap_entry(swpent)) {
- if (is_device_private_entry(swpent))
+ } else if (softleaf_has_pfn(entry)) {
+ if (softleaf_is_device_private(entry))
present = true;
- page = pfn_swap_entry_to_page(swpent);
+ page = softleaf_to_page(entry);
}
- } else {
- smaps_pte_hole_lookup(addr, walk);
- return;
}
if (!page)
@@ -1060,14 +1059,16 @@ static void smaps_pmd_entry(pmd_t *pmd, unsigned long addr,
bool present = false;
struct folio *folio;
+ if (pmd_none(*pmd))
+ return;
if (pmd_present(*pmd)) {
page = vm_normal_page_pmd(vma, addr, *pmd);
present = true;
- } else if (unlikely(thp_migration_supported() && is_swap_pmd(*pmd))) {
- swp_entry_t entry = pmd_to_swp_entry(*pmd);
+ } else if (unlikely(thp_migration_supported())) {
+ const softleaf_t entry = softleaf_from_pmd(*pmd);
- if (is_pfn_swap_entry(entry))
- page = pfn_swap_entry_to_page(entry);
+ if (softleaf_has_pfn(entry))
+ page = softleaf_to_page(entry);
}
if (IS_ERR_OR_NULL(page))
return;
@@ -1146,6 +1147,7 @@ static void show_smap_vma_flags(struct seq_file *m, struct vm_area_struct *vma)
[ilog2(VM_MAYSHARE)] = "ms",
[ilog2(VM_GROWSDOWN)] = "gd",
[ilog2(VM_PFNMAP)] = "pf",
+ [ilog2(VM_MAYBE_GUARD)] = "gu",
[ilog2(VM_LOCKED)] = "lo",
[ilog2(VM_IO)] = "io",
[ilog2(VM_SEQ_READ)] = "sr",
@@ -1181,10 +1183,10 @@ static void show_smap_vma_flags(struct seq_file *m, struct vm_area_struct *vma)
[ilog2(VM_PKEY_BIT0)] = "",
[ilog2(VM_PKEY_BIT1)] = "",
[ilog2(VM_PKEY_BIT2)] = "",
-#if VM_PKEY_BIT3
+#if CONFIG_ARCH_PKEY_BITS > 3
[ilog2(VM_PKEY_BIT3)] = "",
#endif
-#if VM_PKEY_BIT4
+#if CONFIG_ARCH_PKEY_BITS > 4
[ilog2(VM_PKEY_BIT4)] = "",
#endif
#endif /* CONFIG_ARCH_HAS_PKEYS */
@@ -1230,11 +1232,11 @@ static int smaps_hugetlb_range(pte_t *pte, unsigned long hmask,
if (pte_present(ptent)) {
folio = page_folio(pte_page(ptent));
present = true;
- } else if (is_swap_pte(ptent)) {
- swp_entry_t swpent = pte_to_swp_entry(ptent);
+ } else {
+ const softleaf_t entry = softleaf_from_pte(ptent);
- if (is_pfn_swap_entry(swpent))
- folio = pfn_swap_entry_folio(swpent);
+ if (softleaf_has_pfn(entry))
+ folio = softleaf_to_folio(entry);
}
if (folio) {
@@ -1582,8 +1584,6 @@ struct clear_refs_private {
enum clear_refs_types type;
};
-#ifdef CONFIG_MEM_SOFT_DIRTY
-
static inline bool pte_is_pinned(struct vm_area_struct *vma, unsigned long addr, pte_t pte)
{
struct folio *folio;
@@ -1603,6 +1603,8 @@ static inline bool pte_is_pinned(struct vm_area_struct *vma, unsigned long addr,
static inline void clear_soft_dirty(struct vm_area_struct *vma,
unsigned long addr, pte_t *pte)
{
+ if (!pgtable_supports_soft_dirty())
+ return;
/*
* The soft-dirty tracker uses #PF-s to catch writes
* to pages, so write-protect the pte as well. See the
@@ -1611,6 +1613,9 @@ static inline void clear_soft_dirty(struct vm_area_struct *vma,
*/
pte_t ptent = ptep_get(pte);
+ if (pte_none(ptent))
+ return;
+
if (pte_present(ptent)) {
pte_t old_pte;
@@ -1620,24 +1625,21 @@ static inline void clear_soft_dirty(struct vm_area_struct *vma,
ptent = pte_wrprotect(old_pte);
ptent = pte_clear_soft_dirty(ptent);
ptep_modify_prot_commit(vma, addr, pte, old_pte, ptent);
- } else if (is_swap_pte(ptent)) {
+ } else {
ptent = pte_swp_clear_soft_dirty(ptent);
set_pte_at(vma->vm_mm, addr, pte, ptent);
}
}
-#else
-static inline void clear_soft_dirty(struct vm_area_struct *vma,
- unsigned long addr, pte_t *pte)
-{
-}
-#endif
-#if defined(CONFIG_MEM_SOFT_DIRTY) && defined(CONFIG_TRANSPARENT_HUGEPAGE)
+#if defined(CONFIG_TRANSPARENT_HUGEPAGE)
static inline void clear_soft_dirty_pmd(struct vm_area_struct *vma,
unsigned long addr, pmd_t *pmdp)
{
pmd_t old, pmd = *pmdp;
+ if (!pgtable_supports_soft_dirty())
+ return;
+
if (pmd_present(pmd)) {
/* See comment in change_huge_pmd() */
old = pmdp_invalidate(vma, addr, pmdp);
@@ -1650,7 +1652,7 @@ static inline void clear_soft_dirty_pmd(struct vm_area_struct *vma,
pmd = pmd_clear_soft_dirty(pmd);
set_pmd_at(vma->vm_mm, addr, pmdp, pmd);
- } else if (is_migration_entry(pmd_to_swp_entry(pmd))) {
+ } else if (pmd_is_migration_entry(pmd)) {
pmd = pmd_swp_clear_soft_dirty(pmd);
set_pmd_at(vma->vm_mm, addr, pmdp, pmd);
}
@@ -1923,6 +1925,9 @@ static pagemap_entry_t pte_to_pagemap_entry(struct pagemapread *pm,
struct page *page = NULL;
struct folio *folio;
+ if (pte_none(pte))
+ goto out;
+
if (pte_present(pte)) {
if (pm->show_pfn)
frame = pte_pfn(pte);
@@ -1932,32 +1937,34 @@ static pagemap_entry_t pte_to_pagemap_entry(struct pagemapread *pm,
flags |= PM_SOFT_DIRTY;
if (pte_uffd_wp(pte))
flags |= PM_UFFD_WP;
- } else if (is_swap_pte(pte)) {
- swp_entry_t entry;
+ } else {
+ softleaf_t entry;
+
if (pte_swp_soft_dirty(pte))
flags |= PM_SOFT_DIRTY;
if (pte_swp_uffd_wp(pte))
flags |= PM_UFFD_WP;
- entry = pte_to_swp_entry(pte);
+ entry = softleaf_from_pte(pte);
if (pm->show_pfn) {
pgoff_t offset;
+
/*
* For PFN swap offsets, keeping the offset field
* to be PFN only to be compatible with old smaps.
*/
- if (is_pfn_swap_entry(entry))
- offset = swp_offset_pfn(entry);
+ if (softleaf_has_pfn(entry))
+ offset = softleaf_to_pfn(entry);
else
offset = swp_offset(entry);
frame = swp_type(entry) |
(offset << MAX_SWAPFILES_SHIFT);
}
flags |= PM_SWAP;
- if (is_pfn_swap_entry(entry))
- page = pfn_swap_entry_to_page(entry);
- if (pte_marker_entry_uffd_wp(entry))
+ if (softleaf_has_pfn(entry))
+ page = softleaf_to_page(entry);
+ if (softleaf_is_uffd_wp_marker(entry))
flags |= PM_UFFD_WP;
- if (is_guard_swp_entry(entry))
+ if (softleaf_is_guard_marker(entry))
flags |= PM_GUARD_REGION;
}
@@ -1969,96 +1976,110 @@ static pagemap_entry_t pte_to_pagemap_entry(struct pagemapread *pm,
__folio_page_mapped_exclusively(folio, page))
flags |= PM_MMAP_EXCLUSIVE;
}
+
+out:
if (vma->vm_flags & VM_SOFTDIRTY)
flags |= PM_SOFT_DIRTY;
return make_pme(frame, flags);
}
-static int pagemap_pmd_range(pmd_t *pmdp, unsigned long addr, unsigned long end,
- struct mm_walk *walk)
+#ifdef CONFIG_TRANSPARENT_HUGEPAGE
+static int pagemap_pmd_range_thp(pmd_t *pmdp, unsigned long addr,
+ unsigned long end, struct vm_area_struct *vma,
+ struct pagemapread *pm)
{
- struct vm_area_struct *vma = walk->vma;
- struct pagemapread *pm = walk->private;
- spinlock_t *ptl;
- pte_t *pte, *orig_pte;
+ unsigned int idx = (addr & ~PMD_MASK) >> PAGE_SHIFT;
+ u64 flags = 0, frame = 0;
+ pmd_t pmd = *pmdp;
+ struct page *page = NULL;
+ struct folio *folio = NULL;
int err = 0;
-#ifdef CONFIG_TRANSPARENT_HUGEPAGE
- ptl = pmd_trans_huge_lock(pmdp, vma);
- if (ptl) {
- unsigned int idx = (addr & ~PMD_MASK) >> PAGE_SHIFT;
- u64 flags = 0, frame = 0;
- pmd_t pmd = *pmdp;
- struct page *page = NULL;
- struct folio *folio = NULL;
+ if (vma->vm_flags & VM_SOFTDIRTY)
+ flags |= PM_SOFT_DIRTY;
- if (vma->vm_flags & VM_SOFTDIRTY)
- flags |= PM_SOFT_DIRTY;
+ if (pmd_none(pmd))
+ goto populate_pagemap;
- if (pmd_present(pmd)) {
- page = pmd_page(pmd);
+ if (pmd_present(pmd)) {
+ page = pmd_page(pmd);
- flags |= PM_PRESENT;
- if (pmd_soft_dirty(pmd))
- flags |= PM_SOFT_DIRTY;
- if (pmd_uffd_wp(pmd))
- flags |= PM_UFFD_WP;
- if (pm->show_pfn)
- frame = pmd_pfn(pmd) + idx;
- }
-#ifdef CONFIG_ARCH_ENABLE_THP_MIGRATION
- else if (is_swap_pmd(pmd)) {
- swp_entry_t entry = pmd_to_swp_entry(pmd);
- unsigned long offset;
-
- if (pm->show_pfn) {
- if (is_pfn_swap_entry(entry))
- offset = swp_offset_pfn(entry) + idx;
- else
- offset = swp_offset(entry) + idx;
- frame = swp_type(entry) |
- (offset << MAX_SWAPFILES_SHIFT);
- }
- flags |= PM_SWAP;
- if (pmd_swp_soft_dirty(pmd))
- flags |= PM_SOFT_DIRTY;
- if (pmd_swp_uffd_wp(pmd))
- flags |= PM_UFFD_WP;
- VM_BUG_ON(!is_pmd_migration_entry(pmd));
- page = pfn_swap_entry_to_page(entry);
- }
-#endif
+ flags |= PM_PRESENT;
+ if (pmd_soft_dirty(pmd))
+ flags |= PM_SOFT_DIRTY;
+ if (pmd_uffd_wp(pmd))
+ flags |= PM_UFFD_WP;
+ if (pm->show_pfn)
+ frame = pmd_pfn(pmd) + idx;
+ } else if (thp_migration_supported()) {
+ const softleaf_t entry = softleaf_from_pmd(pmd);
+ unsigned long offset;
- if (page) {
- folio = page_folio(page);
- if (!folio_test_anon(folio))
- flags |= PM_FILE;
+ if (pm->show_pfn) {
+ if (softleaf_has_pfn(entry))
+ offset = softleaf_to_pfn(entry) + idx;
+ else
+ offset = swp_offset(entry) + idx;
+ frame = swp_type(entry) |
+ (offset << MAX_SWAPFILES_SHIFT);
}
+ flags |= PM_SWAP;
+ if (pmd_swp_soft_dirty(pmd))
+ flags |= PM_SOFT_DIRTY;
+ if (pmd_swp_uffd_wp(pmd))
+ flags |= PM_UFFD_WP;
+ VM_WARN_ON_ONCE(!pmd_is_migration_entry(pmd));
+ page = softleaf_to_page(entry);
+ }
+
+ if (page) {
+ folio = page_folio(page);
+ if (!folio_test_anon(folio))
+ flags |= PM_FILE;
+ }
- for (; addr != end; addr += PAGE_SIZE, idx++) {
- u64 cur_flags = flags;
- pagemap_entry_t pme;
+populate_pagemap:
+ for (; addr != end; addr += PAGE_SIZE, idx++) {
+ u64 cur_flags = flags;
+ pagemap_entry_t pme;
- if (folio && (flags & PM_PRESENT) &&
- __folio_page_mapped_exclusively(folio, page))
- cur_flags |= PM_MMAP_EXCLUSIVE;
+ if (folio && (flags & PM_PRESENT) &&
+ __folio_page_mapped_exclusively(folio, page))
+ cur_flags |= PM_MMAP_EXCLUSIVE;
- pme = make_pme(frame, cur_flags);
- err = add_to_pagemap(&pme, pm);
- if (err)
- break;
- if (pm->show_pfn) {
- if (flags & PM_PRESENT)
- frame++;
- else if (flags & PM_SWAP)
- frame += (1 << MAX_SWAPFILES_SHIFT);
- }
+ pme = make_pme(frame, cur_flags);
+ err = add_to_pagemap(&pme, pm);
+ if (err)
+ break;
+ if (pm->show_pfn) {
+ if (flags & PM_PRESENT)
+ frame++;
+ else if (flags & PM_SWAP)
+ frame += (1 << MAX_SWAPFILES_SHIFT);
}
+ }
+ return err;
+}
+#endif /* CONFIG_TRANSPARENT_HUGEPAGE */
+
+static int pagemap_pmd_range(pmd_t *pmdp, unsigned long addr, unsigned long end,
+ struct mm_walk *walk)
+{
+ struct vm_area_struct *vma = walk->vma;
+ struct pagemapread *pm = walk->private;
+ spinlock_t *ptl;
+ pte_t *pte, *orig_pte;
+ int err = 0;
+
+#ifdef CONFIG_TRANSPARENT_HUGEPAGE
+ ptl = pmd_trans_huge_lock(pmdp, vma);
+ if (ptl) {
+ err = pagemap_pmd_range_thp(pmdp, addr, end, vma, pm);
spin_unlock(ptl);
return err;
}
-#endif /* CONFIG_TRANSPARENT_HUGEPAGE */
+#endif
/*
* We can assume that @vma always points to a valid one and @end never
@@ -2310,12 +2331,16 @@ static unsigned long pagemap_page_category(struct pagemap_scan_private *p,
struct vm_area_struct *vma,
unsigned long addr, pte_t pte)
{
- unsigned long categories = 0;
+ unsigned long categories;
+
+ if (pte_none(pte))
+ return 0;
if (pte_present(pte)) {
struct page *page;
- categories |= PAGE_IS_PRESENT;
+ categories = PAGE_IS_PRESENT;
+
if (!pte_uffd_wp(pte))
categories |= PAGE_IS_WRITTEN;
@@ -2329,19 +2354,20 @@ static unsigned long pagemap_page_category(struct pagemap_scan_private *p,
categories |= PAGE_IS_PFNZERO;
if (pte_soft_dirty(pte))
categories |= PAGE_IS_SOFT_DIRTY;
- } else if (is_swap_pte(pte)) {
- swp_entry_t swp;
+ } else {
+ softleaf_t entry;
+
+ categories = PAGE_IS_SWAPPED;
- categories |= PAGE_IS_SWAPPED;
if (!pte_swp_uffd_wp_any(pte))
categories |= PAGE_IS_WRITTEN;
- swp = pte_to_swp_entry(pte);
- if (is_guard_swp_entry(swp))
+ entry = softleaf_from_pte(pte);
+ if (softleaf_is_guard_marker(entry))
categories |= PAGE_IS_GUARD;
else if ((p->masks_of_interest & PAGE_IS_FILE) &&
- is_pfn_swap_entry(swp) &&
- !folio_test_anon(pfn_swap_entry_folio(swp)))
+ softleaf_has_pfn(entry) &&
+ !folio_test_anon(softleaf_to_folio(entry)))
categories |= PAGE_IS_FILE;
if (pte_swp_soft_dirty(pte))
@@ -2360,12 +2386,12 @@ static void make_uffd_wp_pte(struct vm_area_struct *vma,
old_pte = ptep_modify_prot_start(vma, addr, pte);
ptent = pte_mkuffd_wp(old_pte);
ptep_modify_prot_commit(vma, addr, pte, old_pte, ptent);
- } else if (is_swap_pte(ptent)) {
- ptent = pte_swp_mkuffd_wp(ptent);
- set_pte_at(vma->vm_mm, addr, pte, ptent);
- } else {
+ } else if (pte_none(ptent)) {
set_pte_at(vma->vm_mm, addr, pte,
make_pte_marker(PTE_MARKER_UFFD_WP));
+ } else {
+ ptent = pte_swp_mkuffd_wp(ptent);
+ set_pte_at(vma->vm_mm, addr, pte, ptent);
}
}
@@ -2376,6 +2402,9 @@ static unsigned long pagemap_thp_category(struct pagemap_scan_private *p,
{
unsigned long categories = PAGE_IS_HUGE;
+ if (pmd_none(pmd))
+ return categories;
+
if (pmd_present(pmd)) {
struct page *page;
@@ -2393,9 +2422,7 @@ static unsigned long pagemap_thp_category(struct pagemap_scan_private *p,
categories |= PAGE_IS_PFNZERO;
if (pmd_soft_dirty(pmd))
categories |= PAGE_IS_SOFT_DIRTY;
- } else if (is_swap_pmd(pmd)) {
- swp_entry_t swp;
-
+ } else {
categories |= PAGE_IS_SWAPPED;
if (!pmd_swp_uffd_wp(pmd))
categories |= PAGE_IS_WRITTEN;
@@ -2403,9 +2430,10 @@ static unsigned long pagemap_thp_category(struct pagemap_scan_private *p,
categories |= PAGE_IS_SOFT_DIRTY;
if (p->masks_of_interest & PAGE_IS_FILE) {
- swp = pmd_to_swp_entry(pmd);
- if (is_pfn_swap_entry(swp) &&
- !folio_test_anon(pfn_swap_entry_folio(swp)))
+ const softleaf_t entry = softleaf_from_pmd(pmd);
+
+ if (softleaf_has_pfn(entry) &&
+ !folio_test_anon(softleaf_to_folio(entry)))
categories |= PAGE_IS_FILE;
}
}
@@ -2422,7 +2450,7 @@ static void make_uffd_wp_pmd(struct vm_area_struct *vma,
old = pmdp_invalidate_ad(vma, addr, pmdp);
pmd = pmd_mkuffd_wp(old);
set_pmd_at(vma->vm_mm, addr, pmdp, pmd);
- } else if (is_migration_entry(pmd_to_swp_entry(pmd))) {
+ } else if (pmd_is_migration_entry(pmd)) {
pmd = pmd_swp_mkuffd_wp(pmd);
set_pmd_at(vma->vm_mm, addr, pmdp, pmd);
}
@@ -2434,6 +2462,9 @@ static unsigned long pagemap_hugetlb_category(pte_t pte)
{
unsigned long categories = PAGE_IS_HUGE;
+ if (pte_none(pte))
+ return categories;
+
/*
* According to pagemap_hugetlb_range(), file-backed HugeTLB
* page cannot be swapped. So PAGE_IS_FILE is not checked for
@@ -2441,6 +2472,7 @@ static unsigned long pagemap_hugetlb_category(pte_t pte)
*/
if (pte_present(pte)) {
categories |= PAGE_IS_PRESENT;
+
if (!huge_pte_uffd_wp(pte))
categories |= PAGE_IS_WRITTEN;
if (!PageAnon(pte_page(pte)))
@@ -2449,8 +2481,9 @@ static unsigned long pagemap_hugetlb_category(pte_t pte)
categories |= PAGE_IS_PFNZERO;
if (pte_soft_dirty(pte))
categories |= PAGE_IS_SOFT_DIRTY;
- } else if (is_swap_pte(pte)) {
+ } else {
categories |= PAGE_IS_SWAPPED;
+
if (!pte_swp_uffd_wp_any(pte))
categories |= PAGE_IS_WRITTEN;
if (pte_swp_soft_dirty(pte))
@@ -2464,22 +2497,25 @@ static void make_uffd_wp_huge_pte(struct vm_area_struct *vma,
unsigned long addr, pte_t *ptep,
pte_t ptent)
{
- unsigned long psize;
+ const unsigned long psize = huge_page_size(hstate_vma(vma));
+ softleaf_t entry;
- if (is_hugetlb_entry_hwpoisoned(ptent) || is_pte_marker(ptent))
+ if (huge_pte_none(ptent)) {
+ set_huge_pte_at(vma->vm_mm, addr, ptep,
+ make_pte_marker(PTE_MARKER_UFFD_WP), psize);
return;
+ }
- psize = huge_page_size(hstate_vma(vma));
+ entry = softleaf_from_pte(ptent);
+ if (softleaf_is_hwpoison(entry) || softleaf_is_marker(entry))
+ return;
- if (is_hugetlb_entry_migration(ptent))
+ if (softleaf_is_migration(entry))
set_huge_pte_at(vma->vm_mm, addr, ptep,
pte_swp_mkuffd_wp(ptent), psize);
- else if (!huge_pte_none(ptent))
+ else
huge_ptep_modify_prot_commit(vma, addr, ptep, ptent,
huge_pte_mkuffd_wp(ptent));
- else
- set_huge_pte_at(vma->vm_mm, addr, ptep,
- make_pte_marker(PTE_MARKER_UFFD_WP), psize);
}
#endif /* CONFIG_HUGETLB_PAGE */
diff --git a/fs/ramfs/file-mmu.c b/fs/ramfs/file-mmu.c
index b11f5b20b78b..c3ed1c5117b2 100644
--- a/fs/ramfs/file-mmu.c
+++ b/fs/ramfs/file-mmu.c
@@ -35,7 +35,7 @@ static unsigned long ramfs_mmu_get_unmapped_area(struct file *file,
unsigned long addr, unsigned long len, unsigned long pgoff,
unsigned long flags)
{
- return mm_get_unmapped_area(current->mm, file, addr, len, pgoff, flags);
+ return mm_get_unmapped_area(file, addr, len, pgoff, flags);
}
const struct file_operations ramfs_file_operations = {
diff --git a/fs/resctrl/pseudo_lock.c b/fs/resctrl/pseudo_lock.c
index 87bbc2605de1..0bfc13c5b96d 100644
--- a/fs/resctrl/pseudo_lock.c
+++ b/fs/resctrl/pseudo_lock.c
@@ -995,10 +995,11 @@ static const struct vm_operations_struct pseudo_mmap_ops = {
.mremap = pseudo_lock_dev_mremap,
};
-static int pseudo_lock_dev_mmap(struct file *filp, struct vm_area_struct *vma)
+static int pseudo_lock_dev_mmap_prepare(struct vm_area_desc *desc)
{
- unsigned long vsize = vma->vm_end - vma->vm_start;
- unsigned long off = vma->vm_pgoff << PAGE_SHIFT;
+ unsigned long off = desc->pgoff << PAGE_SHIFT;
+ unsigned long vsize = vma_desc_size(desc);
+ struct file *filp = desc->file;
struct pseudo_lock_region *plr;
struct rdtgroup *rdtgrp;
unsigned long physical;
@@ -1043,7 +1044,7 @@ static int pseudo_lock_dev_mmap(struct file *filp, struct vm_area_struct *vma)
* Ensure changes are carried directly to the memory being mapped,
* do not allow copy-on-write mapping.
*/
- if (!(vma->vm_flags & VM_SHARED)) {
+ if (!(desc->vm_flags & VM_SHARED)) {
mutex_unlock(&rdtgroup_mutex);
return -EINVAL;
}
@@ -1055,12 +1056,9 @@ static int pseudo_lock_dev_mmap(struct file *filp, struct vm_area_struct *vma)
memset(plr->kmem + off, 0, vsize);
- if (remap_pfn_range(vma, vma->vm_start, physical + vma->vm_pgoff,
- vsize, vma->vm_page_prot)) {
- mutex_unlock(&rdtgroup_mutex);
- return -EAGAIN;
- }
- vma->vm_ops = &pseudo_mmap_ops;
+ desc->vm_ops = &pseudo_mmap_ops;
+ mmap_action_remap_full(desc, physical + desc->pgoff);
+
mutex_unlock(&rdtgroup_mutex);
return 0;
}
@@ -1071,7 +1069,7 @@ static const struct file_operations pseudo_lock_dev_fops = {
.write = NULL,
.open = pseudo_lock_dev_open,
.release = pseudo_lock_dev_release,
- .mmap = pseudo_lock_dev_mmap,
+ .mmap_prepare = pseudo_lock_dev_mmap_prepare,
};
int rdt_pseudo_lock_init(void)
diff --git a/fs/userfaultfd.c b/fs/userfaultfd.c
index e6e74b384087..c5ba1f4487bd 100644
--- a/fs/userfaultfd.c
+++ b/fs/userfaultfd.c
@@ -29,7 +29,7 @@
#include <linux/ioctl.h>
#include <linux/security.h>
#include <linux/hugetlb.h>
-#include <linux/swapops.h>
+#include <linux/leafops.h>
#include <linux/miscdevice.h>
#include <linux/uio.h>
@@ -233,40 +233,48 @@ static inline bool userfaultfd_huge_must_wait(struct userfaultfd_ctx *ctx,
{
struct vm_area_struct *vma = vmf->vma;
pte_t *ptep, pte;
- bool ret = true;
assert_fault_locked(vmf);
ptep = hugetlb_walk(vma, vmf->address, vma_mmu_pagesize(vma));
if (!ptep)
- goto out;
+ return true;
- ret = false;
pte = huge_ptep_get(vma->vm_mm, vmf->address, ptep);
/*
* Lockless access: we're in a wait_event so it's ok if it
- * changes under us. PTE markers should be handled the same as none
- * ptes here.
+ * changes under us.
+ */
+
+ /* Entry is still missing, wait for userspace to resolve the fault. */
+ if (huge_pte_none(pte))
+ return true;
+ /* UFFD PTE markers require userspace to resolve the fault. */
+ if (pte_is_uffd_marker(pte))
+ return true;
+ /*
+ * If VMA has UFFD WP faults enabled and WP fault, wait for userspace to
+ * resolve the fault.
*/
- if (huge_pte_none_mostly(pte))
- ret = true;
if (!huge_pte_write(pte) && (reason & VM_UFFD_WP))
- ret = true;
-out:
- return ret;
+ return true;
+
+ return false;
}
#else
static inline bool userfaultfd_huge_must_wait(struct userfaultfd_ctx *ctx,
struct vm_fault *vmf,
unsigned long reason)
{
- return false; /* should never get here */
+ /* Should never get here. */
+ VM_WARN_ON_ONCE(1);
+ return false;
}
#endif /* CONFIG_HUGETLB_PAGE */
/*
- * Verify the pagetables are still not ok after having reigstered into
+ * Verify the pagetables are still not ok after having registered into
* the fault_pending_wqh to avoid userland having to UFFDIO_WAKE any
* userfault that has already been resolved, if userfaultfd_read_iter and
* UFFDIO_COPY|ZEROPAGE are being run simultaneously on two different
@@ -284,53 +292,63 @@ static inline bool userfaultfd_must_wait(struct userfaultfd_ctx *ctx,
pmd_t *pmd, _pmd;
pte_t *pte;
pte_t ptent;
- bool ret = true;
+ bool ret;
assert_fault_locked(vmf);
pgd = pgd_offset(mm, address);
if (!pgd_present(*pgd))
- goto out;
+ return true;
p4d = p4d_offset(pgd, address);
if (!p4d_present(*p4d))
- goto out;
+ return true;
pud = pud_offset(p4d, address);
if (!pud_present(*pud))
- goto out;
+ return true;
pmd = pmd_offset(pud, address);
again:
_pmd = pmdp_get_lockless(pmd);
if (pmd_none(_pmd))
- goto out;
+ return true;
- ret = false;
+ /*
+ * A race could arise which would result in a softleaf entry such as
+ * migration entry unexpectedly being present in the PMD, so explicitly
+ * check for this and bail out if so.
+ */
if (!pmd_present(_pmd))
- goto out;
+ return false;
- if (pmd_trans_huge(_pmd)) {
- if (!pmd_write(_pmd) && (reason & VM_UFFD_WP))
- ret = true;
- goto out;
- }
+ if (pmd_trans_huge(_pmd))
+ return !pmd_write(_pmd) && (reason & VM_UFFD_WP);
pte = pte_offset_map(pmd, address);
- if (!pte) {
- ret = true;
+ if (!pte)
goto again;
- }
+
/*
* Lockless access: we're in a wait_event so it's ok if it
- * changes under us. PTE markers should be handled the same as none
- * ptes here.
+ * changes under us.
*/
ptent = ptep_get(pte);
- if (pte_none_mostly(ptent))
- ret = true;
+
+ ret = true;
+ /* Entry is still missing, wait for userspace to resolve the fault. */
+ if (pte_none(ptent))
+ goto out;
+ /* UFFD PTE markers require userspace to resolve the fault. */
+ if (pte_is_uffd_marker(ptent))
+ goto out;
+ /*
+ * If VMA has UFFD WP faults enabled and WP fault, wait for userspace to
+ * resolve the fault.
+ */
if (!pte_write(ptent) && (reason & VM_UFFD_WP))
- ret = true;
- pte_unmap(pte);
+ goto out;
+ ret = false;
out:
+ pte_unmap(pte);
return ret;
}
@@ -490,12 +508,13 @@ vm_fault_t handle_userfault(struct vm_fault *vmf, unsigned long reason)
set_current_state(blocking_state);
spin_unlock_irq(&ctx->fault_pending_wqh.lock);
- if (!is_vm_hugetlb_page(vma))
- must_wait = userfaultfd_must_wait(ctx, vmf, reason);
- else
+ if (is_vm_hugetlb_page(vma)) {
must_wait = userfaultfd_huge_must_wait(ctx, vmf, reason);
- if (is_vm_hugetlb_page(vma))
hugetlb_vma_unlock_read(vma);
+ } else {
+ must_wait = userfaultfd_must_wait(ctx, vmf, reason);
+ }
+
release_fault_lock(vmf);
if (likely(must_wait && !READ_ONCE(ctx->released))) {
@@ -1270,9 +1289,9 @@ static int userfaultfd_register(struct userfaultfd_ctx *ctx,
if (uffdio_register.mode & UFFDIO_REGISTER_MODE_MISSING)
vm_flags |= VM_UFFD_MISSING;
if (uffdio_register.mode & UFFDIO_REGISTER_MODE_WP) {
-#ifndef CONFIG_HAVE_ARCH_USERFAULTFD_WP
- goto out;
-#endif
+ if (!pgtable_supports_uffd_wp())
+ goto out;
+
vm_flags |= VM_UFFD_WP;
}
if (uffdio_register.mode & UFFDIO_REGISTER_MODE_MINOR) {
@@ -1980,14 +1999,14 @@ static int userfaultfd_api(struct userfaultfd_ctx *ctx,
uffdio_api.features &=
~(UFFD_FEATURE_MINOR_HUGETLBFS | UFFD_FEATURE_MINOR_SHMEM);
#endif
-#ifndef CONFIG_HAVE_ARCH_USERFAULTFD_WP
- uffdio_api.features &= ~UFFD_FEATURE_PAGEFAULT_FLAG_WP;
-#endif
-#ifndef CONFIG_PTE_MARKER_UFFD_WP
- uffdio_api.features &= ~UFFD_FEATURE_WP_HUGETLBFS_SHMEM;
- uffdio_api.features &= ~UFFD_FEATURE_WP_UNPOPULATED;
- uffdio_api.features &= ~UFFD_FEATURE_WP_ASYNC;
-#endif
+ if (!pgtable_supports_uffd_wp())
+ uffdio_api.features &= ~UFFD_FEATURE_PAGEFAULT_FLAG_WP;
+
+ if (!uffd_supports_wp_marker()) {
+ uffdio_api.features &= ~UFFD_FEATURE_WP_HUGETLBFS_SHMEM;
+ uffdio_api.features &= ~UFFD_FEATURE_WP_UNPOPULATED;
+ uffdio_api.features &= ~UFFD_FEATURE_WP_ASYNC;
+ }
ret = -EINVAL;
if (features & ~uffdio_api.features)