summaryrefslogtreecommitdiff
path: root/mm
diff options
context:
space:
mode:
authorJakub Kicinski <kuba@kernel.org>2025-11-27 12:16:40 -0800
committerJakub Kicinski <kuba@kernel.org>2025-11-27 12:19:08 -0800
commitdb4029859d6fd03f0622d394f4cdb1be86d7ec62 (patch)
treee2e33b9c1f6bb9aea36df6ce974d698f0aaf177c /mm
parent73f784b2c938e17e4af90aff4cdcaafe4ca06a5f (diff)
parent1f5e808aa63af61ec0d6a14909056d6668813e86 (diff)
Merge git://git.kernel.org/pub/scm/linux/kernel/git/netdev/net
Conflicts: net/xdp/xsk.c 0ebc27a4c67d ("xsk: avoid data corruption on cq descriptor number") 8da7bea7db69 ("xsk: add indirect call for xsk_destruct_skb") 30ed05adca4a ("xsk: use a smaller new lock for shared pool case") https://lore.kernel.org/20251127105450.4a1665ec@canb.auug.org.au https://lore.kernel.org/eb4eee14-7e24-4d1b-b312-e9ea738fefee@kernel.org Signed-off-by: Jakub Kicinski <kuba@kernel.org>
Diffstat (limited to 'mm')
-rw-r--r--mm/filemap.c27
-rw-r--r--mm/huge_memory.c22
-rw-r--r--mm/memfd.c27
-rw-r--r--mm/mempool.c32
-rw-r--r--mm/mmap_lock.c1
-rw-r--r--mm/swapfile.c4
6 files changed, 79 insertions, 34 deletions
diff --git a/mm/filemap.c b/mm/filemap.c
index 2f1e7e283a51..024b71da5224 100644
--- a/mm/filemap.c
+++ b/mm/filemap.c
@@ -3682,8 +3682,9 @@ static vm_fault_t filemap_map_folio_range(struct vm_fault *vmf,
struct folio *folio, unsigned long start,
unsigned long addr, unsigned int nr_pages,
unsigned long *rss, unsigned short *mmap_miss,
- bool can_map_large)
+ pgoff_t file_end)
{
+ struct address_space *mapping = folio->mapping;
unsigned int ref_from_caller = 1;
vm_fault_t ret = 0;
struct page *page = folio_page(folio, start);
@@ -3692,12 +3693,16 @@ static vm_fault_t filemap_map_folio_range(struct vm_fault *vmf,
unsigned long addr0;
/*
- * Map the large folio fully where possible.
+ * Map the large folio fully where possible:
*
- * The folio must not cross VMA or page table boundary.
+ * - The folio is fully within size of the file or belong
+ * to shmem/tmpfs;
+ * - The folio doesn't cross VMA boundary;
+ * - The folio doesn't cross page table boundary;
*/
addr0 = addr - start * PAGE_SIZE;
- if (can_map_large && folio_within_vma(folio, vmf->vma) &&
+ if ((file_end >= folio_next_index(folio) || shmem_mapping(mapping)) &&
+ folio_within_vma(folio, vmf->vma) &&
(addr0 & PMD_MASK) == ((addr0 + folio_size(folio) - 1) & PMD_MASK)) {
vmf->pte -= start;
page -= start;
@@ -3812,7 +3817,6 @@ vm_fault_t filemap_map_pages(struct vm_fault *vmf,
unsigned long rss = 0;
unsigned int nr_pages = 0, folio_type;
unsigned short mmap_miss = 0, mmap_miss_saved;
- bool can_map_large;
rcu_read_lock();
folio = next_uptodate_folio(&xas, mapping, end_pgoff);
@@ -3823,16 +3827,14 @@ vm_fault_t filemap_map_pages(struct vm_fault *vmf,
end_pgoff = min(end_pgoff, file_end);
/*
- * Do not allow to map with PTEs beyond i_size and with PMD
- * across i_size to preserve SIGBUS semantics.
+ * Do not allow to map with PMD across i_size to preserve
+ * SIGBUS semantics.
*
* Make an exception for shmem/tmpfs that for long time
* intentionally mapped with PMDs across i_size.
*/
- can_map_large = shmem_mapping(mapping) ||
- file_end >= folio_next_index(folio);
-
- if (can_map_large && filemap_map_pmd(vmf, folio, start_pgoff)) {
+ if ((file_end >= folio_next_index(folio) || shmem_mapping(mapping)) &&
+ filemap_map_pmd(vmf, folio, start_pgoff)) {
ret = VM_FAULT_NOPAGE;
goto out;
}
@@ -3861,8 +3863,7 @@ vm_fault_t filemap_map_pages(struct vm_fault *vmf,
else
ret |= filemap_map_folio_range(vmf, folio,
xas.xa_index - folio->index, addr,
- nr_pages, &rss, &mmap_miss,
- can_map_large);
+ nr_pages, &rss, &mmap_miss, file_end);
folio_unlock(folio);
} while ((folio = next_uptodate_folio(&xas, mapping, end_pgoff)) != NULL);
diff --git a/mm/huge_memory.c b/mm/huge_memory.c
index 2f2a521e5d68..6cba1cb14b23 100644
--- a/mm/huge_memory.c
+++ b/mm/huge_memory.c
@@ -3619,6 +3619,16 @@ static int __folio_split(struct folio *folio, unsigned int new_order,
if (folio != page_folio(split_at) || folio != page_folio(lock_at))
return -EINVAL;
+ /*
+ * Folios that just got truncated cannot get split. Signal to the
+ * caller that there was a race.
+ *
+ * TODO: this will also currently refuse shmem folios that are in the
+ * swapcache.
+ */
+ if (!is_anon && !folio->mapping)
+ return -EBUSY;
+
if (new_order >= folio_order(folio))
return -EINVAL;
@@ -3659,18 +3669,6 @@ static int __folio_split(struct folio *folio, unsigned int new_order,
gfp_t gfp;
mapping = folio->mapping;
-
- /* Truncated ? */
- /*
- * TODO: add support for large shmem folio in swap cache.
- * When shmem is in swap cache, mapping is NULL and
- * folio_test_swapcache() is true.
- */
- if (!mapping) {
- ret = -EBUSY;
- goto out;
- }
-
min_order = mapping_min_folio_order(folio->mapping);
if (new_order < min_order) {
ret = -EINVAL;
diff --git a/mm/memfd.c b/mm/memfd.c
index 1d109c1acf21..a405eaa451ee 100644
--- a/mm/memfd.c
+++ b/mm/memfd.c
@@ -96,9 +96,36 @@ struct folio *memfd_alloc_folio(struct file *memfd, pgoff_t idx)
NULL,
gfp_mask);
if (folio) {
+ u32 hash;
+
+ /*
+ * Zero the folio to prevent information leaks to userspace.
+ * Use folio_zero_user() which is optimized for huge/gigantic
+ * pages. Pass 0 as addr_hint since this is not a faulting path
+ * and we don't have a user virtual address yet.
+ */
+ folio_zero_user(folio, 0);
+
+ /*
+ * Mark the folio uptodate before adding to page cache,
+ * as required by filemap.c and other hugetlb paths.
+ */
+ __folio_mark_uptodate(folio);
+
+ /*
+ * Serialize hugepage allocation and instantiation to prevent
+ * races with concurrent allocations, as required by all other
+ * callers of hugetlb_add_to_page_cache().
+ */
+ hash = hugetlb_fault_mutex_hash(memfd->f_mapping, idx);
+ mutex_lock(&hugetlb_fault_mutex_table[hash]);
+
err = hugetlb_add_to_page_cache(folio,
memfd->f_mapping,
idx);
+
+ mutex_unlock(&hugetlb_fault_mutex_table[hash]);
+
if (err) {
folio_put(folio);
goto err_unresv;
diff --git a/mm/mempool.c b/mm/mempool.c
index 1c38e873e546..d7bbf1189db9 100644
--- a/mm/mempool.c
+++ b/mm/mempool.c
@@ -68,10 +68,20 @@ static void check_element(mempool_t *pool, void *element)
} else if (pool->free == mempool_free_pages) {
/* Mempools backed by page allocator */
int order = (int)(long)pool->pool_data;
- void *addr = kmap_local_page((struct page *)element);
- __check_element(pool, addr, 1UL << (PAGE_SHIFT + order));
- kunmap_local(addr);
+#ifdef CONFIG_HIGHMEM
+ for (int i = 0; i < (1 << order); i++) {
+ struct page *page = (struct page *)element;
+ void *addr = kmap_local_page(page + i);
+
+ __check_element(pool, addr, PAGE_SIZE);
+ kunmap_local(addr);
+ }
+#else
+ void *addr = page_address((struct page *)element);
+
+ __check_element(pool, addr, PAGE_SIZE << order);
+#endif
}
}
@@ -97,10 +107,20 @@ static void poison_element(mempool_t *pool, void *element)
} else if (pool->alloc == mempool_alloc_pages) {
/* Mempools backed by page allocator */
int order = (int)(long)pool->pool_data;
- void *addr = kmap_local_page((struct page *)element);
- __poison_element(addr, 1UL << (PAGE_SHIFT + order));
- kunmap_local(addr);
+#ifdef CONFIG_HIGHMEM
+ for (int i = 0; i < (1 << order); i++) {
+ struct page *page = (struct page *)element;
+ void *addr = kmap_local_page(page + i);
+
+ __poison_element(addr, PAGE_SIZE);
+ kunmap_local(addr);
+ }
+#else
+ void *addr = page_address((struct page *)element);
+
+ __poison_element(addr, PAGE_SIZE << order);
+#endif
}
}
#else /* CONFIG_SLUB_DEBUG_ON */
diff --git a/mm/mmap_lock.c b/mm/mmap_lock.c
index 0a0db5849b8e..42e3dde73e74 100644
--- a/mm/mmap_lock.c
+++ b/mm/mmap_lock.c
@@ -241,6 +241,7 @@ retry:
if (PTR_ERR(vma) == -EAGAIN) {
count_vm_vma_lock_event(VMA_LOCK_MISS);
/* The area was replaced with another one */
+ mas_set(&mas, address);
goto retry;
}
diff --git a/mm/swapfile.c b/mm/swapfile.c
index 10760240a3a2..a1b4b9d80e3b 100644
--- a/mm/swapfile.c
+++ b/mm/swapfile.c
@@ -2005,10 +2005,8 @@ swp_entry_t get_swap_page_of_type(int type)
local_lock(&percpu_swap_cluster.lock);
offset = cluster_alloc_swap_entry(si, 0, 1);
local_unlock(&percpu_swap_cluster.lock);
- if (offset) {
+ if (offset)
entry = swp_entry(si->type, offset);
- atomic_long_dec(&nr_swap_pages);
- }
}
put_swap_device(si);
}