summaryrefslogtreecommitdiff
path: root/mm/page_alloc.c
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2025-10-02 15:58:05 -0700
committerLinus Torvalds <torvalds@linux-foundation.org>2025-10-02 15:58:05 -0700
commit24d9e8b3c9c8a6f72c8b4c196a703e144928d919 (patch)
tree81d9a41265b30c776a2a70a517fddb5e5da62ed0 /mm/page_alloc.c
parent07fdad3a93756b872da7b53647715c48d0f4a2d0 (diff)
parentca74b8cadaad4b179f77f1f4dc3d288be9a580f1 (diff)
Merge tag 'slab-for-6.18' of git://git.kernel.org/pub/scm/linux/kernel/git/vbabka/slab
Pull slab updates from Vlastimil Babka: - A new layer for caching objects for allocation and free via percpu arrays called sheaves. The aim is to combine the good parts of SLAB (lower-overhead and simpler percpu caching, compared to SLUB) without the past issues with arrays for freeing remote NUMA node objects and their flushing. It also allows more efficient kfree_rcu(), and cheaper object preallocations for cases where the exact number of objects is unknown, but an upper bound is. Currently VMAs and maple nodes are using this new caching, with a plan to enable it for all caches and remove the complex SLUB fastpath based on cpu (partial) slabs and this_cpu_cmpxchg_double(). (Vlastimil Babka, with Liam Howlett and Pedro Falcato for the maple tree changes) - Re-entrant kmalloc_nolock(), which allows opportunistic allocations from NMI and tracing/kprobe contexts. Building on prior page allocator and memcg changes, it will result in removing BPF-specific caches on top of slab (Alexei Starovoitov) - Various fixes and cleanups. (Kuan-Wei Chiu, Matthew Wilcox, Suren Baghdasaryan, Ye Liu) * tag 'slab-for-6.18' of git://git.kernel.org/pub/scm/linux/kernel/git/vbabka/slab: (40 commits) slab: Introduce kmalloc_nolock() and kfree_nolock(). slab: Reuse first bit for OBJEXTS_ALLOC_FAIL slab: Make slub local_(try)lock more precise for LOCKDEP mm: Introduce alloc_frozen_pages_nolock() mm: Allow GFP_ACCOUNT to be used in alloc_pages_nolock(). locking/local_lock: Introduce local_lock_is_locked(). maple_tree: Convert forking to use the sheaf interface maple_tree: Add single node allocation support to maple state maple_tree: Prefilled sheaf conversion and testing tools/testing: Add support for prefilled slab sheafs maple_tree: Replace mt_free_one() with kfree() maple_tree: Use kfree_rcu in ma_free_rcu testing/radix-tree/maple: Hack around kfree_rcu not existing tools/testing: include maple-shim.c in maple.c maple_tree: use percpu sheaves for maple_node_cache mm, vma: use percpu sheaves for vm_area_struct cache tools/testing: Add support for changes to slab for sheaves slab: allow NUMA restricted allocations to use percpu sheaves tools/testing/vma: Implement vm_refcnt reset slab: skip percpu sheaves for remote object freeing ...
Diffstat (limited to 'mm/page_alloc.c')
-rw-r--r--mm/page_alloc.c55
1 files changed, 32 insertions, 23 deletions
diff --git a/mm/page_alloc.c b/mm/page_alloc.c
index d1d037f97c5f..5a40e2b7d148 100644
--- a/mm/page_alloc.c
+++ b/mm/page_alloc.c
@@ -7478,22 +7478,7 @@ static bool __free_unaccepted(struct page *page)
#endif /* CONFIG_UNACCEPTED_MEMORY */
-/**
- * alloc_pages_nolock - opportunistic reentrant allocation from any context
- * @nid: node to allocate from
- * @order: allocation order size
- *
- * Allocates pages of a given order from the given node. This is safe to
- * call from any context (from atomic, NMI, and also reentrant
- * allocator -> tracepoint -> alloc_pages_nolock_noprof).
- * Allocation is best effort and to be expected to fail easily so nobody should
- * rely on the success. Failures are not reported via warn_alloc().
- * See always fail conditions below.
- *
- * Return: allocated page or NULL on failure. NULL does not mean EBUSY or EAGAIN.
- * It means ENOMEM. There is no reason to call it again and expect !NULL.
- */
-struct page *alloc_pages_nolock_noprof(int nid, unsigned int order)
+struct page *alloc_frozen_pages_nolock_noprof(gfp_t gfp_flags, int nid, unsigned int order)
{
/*
* Do not specify __GFP_DIRECT_RECLAIM, since direct claim is not allowed.
@@ -7515,12 +7500,13 @@ struct page *alloc_pages_nolock_noprof(int nid, unsigned int order)
* specify it here to highlight that alloc_pages_nolock()
* doesn't want to deplete reserves.
*/
- gfp_t alloc_gfp = __GFP_NOWARN | __GFP_ZERO | __GFP_NOMEMALLOC
- | __GFP_ACCOUNT;
+ gfp_t alloc_gfp = __GFP_NOWARN | __GFP_ZERO | __GFP_NOMEMALLOC | __GFP_COMP
+ | gfp_flags;
unsigned int alloc_flags = ALLOC_TRYLOCK;
struct alloc_context ac = { };
struct page *page;
+ VM_WARN_ON_ONCE(gfp_flags & ~__GFP_ACCOUNT);
/*
* In PREEMPT_RT spin_trylock() will call raw_spin_lock() which is
* unsafe in NMI. If spin_trylock() is called from hard IRQ the current
@@ -7555,15 +7541,38 @@ struct page *alloc_pages_nolock_noprof(int nid, unsigned int order)
/* Unlike regular alloc_pages() there is no __alloc_pages_slowpath(). */
- if (page)
- set_page_refcounted(page);
-
- if (memcg_kmem_online() && page &&
+ if (memcg_kmem_online() && page && (gfp_flags & __GFP_ACCOUNT) &&
unlikely(__memcg_kmem_charge_page(page, alloc_gfp, order) != 0)) {
- free_pages_nolock(page, order);
+ __free_frozen_pages(page, order, FPI_TRYLOCK);
page = NULL;
}
trace_mm_page_alloc(page, order, alloc_gfp, ac.migratetype);
kmsan_alloc_page(page, order, alloc_gfp);
return page;
}
+/**
+ * alloc_pages_nolock - opportunistic reentrant allocation from any context
+ * @gfp_flags: GFP flags. Only __GFP_ACCOUNT allowed.
+ * @nid: node to allocate from
+ * @order: allocation order size
+ *
+ * Allocates pages of a given order from the given node. This is safe to
+ * call from any context (from atomic, NMI, and also reentrant
+ * allocator -> tracepoint -> alloc_pages_nolock_noprof).
+ * Allocation is best effort and to be expected to fail easily so nobody should
+ * rely on the success. Failures are not reported via warn_alloc().
+ * See always fail conditions below.
+ *
+ * Return: allocated page or NULL on failure. NULL does not mean EBUSY or EAGAIN.
+ * It means ENOMEM. There is no reason to call it again and expect !NULL.
+ */
+struct page *alloc_pages_nolock_noprof(gfp_t gfp_flags, int nid, unsigned int order)
+{
+ struct page *page;
+
+ page = alloc_frozen_pages_nolock_noprof(gfp_flags, nid, order);
+ if (page)
+ set_page_refcounted(page);
+ return page;
+}
+EXPORT_SYMBOL_GPL(alloc_pages_nolock_noprof);