summaryrefslogtreecommitdiff
path: root/mm
diff options
context:
space:
mode:
Diffstat (limited to 'mm')
-rw-r--r--mm/damon/core.c7
-rw-r--r--mm/damon/sysfs.c7
-rw-r--r--mm/huge_memory.c3
-rw-r--r--mm/hugetlb.c5
-rw-r--r--mm/kasan/common.c12
-rw-r--r--mm/kfence/core.c14
-rw-r--r--mm/memcontrol.c40
-rw-r--r--mm/migrate.c3
-rw-r--r--mm/mremap.c15
-rw-r--r--mm/page_owner.c3
-rw-r--r--mm/slab.h60
-rw-r--r--mm/slab_common.c29
-rw-r--r--mm/slub.c489
-rw-r--r--mm/usercopy.c24
14 files changed, 354 insertions, 357 deletions
diff --git a/mm/damon/core.c b/mm/damon/core.c
index 93848b4c6944..109b050c795a 100644
--- a/mm/damon/core.c
+++ b/mm/damon/core.c
@@ -452,6 +452,9 @@ void damon_destroy_scheme(struct damos *s)
damos_for_each_filter_safe(f, next, s)
damos_destroy_filter(f);
+ damos_for_each_ops_filter_safe(f, next, s)
+ damos_destroy_filter(f);
+
kfree(s->migrate_dests.node_id_arr);
kfree(s->migrate_dests.weight_arr);
damon_del_scheme(s);
@@ -832,7 +835,7 @@ int damos_commit_quota_goals(struct damos_quota *dst, struct damos_quota *src)
src_goal->metric, src_goal->target_value);
if (!new_goal)
return -ENOMEM;
- damos_commit_quota_goal_union(new_goal, src_goal);
+ damos_commit_quota_goal(new_goal, src_goal);
damos_add_quota_goal(dst, new_goal);
}
return 0;
@@ -1450,7 +1453,7 @@ int damon_call(struct damon_ctx *ctx, struct damon_call_control *control)
INIT_LIST_HEAD(&control->list);
mutex_lock(&ctx->call_controls_lock);
- list_add_tail(&ctx->call_controls, &control->list);
+ list_add_tail(&control->list, &ctx->call_controls);
mutex_unlock(&ctx->call_controls_lock);
if (!damon_is_running(ctx))
return -EINVAL;
diff --git a/mm/damon/sysfs.c b/mm/damon/sysfs.c
index 2fc722f998f8..cd6815ecc04e 100644
--- a/mm/damon/sysfs.c
+++ b/mm/damon/sysfs.c
@@ -1473,13 +1473,14 @@ static int damon_sysfs_commit_input(void *data)
if (IS_ERR(param_ctx))
return PTR_ERR(param_ctx);
test_ctx = damon_new_ctx();
+ if (!test_ctx)
+ return -ENOMEM;
err = damon_commit_ctx(test_ctx, param_ctx);
- if (err) {
- damon_destroy_ctx(test_ctx);
+ if (err)
goto out;
- }
err = damon_commit_ctx(kdamond->damon_ctx, param_ctx);
out:
+ damon_destroy_ctx(test_ctx);
damon_destroy_ctx(param_ctx);
return err;
}
diff --git a/mm/huge_memory.c b/mm/huge_memory.c
index 1b81680b4225..1d1b74950332 100644
--- a/mm/huge_memory.c
+++ b/mm/huge_memory.c
@@ -4109,6 +4109,9 @@ static bool thp_underused(struct folio *folio)
if (khugepaged_max_ptes_none == HPAGE_PMD_NR - 1)
return false;
+ if (folio_contain_hwpoisoned_page(folio))
+ return false;
+
for (i = 0; i < folio_nr_pages(folio); i++) {
if (pages_identical(folio_page(folio, i), ZERO_PAGE(0))) {
if (++num_zero_pages > khugepaged_max_ptes_none)
diff --git a/mm/hugetlb.c b/mm/hugetlb.c
index 795ee393eac0..0455119716ec 100644
--- a/mm/hugetlb.c
+++ b/mm/hugetlb.c
@@ -7614,13 +7614,12 @@ int huge_pmd_unshare(struct mm_struct *mm, struct vm_area_struct *vma,
p4d_t *p4d = p4d_offset(pgd, addr);
pud_t *pud = pud_offset(p4d, addr);
- i_mmap_assert_write_locked(vma->vm_file->f_mapping);
- hugetlb_vma_assert_locked(vma);
if (sz != PMD_SIZE)
return 0;
if (!ptdesc_pmd_is_shared(virt_to_ptdesc(ptep)))
return 0;
-
+ i_mmap_assert_write_locked(vma->vm_file->f_mapping);
+ hugetlb_vma_assert_locked(vma);
pud_clear(pud);
/*
* Once our caller drops the rmap lock, some other process might be
diff --git a/mm/kasan/common.c b/mm/kasan/common.c
index d4c14359feaf..38e8bb0bf326 100644
--- a/mm/kasan/common.c
+++ b/mm/kasan/common.c
@@ -520,24 +520,20 @@ void __kasan_mempool_unpoison_pages(struct page *page, unsigned int order,
bool __kasan_mempool_poison_object(void *ptr, unsigned long ip)
{
- struct folio *folio = virt_to_folio(ptr);
+ struct page *page = virt_to_page(ptr);
struct slab *slab;
- /*
- * This function can be called for large kmalloc allocation that get
- * their memory from page_alloc. Thus, the folio might not be a slab.
- */
- if (unlikely(!folio_test_slab(folio))) {
+ if (unlikely(PageLargeKmalloc(page))) {
if (check_page_allocation(ptr, ip))
return false;
- kasan_poison(ptr, folio_size(folio), KASAN_PAGE_FREE, false);
+ kasan_poison(ptr, page_size(page), KASAN_PAGE_FREE, false);
return true;
}
if (is_kfence_address(ptr))
return true;
- slab = folio_slab(folio);
+ slab = page_slab(page);
if (check_slab_allocation(slab->slab_cache, ptr, ip))
return false;
diff --git a/mm/kfence/core.c b/mm/kfence/core.c
index 727c20c94ac5..e62b5516bf48 100644
--- a/mm/kfence/core.c
+++ b/mm/kfence/core.c
@@ -612,14 +612,15 @@ static unsigned long kfence_init_pool(void)
* enters __slab_free() slow-path.
*/
for (i = 0; i < KFENCE_POOL_SIZE / PAGE_SIZE; i++) {
- struct slab *slab;
+ struct page *page;
if (!i || (i % 2))
continue;
- slab = page_slab(pfn_to_page(start_pfn + i));
- __folio_set_slab(slab_folio(slab));
+ page = pfn_to_page(start_pfn + i);
+ __SetPageSlab(page);
#ifdef CONFIG_MEMCG
+ struct slab *slab = page_slab(page);
slab->obj_exts = (unsigned long)&kfence_metadata_init[i / 2 - 1].obj_exts |
MEMCG_DATA_OBJEXTS;
#endif
@@ -665,16 +666,17 @@ static unsigned long kfence_init_pool(void)
reset_slab:
for (i = 0; i < KFENCE_POOL_SIZE / PAGE_SIZE; i++) {
- struct slab *slab;
+ struct page *page;
if (!i || (i % 2))
continue;
- slab = page_slab(pfn_to_page(start_pfn + i));
+ page = pfn_to_page(start_pfn + i);
#ifdef CONFIG_MEMCG
+ struct slab *slab = page_slab(page);
slab->obj_exts = 0;
#endif
- __folio_clear_slab(slab_folio(slab));
+ __ClearPageSlab(page);
}
return addr;
diff --git a/mm/memcontrol.c b/mm/memcontrol.c
index 4deda33625f4..b46356da6c0e 100644
--- a/mm/memcontrol.c
+++ b/mm/memcontrol.c
@@ -2557,38 +2557,25 @@ static inline void mod_objcg_mlstate(struct obj_cgroup *objcg,
}
static __always_inline
-struct mem_cgroup *mem_cgroup_from_obj_folio(struct folio *folio, void *p)
+struct mem_cgroup *mem_cgroup_from_obj_slab(struct slab *slab, void *p)
{
/*
* Slab objects are accounted individually, not per-page.
* Memcg membership data for each individual object is saved in
* slab->obj_exts.
*/
- if (folio_test_slab(folio)) {
- struct slabobj_ext *obj_exts;
- struct slab *slab;
- unsigned int off;
-
- slab = folio_slab(folio);
- obj_exts = slab_obj_exts(slab);
- if (!obj_exts)
- return NULL;
-
- off = obj_to_index(slab->slab_cache, slab, p);
- if (obj_exts[off].objcg)
- return obj_cgroup_memcg(obj_exts[off].objcg);
+ struct slabobj_ext *obj_exts;
+ unsigned int off;
+ obj_exts = slab_obj_exts(slab);
+ if (!obj_exts)
return NULL;
- }
- /*
- * folio_memcg_check() is used here, because in theory we can encounter
- * a folio where the slab flag has been cleared already, but
- * slab->obj_exts has not been freed yet
- * folio_memcg_check() will guarantee that a proper memory
- * cgroup pointer or NULL will be returned.
- */
- return folio_memcg_check(folio);
+ off = obj_to_index(slab->slab_cache, slab, p);
+ if (obj_exts[off].objcg)
+ return obj_cgroup_memcg(obj_exts[off].objcg);
+
+ return NULL;
}
/*
@@ -2602,10 +2589,15 @@ struct mem_cgroup *mem_cgroup_from_obj_folio(struct folio *folio, void *p)
*/
struct mem_cgroup *mem_cgroup_from_slab_obj(void *p)
{
+ struct slab *slab;
+
if (mem_cgroup_disabled())
return NULL;
- return mem_cgroup_from_obj_folio(virt_to_folio(p), p);
+ slab = virt_to_slab(p);
+ if (slab)
+ return mem_cgroup_from_obj_slab(slab, p);
+ return folio_memcg_check(virt_to_folio(p));
}
static struct obj_cgroup *__get_obj_cgroup_from_memcg(struct mem_cgroup *memcg)
diff --git a/mm/migrate.c b/mm/migrate.c
index e3065c9edb55..c0e9f15be2a2 100644
--- a/mm/migrate.c
+++ b/mm/migrate.c
@@ -301,8 +301,9 @@ static bool try_to_map_unused_to_zeropage(struct page_vma_mapped_walk *pvmw,
struct page *page = folio_page(folio, idx);
pte_t newpte;
- if (PageCompound(page))
+ if (PageCompound(page) || PageHWPoison(page))
return false;
+
VM_BUG_ON_PAGE(!PageAnon(page), page);
VM_BUG_ON_PAGE(!PageLocked(page), page);
VM_BUG_ON_PAGE(pte_present(old_pte), page);
diff --git a/mm/mremap.c b/mm/mremap.c
index 35de0a7b910e..bd7314898ec5 100644
--- a/mm/mremap.c
+++ b/mm/mremap.c
@@ -1237,10 +1237,10 @@ static int copy_vma_and_data(struct vma_remap_struct *vrm,
}
/*
- * Perform final tasks for MADV_DONTUNMAP operation, clearing mlock() and
- * account flags on remaining VMA by convention (it cannot be mlock()'d any
- * longer, as pages in range are no longer mapped), and removing anon_vma_chain
- * links from it (if the entire VMA was copied over).
+ * Perform final tasks for MADV_DONTUNMAP operation, clearing mlock() flag on
+ * remaining VMA by convention (it cannot be mlock()'d any longer, as pages in
+ * range are no longer mapped), and removing anon_vma_chain links from it if the
+ * entire VMA was copied over.
*/
static void dontunmap_complete(struct vma_remap_struct *vrm,
struct vm_area_struct *new_vma)
@@ -1250,11 +1250,8 @@ static void dontunmap_complete(struct vma_remap_struct *vrm,
unsigned long old_start = vrm->vma->vm_start;
unsigned long old_end = vrm->vma->vm_end;
- /*
- * We always clear VM_LOCKED[ONFAULT] | VM_ACCOUNT on the old
- * vma.
- */
- vm_flags_clear(vrm->vma, VM_LOCKED_MASK | VM_ACCOUNT);
+ /* We always clear VM_LOCKED[ONFAULT] on the old VMA. */
+ vm_flags_clear(vrm->vma, VM_LOCKED_MASK);
/*
* anon_vma links of the old vma is no longer needed after its page
diff --git a/mm/page_owner.c b/mm/page_owner.c
index c3ca21132c2c..589ec37c94aa 100644
--- a/mm/page_owner.c
+++ b/mm/page_owner.c
@@ -168,6 +168,9 @@ static void add_stack_record_to_list(struct stack_record *stack_record,
unsigned long flags;
struct stack *stack;
+ if (!gfpflags_allow_spinning(gfp_mask))
+ return;
+
set_current_in_page_owner();
stack = kmalloc(sizeof(*stack), gfp_nested_mask(gfp_mask));
if (!stack) {
diff --git a/mm/slab.h b/mm/slab.h
index 42627b87d50c..f730e012553c 100644
--- a/mm/slab.h
+++ b/mm/slab.h
@@ -114,19 +114,6 @@ static_assert(IS_ALIGNED(offsetof(struct slab, freelist), sizeof(struct freelist
#endif
/**
- * folio_slab - Converts from folio to slab.
- * @folio: The folio.
- *
- * Currently struct slab is a different representation of a folio where
- * folio_test_slab() is true.
- *
- * Return: The slab which contains this folio.
- */
-#define folio_slab(folio) (_Generic((folio), \
- const struct folio *: (const struct slab *)(folio), \
- struct folio *: (struct slab *)(folio)))
-
-/**
* slab_folio - The folio allocated for a slab
* @s: The slab.
*
@@ -142,20 +129,24 @@ static_assert(IS_ALIGNED(offsetof(struct slab, freelist), sizeof(struct freelist
struct slab *: (struct folio *)s))
/**
- * page_slab - Converts from first struct page to slab.
- * @p: The first (either head of compound or single) page of slab.
+ * page_slab - Converts from struct page to its slab.
+ * @page: A page which may or may not belong to a slab.
*
- * A temporary wrapper to convert struct page to struct slab in situations where
- * we know the page is the compound head, or single order-0 page.
- *
- * Long-term ideally everything would work with struct slab directly or go
- * through folio to struct slab.
- *
- * Return: The slab which contains this page
+ * Return: The slab which contains this page or NULL if the page does
+ * not belong to a slab. This includes pages returned from large kmalloc.
*/
-#define page_slab(p) (_Generic((p), \
- const struct page *: (const struct slab *)(p), \
- struct page *: (struct slab *)(p)))
+static inline struct slab *page_slab(const struct page *page)
+{
+ unsigned long head;
+
+ head = READ_ONCE(page->compound_head);
+ if (head & 1)
+ page = (struct page *)(head - 1);
+ if (data_race(page->page_type >> 24) != PGTY_slab)
+ page = NULL;
+
+ return (struct slab *)page;
+}
/**
* slab_page - The first struct page allocated for a slab
@@ -184,12 +175,7 @@ static inline pg_data_t *slab_pgdat(const struct slab *slab)
static inline struct slab *virt_to_slab(const void *addr)
{
- struct folio *folio = virt_to_folio(addr);
-
- if (!folio_test_slab(folio))
- return NULL;
-
- return folio_slab(folio);
+ return page_slab(virt_to_page(addr));
}
static inline int slab_order(const struct slab *slab)
@@ -232,10 +218,8 @@ struct kmem_cache_order_objects {
* Slab cache management.
*/
struct kmem_cache {
-#ifndef CONFIG_SLUB_TINY
struct kmem_cache_cpu __percpu *cpu_slab;
struct lock_class_key lock_key;
-#endif
struct slub_percpu_sheaves __percpu *cpu_sheaves;
/* Used for retrieving partial slabs, etc. */
slab_flags_t flags;
@@ -597,6 +581,16 @@ static inline size_t slab_ksize(const struct kmem_cache *s)
return s->size;
}
+static inline unsigned int large_kmalloc_order(const struct page *page)
+{
+ return page[1].flags.f & 0xff;
+}
+
+static inline size_t large_kmalloc_size(const struct page *page)
+{
+ return PAGE_SIZE << large_kmalloc_order(page);
+}
+
#ifdef CONFIG_SLUB_DEBUG
void dump_unreclaimable_slab(void);
#else
diff --git a/mm/slab_common.c b/mm/slab_common.c
index 932d13ada36c..84dfff4f7b1f 100644
--- a/mm/slab_common.c
+++ b/mm/slab_common.c
@@ -997,26 +997,27 @@ void __init create_kmalloc_caches(void)
*/
size_t __ksize(const void *object)
{
- struct folio *folio;
+ const struct page *page;
+ const struct slab *slab;
if (unlikely(object == ZERO_SIZE_PTR))
return 0;
- folio = virt_to_folio(object);
+ page = virt_to_page(object);
- if (unlikely(!folio_test_slab(folio))) {
- if (WARN_ON(folio_size(folio) <= KMALLOC_MAX_CACHE_SIZE))
- return 0;
- if (WARN_ON(object != folio_address(folio)))
- return 0;
- return folio_size(folio);
- }
+ if (unlikely(PageLargeKmalloc(page)))
+ return large_kmalloc_size(page);
+
+ slab = page_slab(page);
+ /* Delete this after we're sure there are no users */
+ if (WARN_ON(!slab))
+ return page_size(page);
#ifdef CONFIG_SLUB_DEBUG
- skip_orig_size_check(folio_slab(folio)->slab_cache, object);
+ skip_orig_size_check(slab->slab_cache, object);
#endif
- return slab_ksize(folio_slab(folio)->slab_cache);
+ return slab_ksize(slab->slab_cache);
}
gfp_t kmalloc_fix_flags(gfp_t flags)
@@ -1614,17 +1615,15 @@ static void kfree_rcu_work(struct work_struct *work)
static bool kfree_rcu_sheaf(void *obj)
{
struct kmem_cache *s;
- struct folio *folio;
struct slab *slab;
if (is_vmalloc_addr(obj))
return false;
- folio = virt_to_folio(obj);
- if (unlikely(!folio_test_slab(folio)))
+ slab = virt_to_slab(obj);
+ if (unlikely(!slab))
return false;
- slab = folio_slab(folio);
s = slab->slab_cache;
if (s->cpu_sheaves) {
if (likely(!IS_ENABLED(CONFIG_NUMA) ||
diff --git a/mm/slub.c b/mm/slub.c
index ddd71f4937fa..785e25a14999 100644
--- a/mm/slub.c
+++ b/mm/slub.c
@@ -410,7 +410,6 @@ enum stat_item {
NR_SLUB_STAT_ITEMS
};
-#ifndef CONFIG_SLUB_TINY
struct freelist_tid {
union {
struct {
@@ -436,7 +435,6 @@ struct kmem_cache_cpu {
unsigned int stat[NR_SLUB_STAT_ITEMS];
#endif
};
-#endif /* CONFIG_SLUB_TINY */
static inline void stat(const struct kmem_cache *s, enum stat_item si)
{
@@ -473,7 +471,10 @@ struct slab_sheaf {
struct rcu_head rcu_head;
struct list_head barn_list;
/* only used for prefilled sheafs */
- unsigned int capacity;
+ struct {
+ unsigned int capacity;
+ bool pfmemalloc;
+ };
};
struct kmem_cache *cache;
unsigned int size;
@@ -598,12 +599,10 @@ static inline void *get_freepointer(struct kmem_cache *s, void *object)
return freelist_ptr_decode(s, p, ptr_addr);
}
-#ifndef CONFIG_SLUB_TINY
static void prefetch_freepointer(const struct kmem_cache *s, void *object)
{
prefetchw(object + s->offset);
}
-#endif
/*
* When running under KMSAN, get_freepointer_safe() may return an uninitialized
@@ -715,10 +714,12 @@ static inline unsigned int slub_get_cpu_partial(struct kmem_cache *s)
return s->cpu_partial_slabs;
}
#else
+#ifdef SLAB_SUPPORTS_SYSFS
static inline void
slub_set_cpu_partial(struct kmem_cache *s, unsigned int nr_objects)
{
}
+#endif
static inline unsigned int slub_get_cpu_partial(struct kmem_cache *s)
{
@@ -971,7 +972,7 @@ static slab_flags_t slub_debug = DEBUG_DEFAULT_FLAGS;
static slab_flags_t slub_debug;
#endif
-static char *slub_debug_string;
+static const char *slub_debug_string __ro_after_init;
static int disable_higher_order_debug;
/*
@@ -1778,8 +1779,8 @@ static inline int free_consistency_checks(struct kmem_cache *s,
*
* returns the start of next block if there's any, or NULL
*/
-static char *
-parse_slub_debug_flags(char *str, slab_flags_t *flags, char **slabs, bool init)
+static const char *
+parse_slub_debug_flags(const char *str, slab_flags_t *flags, const char **slabs, bool init)
{
bool higher_order_disable = false;
@@ -1856,17 +1857,17 @@ check_slabs:
return NULL;
}
-static int __init setup_slub_debug(char *str)
+static int __init setup_slub_debug(const char *str, const struct kernel_param *kp)
{
slab_flags_t flags;
slab_flags_t global_flags;
- char *saved_str;
- char *slab_list;
+ const char *saved_str;
+ const char *slab_list;
bool global_slub_debug_changed = false;
bool slab_list_specified = false;
global_flags = DEBUG_DEFAULT_FLAGS;
- if (*str++ != '=' || !*str)
+ if (!str || !*str)
/*
* No options specified. Switch on full debugging.
*/
@@ -1910,11 +1911,15 @@ out:
static_branch_unlikely(&init_on_free)) &&
(slub_debug & SLAB_POISON))
pr_info("mem auto-init: SLAB_POISON will take precedence over init_on_alloc/init_on_free\n");
- return 1;
+ return 0;
}
-__setup("slab_debug", setup_slub_debug);
-__setup_param("slub_debug", slub_debug, setup_slub_debug, 0);
+static const struct kernel_param_ops param_ops_slab_debug __initconst = {
+ .flags = KERNEL_PARAM_OPS_FL_NOARG,
+ .set = setup_slub_debug,
+};
+__core_param_cb(slab_debug, &param_ops_slab_debug, NULL, 0);
+__core_param_cb(slub_debug, &param_ops_slab_debug, NULL, 0);
/*
* kmem_cache_flags - apply debugging options to the cache
@@ -1928,9 +1933,9 @@ __setup_param("slub_debug", slub_debug, setup_slub_debug, 0);
*/
slab_flags_t kmem_cache_flags(slab_flags_t flags, const char *name)
{
- char *iter;
+ const char *iter;
size_t len;
- char *next_block;
+ const char *next_block;
slab_flags_t block_flags;
slab_flags_t slub_debug_local = slub_debug;
@@ -1954,7 +1959,7 @@ slab_flags_t kmem_cache_flags(slab_flags_t flags, const char *name)
continue;
/* Found a block that has a slab list, search it */
while (*iter) {
- char *end, *glob;
+ const char *end, *glob;
size_t cmplen;
end = strchrnul(iter, ',');
@@ -2016,15 +2021,21 @@ static inline void inc_slabs_node(struct kmem_cache *s, int node,
int objects) {}
static inline void dec_slabs_node(struct kmem_cache *s, int node,
int objects) {}
-#ifndef CONFIG_SLUB_TINY
static bool freelist_corrupted(struct kmem_cache *s, struct slab *slab,
void **freelist, void *nextfree)
{
return false;
}
-#endif
#endif /* CONFIG_SLUB_DEBUG */
+/*
+ * The allocated objcg pointers array is not accounted directly.
+ * Moreover, it should not come from DMA buffer and is not readily
+ * reclaimable. So those GFP bits should be masked off.
+ */
+#define OBJCGS_CLEAR_MASK (__GFP_DMA | __GFP_RECLAIMABLE | \
+ __GFP_ACCOUNT | __GFP_NOFAIL)
+
#ifdef CONFIG_SLAB_OBJ_EXT
#ifdef CONFIG_MEM_ALLOC_PROFILING_DEBUG
@@ -2075,14 +2086,6 @@ static inline void handle_failed_objexts_alloc(unsigned long obj_exts,
#endif /* CONFIG_MEM_ALLOC_PROFILING_DEBUG */
-/*
- * The allocated objcg pointers array is not accounted directly.
- * Moreover, it should not come from DMA buffer and is not readily
- * reclaimable. So those GFP bits should be masked off.
- */
-#define OBJCGS_CLEAR_MASK (__GFP_DMA | __GFP_RECLAIMABLE | \
- __GFP_ACCOUNT | __GFP_NOFAIL)
-
static inline void init_slab_obj_exts(struct slab *slab)
{
slab->obj_exts = 0;
@@ -2362,33 +2365,34 @@ bool memcg_slab_post_charge(void *p, gfp_t flags)
{
struct slabobj_ext *slab_exts;
struct kmem_cache *s;
- struct folio *folio;
+ struct page *page;
struct slab *slab;
unsigned long off;
- folio = virt_to_folio(p);
- if (!folio_test_slab(folio)) {
+ page = virt_to_page(p);
+ if (PageLargeKmalloc(page)) {
+ unsigned int order;
int size;
- if (folio_memcg_kmem(folio))
+ if (PageMemcgKmem(page))
return true;
- if (__memcg_kmem_charge_page(folio_page(folio, 0), flags,
- folio_order(folio)))
+ order = large_kmalloc_order(page);
+ if (__memcg_kmem_charge_page(page, flags, order))
return false;
/*
- * This folio has already been accounted in the global stats but
+ * This page has already been accounted in the global stats but
* not in the memcg stats. So, subtract from the global and use
* the interface which adds to both global and memcg stats.
*/
- size = folio_size(folio);
- node_stat_mod_folio(folio, NR_SLAB_UNRECLAIMABLE_B, -size);
- lruvec_stat_mod_folio(folio, NR_SLAB_UNRECLAIMABLE_B, size);
+ size = PAGE_SIZE << order;
+ mod_node_page_state(page_pgdat(page), NR_SLAB_UNRECLAIMABLE_B, -size);
+ mod_lruvec_page_state(page, NR_SLAB_UNRECLAIMABLE_B, size);
return true;
}
- slab = folio_slab(folio);
+ slab = page_slab(page);
s = slab->slab_cache;
/*
@@ -2590,8 +2594,24 @@ static void *setup_object(struct kmem_cache *s, void *object)
static struct slab_sheaf *alloc_empty_sheaf(struct kmem_cache *s, gfp_t gfp)
{
- struct slab_sheaf *sheaf = kzalloc(struct_size(sheaf, objects,
- s->sheaf_capacity), gfp);
+ struct slab_sheaf *sheaf;
+ size_t sheaf_size;
+
+ if (gfp & __GFP_NO_OBJ_EXT)
+ return NULL;
+
+ gfp &= ~OBJCGS_CLEAR_MASK;
+
+ /*
+ * Prevent recursion to the same cache, or a deep stack of kmallocs of
+ * varying sizes (sheaf capacity might differ for each kmalloc size
+ * bucket)
+ */
+ if (s->flags & SLAB_KMALLOC)
+ gfp |= __GFP_NO_OBJ_EXT;
+
+ sheaf_size = struct_size(sheaf, objects, s->sheaf_capacity);
+ sheaf = kzalloc(sheaf_size, gfp);
if (unlikely(!sheaf))
return NULL;
@@ -2644,7 +2664,7 @@ static struct slab_sheaf *alloc_full_sheaf(struct kmem_cache *s, gfp_t gfp)
if (!sheaf)
return NULL;
- if (refill_sheaf(s, sheaf, gfp)) {
+ if (refill_sheaf(s, sheaf, gfp | __GFP_NOMEMALLOC)) {
free_empty_sheaf(s, sheaf);
return NULL;
}
@@ -2722,12 +2742,13 @@ static void sheaf_flush_unused(struct kmem_cache *s, struct slab_sheaf *sheaf)
sheaf->size = 0;
}
-static void __rcu_free_sheaf_prepare(struct kmem_cache *s,
+static bool __rcu_free_sheaf_prepare(struct kmem_cache *s,
struct slab_sheaf *sheaf)
{
bool init = slab_want_init_on_free(s);
void **p = &sheaf->objects[0];
unsigned int i = 0;
+ bool pfmemalloc = false;
while (i < sheaf->size) {
struct slab *slab = virt_to_slab(p[i]);
@@ -2740,8 +2761,13 @@ static void __rcu_free_sheaf_prepare(struct kmem_cache *s,
continue;
}
+ if (slab_test_pfmemalloc(slab))
+ pfmemalloc = true;
+
i++;
}
+
+ return pfmemalloc;
}
static void rcu_free_sheaf_nobarn(struct rcu_head *head)
@@ -3004,14 +3030,11 @@ static void barn_init(struct node_barn *barn)
static void barn_shrink(struct kmem_cache *s, struct node_barn *barn)
{
- struct list_head empty_list;
- struct list_head full_list;
+ LIST_HEAD(empty_list);
+ LIST_HEAD(full_list);
struct slab_sheaf *sheaf, *sheaf2;
unsigned long flags;
- INIT_LIST_HEAD(&empty_list);
- INIT_LIST_HEAD(&full_list);
-
spin_lock_irqsave(&barn->lock, flags);
list_splice_init(&barn->sheaves_full, &full_list);
@@ -3037,24 +3060,24 @@ static inline struct slab *alloc_slab_page(gfp_t flags, int node,
struct kmem_cache_order_objects oo,
bool allow_spin)
{
- struct folio *folio;
+ struct page *page;
struct slab *slab;
unsigned int order = oo_order(oo);
if (unlikely(!allow_spin))
- folio = (struct folio *)alloc_frozen_pages_nolock(0/* __GFP_COMP is implied */,
+ page = alloc_frozen_pages_nolock(0/* __GFP_COMP is implied */,
node, order);
else if (node == NUMA_NO_NODE)
- folio = (struct folio *)alloc_frozen_pages(flags, order);
+ page = alloc_frozen_pages(flags, order);
else
- folio = (struct folio *)__alloc_frozen_pages(flags, order, node, NULL);
+ page = __alloc_frozen_pages(flags, order, node, NULL);
- if (!folio)
+ if (!page)
return NULL;
- slab = folio_slab(folio);
- __folio_set_slab(folio);
- if (folio_is_pfmemalloc(folio))
+ __SetPageSlab(page);
+ slab = page_slab(page);
+ if (page_is_pfmemalloc(page))
slab_set_pfmemalloc(slab);
return slab;
@@ -3278,16 +3301,16 @@ static struct slab *new_slab(struct kmem_cache *s, gfp_t flags, int node)
static void __free_slab(struct kmem_cache *s, struct slab *slab)
{
- struct folio *folio = slab_folio(slab);
- int order = folio_order(folio);
+ struct page *page = slab_page(slab);
+ int order = compound_order(page);
int pages = 1 << order;
__slab_clear_pfmemalloc(slab);
- folio->mapping = NULL;
- __folio_clear_slab(folio);
+ page->mapping = NULL;
+ __ClearPageSlab(page);
mm_account_reclaimed_pages(pages);
unaccount_slab(slab, order, s);
- free_frozen_pages(&folio->page, order);
+ free_frozen_pages(page, order);
}
static void rcu_free_slab(struct rcu_head *h)
@@ -3607,8 +3630,6 @@ static struct slab *get_partial(struct kmem_cache *s, int node,
return get_any_partial(s, pc);
}
-#ifndef CONFIG_SLUB_TINY
-
#ifdef CONFIG_PREEMPTION
/*
* Calculate the next globally unique transaction for disambiguation
@@ -4004,12 +4025,6 @@ static bool has_cpu_slab(int cpu, struct kmem_cache *s)
return c->slab || slub_percpu_partial(c);
}
-#else /* CONFIG_SLUB_TINY */
-static inline void __flush_cpu_slab(struct kmem_cache *s, int cpu) { }
-static inline bool has_cpu_slab(int cpu, struct kmem_cache *s) { return false; }
-static inline void flush_this_cpu_slab(struct kmem_cache *s) { }
-#endif /* CONFIG_SLUB_TINY */
-
static bool has_pcs_used(int cpu, struct kmem_cache *s)
{
struct slub_percpu_sheaves *pcs;
@@ -4350,7 +4365,6 @@ static inline bool pfmemalloc_match(struct slab *slab, gfp_t gfpflags)
return true;
}
-#ifndef CONFIG_SLUB_TINY
static inline bool
__update_cpu_freelist_fast(struct kmem_cache *s,
void *freelist_old, void *freelist_new,
@@ -4607,7 +4621,7 @@ new_objects:
pc.orig_size = orig_size;
slab = get_partial(s, node, &pc);
if (slab) {
- if (kmem_cache_debug(s)) {
+ if (IS_ENABLED(CONFIG_SLUB_TINY) || kmem_cache_debug(s)) {
freelist = pc.object;
/*
* For debug caches here we had to go through
@@ -4645,7 +4659,7 @@ new_objects:
stat(s, ALLOC_SLAB);
- if (kmem_cache_debug(s)) {
+ if (IS_ENABLED(CONFIG_SLUB_TINY) || kmem_cache_debug(s)) {
freelist = alloc_single_from_new_slab(s, slab, orig_size, gfpflags);
if (unlikely(!freelist)) {
@@ -4857,32 +4871,6 @@ redo:
return object;
}
-#else /* CONFIG_SLUB_TINY */
-static void *__slab_alloc_node(struct kmem_cache *s,
- gfp_t gfpflags, int node, unsigned long addr, size_t orig_size)
-{
- struct partial_context pc;
- struct slab *slab;
- void *object;
-
- pc.flags = gfpflags;
- pc.orig_size = orig_size;
- slab = get_partial(s, node, &pc);
-
- if (slab)
- return pc.object;
-
- slab = new_slab(s, gfpflags, node);
- if (unlikely(!slab)) {
- slab_out_of_memory(s, gfpflags, node);
- return NULL;
- }
-
- object = alloc_single_from_new_slab(s, slab, orig_size, gfpflags);
-
- return object;
-}
-#endif /* CONFIG_SLUB_TINY */
/*
* If the object has been wiped upon free, make sure it's fully initialized by
@@ -5023,7 +5011,7 @@ __pcs_replace_empty_main(struct kmem_cache *s, struct slub_percpu_sheaves *pcs,
return NULL;
if (empty) {
- if (!refill_sheaf(s, empty, gfp)) {
+ if (!refill_sheaf(s, empty, gfp | __GFP_NOMEMALLOC)) {
full = empty;
} else {
/*
@@ -5134,7 +5122,7 @@ void *alloc_from_pcs(struct kmem_cache *s, gfp_t gfp, int node)
* be false because of cpu migration during an unlocked part of
* the current allocation or previous freeing process.
*/
- if (folio_nid(virt_to_folio(object)) != node) {
+ if (page_to_nid(virt_to_page(object)) != node) {
local_unlock(&s->cpu_sheaves->lock);
return NULL;
}
@@ -5323,6 +5311,26 @@ void *kmem_cache_alloc_node_noprof(struct kmem_cache *s, gfp_t gfpflags, int nod
}
EXPORT_SYMBOL(kmem_cache_alloc_node_noprof);
+static int __prefill_sheaf_pfmemalloc(struct kmem_cache *s,
+ struct slab_sheaf *sheaf, gfp_t gfp)
+{
+ int ret = 0;
+
+ ret = refill_sheaf(s, sheaf, gfp | __GFP_NOMEMALLOC);
+
+ if (likely(!ret || !gfp_pfmemalloc_allowed(gfp)))
+ return ret;
+
+ /*
+ * if we are allowed to, refill sheaf with pfmemalloc but then remember
+ * it for when it's returned
+ */
+ ret = refill_sheaf(s, sheaf, gfp);
+ sheaf->pfmemalloc = true;
+
+ return ret;
+}
+
/*
* returns a sheaf that has at least the requested size
* when prefilling is needed, do so with given gfp flags
@@ -5357,6 +5365,10 @@ kmem_cache_prefill_sheaf(struct kmem_cache *s, gfp_t gfp, unsigned int size)
sheaf->cache = s;
sheaf->capacity = size;
+ /*
+ * we do not need to care about pfmemalloc here because oversize
+ * sheaves area always flushed and freed when returned
+ */
if (!__kmem_cache_alloc_bulk(s, gfp, size,
&sheaf->objects[0])) {
kfree(sheaf);
@@ -5393,17 +5405,18 @@ kmem_cache_prefill_sheaf(struct kmem_cache *s, gfp_t gfp, unsigned int size)
if (!sheaf)
sheaf = alloc_empty_sheaf(s, gfp);
- if (sheaf && sheaf->size < size) {
- if (refill_sheaf(s, sheaf, gfp)) {
+ if (sheaf) {
+ sheaf->capacity = s->sheaf_capacity;
+ sheaf->pfmemalloc = false;
+
+ if (sheaf->size < size &&
+ __prefill_sheaf_pfmemalloc(s, sheaf, gfp)) {
sheaf_flush_unused(s, sheaf);
free_empty_sheaf(s, sheaf);
sheaf = NULL;
}
}
- if (sheaf)
- sheaf->capacity = s->sheaf_capacity;
-
return sheaf;
}
@@ -5423,7 +5436,8 @@ void kmem_cache_return_sheaf(struct kmem_cache *s, gfp_t gfp,
struct slub_percpu_sheaves *pcs;
struct node_barn *barn;
- if (unlikely(sheaf->capacity != s->sheaf_capacity)) {
+ if (unlikely((sheaf->capacity != s->sheaf_capacity)
+ || sheaf->pfmemalloc)) {
sheaf_flush_unused(s, sheaf);
kfree(sheaf);
return;
@@ -5489,7 +5503,7 @@ int kmem_cache_refill_sheaf(struct kmem_cache *s, gfp_t gfp,
if (likely(sheaf->capacity >= size)) {
if (likely(sheaf->capacity == s->sheaf_capacity))
- return refill_sheaf(s, sheaf, gfp);
+ return __prefill_sheaf_pfmemalloc(s, sheaf, gfp);
if (!__kmem_cache_alloc_bulk(s, gfp, sheaf->capacity - sheaf->size,
&sheaf->objects[sheaf->size])) {
@@ -5522,6 +5536,9 @@ int kmem_cache_refill_sheaf(struct kmem_cache *s, gfp_t gfp,
*
* The gfp parameter is meant only to specify __GFP_ZERO or __GFP_ACCOUNT
* memcg charging is forced over limit if necessary, to avoid failure.
+ *
+ * It is possible that the allocation comes from kfence and then the sheaf
+ * size is not decreased.
*/
void *
kmem_cache_alloc_from_sheaf_noprof(struct kmem_cache *s, gfp_t gfp,
@@ -5533,7 +5550,10 @@ kmem_cache_alloc_from_sheaf_noprof(struct kmem_cache *s, gfp_t gfp,
if (sheaf->size == 0)
goto out;
- ret = sheaf->objects[--sheaf->size];
+ ret = kfence_alloc(s, s->object_size, gfp);
+
+ if (likely(!ret))
+ ret = sheaf->objects[--sheaf->size];
init = slab_want_init_on_alloc(gfp, s);
@@ -5556,7 +5576,7 @@ unsigned int kmem_cache_sheaf_size(struct slab_sheaf *sheaf)
*/
static void *___kmalloc_large_node(size_t size, gfp_t flags, int node)
{
- struct folio *folio;
+ struct page *page;
void *ptr = NULL;
unsigned int order = get_order(size);
@@ -5566,15 +5586,15 @@ static void *___kmalloc_large_node(size_t size, gfp_t flags, int node)
flags |= __GFP_COMP;
if (node == NUMA_NO_NODE)
- folio = (struct folio *)alloc_frozen_pages_noprof(flags, order);
+ page = alloc_frozen_pages_noprof(flags, order);
else
- folio = (struct folio *)__alloc_frozen_pages_noprof(flags, order, node, NULL);
+ page = __alloc_frozen_pages_noprof(flags, order, node, NULL);
- if (folio) {
- ptr = folio_address(folio);
- lruvec_stat_mod_folio(folio, NR_SLAB_UNRECLAIMABLE_B,
+ if (page) {
+ ptr = page_address(page);
+ mod_lruvec_page_state(page, NR_SLAB_UNRECLAIMABLE_B,
PAGE_SIZE << order);
- __folio_set_large_kmalloc(folio);
+ __SetPageLargeKmalloc(page);
}
ptr = kasan_kmalloc_large(ptr, size, flags);
@@ -5701,9 +5721,7 @@ retry:
* it did local_lock_irqsave(&s->cpu_slab->lock, flags).
* In this case fast path with __update_cpu_freelist_fast() is not safe.
*/
-#ifndef CONFIG_SLUB_TINY
if (!in_nmi() || !local_lock_is_locked(&s->cpu_slab->lock))
-#endif
ret = __slab_alloc_node(s, alloc_gfp, node, _RET_IP_, size);
if (PTR_ERR(ret) == -EBUSY) {
@@ -6192,8 +6210,12 @@ static void rcu_free_sheaf(struct rcu_head *head)
* handles it fine. The only downside is that sheaf will serve fewer
* allocations when reused. It only happens due to debugging, which is a
* performance hit anyway.
+ *
+ * If it returns true, there was at least one object from pfmemalloc
+ * slab so simply flush everything.
*/
- __rcu_free_sheaf_prepare(s, sheaf);
+ if (__rcu_free_sheaf_prepare(s, sheaf))
+ goto flush;
n = get_node(s, sheaf->node);
if (!n)
@@ -6348,7 +6370,8 @@ next_remote_batch:
continue;
}
- if (unlikely(IS_ENABLED(CONFIG_NUMA) && slab_nid(slab) != node)) {
+ if (unlikely((IS_ENABLED(CONFIG_NUMA) && slab_nid(slab) != node)
+ || slab_test_pfmemalloc(slab))) {
remote_objects[remote_nr] = p[i];
p[i] = p[--size];
if (++remote_nr >= PCS_BATCH_MAX)
@@ -6490,14 +6513,10 @@ static void free_deferred_objects(struct irq_work *work)
llist_for_each_safe(pos, t, llnode) {
struct slab *slab = container_of(pos, struct slab, llnode);
-#ifdef CONFIG_SLUB_TINY
- free_slab(slab->slab_cache, slab);
-#else
if (slab->frozen)
deactivate_slab(slab->slab_cache, slab, slab->flush_freelist);
else
free_slab(slab->slab_cache, slab);
-#endif
}
}
@@ -6533,7 +6552,6 @@ void defer_free_barrier(void)
irq_work_sync(&per_cpu_ptr(&defer_free_objects, cpu)->work);
}
-#ifndef CONFIG_SLUB_TINY
/*
* Fastpath with forced inlining to produce a kfree and kmem_cache_free that
* can perform fastpath freeing without additional function calls.
@@ -6626,14 +6644,6 @@ redo:
}
stat_add(s, FREE_FASTPATH, cnt);
}
-#else /* CONFIG_SLUB_TINY */
-static void do_slab_free(struct kmem_cache *s,
- struct slab *slab, void *head, void *tail,
- int cnt, unsigned long addr)
-{
- __slab_free(s, slab, head, tail, cnt, addr);
-}
-#endif /* CONFIG_SLUB_TINY */
static __fastpath_inline
void slab_free(struct kmem_cache *s, struct slab *slab, void *object,
@@ -6646,7 +6656,8 @@ void slab_free(struct kmem_cache *s, struct slab *slab, void *object,
return;
if (s->cpu_sheaves && likely(!IS_ENABLED(CONFIG_NUMA) ||
- slab_nid(slab) == numa_mem_id())) {
+ slab_nid(slab) == numa_mem_id())
+ && likely(!slab_test_pfmemalloc(slab))) {
if (likely(free_to_pcs(s, object)))
return;
}
@@ -6756,12 +6767,12 @@ void kmem_cache_free(struct kmem_cache *s, void *x)
}
EXPORT_SYMBOL(kmem_cache_free);
-static void free_large_kmalloc(struct folio *folio, void *object)
+static void free_large_kmalloc(struct page *page, void *object)
{
- unsigned int order = folio_order(folio);
+ unsigned int order = compound_order(page);
- if (WARN_ON_ONCE(!folio_test_large_kmalloc(folio))) {
- dump_page(&folio->page, "Not a kmalloc allocation");
+ if (WARN_ON_ONCE(!PageLargeKmalloc(page))) {
+ dump_page(page, "Not a kmalloc allocation");
return;
}
@@ -6772,10 +6783,10 @@ static void free_large_kmalloc(struct folio *folio, void *object)
kasan_kfree_large(object);
kmsan_kfree_large(object);
- lruvec_stat_mod_folio(folio, NR_SLAB_UNRECLAIMABLE_B,
+ mod_lruvec_page_state(page, NR_SLAB_UNRECLAIMABLE_B,
-(PAGE_SIZE << order));
- __folio_clear_large_kmalloc(folio);
- free_frozen_pages(&folio->page, order);
+ __ClearPageLargeKmalloc(page);
+ free_frozen_pages(page, order);
}
/*
@@ -6785,7 +6796,7 @@ static void free_large_kmalloc(struct folio *folio, void *object)
void kvfree_rcu_cb(struct rcu_head *head)
{
void *obj = head;
- struct folio *folio;
+ struct page *page;
struct slab *slab;
struct kmem_cache *s;
void *slab_addr;
@@ -6796,20 +6807,20 @@ void kvfree_rcu_cb(struct rcu_head *head)
return;
}
- folio = virt_to_folio(obj);
- if (!folio_test_slab(folio)) {
+ page = virt_to_page(obj);
+ slab = page_slab(page);
+ if (!slab) {
/*
* rcu_head offset can be only less than page size so no need to
- * consider folio order
+ * consider allocation order
*/
obj = (void *) PAGE_ALIGN_DOWN((unsigned long)obj);
- free_large_kmalloc(folio, obj);
+ free_large_kmalloc(page, obj);
return;
}
- slab = folio_slab(folio);
s = slab->slab_cache;
- slab_addr = folio_address(folio);
+ slab_addr = slab_address(slab);
if (is_kfence_address(obj)) {
obj = kfence_object_start(obj);
@@ -6831,7 +6842,7 @@ void kvfree_rcu_cb(struct rcu_head *head)
*/
void kfree(const void *object)
{
- struct folio *folio;
+ struct page *page;
struct slab *slab;
struct kmem_cache *s;
void *x = (void *)object;
@@ -6841,13 +6852,13 @@ void kfree(const void *object)
if (unlikely(ZERO_OR_NULL_PTR(object)))
return;
- folio = virt_to_folio(object);
- if (unlikely(!folio_test_slab(folio))) {
- free_large_kmalloc(folio, (void *)object);
+ page = virt_to_page(object);
+ slab = page_slab(page);
+ if (!slab) {
+ free_large_kmalloc(page, (void *)object);
return;
}
- slab = folio_slab(folio);
s = slab->slab_cache;
slab_free(s, slab, x, _RET_IP_);
}
@@ -6864,7 +6875,6 @@ EXPORT_SYMBOL(kfree);
*/
void kfree_nolock(const void *object)
{
- struct folio *folio;
struct slab *slab;
struct kmem_cache *s;
void *x = (void *)object;
@@ -6872,13 +6882,12 @@ void kfree_nolock(const void *object)
if (unlikely(ZERO_OR_NULL_PTR(object)))
return;
- folio = virt_to_folio(object);
- if (unlikely(!folio_test_slab(folio))) {
+ slab = virt_to_slab(object);
+ if (unlikely(!slab)) {
WARN_ONCE(1, "large_kmalloc is not supported by kfree_nolock()");
return;
}
- slab = folio_slab(folio);
s = slab->slab_cache;
memcg_slab_free_hook(s, slab, &x, 1);
@@ -6910,11 +6919,7 @@ void kfree_nolock(const void *object)
* since kasan quarantine takes locks and not supported from NMI.
*/
kasan_slab_free(s, x, false, false, /* skip quarantine */true);
-#ifndef CONFIG_SLUB_TINY
do_slab_free(s, slab, x, x, 0, _RET_IP_);
-#else
- defer_free(s, x);
-#endif
}
EXPORT_SYMBOL_GPL(kfree_nolock);
@@ -6946,16 +6951,16 @@ __do_krealloc(const void *p, size_t new_size, unsigned long align, gfp_t flags,
if (is_kfence_address(p)) {
ks = orig_size = kfence_ksize(p);
} else {
- struct folio *folio;
+ struct page *page = virt_to_page(p);
+ struct slab *slab = page_slab(page);
- folio = virt_to_folio(p);
- if (unlikely(!folio_test_slab(folio))) {
+ if (!slab) {
/* Big kmalloc object */
- WARN_ON(folio_size(folio) <= KMALLOC_MAX_CACHE_SIZE);
- WARN_ON(p != folio_address(folio));
- ks = folio_size(folio);
+ ks = page_size(page);
+ WARN_ON(ks <= KMALLOC_MAX_CACHE_SIZE);
+ WARN_ON(p != page_address(page));
} else {
- s = folio_slab(folio)->slab_cache;
+ s = slab->slab_cache;
orig_size = get_orig_size(s, (void *)p);
ks = s->object_size;
}
@@ -7259,23 +7264,25 @@ int build_detached_freelist(struct kmem_cache *s, size_t size,
{
int lookahead = 3;
void *object;
- struct folio *folio;
+ struct page *page;
+ struct slab *slab;
size_t same;
object = p[--size];
- folio = virt_to_folio(object);
+ page = virt_to_page(object);
+ slab = page_slab(page);
if (!s) {
/* Handle kalloc'ed objects */
- if (unlikely(!folio_test_slab(folio))) {
- free_large_kmalloc(folio, object);
+ if (!slab) {
+ free_large_kmalloc(page, object);
df->slab = NULL;
return size;
}
/* Derive kmem_cache from object */
- df->slab = folio_slab(folio);
- df->s = df->slab->slab_cache;
+ df->slab = slab;
+ df->s = slab->slab_cache;
} else {
- df->slab = folio_slab(folio);
+ df->slab = slab;
df->s = cache_from_obj(s, object); /* Support for memcg */
}
@@ -7364,7 +7371,6 @@ void kmem_cache_free_bulk(struct kmem_cache *s, size_t size, void **p)
}
EXPORT_SYMBOL(kmem_cache_free_bulk);
-#ifndef CONFIG_SLUB_TINY
static inline
int __kmem_cache_alloc_bulk(struct kmem_cache *s, gfp_t flags, size_t size,
void **p)
@@ -7382,14 +7388,8 @@ int __kmem_cache_alloc_bulk(struct kmem_cache *s, gfp_t flags, size_t size,
local_lock_irqsave(&s->cpu_slab->lock, irqflags);
for (i = 0; i < size; i++) {
- void *object = kfence_alloc(s, s->object_size, flags);
-
- if (unlikely(object)) {
- p[i] = object;
- continue;
- }
+ void *object = c->freelist;
- object = c->freelist;
if (unlikely(!object)) {
/*
* We may have removed an object from c->freelist using
@@ -7435,41 +7435,13 @@ error:
return 0;
}
-#else /* CONFIG_SLUB_TINY */
-static int __kmem_cache_alloc_bulk(struct kmem_cache *s, gfp_t flags,
- size_t size, void **p)
-{
- int i;
-
- for (i = 0; i < size; i++) {
- void *object = kfence_alloc(s, s->object_size, flags);
-
- if (unlikely(object)) {
- p[i] = object;
- continue;
- }
-
- p[i] = __slab_alloc_node(s, flags, NUMA_NO_NODE,
- _RET_IP_, s->object_size);
- if (unlikely(!p[i]))
- goto error;
-
- maybe_wipe_obj_freeptr(s, p[i]);
- }
-
- return i;
-
-error:
- __kmem_cache_free_bulk(s, i, p);
- return 0;
-}
-#endif /* CONFIG_SLUB_TINY */
/* Note that interrupts must be enabled when calling this function. */
int kmem_cache_alloc_bulk_noprof(struct kmem_cache *s, gfp_t flags, size_t size,
void **p)
{
unsigned int i = 0;
+ void *kfence_obj;
if (!size)
return 0;
@@ -7478,6 +7450,20 @@ int kmem_cache_alloc_bulk_noprof(struct kmem_cache *s, gfp_t flags, size_t size,
if (unlikely(!s))
return 0;
+ /*
+ * to make things simpler, only assume at most once kfence allocated
+ * object per bulk allocation and choose its index randomly
+ */
+ kfence_obj = kfence_alloc(s, s->object_size, flags);
+
+ if (unlikely(kfence_obj)) {
+ if (unlikely(size == 1)) {
+ p[0] = kfence_obj;
+ goto out;
+ }
+ size--;
+ }
+
if (s->cpu_sheaves)
i = alloc_from_pcs_bulk(s, size, p);
@@ -7489,10 +7475,23 @@ int kmem_cache_alloc_bulk_noprof(struct kmem_cache *s, gfp_t flags, size_t size,
if (unlikely(__kmem_cache_alloc_bulk(s, flags, size - i, p + i) == 0)) {
if (i > 0)
__kmem_cache_free_bulk(s, i, p);
+ if (kfence_obj)
+ __kfence_free(kfence_obj);
return 0;
}
}
+ if (unlikely(kfence_obj)) {
+ int idx = get_random_u32_below(size + 1);
+
+ if (idx != size)
+ p[size] = p[idx];
+ p[idx] = kfence_obj;
+
+ size++;
+ }
+
+out:
/*
* memcg and kmem_cache debug support and memory initialization.
* Done outside of the IRQ disabled fastpath loop.
@@ -7654,7 +7653,6 @@ init_kmem_cache_node(struct kmem_cache_node *n, struct node_barn *barn)
barn_init(barn);
}
-#ifndef CONFIG_SLUB_TINY
static inline int alloc_kmem_cache_cpus(struct kmem_cache *s)
{
BUILD_BUG_ON(PERCPU_DYNAMIC_EARLY_SIZE <
@@ -7675,12 +7673,6 @@ static inline int alloc_kmem_cache_cpus(struct kmem_cache *s)
return 1;
}
-#else
-static inline int alloc_kmem_cache_cpus(struct kmem_cache *s)
-{
- return 1;
-}
-#endif /* CONFIG_SLUB_TINY */
static int init_percpu_sheaves(struct kmem_cache *s)
{
@@ -7770,13 +7762,11 @@ void __kmem_cache_release(struct kmem_cache *s)
cache_random_seq_destroy(s);
if (s->cpu_sheaves)
pcs_destroy(s);
-#ifndef CONFIG_SLUB_TINY
#ifdef CONFIG_PREEMPT_RT
if (s->cpu_slab)
lockdep_unregister_key(&s->lock_key);
#endif
free_percpu(s->cpu_slab);
-#endif
free_kmem_cache_nodes(s);
}
@@ -8142,46 +8132,53 @@ void __kmem_obj_info(struct kmem_obj_info *kpp, void *object, struct slab *slab)
* Kmalloc subsystem
*******************************************************************/
-static int __init setup_slub_min_order(char *str)
+static int __init setup_slub_min_order(const char *str, const struct kernel_param *kp)
{
- get_option(&str, (int *)&slub_min_order);
+ int ret;
+
+ ret = kstrtouint(str, 0, &slub_min_order);
+ if (ret)
+ return ret;
if (slub_min_order > slub_max_order)
slub_max_order = slub_min_order;
- return 1;
+ return 0;
}
-__setup("slab_min_order=", setup_slub_min_order);
-__setup_param("slub_min_order=", slub_min_order, setup_slub_min_order, 0);
-
+static const struct kernel_param_ops param_ops_slab_min_order __initconst = {
+ .set = setup_slub_min_order,
+};
+__core_param_cb(slab_min_order, &param_ops_slab_min_order, &slub_min_order, 0);
+__core_param_cb(slub_min_order, &param_ops_slab_min_order, &slub_min_order, 0);
-static int __init setup_slub_max_order(char *str)
+static int __init setup_slub_max_order(const char *str, const struct kernel_param *kp)
{
- get_option(&str, (int *)&slub_max_order);
+ int ret;
+
+ ret = kstrtouint(str, 0, &slub_max_order);
+ if (ret)
+ return ret;
+
slub_max_order = min_t(unsigned int, slub_max_order, MAX_PAGE_ORDER);
if (slub_min_order > slub_max_order)
slub_min_order = slub_max_order;
- return 1;
+ return 0;
}
-__setup("slab_max_order=", setup_slub_max_order);
-__setup_param("slub_max_order=", slub_max_order, setup_slub_max_order, 0);
-
-static int __init setup_slub_min_objects(char *str)
-{
- get_option(&str, (int *)&slub_min_objects);
-
- return 1;
-}
+static const struct kernel_param_ops param_ops_slab_max_order __initconst = {
+ .set = setup_slub_max_order,
+};
+__core_param_cb(slab_max_order, &param_ops_slab_max_order, &slub_max_order, 0);
+__core_param_cb(slub_max_order, &param_ops_slab_max_order, &slub_max_order, 0);
-__setup("slab_min_objects=", setup_slub_min_objects);
-__setup_param("slub_min_objects=", slub_min_objects, setup_slub_min_objects, 0);
+core_param(slab_min_objects, slub_min_objects, uint, 0);
+core_param(slub_min_objects, slub_min_objects, uint, 0);
#ifdef CONFIG_NUMA
-static int __init setup_slab_strict_numa(char *str)
+static int __init setup_slab_strict_numa(const char *str, const struct kernel_param *kp)
{
if (nr_node_ids > 1) {
static_branch_enable(&strict_numa);
@@ -8190,10 +8187,14 @@ static int __init setup_slab_strict_numa(char *str)
pr_warn("slab_strict_numa parameter set on non NUMA system.\n");
}
- return 1;
+ return 0;
}
-__setup("slab_strict_numa", setup_slab_strict_numa);
+static const struct kernel_param_ops param_ops_slab_strict_numa __initconst = {
+ .flags = KERNEL_PARAM_OPS_FL_NOARG,
+ .set = setup_slab_strict_numa,
+};
+__core_param_cb(slab_strict_numa, &param_ops_slab_strict_numa, NULL, 0);
#endif
@@ -8519,10 +8520,8 @@ void __init kmem_cache_init(void)
void __init kmem_cache_init_late(void)
{
-#ifndef CONFIG_SLUB_TINY
flushwq = alloc_workqueue("slub_flushwq", WQ_MEM_RECLAIM, 0);
WARN_ON(!flushwq);
-#endif
}
struct kmem_cache *
diff --git a/mm/usercopy.c b/mm/usercopy.c
index dbdcc43964fb..5de7a518b1b1 100644
--- a/mm/usercopy.c
+++ b/mm/usercopy.c
@@ -164,7 +164,8 @@ static inline void check_heap_object(const void *ptr, unsigned long n,
{
unsigned long addr = (unsigned long)ptr;
unsigned long offset;
- struct folio *folio;
+ struct page *page;
+ struct slab *slab;
if (is_kmap_addr(ptr)) {
offset = offset_in_page(ptr);
@@ -189,16 +190,23 @@ static inline void check_heap_object(const void *ptr, unsigned long n,
if (!virt_addr_valid(ptr))
return;
- folio = virt_to_folio(ptr);
-
- if (folio_test_slab(folio)) {
+ page = virt_to_page(ptr);
+ slab = page_slab(page);
+ if (slab) {
/* Check slab allocator for flags and size. */
- __check_heap_object(ptr, n, folio_slab(folio), to_user);
- } else if (folio_test_large(folio)) {
- offset = ptr - folio_address(folio);
- if (n > folio_size(folio) - offset)
+ __check_heap_object(ptr, n, slab, to_user);
+ } else if (PageCompound(page)) {
+ page = compound_head(page);
+ offset = ptr - page_address(page);
+ if (n > page_size(page) - offset)
usercopy_abort("page alloc", NULL, to_user, offset, n);
}
+
+ /*
+ * We cannot check non-compound pages. They might be part of
+ * a large allocation, in which case crossing a page boundary
+ * is fine.
+ */
}
DEFINE_STATIC_KEY_MAYBE_RO(CONFIG_HARDENED_USERCOPY_DEFAULT_ON,