diff options
Diffstat (limited to 'mm')
| -rw-r--r-- | mm/damon/core.c | 7 | ||||
| -rw-r--r-- | mm/damon/sysfs.c | 7 | ||||
| -rw-r--r-- | mm/huge_memory.c | 3 | ||||
| -rw-r--r-- | mm/hugetlb.c | 5 | ||||
| -rw-r--r-- | mm/kasan/common.c | 12 | ||||
| -rw-r--r-- | mm/kfence/core.c | 14 | ||||
| -rw-r--r-- | mm/memcontrol.c | 40 | ||||
| -rw-r--r-- | mm/migrate.c | 3 | ||||
| -rw-r--r-- | mm/mremap.c | 15 | ||||
| -rw-r--r-- | mm/page_owner.c | 3 | ||||
| -rw-r--r-- | mm/slab.h | 60 | ||||
| -rw-r--r-- | mm/slab_common.c | 29 | ||||
| -rw-r--r-- | mm/slub.c | 489 | ||||
| -rw-r--r-- | mm/usercopy.c | 24 |
14 files changed, 354 insertions, 357 deletions
diff --git a/mm/damon/core.c b/mm/damon/core.c index 93848b4c6944..109b050c795a 100644 --- a/mm/damon/core.c +++ b/mm/damon/core.c @@ -452,6 +452,9 @@ void damon_destroy_scheme(struct damos *s) damos_for_each_filter_safe(f, next, s) damos_destroy_filter(f); + damos_for_each_ops_filter_safe(f, next, s) + damos_destroy_filter(f); + kfree(s->migrate_dests.node_id_arr); kfree(s->migrate_dests.weight_arr); damon_del_scheme(s); @@ -832,7 +835,7 @@ int damos_commit_quota_goals(struct damos_quota *dst, struct damos_quota *src) src_goal->metric, src_goal->target_value); if (!new_goal) return -ENOMEM; - damos_commit_quota_goal_union(new_goal, src_goal); + damos_commit_quota_goal(new_goal, src_goal); damos_add_quota_goal(dst, new_goal); } return 0; @@ -1450,7 +1453,7 @@ int damon_call(struct damon_ctx *ctx, struct damon_call_control *control) INIT_LIST_HEAD(&control->list); mutex_lock(&ctx->call_controls_lock); - list_add_tail(&ctx->call_controls, &control->list); + list_add_tail(&control->list, &ctx->call_controls); mutex_unlock(&ctx->call_controls_lock); if (!damon_is_running(ctx)) return -EINVAL; diff --git a/mm/damon/sysfs.c b/mm/damon/sysfs.c index 2fc722f998f8..cd6815ecc04e 100644 --- a/mm/damon/sysfs.c +++ b/mm/damon/sysfs.c @@ -1473,13 +1473,14 @@ static int damon_sysfs_commit_input(void *data) if (IS_ERR(param_ctx)) return PTR_ERR(param_ctx); test_ctx = damon_new_ctx(); + if (!test_ctx) + return -ENOMEM; err = damon_commit_ctx(test_ctx, param_ctx); - if (err) { - damon_destroy_ctx(test_ctx); + if (err) goto out; - } err = damon_commit_ctx(kdamond->damon_ctx, param_ctx); out: + damon_destroy_ctx(test_ctx); damon_destroy_ctx(param_ctx); return err; } diff --git a/mm/huge_memory.c b/mm/huge_memory.c index 1b81680b4225..1d1b74950332 100644 --- a/mm/huge_memory.c +++ b/mm/huge_memory.c @@ -4109,6 +4109,9 @@ static bool thp_underused(struct folio *folio) if (khugepaged_max_ptes_none == HPAGE_PMD_NR - 1) return false; + if (folio_contain_hwpoisoned_page(folio)) + return false; + for (i = 0; i < folio_nr_pages(folio); i++) { if (pages_identical(folio_page(folio, i), ZERO_PAGE(0))) { if (++num_zero_pages > khugepaged_max_ptes_none) diff --git a/mm/hugetlb.c b/mm/hugetlb.c index 795ee393eac0..0455119716ec 100644 --- a/mm/hugetlb.c +++ b/mm/hugetlb.c @@ -7614,13 +7614,12 @@ int huge_pmd_unshare(struct mm_struct *mm, struct vm_area_struct *vma, p4d_t *p4d = p4d_offset(pgd, addr); pud_t *pud = pud_offset(p4d, addr); - i_mmap_assert_write_locked(vma->vm_file->f_mapping); - hugetlb_vma_assert_locked(vma); if (sz != PMD_SIZE) return 0; if (!ptdesc_pmd_is_shared(virt_to_ptdesc(ptep))) return 0; - + i_mmap_assert_write_locked(vma->vm_file->f_mapping); + hugetlb_vma_assert_locked(vma); pud_clear(pud); /* * Once our caller drops the rmap lock, some other process might be diff --git a/mm/kasan/common.c b/mm/kasan/common.c index d4c14359feaf..38e8bb0bf326 100644 --- a/mm/kasan/common.c +++ b/mm/kasan/common.c @@ -520,24 +520,20 @@ void __kasan_mempool_unpoison_pages(struct page *page, unsigned int order, bool __kasan_mempool_poison_object(void *ptr, unsigned long ip) { - struct folio *folio = virt_to_folio(ptr); + struct page *page = virt_to_page(ptr); struct slab *slab; - /* - * This function can be called for large kmalloc allocation that get - * their memory from page_alloc. Thus, the folio might not be a slab. - */ - if (unlikely(!folio_test_slab(folio))) { + if (unlikely(PageLargeKmalloc(page))) { if (check_page_allocation(ptr, ip)) return false; - kasan_poison(ptr, folio_size(folio), KASAN_PAGE_FREE, false); + kasan_poison(ptr, page_size(page), KASAN_PAGE_FREE, false); return true; } if (is_kfence_address(ptr)) return true; - slab = folio_slab(folio); + slab = page_slab(page); if (check_slab_allocation(slab->slab_cache, ptr, ip)) return false; diff --git a/mm/kfence/core.c b/mm/kfence/core.c index 727c20c94ac5..e62b5516bf48 100644 --- a/mm/kfence/core.c +++ b/mm/kfence/core.c @@ -612,14 +612,15 @@ static unsigned long kfence_init_pool(void) * enters __slab_free() slow-path. */ for (i = 0; i < KFENCE_POOL_SIZE / PAGE_SIZE; i++) { - struct slab *slab; + struct page *page; if (!i || (i % 2)) continue; - slab = page_slab(pfn_to_page(start_pfn + i)); - __folio_set_slab(slab_folio(slab)); + page = pfn_to_page(start_pfn + i); + __SetPageSlab(page); #ifdef CONFIG_MEMCG + struct slab *slab = page_slab(page); slab->obj_exts = (unsigned long)&kfence_metadata_init[i / 2 - 1].obj_exts | MEMCG_DATA_OBJEXTS; #endif @@ -665,16 +666,17 @@ static unsigned long kfence_init_pool(void) reset_slab: for (i = 0; i < KFENCE_POOL_SIZE / PAGE_SIZE; i++) { - struct slab *slab; + struct page *page; if (!i || (i % 2)) continue; - slab = page_slab(pfn_to_page(start_pfn + i)); + page = pfn_to_page(start_pfn + i); #ifdef CONFIG_MEMCG + struct slab *slab = page_slab(page); slab->obj_exts = 0; #endif - __folio_clear_slab(slab_folio(slab)); + __ClearPageSlab(page); } return addr; diff --git a/mm/memcontrol.c b/mm/memcontrol.c index 4deda33625f4..b46356da6c0e 100644 --- a/mm/memcontrol.c +++ b/mm/memcontrol.c @@ -2557,38 +2557,25 @@ static inline void mod_objcg_mlstate(struct obj_cgroup *objcg, } static __always_inline -struct mem_cgroup *mem_cgroup_from_obj_folio(struct folio *folio, void *p) +struct mem_cgroup *mem_cgroup_from_obj_slab(struct slab *slab, void *p) { /* * Slab objects are accounted individually, not per-page. * Memcg membership data for each individual object is saved in * slab->obj_exts. */ - if (folio_test_slab(folio)) { - struct slabobj_ext *obj_exts; - struct slab *slab; - unsigned int off; - - slab = folio_slab(folio); - obj_exts = slab_obj_exts(slab); - if (!obj_exts) - return NULL; - - off = obj_to_index(slab->slab_cache, slab, p); - if (obj_exts[off].objcg) - return obj_cgroup_memcg(obj_exts[off].objcg); + struct slabobj_ext *obj_exts; + unsigned int off; + obj_exts = slab_obj_exts(slab); + if (!obj_exts) return NULL; - } - /* - * folio_memcg_check() is used here, because in theory we can encounter - * a folio where the slab flag has been cleared already, but - * slab->obj_exts has not been freed yet - * folio_memcg_check() will guarantee that a proper memory - * cgroup pointer or NULL will be returned. - */ - return folio_memcg_check(folio); + off = obj_to_index(slab->slab_cache, slab, p); + if (obj_exts[off].objcg) + return obj_cgroup_memcg(obj_exts[off].objcg); + + return NULL; } /* @@ -2602,10 +2589,15 @@ struct mem_cgroup *mem_cgroup_from_obj_folio(struct folio *folio, void *p) */ struct mem_cgroup *mem_cgroup_from_slab_obj(void *p) { + struct slab *slab; + if (mem_cgroup_disabled()) return NULL; - return mem_cgroup_from_obj_folio(virt_to_folio(p), p); + slab = virt_to_slab(p); + if (slab) + return mem_cgroup_from_obj_slab(slab, p); + return folio_memcg_check(virt_to_folio(p)); } static struct obj_cgroup *__get_obj_cgroup_from_memcg(struct mem_cgroup *memcg) diff --git a/mm/migrate.c b/mm/migrate.c index e3065c9edb55..c0e9f15be2a2 100644 --- a/mm/migrate.c +++ b/mm/migrate.c @@ -301,8 +301,9 @@ static bool try_to_map_unused_to_zeropage(struct page_vma_mapped_walk *pvmw, struct page *page = folio_page(folio, idx); pte_t newpte; - if (PageCompound(page)) + if (PageCompound(page) || PageHWPoison(page)) return false; + VM_BUG_ON_PAGE(!PageAnon(page), page); VM_BUG_ON_PAGE(!PageLocked(page), page); VM_BUG_ON_PAGE(pte_present(old_pte), page); diff --git a/mm/mremap.c b/mm/mremap.c index 35de0a7b910e..bd7314898ec5 100644 --- a/mm/mremap.c +++ b/mm/mremap.c @@ -1237,10 +1237,10 @@ static int copy_vma_and_data(struct vma_remap_struct *vrm, } /* - * Perform final tasks for MADV_DONTUNMAP operation, clearing mlock() and - * account flags on remaining VMA by convention (it cannot be mlock()'d any - * longer, as pages in range are no longer mapped), and removing anon_vma_chain - * links from it (if the entire VMA was copied over). + * Perform final tasks for MADV_DONTUNMAP operation, clearing mlock() flag on + * remaining VMA by convention (it cannot be mlock()'d any longer, as pages in + * range are no longer mapped), and removing anon_vma_chain links from it if the + * entire VMA was copied over. */ static void dontunmap_complete(struct vma_remap_struct *vrm, struct vm_area_struct *new_vma) @@ -1250,11 +1250,8 @@ static void dontunmap_complete(struct vma_remap_struct *vrm, unsigned long old_start = vrm->vma->vm_start; unsigned long old_end = vrm->vma->vm_end; - /* - * We always clear VM_LOCKED[ONFAULT] | VM_ACCOUNT on the old - * vma. - */ - vm_flags_clear(vrm->vma, VM_LOCKED_MASK | VM_ACCOUNT); + /* We always clear VM_LOCKED[ONFAULT] on the old VMA. */ + vm_flags_clear(vrm->vma, VM_LOCKED_MASK); /* * anon_vma links of the old vma is no longer needed after its page diff --git a/mm/page_owner.c b/mm/page_owner.c index c3ca21132c2c..589ec37c94aa 100644 --- a/mm/page_owner.c +++ b/mm/page_owner.c @@ -168,6 +168,9 @@ static void add_stack_record_to_list(struct stack_record *stack_record, unsigned long flags; struct stack *stack; + if (!gfpflags_allow_spinning(gfp_mask)) + return; + set_current_in_page_owner(); stack = kmalloc(sizeof(*stack), gfp_nested_mask(gfp_mask)); if (!stack) { diff --git a/mm/slab.h b/mm/slab.h index 42627b87d50c..f730e012553c 100644 --- a/mm/slab.h +++ b/mm/slab.h @@ -114,19 +114,6 @@ static_assert(IS_ALIGNED(offsetof(struct slab, freelist), sizeof(struct freelist #endif /** - * folio_slab - Converts from folio to slab. - * @folio: The folio. - * - * Currently struct slab is a different representation of a folio where - * folio_test_slab() is true. - * - * Return: The slab which contains this folio. - */ -#define folio_slab(folio) (_Generic((folio), \ - const struct folio *: (const struct slab *)(folio), \ - struct folio *: (struct slab *)(folio))) - -/** * slab_folio - The folio allocated for a slab * @s: The slab. * @@ -142,20 +129,24 @@ static_assert(IS_ALIGNED(offsetof(struct slab, freelist), sizeof(struct freelist struct slab *: (struct folio *)s)) /** - * page_slab - Converts from first struct page to slab. - * @p: The first (either head of compound or single) page of slab. + * page_slab - Converts from struct page to its slab. + * @page: A page which may or may not belong to a slab. * - * A temporary wrapper to convert struct page to struct slab in situations where - * we know the page is the compound head, or single order-0 page. - * - * Long-term ideally everything would work with struct slab directly or go - * through folio to struct slab. - * - * Return: The slab which contains this page + * Return: The slab which contains this page or NULL if the page does + * not belong to a slab. This includes pages returned from large kmalloc. */ -#define page_slab(p) (_Generic((p), \ - const struct page *: (const struct slab *)(p), \ - struct page *: (struct slab *)(p))) +static inline struct slab *page_slab(const struct page *page) +{ + unsigned long head; + + head = READ_ONCE(page->compound_head); + if (head & 1) + page = (struct page *)(head - 1); + if (data_race(page->page_type >> 24) != PGTY_slab) + page = NULL; + + return (struct slab *)page; +} /** * slab_page - The first struct page allocated for a slab @@ -184,12 +175,7 @@ static inline pg_data_t *slab_pgdat(const struct slab *slab) static inline struct slab *virt_to_slab(const void *addr) { - struct folio *folio = virt_to_folio(addr); - - if (!folio_test_slab(folio)) - return NULL; - - return folio_slab(folio); + return page_slab(virt_to_page(addr)); } static inline int slab_order(const struct slab *slab) @@ -232,10 +218,8 @@ struct kmem_cache_order_objects { * Slab cache management. */ struct kmem_cache { -#ifndef CONFIG_SLUB_TINY struct kmem_cache_cpu __percpu *cpu_slab; struct lock_class_key lock_key; -#endif struct slub_percpu_sheaves __percpu *cpu_sheaves; /* Used for retrieving partial slabs, etc. */ slab_flags_t flags; @@ -597,6 +581,16 @@ static inline size_t slab_ksize(const struct kmem_cache *s) return s->size; } +static inline unsigned int large_kmalloc_order(const struct page *page) +{ + return page[1].flags.f & 0xff; +} + +static inline size_t large_kmalloc_size(const struct page *page) +{ + return PAGE_SIZE << large_kmalloc_order(page); +} + #ifdef CONFIG_SLUB_DEBUG void dump_unreclaimable_slab(void); #else diff --git a/mm/slab_common.c b/mm/slab_common.c index 932d13ada36c..84dfff4f7b1f 100644 --- a/mm/slab_common.c +++ b/mm/slab_common.c @@ -997,26 +997,27 @@ void __init create_kmalloc_caches(void) */ size_t __ksize(const void *object) { - struct folio *folio; + const struct page *page; + const struct slab *slab; if (unlikely(object == ZERO_SIZE_PTR)) return 0; - folio = virt_to_folio(object); + page = virt_to_page(object); - if (unlikely(!folio_test_slab(folio))) { - if (WARN_ON(folio_size(folio) <= KMALLOC_MAX_CACHE_SIZE)) - return 0; - if (WARN_ON(object != folio_address(folio))) - return 0; - return folio_size(folio); - } + if (unlikely(PageLargeKmalloc(page))) + return large_kmalloc_size(page); + + slab = page_slab(page); + /* Delete this after we're sure there are no users */ + if (WARN_ON(!slab)) + return page_size(page); #ifdef CONFIG_SLUB_DEBUG - skip_orig_size_check(folio_slab(folio)->slab_cache, object); + skip_orig_size_check(slab->slab_cache, object); #endif - return slab_ksize(folio_slab(folio)->slab_cache); + return slab_ksize(slab->slab_cache); } gfp_t kmalloc_fix_flags(gfp_t flags) @@ -1614,17 +1615,15 @@ static void kfree_rcu_work(struct work_struct *work) static bool kfree_rcu_sheaf(void *obj) { struct kmem_cache *s; - struct folio *folio; struct slab *slab; if (is_vmalloc_addr(obj)) return false; - folio = virt_to_folio(obj); - if (unlikely(!folio_test_slab(folio))) + slab = virt_to_slab(obj); + if (unlikely(!slab)) return false; - slab = folio_slab(folio); s = slab->slab_cache; if (s->cpu_sheaves) { if (likely(!IS_ENABLED(CONFIG_NUMA) || diff --git a/mm/slub.c b/mm/slub.c index ddd71f4937fa..785e25a14999 100644 --- a/mm/slub.c +++ b/mm/slub.c @@ -410,7 +410,6 @@ enum stat_item { NR_SLUB_STAT_ITEMS }; -#ifndef CONFIG_SLUB_TINY struct freelist_tid { union { struct { @@ -436,7 +435,6 @@ struct kmem_cache_cpu { unsigned int stat[NR_SLUB_STAT_ITEMS]; #endif }; -#endif /* CONFIG_SLUB_TINY */ static inline void stat(const struct kmem_cache *s, enum stat_item si) { @@ -473,7 +471,10 @@ struct slab_sheaf { struct rcu_head rcu_head; struct list_head barn_list; /* only used for prefilled sheafs */ - unsigned int capacity; + struct { + unsigned int capacity; + bool pfmemalloc; + }; }; struct kmem_cache *cache; unsigned int size; @@ -598,12 +599,10 @@ static inline void *get_freepointer(struct kmem_cache *s, void *object) return freelist_ptr_decode(s, p, ptr_addr); } -#ifndef CONFIG_SLUB_TINY static void prefetch_freepointer(const struct kmem_cache *s, void *object) { prefetchw(object + s->offset); } -#endif /* * When running under KMSAN, get_freepointer_safe() may return an uninitialized @@ -715,10 +714,12 @@ static inline unsigned int slub_get_cpu_partial(struct kmem_cache *s) return s->cpu_partial_slabs; } #else +#ifdef SLAB_SUPPORTS_SYSFS static inline void slub_set_cpu_partial(struct kmem_cache *s, unsigned int nr_objects) { } +#endif static inline unsigned int slub_get_cpu_partial(struct kmem_cache *s) { @@ -971,7 +972,7 @@ static slab_flags_t slub_debug = DEBUG_DEFAULT_FLAGS; static slab_flags_t slub_debug; #endif -static char *slub_debug_string; +static const char *slub_debug_string __ro_after_init; static int disable_higher_order_debug; /* @@ -1778,8 +1779,8 @@ static inline int free_consistency_checks(struct kmem_cache *s, * * returns the start of next block if there's any, or NULL */ -static char * -parse_slub_debug_flags(char *str, slab_flags_t *flags, char **slabs, bool init) +static const char * +parse_slub_debug_flags(const char *str, slab_flags_t *flags, const char **slabs, bool init) { bool higher_order_disable = false; @@ -1856,17 +1857,17 @@ check_slabs: return NULL; } -static int __init setup_slub_debug(char *str) +static int __init setup_slub_debug(const char *str, const struct kernel_param *kp) { slab_flags_t flags; slab_flags_t global_flags; - char *saved_str; - char *slab_list; + const char *saved_str; + const char *slab_list; bool global_slub_debug_changed = false; bool slab_list_specified = false; global_flags = DEBUG_DEFAULT_FLAGS; - if (*str++ != '=' || !*str) + if (!str || !*str) /* * No options specified. Switch on full debugging. */ @@ -1910,11 +1911,15 @@ out: static_branch_unlikely(&init_on_free)) && (slub_debug & SLAB_POISON)) pr_info("mem auto-init: SLAB_POISON will take precedence over init_on_alloc/init_on_free\n"); - return 1; + return 0; } -__setup("slab_debug", setup_slub_debug); -__setup_param("slub_debug", slub_debug, setup_slub_debug, 0); +static const struct kernel_param_ops param_ops_slab_debug __initconst = { + .flags = KERNEL_PARAM_OPS_FL_NOARG, + .set = setup_slub_debug, +}; +__core_param_cb(slab_debug, ¶m_ops_slab_debug, NULL, 0); +__core_param_cb(slub_debug, ¶m_ops_slab_debug, NULL, 0); /* * kmem_cache_flags - apply debugging options to the cache @@ -1928,9 +1933,9 @@ __setup_param("slub_debug", slub_debug, setup_slub_debug, 0); */ slab_flags_t kmem_cache_flags(slab_flags_t flags, const char *name) { - char *iter; + const char *iter; size_t len; - char *next_block; + const char *next_block; slab_flags_t block_flags; slab_flags_t slub_debug_local = slub_debug; @@ -1954,7 +1959,7 @@ slab_flags_t kmem_cache_flags(slab_flags_t flags, const char *name) continue; /* Found a block that has a slab list, search it */ while (*iter) { - char *end, *glob; + const char *end, *glob; size_t cmplen; end = strchrnul(iter, ','); @@ -2016,15 +2021,21 @@ static inline void inc_slabs_node(struct kmem_cache *s, int node, int objects) {} static inline void dec_slabs_node(struct kmem_cache *s, int node, int objects) {} -#ifndef CONFIG_SLUB_TINY static bool freelist_corrupted(struct kmem_cache *s, struct slab *slab, void **freelist, void *nextfree) { return false; } -#endif #endif /* CONFIG_SLUB_DEBUG */ +/* + * The allocated objcg pointers array is not accounted directly. + * Moreover, it should not come from DMA buffer and is not readily + * reclaimable. So those GFP bits should be masked off. + */ +#define OBJCGS_CLEAR_MASK (__GFP_DMA | __GFP_RECLAIMABLE | \ + __GFP_ACCOUNT | __GFP_NOFAIL) + #ifdef CONFIG_SLAB_OBJ_EXT #ifdef CONFIG_MEM_ALLOC_PROFILING_DEBUG @@ -2075,14 +2086,6 @@ static inline void handle_failed_objexts_alloc(unsigned long obj_exts, #endif /* CONFIG_MEM_ALLOC_PROFILING_DEBUG */ -/* - * The allocated objcg pointers array is not accounted directly. - * Moreover, it should not come from DMA buffer and is not readily - * reclaimable. So those GFP bits should be masked off. - */ -#define OBJCGS_CLEAR_MASK (__GFP_DMA | __GFP_RECLAIMABLE | \ - __GFP_ACCOUNT | __GFP_NOFAIL) - static inline void init_slab_obj_exts(struct slab *slab) { slab->obj_exts = 0; @@ -2362,33 +2365,34 @@ bool memcg_slab_post_charge(void *p, gfp_t flags) { struct slabobj_ext *slab_exts; struct kmem_cache *s; - struct folio *folio; + struct page *page; struct slab *slab; unsigned long off; - folio = virt_to_folio(p); - if (!folio_test_slab(folio)) { + page = virt_to_page(p); + if (PageLargeKmalloc(page)) { + unsigned int order; int size; - if (folio_memcg_kmem(folio)) + if (PageMemcgKmem(page)) return true; - if (__memcg_kmem_charge_page(folio_page(folio, 0), flags, - folio_order(folio))) + order = large_kmalloc_order(page); + if (__memcg_kmem_charge_page(page, flags, order)) return false; /* - * This folio has already been accounted in the global stats but + * This page has already been accounted in the global stats but * not in the memcg stats. So, subtract from the global and use * the interface which adds to both global and memcg stats. */ - size = folio_size(folio); - node_stat_mod_folio(folio, NR_SLAB_UNRECLAIMABLE_B, -size); - lruvec_stat_mod_folio(folio, NR_SLAB_UNRECLAIMABLE_B, size); + size = PAGE_SIZE << order; + mod_node_page_state(page_pgdat(page), NR_SLAB_UNRECLAIMABLE_B, -size); + mod_lruvec_page_state(page, NR_SLAB_UNRECLAIMABLE_B, size); return true; } - slab = folio_slab(folio); + slab = page_slab(page); s = slab->slab_cache; /* @@ -2590,8 +2594,24 @@ static void *setup_object(struct kmem_cache *s, void *object) static struct slab_sheaf *alloc_empty_sheaf(struct kmem_cache *s, gfp_t gfp) { - struct slab_sheaf *sheaf = kzalloc(struct_size(sheaf, objects, - s->sheaf_capacity), gfp); + struct slab_sheaf *sheaf; + size_t sheaf_size; + + if (gfp & __GFP_NO_OBJ_EXT) + return NULL; + + gfp &= ~OBJCGS_CLEAR_MASK; + + /* + * Prevent recursion to the same cache, or a deep stack of kmallocs of + * varying sizes (sheaf capacity might differ for each kmalloc size + * bucket) + */ + if (s->flags & SLAB_KMALLOC) + gfp |= __GFP_NO_OBJ_EXT; + + sheaf_size = struct_size(sheaf, objects, s->sheaf_capacity); + sheaf = kzalloc(sheaf_size, gfp); if (unlikely(!sheaf)) return NULL; @@ -2644,7 +2664,7 @@ static struct slab_sheaf *alloc_full_sheaf(struct kmem_cache *s, gfp_t gfp) if (!sheaf) return NULL; - if (refill_sheaf(s, sheaf, gfp)) { + if (refill_sheaf(s, sheaf, gfp | __GFP_NOMEMALLOC)) { free_empty_sheaf(s, sheaf); return NULL; } @@ -2722,12 +2742,13 @@ static void sheaf_flush_unused(struct kmem_cache *s, struct slab_sheaf *sheaf) sheaf->size = 0; } -static void __rcu_free_sheaf_prepare(struct kmem_cache *s, +static bool __rcu_free_sheaf_prepare(struct kmem_cache *s, struct slab_sheaf *sheaf) { bool init = slab_want_init_on_free(s); void **p = &sheaf->objects[0]; unsigned int i = 0; + bool pfmemalloc = false; while (i < sheaf->size) { struct slab *slab = virt_to_slab(p[i]); @@ -2740,8 +2761,13 @@ static void __rcu_free_sheaf_prepare(struct kmem_cache *s, continue; } + if (slab_test_pfmemalloc(slab)) + pfmemalloc = true; + i++; } + + return pfmemalloc; } static void rcu_free_sheaf_nobarn(struct rcu_head *head) @@ -3004,14 +3030,11 @@ static void barn_init(struct node_barn *barn) static void barn_shrink(struct kmem_cache *s, struct node_barn *barn) { - struct list_head empty_list; - struct list_head full_list; + LIST_HEAD(empty_list); + LIST_HEAD(full_list); struct slab_sheaf *sheaf, *sheaf2; unsigned long flags; - INIT_LIST_HEAD(&empty_list); - INIT_LIST_HEAD(&full_list); - spin_lock_irqsave(&barn->lock, flags); list_splice_init(&barn->sheaves_full, &full_list); @@ -3037,24 +3060,24 @@ static inline struct slab *alloc_slab_page(gfp_t flags, int node, struct kmem_cache_order_objects oo, bool allow_spin) { - struct folio *folio; + struct page *page; struct slab *slab; unsigned int order = oo_order(oo); if (unlikely(!allow_spin)) - folio = (struct folio *)alloc_frozen_pages_nolock(0/* __GFP_COMP is implied */, + page = alloc_frozen_pages_nolock(0/* __GFP_COMP is implied */, node, order); else if (node == NUMA_NO_NODE) - folio = (struct folio *)alloc_frozen_pages(flags, order); + page = alloc_frozen_pages(flags, order); else - folio = (struct folio *)__alloc_frozen_pages(flags, order, node, NULL); + page = __alloc_frozen_pages(flags, order, node, NULL); - if (!folio) + if (!page) return NULL; - slab = folio_slab(folio); - __folio_set_slab(folio); - if (folio_is_pfmemalloc(folio)) + __SetPageSlab(page); + slab = page_slab(page); + if (page_is_pfmemalloc(page)) slab_set_pfmemalloc(slab); return slab; @@ -3278,16 +3301,16 @@ static struct slab *new_slab(struct kmem_cache *s, gfp_t flags, int node) static void __free_slab(struct kmem_cache *s, struct slab *slab) { - struct folio *folio = slab_folio(slab); - int order = folio_order(folio); + struct page *page = slab_page(slab); + int order = compound_order(page); int pages = 1 << order; __slab_clear_pfmemalloc(slab); - folio->mapping = NULL; - __folio_clear_slab(folio); + page->mapping = NULL; + __ClearPageSlab(page); mm_account_reclaimed_pages(pages); unaccount_slab(slab, order, s); - free_frozen_pages(&folio->page, order); + free_frozen_pages(page, order); } static void rcu_free_slab(struct rcu_head *h) @@ -3607,8 +3630,6 @@ static struct slab *get_partial(struct kmem_cache *s, int node, return get_any_partial(s, pc); } -#ifndef CONFIG_SLUB_TINY - #ifdef CONFIG_PREEMPTION /* * Calculate the next globally unique transaction for disambiguation @@ -4004,12 +4025,6 @@ static bool has_cpu_slab(int cpu, struct kmem_cache *s) return c->slab || slub_percpu_partial(c); } -#else /* CONFIG_SLUB_TINY */ -static inline void __flush_cpu_slab(struct kmem_cache *s, int cpu) { } -static inline bool has_cpu_slab(int cpu, struct kmem_cache *s) { return false; } -static inline void flush_this_cpu_slab(struct kmem_cache *s) { } -#endif /* CONFIG_SLUB_TINY */ - static bool has_pcs_used(int cpu, struct kmem_cache *s) { struct slub_percpu_sheaves *pcs; @@ -4350,7 +4365,6 @@ static inline bool pfmemalloc_match(struct slab *slab, gfp_t gfpflags) return true; } -#ifndef CONFIG_SLUB_TINY static inline bool __update_cpu_freelist_fast(struct kmem_cache *s, void *freelist_old, void *freelist_new, @@ -4607,7 +4621,7 @@ new_objects: pc.orig_size = orig_size; slab = get_partial(s, node, &pc); if (slab) { - if (kmem_cache_debug(s)) { + if (IS_ENABLED(CONFIG_SLUB_TINY) || kmem_cache_debug(s)) { freelist = pc.object; /* * For debug caches here we had to go through @@ -4645,7 +4659,7 @@ new_objects: stat(s, ALLOC_SLAB); - if (kmem_cache_debug(s)) { + if (IS_ENABLED(CONFIG_SLUB_TINY) || kmem_cache_debug(s)) { freelist = alloc_single_from_new_slab(s, slab, orig_size, gfpflags); if (unlikely(!freelist)) { @@ -4857,32 +4871,6 @@ redo: return object; } -#else /* CONFIG_SLUB_TINY */ -static void *__slab_alloc_node(struct kmem_cache *s, - gfp_t gfpflags, int node, unsigned long addr, size_t orig_size) -{ - struct partial_context pc; - struct slab *slab; - void *object; - - pc.flags = gfpflags; - pc.orig_size = orig_size; - slab = get_partial(s, node, &pc); - - if (slab) - return pc.object; - - slab = new_slab(s, gfpflags, node); - if (unlikely(!slab)) { - slab_out_of_memory(s, gfpflags, node); - return NULL; - } - - object = alloc_single_from_new_slab(s, slab, orig_size, gfpflags); - - return object; -} -#endif /* CONFIG_SLUB_TINY */ /* * If the object has been wiped upon free, make sure it's fully initialized by @@ -5023,7 +5011,7 @@ __pcs_replace_empty_main(struct kmem_cache *s, struct slub_percpu_sheaves *pcs, return NULL; if (empty) { - if (!refill_sheaf(s, empty, gfp)) { + if (!refill_sheaf(s, empty, gfp | __GFP_NOMEMALLOC)) { full = empty; } else { /* @@ -5134,7 +5122,7 @@ void *alloc_from_pcs(struct kmem_cache *s, gfp_t gfp, int node) * be false because of cpu migration during an unlocked part of * the current allocation or previous freeing process. */ - if (folio_nid(virt_to_folio(object)) != node) { + if (page_to_nid(virt_to_page(object)) != node) { local_unlock(&s->cpu_sheaves->lock); return NULL; } @@ -5323,6 +5311,26 @@ void *kmem_cache_alloc_node_noprof(struct kmem_cache *s, gfp_t gfpflags, int nod } EXPORT_SYMBOL(kmem_cache_alloc_node_noprof); +static int __prefill_sheaf_pfmemalloc(struct kmem_cache *s, + struct slab_sheaf *sheaf, gfp_t gfp) +{ + int ret = 0; + + ret = refill_sheaf(s, sheaf, gfp | __GFP_NOMEMALLOC); + + if (likely(!ret || !gfp_pfmemalloc_allowed(gfp))) + return ret; + + /* + * if we are allowed to, refill sheaf with pfmemalloc but then remember + * it for when it's returned + */ + ret = refill_sheaf(s, sheaf, gfp); + sheaf->pfmemalloc = true; + + return ret; +} + /* * returns a sheaf that has at least the requested size * when prefilling is needed, do so with given gfp flags @@ -5357,6 +5365,10 @@ kmem_cache_prefill_sheaf(struct kmem_cache *s, gfp_t gfp, unsigned int size) sheaf->cache = s; sheaf->capacity = size; + /* + * we do not need to care about pfmemalloc here because oversize + * sheaves area always flushed and freed when returned + */ if (!__kmem_cache_alloc_bulk(s, gfp, size, &sheaf->objects[0])) { kfree(sheaf); @@ -5393,17 +5405,18 @@ kmem_cache_prefill_sheaf(struct kmem_cache *s, gfp_t gfp, unsigned int size) if (!sheaf) sheaf = alloc_empty_sheaf(s, gfp); - if (sheaf && sheaf->size < size) { - if (refill_sheaf(s, sheaf, gfp)) { + if (sheaf) { + sheaf->capacity = s->sheaf_capacity; + sheaf->pfmemalloc = false; + + if (sheaf->size < size && + __prefill_sheaf_pfmemalloc(s, sheaf, gfp)) { sheaf_flush_unused(s, sheaf); free_empty_sheaf(s, sheaf); sheaf = NULL; } } - if (sheaf) - sheaf->capacity = s->sheaf_capacity; - return sheaf; } @@ -5423,7 +5436,8 @@ void kmem_cache_return_sheaf(struct kmem_cache *s, gfp_t gfp, struct slub_percpu_sheaves *pcs; struct node_barn *barn; - if (unlikely(sheaf->capacity != s->sheaf_capacity)) { + if (unlikely((sheaf->capacity != s->sheaf_capacity) + || sheaf->pfmemalloc)) { sheaf_flush_unused(s, sheaf); kfree(sheaf); return; @@ -5489,7 +5503,7 @@ int kmem_cache_refill_sheaf(struct kmem_cache *s, gfp_t gfp, if (likely(sheaf->capacity >= size)) { if (likely(sheaf->capacity == s->sheaf_capacity)) - return refill_sheaf(s, sheaf, gfp); + return __prefill_sheaf_pfmemalloc(s, sheaf, gfp); if (!__kmem_cache_alloc_bulk(s, gfp, sheaf->capacity - sheaf->size, &sheaf->objects[sheaf->size])) { @@ -5522,6 +5536,9 @@ int kmem_cache_refill_sheaf(struct kmem_cache *s, gfp_t gfp, * * The gfp parameter is meant only to specify __GFP_ZERO or __GFP_ACCOUNT * memcg charging is forced over limit if necessary, to avoid failure. + * + * It is possible that the allocation comes from kfence and then the sheaf + * size is not decreased. */ void * kmem_cache_alloc_from_sheaf_noprof(struct kmem_cache *s, gfp_t gfp, @@ -5533,7 +5550,10 @@ kmem_cache_alloc_from_sheaf_noprof(struct kmem_cache *s, gfp_t gfp, if (sheaf->size == 0) goto out; - ret = sheaf->objects[--sheaf->size]; + ret = kfence_alloc(s, s->object_size, gfp); + + if (likely(!ret)) + ret = sheaf->objects[--sheaf->size]; init = slab_want_init_on_alloc(gfp, s); @@ -5556,7 +5576,7 @@ unsigned int kmem_cache_sheaf_size(struct slab_sheaf *sheaf) */ static void *___kmalloc_large_node(size_t size, gfp_t flags, int node) { - struct folio *folio; + struct page *page; void *ptr = NULL; unsigned int order = get_order(size); @@ -5566,15 +5586,15 @@ static void *___kmalloc_large_node(size_t size, gfp_t flags, int node) flags |= __GFP_COMP; if (node == NUMA_NO_NODE) - folio = (struct folio *)alloc_frozen_pages_noprof(flags, order); + page = alloc_frozen_pages_noprof(flags, order); else - folio = (struct folio *)__alloc_frozen_pages_noprof(flags, order, node, NULL); + page = __alloc_frozen_pages_noprof(flags, order, node, NULL); - if (folio) { - ptr = folio_address(folio); - lruvec_stat_mod_folio(folio, NR_SLAB_UNRECLAIMABLE_B, + if (page) { + ptr = page_address(page); + mod_lruvec_page_state(page, NR_SLAB_UNRECLAIMABLE_B, PAGE_SIZE << order); - __folio_set_large_kmalloc(folio); + __SetPageLargeKmalloc(page); } ptr = kasan_kmalloc_large(ptr, size, flags); @@ -5701,9 +5721,7 @@ retry: * it did local_lock_irqsave(&s->cpu_slab->lock, flags). * In this case fast path with __update_cpu_freelist_fast() is not safe. */ -#ifndef CONFIG_SLUB_TINY if (!in_nmi() || !local_lock_is_locked(&s->cpu_slab->lock)) -#endif ret = __slab_alloc_node(s, alloc_gfp, node, _RET_IP_, size); if (PTR_ERR(ret) == -EBUSY) { @@ -6192,8 +6210,12 @@ static void rcu_free_sheaf(struct rcu_head *head) * handles it fine. The only downside is that sheaf will serve fewer * allocations when reused. It only happens due to debugging, which is a * performance hit anyway. + * + * If it returns true, there was at least one object from pfmemalloc + * slab so simply flush everything. */ - __rcu_free_sheaf_prepare(s, sheaf); + if (__rcu_free_sheaf_prepare(s, sheaf)) + goto flush; n = get_node(s, sheaf->node); if (!n) @@ -6348,7 +6370,8 @@ next_remote_batch: continue; } - if (unlikely(IS_ENABLED(CONFIG_NUMA) && slab_nid(slab) != node)) { + if (unlikely((IS_ENABLED(CONFIG_NUMA) && slab_nid(slab) != node) + || slab_test_pfmemalloc(slab))) { remote_objects[remote_nr] = p[i]; p[i] = p[--size]; if (++remote_nr >= PCS_BATCH_MAX) @@ -6490,14 +6513,10 @@ static void free_deferred_objects(struct irq_work *work) llist_for_each_safe(pos, t, llnode) { struct slab *slab = container_of(pos, struct slab, llnode); -#ifdef CONFIG_SLUB_TINY - free_slab(slab->slab_cache, slab); -#else if (slab->frozen) deactivate_slab(slab->slab_cache, slab, slab->flush_freelist); else free_slab(slab->slab_cache, slab); -#endif } } @@ -6533,7 +6552,6 @@ void defer_free_barrier(void) irq_work_sync(&per_cpu_ptr(&defer_free_objects, cpu)->work); } -#ifndef CONFIG_SLUB_TINY /* * Fastpath with forced inlining to produce a kfree and kmem_cache_free that * can perform fastpath freeing without additional function calls. @@ -6626,14 +6644,6 @@ redo: } stat_add(s, FREE_FASTPATH, cnt); } -#else /* CONFIG_SLUB_TINY */ -static void do_slab_free(struct kmem_cache *s, - struct slab *slab, void *head, void *tail, - int cnt, unsigned long addr) -{ - __slab_free(s, slab, head, tail, cnt, addr); -} -#endif /* CONFIG_SLUB_TINY */ static __fastpath_inline void slab_free(struct kmem_cache *s, struct slab *slab, void *object, @@ -6646,7 +6656,8 @@ void slab_free(struct kmem_cache *s, struct slab *slab, void *object, return; if (s->cpu_sheaves && likely(!IS_ENABLED(CONFIG_NUMA) || - slab_nid(slab) == numa_mem_id())) { + slab_nid(slab) == numa_mem_id()) + && likely(!slab_test_pfmemalloc(slab))) { if (likely(free_to_pcs(s, object))) return; } @@ -6756,12 +6767,12 @@ void kmem_cache_free(struct kmem_cache *s, void *x) } EXPORT_SYMBOL(kmem_cache_free); -static void free_large_kmalloc(struct folio *folio, void *object) +static void free_large_kmalloc(struct page *page, void *object) { - unsigned int order = folio_order(folio); + unsigned int order = compound_order(page); - if (WARN_ON_ONCE(!folio_test_large_kmalloc(folio))) { - dump_page(&folio->page, "Not a kmalloc allocation"); + if (WARN_ON_ONCE(!PageLargeKmalloc(page))) { + dump_page(page, "Not a kmalloc allocation"); return; } @@ -6772,10 +6783,10 @@ static void free_large_kmalloc(struct folio *folio, void *object) kasan_kfree_large(object); kmsan_kfree_large(object); - lruvec_stat_mod_folio(folio, NR_SLAB_UNRECLAIMABLE_B, + mod_lruvec_page_state(page, NR_SLAB_UNRECLAIMABLE_B, -(PAGE_SIZE << order)); - __folio_clear_large_kmalloc(folio); - free_frozen_pages(&folio->page, order); + __ClearPageLargeKmalloc(page); + free_frozen_pages(page, order); } /* @@ -6785,7 +6796,7 @@ static void free_large_kmalloc(struct folio *folio, void *object) void kvfree_rcu_cb(struct rcu_head *head) { void *obj = head; - struct folio *folio; + struct page *page; struct slab *slab; struct kmem_cache *s; void *slab_addr; @@ -6796,20 +6807,20 @@ void kvfree_rcu_cb(struct rcu_head *head) return; } - folio = virt_to_folio(obj); - if (!folio_test_slab(folio)) { + page = virt_to_page(obj); + slab = page_slab(page); + if (!slab) { /* * rcu_head offset can be only less than page size so no need to - * consider folio order + * consider allocation order */ obj = (void *) PAGE_ALIGN_DOWN((unsigned long)obj); - free_large_kmalloc(folio, obj); + free_large_kmalloc(page, obj); return; } - slab = folio_slab(folio); s = slab->slab_cache; - slab_addr = folio_address(folio); + slab_addr = slab_address(slab); if (is_kfence_address(obj)) { obj = kfence_object_start(obj); @@ -6831,7 +6842,7 @@ void kvfree_rcu_cb(struct rcu_head *head) */ void kfree(const void *object) { - struct folio *folio; + struct page *page; struct slab *slab; struct kmem_cache *s; void *x = (void *)object; @@ -6841,13 +6852,13 @@ void kfree(const void *object) if (unlikely(ZERO_OR_NULL_PTR(object))) return; - folio = virt_to_folio(object); - if (unlikely(!folio_test_slab(folio))) { - free_large_kmalloc(folio, (void *)object); + page = virt_to_page(object); + slab = page_slab(page); + if (!slab) { + free_large_kmalloc(page, (void *)object); return; } - slab = folio_slab(folio); s = slab->slab_cache; slab_free(s, slab, x, _RET_IP_); } @@ -6864,7 +6875,6 @@ EXPORT_SYMBOL(kfree); */ void kfree_nolock(const void *object) { - struct folio *folio; struct slab *slab; struct kmem_cache *s; void *x = (void *)object; @@ -6872,13 +6882,12 @@ void kfree_nolock(const void *object) if (unlikely(ZERO_OR_NULL_PTR(object))) return; - folio = virt_to_folio(object); - if (unlikely(!folio_test_slab(folio))) { + slab = virt_to_slab(object); + if (unlikely(!slab)) { WARN_ONCE(1, "large_kmalloc is not supported by kfree_nolock()"); return; } - slab = folio_slab(folio); s = slab->slab_cache; memcg_slab_free_hook(s, slab, &x, 1); @@ -6910,11 +6919,7 @@ void kfree_nolock(const void *object) * since kasan quarantine takes locks and not supported from NMI. */ kasan_slab_free(s, x, false, false, /* skip quarantine */true); -#ifndef CONFIG_SLUB_TINY do_slab_free(s, slab, x, x, 0, _RET_IP_); -#else - defer_free(s, x); -#endif } EXPORT_SYMBOL_GPL(kfree_nolock); @@ -6946,16 +6951,16 @@ __do_krealloc(const void *p, size_t new_size, unsigned long align, gfp_t flags, if (is_kfence_address(p)) { ks = orig_size = kfence_ksize(p); } else { - struct folio *folio; + struct page *page = virt_to_page(p); + struct slab *slab = page_slab(page); - folio = virt_to_folio(p); - if (unlikely(!folio_test_slab(folio))) { + if (!slab) { /* Big kmalloc object */ - WARN_ON(folio_size(folio) <= KMALLOC_MAX_CACHE_SIZE); - WARN_ON(p != folio_address(folio)); - ks = folio_size(folio); + ks = page_size(page); + WARN_ON(ks <= KMALLOC_MAX_CACHE_SIZE); + WARN_ON(p != page_address(page)); } else { - s = folio_slab(folio)->slab_cache; + s = slab->slab_cache; orig_size = get_orig_size(s, (void *)p); ks = s->object_size; } @@ -7259,23 +7264,25 @@ int build_detached_freelist(struct kmem_cache *s, size_t size, { int lookahead = 3; void *object; - struct folio *folio; + struct page *page; + struct slab *slab; size_t same; object = p[--size]; - folio = virt_to_folio(object); + page = virt_to_page(object); + slab = page_slab(page); if (!s) { /* Handle kalloc'ed objects */ - if (unlikely(!folio_test_slab(folio))) { - free_large_kmalloc(folio, object); + if (!slab) { + free_large_kmalloc(page, object); df->slab = NULL; return size; } /* Derive kmem_cache from object */ - df->slab = folio_slab(folio); - df->s = df->slab->slab_cache; + df->slab = slab; + df->s = slab->slab_cache; } else { - df->slab = folio_slab(folio); + df->slab = slab; df->s = cache_from_obj(s, object); /* Support for memcg */ } @@ -7364,7 +7371,6 @@ void kmem_cache_free_bulk(struct kmem_cache *s, size_t size, void **p) } EXPORT_SYMBOL(kmem_cache_free_bulk); -#ifndef CONFIG_SLUB_TINY static inline int __kmem_cache_alloc_bulk(struct kmem_cache *s, gfp_t flags, size_t size, void **p) @@ -7382,14 +7388,8 @@ int __kmem_cache_alloc_bulk(struct kmem_cache *s, gfp_t flags, size_t size, local_lock_irqsave(&s->cpu_slab->lock, irqflags); for (i = 0; i < size; i++) { - void *object = kfence_alloc(s, s->object_size, flags); - - if (unlikely(object)) { - p[i] = object; - continue; - } + void *object = c->freelist; - object = c->freelist; if (unlikely(!object)) { /* * We may have removed an object from c->freelist using @@ -7435,41 +7435,13 @@ error: return 0; } -#else /* CONFIG_SLUB_TINY */ -static int __kmem_cache_alloc_bulk(struct kmem_cache *s, gfp_t flags, - size_t size, void **p) -{ - int i; - - for (i = 0; i < size; i++) { - void *object = kfence_alloc(s, s->object_size, flags); - - if (unlikely(object)) { - p[i] = object; - continue; - } - - p[i] = __slab_alloc_node(s, flags, NUMA_NO_NODE, - _RET_IP_, s->object_size); - if (unlikely(!p[i])) - goto error; - - maybe_wipe_obj_freeptr(s, p[i]); - } - - return i; - -error: - __kmem_cache_free_bulk(s, i, p); - return 0; -} -#endif /* CONFIG_SLUB_TINY */ /* Note that interrupts must be enabled when calling this function. */ int kmem_cache_alloc_bulk_noprof(struct kmem_cache *s, gfp_t flags, size_t size, void **p) { unsigned int i = 0; + void *kfence_obj; if (!size) return 0; @@ -7478,6 +7450,20 @@ int kmem_cache_alloc_bulk_noprof(struct kmem_cache *s, gfp_t flags, size_t size, if (unlikely(!s)) return 0; + /* + * to make things simpler, only assume at most once kfence allocated + * object per bulk allocation and choose its index randomly + */ + kfence_obj = kfence_alloc(s, s->object_size, flags); + + if (unlikely(kfence_obj)) { + if (unlikely(size == 1)) { + p[0] = kfence_obj; + goto out; + } + size--; + } + if (s->cpu_sheaves) i = alloc_from_pcs_bulk(s, size, p); @@ -7489,10 +7475,23 @@ int kmem_cache_alloc_bulk_noprof(struct kmem_cache *s, gfp_t flags, size_t size, if (unlikely(__kmem_cache_alloc_bulk(s, flags, size - i, p + i) == 0)) { if (i > 0) __kmem_cache_free_bulk(s, i, p); + if (kfence_obj) + __kfence_free(kfence_obj); return 0; } } + if (unlikely(kfence_obj)) { + int idx = get_random_u32_below(size + 1); + + if (idx != size) + p[size] = p[idx]; + p[idx] = kfence_obj; + + size++; + } + +out: /* * memcg and kmem_cache debug support and memory initialization. * Done outside of the IRQ disabled fastpath loop. @@ -7654,7 +7653,6 @@ init_kmem_cache_node(struct kmem_cache_node *n, struct node_barn *barn) barn_init(barn); } -#ifndef CONFIG_SLUB_TINY static inline int alloc_kmem_cache_cpus(struct kmem_cache *s) { BUILD_BUG_ON(PERCPU_DYNAMIC_EARLY_SIZE < @@ -7675,12 +7673,6 @@ static inline int alloc_kmem_cache_cpus(struct kmem_cache *s) return 1; } -#else -static inline int alloc_kmem_cache_cpus(struct kmem_cache *s) -{ - return 1; -} -#endif /* CONFIG_SLUB_TINY */ static int init_percpu_sheaves(struct kmem_cache *s) { @@ -7770,13 +7762,11 @@ void __kmem_cache_release(struct kmem_cache *s) cache_random_seq_destroy(s); if (s->cpu_sheaves) pcs_destroy(s); -#ifndef CONFIG_SLUB_TINY #ifdef CONFIG_PREEMPT_RT if (s->cpu_slab) lockdep_unregister_key(&s->lock_key); #endif free_percpu(s->cpu_slab); -#endif free_kmem_cache_nodes(s); } @@ -8142,46 +8132,53 @@ void __kmem_obj_info(struct kmem_obj_info *kpp, void *object, struct slab *slab) * Kmalloc subsystem *******************************************************************/ -static int __init setup_slub_min_order(char *str) +static int __init setup_slub_min_order(const char *str, const struct kernel_param *kp) { - get_option(&str, (int *)&slub_min_order); + int ret; + + ret = kstrtouint(str, 0, &slub_min_order); + if (ret) + return ret; if (slub_min_order > slub_max_order) slub_max_order = slub_min_order; - return 1; + return 0; } -__setup("slab_min_order=", setup_slub_min_order); -__setup_param("slub_min_order=", slub_min_order, setup_slub_min_order, 0); - +static const struct kernel_param_ops param_ops_slab_min_order __initconst = { + .set = setup_slub_min_order, +}; +__core_param_cb(slab_min_order, ¶m_ops_slab_min_order, &slub_min_order, 0); +__core_param_cb(slub_min_order, ¶m_ops_slab_min_order, &slub_min_order, 0); -static int __init setup_slub_max_order(char *str) +static int __init setup_slub_max_order(const char *str, const struct kernel_param *kp) { - get_option(&str, (int *)&slub_max_order); + int ret; + + ret = kstrtouint(str, 0, &slub_max_order); + if (ret) + return ret; + slub_max_order = min_t(unsigned int, slub_max_order, MAX_PAGE_ORDER); if (slub_min_order > slub_max_order) slub_min_order = slub_max_order; - return 1; + return 0; } -__setup("slab_max_order=", setup_slub_max_order); -__setup_param("slub_max_order=", slub_max_order, setup_slub_max_order, 0); - -static int __init setup_slub_min_objects(char *str) -{ - get_option(&str, (int *)&slub_min_objects); - - return 1; -} +static const struct kernel_param_ops param_ops_slab_max_order __initconst = { + .set = setup_slub_max_order, +}; +__core_param_cb(slab_max_order, ¶m_ops_slab_max_order, &slub_max_order, 0); +__core_param_cb(slub_max_order, ¶m_ops_slab_max_order, &slub_max_order, 0); -__setup("slab_min_objects=", setup_slub_min_objects); -__setup_param("slub_min_objects=", slub_min_objects, setup_slub_min_objects, 0); +core_param(slab_min_objects, slub_min_objects, uint, 0); +core_param(slub_min_objects, slub_min_objects, uint, 0); #ifdef CONFIG_NUMA -static int __init setup_slab_strict_numa(char *str) +static int __init setup_slab_strict_numa(const char *str, const struct kernel_param *kp) { if (nr_node_ids > 1) { static_branch_enable(&strict_numa); @@ -8190,10 +8187,14 @@ static int __init setup_slab_strict_numa(char *str) pr_warn("slab_strict_numa parameter set on non NUMA system.\n"); } - return 1; + return 0; } -__setup("slab_strict_numa", setup_slab_strict_numa); +static const struct kernel_param_ops param_ops_slab_strict_numa __initconst = { + .flags = KERNEL_PARAM_OPS_FL_NOARG, + .set = setup_slab_strict_numa, +}; +__core_param_cb(slab_strict_numa, ¶m_ops_slab_strict_numa, NULL, 0); #endif @@ -8519,10 +8520,8 @@ void __init kmem_cache_init(void) void __init kmem_cache_init_late(void) { -#ifndef CONFIG_SLUB_TINY flushwq = alloc_workqueue("slub_flushwq", WQ_MEM_RECLAIM, 0); WARN_ON(!flushwq); -#endif } struct kmem_cache * diff --git a/mm/usercopy.c b/mm/usercopy.c index dbdcc43964fb..5de7a518b1b1 100644 --- a/mm/usercopy.c +++ b/mm/usercopy.c @@ -164,7 +164,8 @@ static inline void check_heap_object(const void *ptr, unsigned long n, { unsigned long addr = (unsigned long)ptr; unsigned long offset; - struct folio *folio; + struct page *page; + struct slab *slab; if (is_kmap_addr(ptr)) { offset = offset_in_page(ptr); @@ -189,16 +190,23 @@ static inline void check_heap_object(const void *ptr, unsigned long n, if (!virt_addr_valid(ptr)) return; - folio = virt_to_folio(ptr); - - if (folio_test_slab(folio)) { + page = virt_to_page(ptr); + slab = page_slab(page); + if (slab) { /* Check slab allocator for flags and size. */ - __check_heap_object(ptr, n, folio_slab(folio), to_user); - } else if (folio_test_large(folio)) { - offset = ptr - folio_address(folio); - if (n > folio_size(folio) - offset) + __check_heap_object(ptr, n, slab, to_user); + } else if (PageCompound(page)) { + page = compound_head(page); + offset = ptr - page_address(page); + if (n > page_size(page) - offset) usercopy_abort("page alloc", NULL, to_user, offset, n); } + + /* + * We cannot check non-compound pages. They might be part of + * a large allocation, in which case crossing a page boundary + * is fine. + */ } DEFINE_STATIC_KEY_MAYBE_RO(CONFIG_HARDENED_USERCOPY_DEFAULT_ON, |