diff options
| author | Zi Yan <ziy@nvidia.com> | 2025-03-07 12:40:00 -0500 |
|---|---|---|
| committer | Andrew Morton <akpm@linux-foundation.org> | 2025-03-17 22:07:00 -0700 |
| commit | 7460b470a131f985a70302a322617121efdd7caa (patch) | |
| tree | 5281816fa47ce1292a395d3e490239b4f24f9960 | |
| parent | 4b94c18d15199658f1a86231663e97d3cc12d8de (diff) | |
mm/truncate: use folio_split() in truncate operation
Instead of splitting the large folio uniformly during truncation, try to
use buddy allocator like folio_split() at the start and the end of a
truncation range to minimize the number of resulting folios if it is
supported. try_folio_split() is introduced to use folio_split() if
supported and it falls back to uniform split otherwise.
For example, to truncate a order-4 folio
[0, 1, 2, 3, 4, 5, ..., 15]
between [3, 10] (inclusive), folio_split() splits the folio at 3 to
[0,1], [2], [3], [4..7], [8..15] and [3], [4..7] can be dropped and
[8..15] is kept with zeros in [8..10], then another folio_split() is
done at 10, so [8..10] can be dropped.
One possible optimization is to make folio_split() to split a folio based
on a given range, like [3..10] above. But that complicates folio_split(),
so it will be investigated when necessary.
Link: https://lkml.kernel.org/r/20250226210032.2044041-8-ziy@nvidia.com
Link: https://lkml.kernel.org/r/20250307174001.242794-8-ziy@nvidia.com
Signed-off-by: Zi Yan <ziy@nvidia.com>
Cc: Baolin Wang <baolin.wang@linux.alibaba.com>
Cc: David Hildenbrand <david@redhat.com>
Cc: Hugh Dickins <hughd@google.com>
Cc: John Hubbard <jhubbard@nvidia.com>
Cc: Kefeng Wang <wangkefeng.wang@huawei.com>
Cc: Kirill A. Shuemov <kirill.shutemov@linux.intel.com>
Cc: Matthew Wilcox <willy@infradead.org>
Cc: Miaohe Lin <linmiaohe@huawei.com>
Cc: Ryan Roberts <ryan.roberts@arm.com>
Cc: Yang Shi <yang@os.amperecomputing.com>
Cc: Yu Zhao <yuzhao@google.com>
Cc: Kairui Song <kasong@tencent.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
| -rw-r--r-- | include/linux/huge_mm.h | 36 | ||||
| -rw-r--r-- | mm/huge_memory.c | 6 | ||||
| -rw-r--r-- | mm/truncate.c | 37 |
3 files changed, 75 insertions, 4 deletions
diff --git a/include/linux/huge_mm.h b/include/linux/huge_mm.h index e57e811cfd3c..e893d546a49f 100644 --- a/include/linux/huge_mm.h +++ b/include/linux/huge_mm.h @@ -345,6 +345,36 @@ int split_huge_page_to_list_to_order(struct page *page, struct list_head *list, unsigned int new_order); int min_order_for_split(struct folio *folio); int split_folio_to_list(struct folio *folio, struct list_head *list); +bool uniform_split_supported(struct folio *folio, unsigned int new_order, + bool warns); +bool non_uniform_split_supported(struct folio *folio, unsigned int new_order, + bool warns); +int folio_split(struct folio *folio, unsigned int new_order, struct page *page, + struct list_head *list); +/* + * try_folio_split - try to split a @folio at @page using non uniform split. + * @folio: folio to be split + * @page: split to order-0 at the given page + * @list: store the after-split folios + * + * Try to split a @folio at @page using non uniform split to order-0, if + * non uniform split is not supported, fall back to uniform split. + * + * Return: 0: split is successful, otherwise split failed. + */ +static inline int try_folio_split(struct folio *folio, struct page *page, + struct list_head *list) +{ + int ret = min_order_for_split(folio); + + if (ret < 0) + return ret; + + if (!non_uniform_split_supported(folio, 0, false)) + return split_huge_page_to_list_to_order(&folio->page, list, + ret); + return folio_split(folio, ret, page, list); +} static inline int split_huge_page(struct page *page) { struct folio *folio = page_folio(page); @@ -537,6 +567,12 @@ static inline int split_folio_to_list(struct folio *folio, struct list_head *lis return 0; } +static inline int try_folio_split(struct folio *folio, struct page *page, + struct list_head *list) +{ + return 0; +} + static inline void deferred_split_folio(struct folio *folio, bool partially_mapped) {} #define split_huge_pmd(__vma, __pmd, __address) \ do { } while (0) diff --git a/mm/huge_memory.c b/mm/huge_memory.c index c6ad2e4053d8..e3ed8e9523f5 100644 --- a/mm/huge_memory.c +++ b/mm/huge_memory.c @@ -3654,7 +3654,7 @@ after_split: return ret; } -static bool non_uniform_split_supported(struct folio *folio, unsigned int new_order, +bool non_uniform_split_supported(struct folio *folio, unsigned int new_order, bool warns) { if (folio_test_anon(folio)) { @@ -3686,7 +3686,7 @@ static bool non_uniform_split_supported(struct folio *folio, unsigned int new_or } /* See comments in non_uniform_split_supported() */ -static bool uniform_split_supported(struct folio *folio, unsigned int new_order, +bool uniform_split_supported(struct folio *folio, unsigned int new_order, bool warns) { if (folio_test_anon(folio)) { @@ -4005,7 +4005,7 @@ int split_huge_page_to_list_to_order(struct page *page, struct list_head *list, * * After split, folio is left locked for caller. */ -static int folio_split(struct folio *folio, unsigned int new_order, +int folio_split(struct folio *folio, unsigned int new_order, struct page *split_at, struct list_head *list) { return __folio_split(folio, new_order, split_at, &folio->page, list, diff --git a/mm/truncate.c b/mm/truncate.c index 79570045071c..5d98054094d1 100644 --- a/mm/truncate.c +++ b/mm/truncate.c @@ -192,6 +192,7 @@ bool truncate_inode_partial_folio(struct folio *folio, loff_t start, loff_t end) { loff_t pos = folio_pos(folio); unsigned int offset, length; + struct page *split_at, *split_at2; if (pos < start) offset = start - pos; @@ -221,8 +222,42 @@ bool truncate_inode_partial_folio(struct folio *folio, loff_t start, loff_t end) folio_invalidate(folio, offset, length); if (!folio_test_large(folio)) return true; - if (split_folio(folio) == 0) + + split_at = folio_page(folio, PAGE_ALIGN_DOWN(offset) / PAGE_SIZE); + split_at2 = folio_page(folio, + PAGE_ALIGN_DOWN(offset + length) / PAGE_SIZE); + + if (!try_folio_split(folio, split_at, NULL)) { + /* + * try to split at offset + length to make sure folios within + * the range can be dropped, especially to avoid memory waste + * for shmem truncate + */ + struct folio *folio2 = page_folio(split_at2); + + if (!folio_try_get(folio2)) + goto no_split; + + if (!folio_test_large(folio2)) + goto out; + + if (!folio_trylock(folio2)) + goto out; + + /* + * make sure folio2 is large and does not change its mapping. + * Its split result does not matter here. + */ + if (folio_test_large(folio2) && + folio2->mapping == folio->mapping) + try_folio_split(folio2, split_at2, NULL); + + folio_unlock(folio2); +out: + folio_put(folio2); +no_split: return true; + } if (folio_test_dirty(folio)) return false; truncate_inode_folio(folio->mapping, folio); |