summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorQu Wenruo <wqu@suse.com>2025-11-06 08:15:03 +1030
committerDavid Sterba <dsterba@suse.com>2025-11-24 22:42:22 +0100
commitcfc7fe2b0f18c54b571b4137156f944ff76057c8 (patch)
tree4cb0f183e23ba4f2d84b64dbcddaf982e14ccafa
parent1dac8db80cee66b7ba51d323025e47989278ee03 (diff)
btrfs: use kvcalloc for btrfs_bio::csum allocation
[BUG] There is a report that memory allocation failed for btrfs_bio::csum during a large read: b2sum: page allocation failure: order:4, mode:0x40c40(GFP_NOFS|__GFP_COMP), nodemask=(null),cpuset=/,mems_allowed=0 CPU: 0 UID: 0 PID: 416120 Comm: b2sum Tainted: G W 6.17.0 #1 NONE Tainted: [W]=WARN Hardware name: Raspberry Pi 4 Model B Rev 1.5 (DT) Call trace: show_stack+0x18/0x30 (C) dump_stack_lvl+0x5c/0x7c dump_stack+0x18/0x24 warn_alloc+0xec/0x184 __alloc_pages_slowpath.constprop.0+0x21c/0x730 __alloc_frozen_pages_noprof+0x230/0x260 ___kmalloc_large_node+0xd4/0xf0 __kmalloc_noprof+0x1c8/0x260 btrfs_lookup_bio_sums+0x214/0x278 btrfs_submit_chunk+0xf0/0x3c0 btrfs_submit_bbio+0x2c/0x4c submit_one_bio+0x50/0xac submit_extent_folio+0x13c/0x340 btrfs_do_readpage+0x4b0/0x7a0 btrfs_readahead+0x184/0x254 read_pages+0x58/0x260 page_cache_ra_unbounded+0x170/0x24c page_cache_ra_order+0x360/0x3bc page_cache_async_ra+0x1a4/0x1d4 filemap_readahead.isra.0+0x44/0x74 filemap_get_pages+0x2b4/0x3b4 filemap_read+0xc4/0x3bc btrfs_file_read_iter+0x70/0x7c vfs_read+0x1ec/0x2c0 ksys_read+0x4c/0xe0 __arm64_sys_read+0x18/0x24 el0_svc_common.constprop.0+0x5c/0x130 do_el0_svc+0x1c/0x30 el0_svc+0x30/0xa0 el0t_64_sync_handler+0xa0/0xe4 el0t_64_sync+0x198/0x19c [CAUSE] Btrfs needs to allocate memory for btrfs_bio::csum for large reads, so that we can later verify the contents of the read. However nowadays a read bio can easily go beyond BIO_MAX_VECS * PAGE_SIZE (which is 1M for 4K page sizes), due to the multi-page bvec that one bvec can have more than one pages, as long as the pages are physically adjacent. This will become more common when the large folio support is moved out of experimental features. In the above case, a read larger than 4MiB with SHA256 checksum (32 bytes for each 4K block) will be able to trigger a order 4 allocation. The order 4 is larger than PAGE_ALLOC_COSTLY_ORDER (3), thus without extra flags such allocation will not retry. And if the system has very small amount of memory (e.g. RPI4 with low memory spec) or VMs with small vRAM, or the memory is heavily fragmented, such allocation will fail and cause the above warning. [FIX] Although btrfs is handling the memory allocation failure correctly, we do not really need the physically contiguous memory just to restore our checksum. In fact btrfs_csum_one_bio() is already using kvzalloc() to reduce the memory pressure. So follow the step to use kvcalloc() for btrfs_bio::csum. Reported-by: Calvin Owens <calvin@wbinvd.org> Link: https://lore.kernel.org/linux-btrfs/20251105180054.511528-1-calvin@wbinvd.org/ Reviewed-by: Johannes Thumshirn <johannes.thumshirn@wdc.com> Signed-off-by: Qu Wenruo <wqu@suse.com> Reviewed-by: David Sterba <dsterba@suse.com> Signed-off-by: David Sterba <dsterba@suse.com>
-rw-r--r--fs/btrfs/bio.c2
-rw-r--r--fs/btrfs/file-item.c4
2 files changed, 3 insertions, 3 deletions
diff --git a/fs/btrfs/bio.c b/fs/btrfs/bio.c
index 1286c1ac1940..a73652b8724a 100644
--- a/fs/btrfs/bio.c
+++ b/fs/btrfs/bio.c
@@ -293,7 +293,7 @@ static void btrfs_check_read_bio(struct btrfs_bio *bbio, struct btrfs_device *de
offset += sectorsize;
}
if (bbio->csum != bbio->csum_inline)
- kfree(bbio->csum);
+ kvfree(bbio->csum);
if (fbio)
btrfs_repair_done(fbio);
diff --git a/fs/btrfs/file-item.c b/fs/btrfs/file-item.c
index 4b7c40f05e8f..72be3ede0edf 100644
--- a/fs/btrfs/file-item.c
+++ b/fs/btrfs/file-item.c
@@ -373,7 +373,7 @@ int btrfs_lookup_bio_sums(struct btrfs_bio *bbio)
return -ENOMEM;
if (nblocks * csum_size > BTRFS_BIO_INLINE_CSUM_SIZE) {
- bbio->csum = kmalloc_array(nblocks, csum_size, GFP_NOFS);
+ bbio->csum = kvcalloc(nblocks, csum_size, GFP_NOFS);
if (!bbio->csum)
return -ENOMEM;
} else {
@@ -439,7 +439,7 @@ int btrfs_lookup_bio_sums(struct btrfs_bio *bbio)
if (count < 0) {
ret = count;
if (bbio->csum != bbio->csum_inline)
- kfree(bbio->csum);
+ kvfree(bbio->csum);
bbio->csum = NULL;
break;
}