summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorQu Wenruo <wqu@suse.com>2025-09-02 14:51:49 +0930
committerDavid Sterba <dsterba@suse.com>2025-09-23 08:49:17 +0200
commit9afc617265383f591614e94b702d558dfb1519c0 (patch)
treed8b78de226193cf8d2b1975f1899d5ef2a3b6427
parent35aff706dccbc51d30df6fedba76a746a1322839 (diff)
btrfs: introduce btrfs_bio_for_each_block() helper
Currently if we want to iterate a bio in block unit, we do something like this: while (iter->bi_size) { struct bio_vec bv = bio_iter_iovec(); /* Do something with using the bv */ bio_advance_iter_single(&bbio->bio, iter, sectorsize); } That's fine for now, but it will not handle future bs > ps, as bio_iter_iovec() returns a single-page bvec, meaning the bv_len will not exceed page size. This means the code using that bv can only handle a block if bs <= ps. To address this problem and handle future bs > ps cases better: - Introduce a helper btrfs_bio_for_each_block() Instead of bio_vec, which has single and multiple page version and multiple page version has quite some limits, use my favorite way to represent a block, phys_addr_t. For bs <= ps cases, nothing is changed, except we will do a very small overhead to convert phys_addr_t to a folio, then use the proper folio helpers to handle the possible highmem cases. For bs > ps cases, all blocks will be backed by large folios, meaning every folio will cover at least one block. And still use proper folio helpers to handle highmem cases. With phys_addr_t, we will handle both large folio and highmem properly. So there is no better single variable to present a btrfs block than phys_addr_t. - Extract the data block csum calculation into a helper The new helper, btrfs_calculate_block_csum() will be utilized by btrfs_csum_one_bio(). - Use btrfs_bio_for_each_block() to replace existing call sites Including: * index_one_bio() from raid56.c Very straight-forward. * btrfs_check_read_bio() Also update repair_one_sector() to grab the folio using phys_addr_t, and do extra checks to make sure the folio covers at least one block. We do not need to bother bv_len at all now. * btrfs_csum_one_bio() Now we can move the highmem handling into a dedicated helper, calculate_block_csum(), and use btrfs_bio_for_each_block() helper. There is one exception in btrfs_decompress_buf2page(), which is copying decompressed data into the original bio, which is not iterating using block size thus we don't need to bother. Signed-off-by: Qu Wenruo <wqu@suse.com> Signed-off-by: David Sterba <dsterba@suse.com>
-rw-r--r--fs/btrfs/bio.c20
-rw-r--r--fs/btrfs/btrfs_inode.h2
-rw-r--r--fs/btrfs/file-item.c26
-rw-r--r--fs/btrfs/inode.c26
-rw-r--r--fs/btrfs/misc.h25
-rw-r--r--fs/btrfs/raid56.c7
6 files changed, 60 insertions, 46 deletions
diff --git a/fs/btrfs/bio.c b/fs/btrfs/bio.c
index 493135bfa518..909b208f9ef3 100644
--- a/fs/btrfs/bio.c
+++ b/fs/btrfs/bio.c
@@ -204,18 +204,21 @@ done:
*/
static struct btrfs_failed_bio *repair_one_sector(struct btrfs_bio *failed_bbio,
u32 bio_offset,
- struct bio_vec *bv,
+ phys_addr_t paddr,
struct btrfs_failed_bio *fbio)
{
struct btrfs_inode *inode = failed_bbio->inode;
struct btrfs_fs_info *fs_info = inode->root->fs_info;
+ struct folio *folio = page_folio(phys_to_page(paddr));
const u32 sectorsize = fs_info->sectorsize;
+ const u32 foff = offset_in_folio(folio, paddr);
const u64 logical = (failed_bbio->saved_iter.bi_sector << SECTOR_SHIFT);
struct btrfs_bio *repair_bbio;
struct bio *repair_bio;
int num_copies;
int mirror;
+ ASSERT(foff + sectorsize <= folio_size(folio));
btrfs_debug(fs_info, "repair read error: read error at %llu",
failed_bbio->file_offset + bio_offset);
@@ -238,7 +241,7 @@ static struct btrfs_failed_bio *repair_one_sector(struct btrfs_bio *failed_bbio,
repair_bio = bio_alloc_bioset(NULL, 1, REQ_OP_READ, GFP_NOFS,
&btrfs_repair_bioset);
repair_bio->bi_iter.bi_sector = failed_bbio->saved_iter.bi_sector;
- __bio_add_page(repair_bio, bv->bv_page, bv->bv_len, bv->bv_offset);
+ bio_add_folio_nofail(repair_bio, folio, sectorsize, foff);
repair_bbio = btrfs_bio(repair_bio);
btrfs_bio_init(repair_bbio, fs_info, NULL, fbio);
@@ -259,6 +262,7 @@ static void btrfs_check_read_bio(struct btrfs_bio *bbio, struct btrfs_device *de
struct bvec_iter *iter = &bbio->saved_iter;
blk_status_t status = bbio->bio.bi_status;
struct btrfs_failed_bio *fbio = NULL;
+ phys_addr_t paddr;
u32 offset = 0;
/* Read-repair requires the inode field to be set by the submitter. */
@@ -276,17 +280,11 @@ static void btrfs_check_read_bio(struct btrfs_bio *bbio, struct btrfs_device *de
/* Clear the I/O error. A failed repair will reset it. */
bbio->bio.bi_status = BLK_STS_OK;
- while (iter->bi_size) {
- struct bio_vec bv = bio_iter_iovec(&bbio->bio, *iter);
-
- bv.bv_len = min(bv.bv_len, sectorsize);
- if (status || !btrfs_data_csum_ok(bbio, dev, offset, bvec_phys(&bv)))
- fbio = repair_one_sector(bbio, offset, &bv, fbio);
-
- bio_advance_iter_single(&bbio->bio, iter, sectorsize);
+ btrfs_bio_for_each_block(paddr, &bbio->bio, iter, fs_info->sectorsize) {
+ if (status || !btrfs_data_csum_ok(bbio, dev, offset, paddr))
+ fbio = repair_one_sector(bbio, offset, paddr, fbio);
offset += sectorsize;
}
-
if (bbio->csum != bbio->csum_inline)
kfree(bbio->csum);
diff --git a/fs/btrfs/btrfs_inode.h b/fs/btrfs/btrfs_inode.h
index 077b2f178816..c40e99ec13bf 100644
--- a/fs/btrfs/btrfs_inode.h
+++ b/fs/btrfs/btrfs_inode.h
@@ -542,6 +542,8 @@ static inline void btrfs_set_inode_mapping_order(struct btrfs_inode *inode)
#define CSUM_FMT "0x%*phN"
#define CSUM_FMT_VALUE(size, bytes) size, bytes
+void btrfs_calculate_block_csum(struct btrfs_fs_info *fs_info, phys_addr_t paddr,
+ u8 *dest);
int btrfs_check_block_csum(struct btrfs_fs_info *fs_info, phys_addr_t paddr, u8 *csum,
const u8 * const csum_expected);
bool btrfs_data_csum_ok(struct btrfs_bio *bbio, struct btrfs_device *dev,
diff --git a/fs/btrfs/file-item.c b/fs/btrfs/file-item.c
index 4dd3d8a02519..7906aea75ee4 100644
--- a/fs/btrfs/file-item.c
+++ b/fs/btrfs/file-item.c
@@ -775,12 +775,10 @@ int btrfs_csum_one_bio(struct btrfs_bio *bbio)
SHASH_DESC_ON_STACK(shash, fs_info->csum_shash);
struct bio *bio = &bbio->bio;
struct btrfs_ordered_sum *sums;
- char *data;
- struct bvec_iter iter;
- struct bio_vec bvec;
+ struct bvec_iter iter = bio->bi_iter;
+ phys_addr_t paddr;
+ const u32 blocksize = fs_info->sectorsize;
int index;
- unsigned int blockcount;
- int i;
unsigned nofs_flag;
nofs_flag = memalloc_nofs_save();
@@ -799,21 +797,9 @@ int btrfs_csum_one_bio(struct btrfs_bio *bbio)
shash->tfm = fs_info->csum_shash;
- bio_for_each_segment(bvec, bio, iter) {
- blockcount = BTRFS_BYTES_TO_BLKS(fs_info,
- bvec.bv_len + fs_info->sectorsize
- - 1);
-
- for (i = 0; i < blockcount; i++) {
- data = bvec_kmap_local(&bvec);
- crypto_shash_digest(shash,
- data + (i * fs_info->sectorsize),
- fs_info->sectorsize,
- sums->sums + index);
- kunmap_local(data);
- index += fs_info->csum_size;
- }
-
+ btrfs_bio_for_each_block(paddr, bio, &iter, blocksize) {
+ btrfs_calculate_block_csum(fs_info, paddr, sums->sums + index);
+ index += fs_info->csum_size;
}
bbio->sums = sums;
diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c
index 98877535f213..5fad6af57944 100644
--- a/fs/btrfs/inode.c
+++ b/fs/btrfs/inode.c
@@ -3328,14 +3328,8 @@ int btrfs_finish_ordered_io(struct btrfs_ordered_extent *ordered)
return btrfs_finish_one_ordered(ordered);
}
-/*
- * Verify the checksum for a single sector without any extra action that depend
- * on the type of I/O.
- *
- * @kaddr must be a properly kmapped address.
- */
-int btrfs_check_block_csum(struct btrfs_fs_info *fs_info, phys_addr_t paddr, u8 *csum,
- const u8 * const csum_expected)
+void btrfs_calculate_block_csum(struct btrfs_fs_info *fs_info, phys_addr_t paddr,
+ u8 *dest)
{
struct folio *folio = page_folio(phys_to_page(paddr));
const u32 blocksize = fs_info->sectorsize;
@@ -3359,11 +3353,21 @@ int btrfs_check_block_csum(struct btrfs_fs_info *fs_info, phys_addr_t paddr, u8
kunmap_local(kaddr);
cur += len;
}
- crypto_shash_final(shash, csum);
+ crypto_shash_final(shash, dest);
} else {
- crypto_shash_digest(shash, phys_to_virt(paddr), blocksize, csum);
+ crypto_shash_digest(shash, phys_to_virt(paddr), blocksize, dest);
}
-
+}
+/*
+ * Verify the checksum for a single sector without any extra action that depend
+ * on the type of I/O.
+ *
+ * @kaddr must be a properly kmapped address.
+ */
+int btrfs_check_block_csum(struct btrfs_fs_info *fs_info, phys_addr_t paddr, u8 *csum,
+ const u8 * const csum_expected)
+{
+ btrfs_calculate_block_csum(fs_info, paddr, csum);
if (memcmp(csum, csum_expected, fs_info->csum_size))
return -EIO;
return 0;
diff --git a/fs/btrfs/misc.h b/fs/btrfs/misc.h
index ff5eac84d819..f8f055fdc551 100644
--- a/fs/btrfs/misc.h
+++ b/fs/btrfs/misc.h
@@ -11,6 +11,7 @@
#include <linux/pagemap.h>
#include <linux/math64.h>
#include <linux/rbtree.h>
+#include <linux/bio.h>
/*
* Enumerate bits using enum autoincrement. Define the @name as the n-th bit.
@@ -20,6 +21,30 @@
name = (1U << __ ## name ## _BIT), \
__ ## name ## _SEQ = __ ## name ## _BIT
+static inline phys_addr_t bio_iter_phys(struct bio *bio, struct bvec_iter *iter)
+{
+ struct bio_vec bv = bio_iter_iovec(bio, *iter);
+
+ return bvec_phys(&bv);
+}
+
+/*
+ * Iterate bio using btrfs block size.
+ *
+ * This will handle large folio and highmem.
+ *
+ * @paddr: Physical memory address of each iteration
+ * @bio: The bio to iterate
+ * @iter: The bvec_iter (pointer) to use.
+ * @blocksize: The blocksize to iterate.
+ *
+ * This requires all folios in the bio to cover at least one block.
+ */
+#define btrfs_bio_for_each_block(paddr, bio, iter, blocksize) \
+ for (; (iter)->bi_size && \
+ (paddr = bio_iter_phys((bio), (iter)), 1); \
+ bio_advance_iter_single((bio), (iter), (blocksize)))
+
static inline void cond_wake_up(struct wait_queue_head *wq)
{
/*
diff --git a/fs/btrfs/raid56.c b/fs/btrfs/raid56.c
index e88699460dda..389f1b617fe7 100644
--- a/fs/btrfs/raid56.c
+++ b/fs/btrfs/raid56.c
@@ -1208,17 +1208,16 @@ static void index_one_bio(struct btrfs_raid_bio *rbio, struct bio *bio)
const u32 sectorsize = rbio->bioc->fs_info->sectorsize;
const u32 sectorsize_bits = rbio->bioc->fs_info->sectorsize_bits;
struct bvec_iter iter = bio->bi_iter;
+ phys_addr_t paddr;
u32 offset = (bio->bi_iter.bi_sector << SECTOR_SHIFT) -
rbio->bioc->full_stripe_logical;
- while (iter.bi_size) {
+ btrfs_bio_for_each_block(paddr, bio, &iter, sectorsize) {
unsigned int index = (offset >> sectorsize_bits);
struct sector_ptr *sector = &rbio->bio_sectors[index];
- struct bio_vec bv = bio_iter_iovec(bio, iter);
sector->has_paddr = true;
- sector->paddr = bvec_phys(&bv);
- bio_advance_iter_single(bio, &iter, sectorsize);
+ sector->paddr = paddr;
offset += sectorsize;
}
}