summaryrefslogtreecommitdiff
path: root/fs/ext4/extents_status.c
diff options
context:
space:
mode:
authorZhang Yi <yi.zhang@huawei.com>2025-10-13 09:51:18 +0800
committerTheodore Ts'o <tytso@mit.edu>2025-11-06 10:44:39 -0500
commitdd064d5101ea473d39c39ffaa8beeb8f47bbeb09 (patch)
treed9e11df350626242ecbd25fe2a2c97b47944ac3f /fs/ext4/extents_status.c
parenta2e5a3cea4b18f6e2575acc444a5e8cce1fc8260 (diff)
ext4: introduce seq counter for the extent status entry
In the iomap_write_iter(), the iomap buffered write frame does not hold any locks between querying the inode extent mapping info and performing page cache writes. As a result, the extent mapping can be changed due to concurrent I/O in flight. Similarly, in the iomap_writepage_map(), the write-back process faces a similar problem: concurrent changes can invalidate the extent mapping before the I/O is submitted. Therefore, both of these processes must recheck the mapping info after acquiring the folio lock. To address this, similar to XFS, we propose introducing an extent sequence number to serve as a validity cookie for the extent. After commit 24b7a2331fcd ("ext4: clairfy the rules for modifying extents"), we can ensure the extent information should always be processed through the extent status tree, and the extent status tree is always uptodate under i_rwsem or invalidate_lock or folio lock, so it's safe to introduce this sequence number. The sequence number will be increased whenever the extent status tree changes, preparing for the buffered write iomap conversion. Besides, this mechanism is also applicable for the moving extents case. In move_extent_per_page(), it also needs to reacquire data_sem and check the mapping info again under the folio lock. Signed-off-by: Zhang Yi <yi.zhang@huawei.com> Reviewed-by: Jan Kara <jack@suse.cz> Message-ID: <20251013015128.499308-3-yi.zhang@huaweicloud.com> Signed-off-by: Theodore Ts'o <tytso@mit.edu>
Diffstat (limited to 'fs/ext4/extents_status.c')
-rw-r--r--fs/ext4/extents_status.c25
1 files changed, 21 insertions, 4 deletions
diff --git a/fs/ext4/extents_status.c b/fs/ext4/extents_status.c
index 31dc0496f8d0..c3daa57ecd35 100644
--- a/fs/ext4/extents_status.c
+++ b/fs/ext4/extents_status.c
@@ -235,6 +235,13 @@ static inline ext4_lblk_t ext4_es_end(struct extent_status *es)
return es->es_lblk + es->es_len - 1;
}
+static inline void ext4_es_inc_seq(struct inode *inode)
+{
+ struct ext4_inode_info *ei = EXT4_I(inode);
+
+ WRITE_ONCE(ei->i_es_seq, ei->i_es_seq + 1);
+}
+
/*
* search through the tree for an delayed extent with a given offset. If
* it can't be found, try to find next extent.
@@ -906,7 +913,6 @@ void ext4_es_insert_extent(struct inode *inode, ext4_lblk_t lblk,
newes.es_lblk = lblk;
newes.es_len = len;
ext4_es_store_pblock_status(&newes, pblk, status);
- trace_ext4_es_insert_extent(inode, &newes);
ext4_es_insert_extent_check(inode, &newes);
@@ -955,6 +961,11 @@ retry:
}
pending = err3;
}
+ /*
+ * TODO: For cache on-disk extents, there is no need to increment
+ * the sequence counter, this requires future optimization.
+ */
+ ext4_es_inc_seq(inode);
error:
write_unlock(&EXT4_I(inode)->i_es_lock);
/*
@@ -981,6 +992,7 @@ error:
if (err1 || err2 || err3 < 0)
goto retry;
+ trace_ext4_es_insert_extent(inode, &newes);
ext4_es_print_tree(inode);
return;
}
@@ -1550,7 +1562,6 @@ void ext4_es_remove_extent(struct inode *inode, ext4_lblk_t lblk,
if (EXT4_SB(inode->i_sb)->s_mount_state & EXT4_FC_REPLAY)
return;
- trace_ext4_es_remove_extent(inode, lblk, len);
es_debug("remove [%u/%u) from extent status tree of inode %lu\n",
lblk, len, inode->i_ino);
@@ -1570,16 +1581,21 @@ retry:
*/
write_lock(&EXT4_I(inode)->i_es_lock);
err = __es_remove_extent(inode, lblk, end, &reserved, es);
+ if (err)
+ goto error;
/* Free preallocated extent if it didn't get used. */
if (es) {
if (!es->es_len)
__es_free_extent(es);
es = NULL;
}
+ ext4_es_inc_seq(inode);
+error:
write_unlock(&EXT4_I(inode)->i_es_lock);
if (err)
goto retry;
+ trace_ext4_es_remove_extent(inode, lblk, len);
ext4_es_print_tree(inode);
ext4_da_release_space(inode, reserved);
}
@@ -2140,8 +2156,6 @@ void ext4_es_insert_delayed_extent(struct inode *inode, ext4_lblk_t lblk,
newes.es_lblk = lblk;
newes.es_len = len;
ext4_es_store_pblock_status(&newes, ~0, EXTENT_STATUS_DELAYED);
- trace_ext4_es_insert_delayed_extent(inode, &newes, lclu_allocated,
- end_allocated);
ext4_es_insert_extent_check(inode, &newes);
@@ -2196,11 +2210,14 @@ retry:
pr2 = NULL;
}
}
+ ext4_es_inc_seq(inode);
error:
write_unlock(&EXT4_I(inode)->i_es_lock);
if (err1 || err2 || err3 < 0)
goto retry;
+ trace_ext4_es_insert_delayed_extent(inode, &newes, lclu_allocated,
+ end_allocated);
ext4_es_print_tree(inode);
ext4_print_pending_tree(inode);
return;