summaryrefslogtreecommitdiff
path: root/mm
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2025-12-01 08:14:00 -0800
committerLinus Torvalds <torvalds@linux-foundation.org>2025-12-01 08:14:00 -0800
commit1885cdbfbb51ede3637166c895d0b8040c9899cc (patch)
tree257c3f32cff62bff6a3acfe8c76d39981cc52faa /mm
parent7d0a66e4bb9081d75c82ec4957c50034cb0ea449 (diff)
parent7fd8720dff2d9c70cf5a1a13b7513af01952ec02 (diff)
Merge tag 'vfs-6.19-rc1.iomap' of git://git.kernel.org/pub/scm/linux/kernel/git/vfs/vfs
Pull iomap updates from Christian Brauner: "FUSE iomap Support for Buffered Reads: This adds iomap support for FUSE buffered reads and readahead. This enables granular uptodate tracking with large folios so only non-uptodate portions need to be read. Also fixes a race condition with large folios + writeback cache that could cause data corruption on partial writes followed by reads. - Refactored iomap read/readahead bio logic into helpers - Added caller-provided callbacks for read operations - Moved buffered IO bio logic into new file - FUSE now uses iomap for read_folio and readahead Zero Range Folio Batch Support: Add folio batch support for iomap_zero_range() to handle dirty folios over unwritten mappings. Fix raciness issues where dirty data could be lost during zero range operations. - filemap_get_folios_tag_range() helper for dirty folio lookup - Optional zero range dirty folio processing - XFS fills dirty folios on zero range of unwritten mappings - Removed old partial EOF zeroing optimization DIO Write Completions from Interrupt Context: Restore pre-iomap behavior where pure overwrite completions run inline rather than being deferred to workqueue. Reduces context switches for high-performance workloads like ScyllaDB. - Removed unused IOCB_DIO_CALLER_COMP code - Error completions always run in user context (fixes zonefs) - Reworked REQ_FUA selection logic - Inverted IOMAP_DIO_INLINE_COMP to IOMAP_DIO_OFFLOAD_COMP Buffered IO Cleanups: Some performance and code clarity improvements: - Replace manual bitmap scanning with find_next_bit() - Simplify read skip logic for writes - Optimize pending async writeback accounting - Better variable naming - Documentation for iomap_finish_folio_write() requirements Misaligned Vectors for Zoned XFS: Enables sub-block aligned vectors in XFS always-COW mode for zoned devices via new IOMAP_DIO_FSBLOCK_ALIGNED flag. Bug Fixes: - Allocate s_dio_done_wq for async reads (fixes syzbot report after error completion changes) - Fix iomap_read_end() for already uptodate folios (regression fix)" * tag 'vfs-6.19-rc1.iomap' of git://git.kernel.org/pub/scm/linux/kernel/git/vfs/vfs: (40 commits) iomap: allocate s_dio_done_wq for async reads as well iomap: fix iomap_read_end() for already uptodate folios iomap: invert the polarity of IOMAP_DIO_INLINE_COMP iomap: support write completions from interrupt context iomap: rework REQ_FUA selection iomap: always run error completions in user context fs, iomap: remove IOCB_DIO_CALLER_COMP iomap: use find_next_bit() for uptodate bitmap scanning iomap: use find_next_bit() for dirty bitmap scanning iomap: simplify when reads can be skipped for writes iomap: simplify ->read_folio_range() error handling for reads iomap: optimize pending async writeback accounting docs: document iomap writeback's iomap_finish_folio_write() requirement iomap: account for unaligned end offsets when truncating read range iomap: rename bytes_pending/bytes_accounted to bytes_submitted/bytes_not_submitted xfs: support sub-block aligned vectors in always COW mode iomap: add IOMAP_DIO_FSBLOCK_ALIGNED flag xfs: error tag to force zeroing on debug kernels iomap: remove old partial eof zeroing optimization xfs: fill dirty folios on zero range of unwritten mappings ...
Diffstat (limited to 'mm')
-rw-r--r--mm/filemap.c58
1 files changed, 58 insertions, 0 deletions
diff --git a/mm/filemap.c b/mm/filemap.c
index 024b71da5224..bf586eeb9be6 100644
--- a/mm/filemap.c
+++ b/mm/filemap.c
@@ -2366,6 +2366,64 @@ out:
}
EXPORT_SYMBOL(filemap_get_folios_tag);
+/**
+ * filemap_get_folios_dirty - Get a batch of dirty folios
+ * @mapping: The address_space to search
+ * @start: The starting folio index
+ * @end: The final folio index (inclusive)
+ * @fbatch: The batch to fill
+ *
+ * filemap_get_folios_dirty() works exactly like filemap_get_folios(), except
+ * the returned folios are presumed to be dirty or undergoing writeback. Dirty
+ * state is presumed because we don't block on folio lock nor want to miss
+ * folios. Callers that need to can recheck state upon locking the folio.
+ *
+ * This may not return all dirty folios if the batch gets filled up.
+ *
+ * Return: The number of folios found.
+ * Also update @start to be positioned for traversal of the next folio.
+ */
+unsigned filemap_get_folios_dirty(struct address_space *mapping, pgoff_t *start,
+ pgoff_t end, struct folio_batch *fbatch)
+{
+ XA_STATE(xas, &mapping->i_pages, *start);
+ struct folio *folio;
+
+ rcu_read_lock();
+ while ((folio = find_get_entry(&xas, end, XA_PRESENT)) != NULL) {
+ if (xa_is_value(folio))
+ continue;
+ if (folio_trylock(folio)) {
+ bool clean = !folio_test_dirty(folio) &&
+ !folio_test_writeback(folio);
+ folio_unlock(folio);
+ if (clean) {
+ folio_put(folio);
+ continue;
+ }
+ }
+ if (!folio_batch_add(fbatch, folio)) {
+ unsigned long nr = folio_nr_pages(folio);
+ *start = folio->index + nr;
+ goto out;
+ }
+ }
+ /*
+ * We come here when there is no folio beyond @end. We take care to not
+ * overflow the index @start as it confuses some of the callers. This
+ * breaks the iteration when there is a folio at index -1 but that is
+ * already broke anyway.
+ */
+ if (end == (pgoff_t)-1)
+ *start = (pgoff_t)-1;
+ else
+ *start = end + 1;
+out:
+ rcu_read_unlock();
+
+ return folio_batch_count(fbatch);
+}
+
/*
* CD/DVDs are error prone. When a medium error occurs, the driver may fail
* a _large_ part of the i/o request. Imagine the worst scenario: