summaryrefslogtreecommitdiff
path: root/fs/buffer.c
diff options
context:
space:
mode:
Diffstat (limited to 'fs/buffer.c')
-rw-r--r--fs/buffer.c87
1 files changed, 58 insertions, 29 deletions
diff --git a/fs/buffer.c b/fs/buffer.c
index 194eacbefc95..7be23ff20b27 100644
--- a/fs/buffer.c
+++ b/fs/buffer.c
@@ -176,18 +176,8 @@ void end_buffer_write_sync(struct buffer_head *bh, int uptodate)
}
EXPORT_SYMBOL(end_buffer_write_sync);
-/*
- * Various filesystems appear to want __find_get_block to be non-blocking.
- * But it's the page lock which protects the buffers. To get around this,
- * we get exclusion from try_to_free_buffers with the blockdev mapping's
- * i_private_lock.
- *
- * Hack idea: for the blockdev mapping, i_private_lock contention
- * may be quite high. This code could TryLock the page, and if that
- * succeeds, there is no need to take i_private_lock.
- */
static struct buffer_head *
-__find_get_block_slow(struct block_device *bdev, sector_t block)
+__find_get_block_slow(struct block_device *bdev, sector_t block, bool atomic)
{
struct address_space *bd_mapping = bdev->bd_mapping;
const int blkbits = bd_mapping->host->i_blkbits;
@@ -204,10 +194,28 @@ __find_get_block_slow(struct block_device *bdev, sector_t block)
if (IS_ERR(folio))
goto out;
- spin_lock(&bd_mapping->i_private_lock);
+ /*
+ * Folio lock protects the buffers. Callers that cannot block
+ * will fallback to serializing vs try_to_free_buffers() via
+ * the i_private_lock.
+ */
+ if (atomic)
+ spin_lock(&bd_mapping->i_private_lock);
+ else
+ folio_lock(folio);
+
head = folio_buffers(folio);
if (!head)
goto out_unlock;
+ /*
+ * Upon a noref migration, the folio lock serializes here;
+ * otherwise bail.
+ */
+ if (test_bit_acquire(BH_Migrate, &head->b_state)) {
+ WARN_ON(!atomic);
+ goto out_unlock;
+ }
+
bh = head;
do {
if (!buffer_mapped(bh))
@@ -236,7 +244,10 @@ __find_get_block_slow(struct block_device *bdev, sector_t block)
1 << blkbits);
}
out_unlock:
- spin_unlock(&bd_mapping->i_private_lock);
+ if (atomic)
+ spin_unlock(&bd_mapping->i_private_lock);
+ else
+ folio_unlock(folio);
folio_put(folio);
out:
return ret;
@@ -656,7 +667,9 @@ EXPORT_SYMBOL(generic_buffers_fsync);
void write_boundary_block(struct block_device *bdev,
sector_t bblock, unsigned blocksize)
{
- struct buffer_head *bh = __find_get_block(bdev, bblock + 1, blocksize);
+ struct buffer_head *bh;
+
+ bh = __find_get_block_nonatomic(bdev, bblock + 1, blocksize);
if (bh) {
if (buffer_dirty(bh))
write_dirty_buffer(bh, 0);
@@ -1386,16 +1399,18 @@ lookup_bh_lru(struct block_device *bdev, sector_t block, unsigned size)
/*
* Perform a pagecache lookup for the matching buffer. If it's there, refresh
* it in the LRU and mark it as accessed. If it is not present then return
- * NULL
+ * NULL. Atomic context callers may also return NULL if the buffer is being
+ * migrated; similarly the page is not marked accessed either.
*/
-struct buffer_head *
-__find_get_block(struct block_device *bdev, sector_t block, unsigned size)
+static struct buffer_head *
+find_get_block_common(struct block_device *bdev, sector_t block,
+ unsigned size, bool atomic)
{
struct buffer_head *bh = lookup_bh_lru(bdev, block, size);
if (bh == NULL) {
/* __find_get_block_slow will mark the page accessed */
- bh = __find_get_block_slow(bdev, block);
+ bh = __find_get_block_slow(bdev, block, atomic);
if (bh)
bh_lru_install(bh);
} else
@@ -1403,8 +1418,23 @@ __find_get_block(struct block_device *bdev, sector_t block, unsigned size)
return bh;
}
+
+struct buffer_head *
+__find_get_block(struct block_device *bdev, sector_t block, unsigned size)
+{
+ return find_get_block_common(bdev, block, size, true);
+}
EXPORT_SYMBOL(__find_get_block);
+/* same as __find_get_block() but allows sleeping contexts */
+struct buffer_head *
+__find_get_block_nonatomic(struct block_device *bdev, sector_t block,
+ unsigned size)
+{
+ return find_get_block_common(bdev, block, size, false);
+}
+EXPORT_SYMBOL(__find_get_block_nonatomic);
+
/**
* bdev_getblk - Get a buffer_head in a block device's buffer cache.
* @bdev: The block device.
@@ -1422,7 +1452,12 @@ EXPORT_SYMBOL(__find_get_block);
struct buffer_head *bdev_getblk(struct block_device *bdev, sector_t block,
unsigned size, gfp_t gfp)
{
- struct buffer_head *bh = __find_get_block(bdev, block, size);
+ struct buffer_head *bh;
+
+ if (gfpflags_allow_blocking(gfp))
+ bh = __find_get_block_nonatomic(bdev, block, size);
+ else
+ bh = __find_get_block(bdev, block, size);
might_alloc(gfp);
if (bh)
@@ -2166,7 +2201,7 @@ int __block_write_begin(struct folio *folio, loff_t pos, unsigned len,
}
EXPORT_SYMBOL(__block_write_begin);
-static void __block_commit_write(struct folio *folio, size_t from, size_t to)
+void block_commit_write(struct folio *folio, size_t from, size_t to)
{
size_t block_start, block_end;
bool partial = false;
@@ -2204,6 +2239,7 @@ static void __block_commit_write(struct folio *folio, size_t from, size_t to)
if (!partial)
folio_mark_uptodate(folio);
}
+EXPORT_SYMBOL(block_commit_write);
/*
* block_write_begin takes care of the basic task of block allocation and
@@ -2262,7 +2298,7 @@ int block_write_end(struct file *file, struct address_space *mapping,
flush_dcache_folio(folio);
/* This could be a short (even 0-length) commit */
- __block_commit_write(folio, start, start + copied);
+ block_commit_write(folio, start, start + copied);
return copied;
}
@@ -2566,13 +2602,6 @@ int cont_write_begin(struct file *file, struct address_space *mapping,
}
EXPORT_SYMBOL(cont_write_begin);
-void block_commit_write(struct page *page, unsigned from, unsigned to)
-{
- struct folio *folio = page_folio(page);
- __block_commit_write(folio, from, to);
-}
-EXPORT_SYMBOL(block_commit_write);
-
/*
* block_page_mkwrite() is not allowed to change the file size as it gets
* called from a page fault handler when a page is first dirtied. Hence we must
@@ -2618,7 +2647,7 @@ int block_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf,
if (unlikely(ret))
goto out_unlock;
- __block_commit_write(folio, 0, end);
+ block_commit_write(folio, 0, end);
folio_mark_dirty(folio);
folio_wait_stable(folio);