diff options
Diffstat (limited to 'drivers/block/zram')
| -rw-r--r-- | drivers/block/zram/zram_drv.c | 483 | ||||
| -rw-r--r-- | drivers/block/zram/zram_drv.h | 2 |
2 files changed, 376 insertions, 109 deletions
diff --git a/drivers/block/zram/zram_drv.c b/drivers/block/zram/zram_drv.c index a43074657531..5759823d6314 100644 --- a/drivers/block/zram/zram_drv.c +++ b/drivers/block/zram/zram_drv.c @@ -500,8 +500,31 @@ out: } #ifdef CONFIG_ZRAM_WRITEBACK +#define INVALID_BDEV_BLOCK (~0UL) + +struct zram_wb_ctl { + /* idle list is accessed only by the writeback task, no concurency */ + struct list_head idle_reqs; + /* done list is accessed concurrently, protect by done_lock */ + struct list_head done_reqs; + wait_queue_head_t done_wait; + spinlock_t done_lock; + atomic_t num_inflight; +}; + +struct zram_wb_req { + unsigned long blk_idx; + struct page *page; + struct zram_pp_slot *pps; + struct bio_vec bio_vec; + struct bio bio; + + struct list_head entry; +}; + static ssize_t writeback_limit_enable_store(struct device *dev, - struct device_attribute *attr, const char *buf, size_t len) + struct device_attribute *attr, + const char *buf, size_t len) { struct zram *zram = dev_to_zram(dev); u64 val; @@ -510,33 +533,31 @@ static ssize_t writeback_limit_enable_store(struct device *dev, if (kstrtoull(buf, 10, &val)) return ret; - down_read(&zram->init_lock); - spin_lock(&zram->wb_limit_lock); + down_write(&zram->init_lock); zram->wb_limit_enable = val; - spin_unlock(&zram->wb_limit_lock); - up_read(&zram->init_lock); + up_write(&zram->init_lock); ret = len; return ret; } static ssize_t writeback_limit_enable_show(struct device *dev, - struct device_attribute *attr, char *buf) + struct device_attribute *attr, + char *buf) { bool val; struct zram *zram = dev_to_zram(dev); down_read(&zram->init_lock); - spin_lock(&zram->wb_limit_lock); val = zram->wb_limit_enable; - spin_unlock(&zram->wb_limit_lock); up_read(&zram->init_lock); return sysfs_emit(buf, "%d\n", val); } static ssize_t writeback_limit_store(struct device *dev, - struct device_attribute *attr, const char *buf, size_t len) + struct device_attribute *attr, + const char *buf, size_t len) { struct zram *zram = dev_to_zram(dev); u64 val; @@ -545,31 +566,71 @@ static ssize_t writeback_limit_store(struct device *dev, if (kstrtoull(buf, 10, &val)) return ret; - down_read(&zram->init_lock); - spin_lock(&zram->wb_limit_lock); + /* + * When the page size is greater than 4KB, if bd_wb_limit is set to + * a value that is not page - size aligned, it will cause value + * wrapping. For example, when the page size is set to 16KB and + * bd_wb_limit is set to 3, a single write - back operation will + * cause bd_wb_limit to become -1. Even more terrifying is that + * bd_wb_limit is an unsigned number. + */ + val = rounddown(val, PAGE_SIZE / 4096); + + down_write(&zram->init_lock); zram->bd_wb_limit = val; - spin_unlock(&zram->wb_limit_lock); - up_read(&zram->init_lock); + up_write(&zram->init_lock); ret = len; return ret; } static ssize_t writeback_limit_show(struct device *dev, - struct device_attribute *attr, char *buf) + struct device_attribute *attr, char *buf) { u64 val; struct zram *zram = dev_to_zram(dev); down_read(&zram->init_lock); - spin_lock(&zram->wb_limit_lock); val = zram->bd_wb_limit; - spin_unlock(&zram->wb_limit_lock); up_read(&zram->init_lock); return sysfs_emit(buf, "%llu\n", val); } +static ssize_t writeback_batch_size_store(struct device *dev, + struct device_attribute *attr, + const char *buf, size_t len) +{ + struct zram *zram = dev_to_zram(dev); + u32 val; + + if (kstrtouint(buf, 10, &val)) + return -EINVAL; + + if (!val) + return -EINVAL; + + down_write(&zram->init_lock); + zram->wb_batch_size = val; + up_write(&zram->init_lock); + + return len; +} + +static ssize_t writeback_batch_size_show(struct device *dev, + struct device_attribute *attr, + char *buf) +{ + u32 val; + struct zram *zram = dev_to_zram(dev); + + down_read(&zram->init_lock); + val = zram->wb_batch_size; + up_read(&zram->init_lock); + + return sysfs_emit(buf, "%u\n", val); +} + static void reset_bdev(struct zram *zram) { if (!zram->backing_dev) @@ -697,23 +758,20 @@ out: return err; } -static unsigned long alloc_block_bdev(struct zram *zram) +static unsigned long zram_reserve_bdev_block(struct zram *zram) { - unsigned long blk_idx = 1; -retry: - /* skip 0 bit to confuse zram.handle = 0 */ - blk_idx = find_next_zero_bit(zram->bitmap, zram->nr_pages, blk_idx); - if (blk_idx == zram->nr_pages) - return 0; + unsigned long blk_idx; - if (test_and_set_bit(blk_idx, zram->bitmap)) - goto retry; + blk_idx = find_next_zero_bit(zram->bitmap, zram->nr_pages, 0); + if (blk_idx == zram->nr_pages) + return INVALID_BDEV_BLOCK; + set_bit(blk_idx, zram->bitmap); atomic64_inc(&zram->stats.bd_count); return blk_idx; } -static void free_block_bdev(struct zram *zram, unsigned long blk_idx) +static void zram_release_bdev_block(struct zram *zram, unsigned long blk_idx) { int was_set; @@ -734,32 +792,249 @@ static void read_from_bdev_async(struct zram *zram, struct page *page, submit_bio(bio); } -static int zram_writeback_slots(struct zram *zram, struct zram_pp_ctl *ctl) +static void release_wb_req(struct zram_wb_req *req) { - unsigned long blk_idx = 0; - struct page *page = NULL; - struct zram_pp_slot *pps; - struct bio_vec bio_vec; - struct bio bio; + __free_page(req->page); + kfree(req); +} + +static void release_wb_ctl(struct zram_wb_ctl *wb_ctl) +{ + if (!wb_ctl) + return; + + /* We should never have inflight requests at this point */ + WARN_ON(atomic_read(&wb_ctl->num_inflight)); + WARN_ON(!list_empty(&wb_ctl->done_reqs)); + + while (!list_empty(&wb_ctl->idle_reqs)) { + struct zram_wb_req *req; + + req = list_first_entry(&wb_ctl->idle_reqs, + struct zram_wb_req, entry); + list_del(&req->entry); + release_wb_req(req); + } + + kfree(wb_ctl); +} + +static struct zram_wb_ctl *init_wb_ctl(struct zram *zram) +{ + struct zram_wb_ctl *wb_ctl; + int i; + + wb_ctl = kmalloc(sizeof(*wb_ctl), GFP_KERNEL); + if (!wb_ctl) + return NULL; + + INIT_LIST_HEAD(&wb_ctl->idle_reqs); + INIT_LIST_HEAD(&wb_ctl->done_reqs); + atomic_set(&wb_ctl->num_inflight, 0); + init_waitqueue_head(&wb_ctl->done_wait); + spin_lock_init(&wb_ctl->done_lock); + + for (i = 0; i < zram->wb_batch_size; i++) { + struct zram_wb_req *req; + + /* + * This is fatal condition only if we couldn't allocate + * any requests at all. Otherwise we just work with the + * requests that we have successfully allocated, so that + * writeback can still proceed, even if there is only one + * request on the idle list. + */ + req = kzalloc(sizeof(*req), GFP_KERNEL | __GFP_NOWARN); + if (!req) + break; + + req->page = alloc_page(GFP_KERNEL | __GFP_NOWARN); + if (!req->page) { + kfree(req); + break; + } + + list_add(&req->entry, &wb_ctl->idle_reqs); + } + + /* We couldn't allocate any requests, so writeabck is not possible */ + if (list_empty(&wb_ctl->idle_reqs)) + goto release_wb_ctl; + + return wb_ctl; + +release_wb_ctl: + release_wb_ctl(wb_ctl); + return NULL; +} + +static void zram_account_writeback_rollback(struct zram *zram) +{ + lockdep_assert_held_read(&zram->init_lock); + + if (zram->wb_limit_enable) + zram->bd_wb_limit += 1UL << (PAGE_SHIFT - 12); +} + +static void zram_account_writeback_submit(struct zram *zram) +{ + lockdep_assert_held_read(&zram->init_lock); + + if (zram->wb_limit_enable && zram->bd_wb_limit > 0) + zram->bd_wb_limit -= 1UL << (PAGE_SHIFT - 12); +} + +static int zram_writeback_complete(struct zram *zram, struct zram_wb_req *req) +{ + u32 index = req->pps->index; + int err; + + err = blk_status_to_errno(req->bio.bi_status); + if (err) { + /* + * Failed wb requests should not be accounted in wb_limit + * (if enabled). + */ + zram_account_writeback_rollback(zram); + zram_release_bdev_block(zram, req->blk_idx); + return err; + } + + atomic64_inc(&zram->stats.bd_writes); + zram_slot_lock(zram, index); + /* + * We release slot lock during writeback so slot can change under us: + * slot_free() or slot_free() and zram_write_page(). In both cases + * slot loses ZRAM_PP_SLOT flag. No concurrent post-processing can + * set ZRAM_PP_SLOT on such slots until current post-processing + * finishes. + */ + if (!zram_test_flag(zram, index, ZRAM_PP_SLOT)) { + zram_release_bdev_block(zram, req->blk_idx); + goto out; + } + + zram_free_page(zram, index); + zram_set_flag(zram, index, ZRAM_WB); + zram_set_handle(zram, index, req->blk_idx); + atomic64_inc(&zram->stats.pages_stored); + +out: + zram_slot_unlock(zram, index); + return 0; +} + +static void zram_writeback_endio(struct bio *bio) +{ + struct zram_wb_req *req = container_of(bio, struct zram_wb_req, bio); + struct zram_wb_ctl *wb_ctl = bio->bi_private; + unsigned long flags; + + spin_lock_irqsave(&wb_ctl->done_lock, flags); + list_add(&req->entry, &wb_ctl->done_reqs); + spin_unlock_irqrestore(&wb_ctl->done_lock, flags); + + wake_up(&wb_ctl->done_wait); +} + +static void zram_submit_wb_request(struct zram *zram, + struct zram_wb_ctl *wb_ctl, + struct zram_wb_req *req) +{ + /* + * wb_limit (if enabled) should be adjusted before submission, + * so that we don't over-submit. + */ + zram_account_writeback_submit(zram); + atomic_inc(&wb_ctl->num_inflight); + req->bio.bi_private = wb_ctl; + submit_bio(&req->bio); +} + +static int zram_complete_done_reqs(struct zram *zram, + struct zram_wb_ctl *wb_ctl) +{ + struct zram_wb_req *req; + unsigned long flags; int ret = 0, err; - u32 index; - page = alloc_page(GFP_KERNEL); - if (!page) - return -ENOMEM; + while (atomic_read(&wb_ctl->num_inflight) > 0) { + spin_lock_irqsave(&wb_ctl->done_lock, flags); + req = list_first_entry_or_null(&wb_ctl->done_reqs, + struct zram_wb_req, entry); + if (req) + list_del(&req->entry); + spin_unlock_irqrestore(&wb_ctl->done_lock, flags); + + /* ->num_inflight > 0 doesn't mean we have done requests */ + if (!req) + break; + + err = zram_writeback_complete(zram, req); + if (err) + ret = err; + + atomic_dec(&wb_ctl->num_inflight); + release_pp_slot(zram, req->pps); + req->pps = NULL; + + list_add(&req->entry, &wb_ctl->idle_reqs); + } + + return ret; +} + +static struct zram_wb_req *zram_select_idle_req(struct zram_wb_ctl *wb_ctl) +{ + struct zram_wb_req *req; + + req = list_first_entry_or_null(&wb_ctl->idle_reqs, + struct zram_wb_req, entry); + if (req) + list_del(&req->entry); + return req; +} + +static int zram_writeback_slots(struct zram *zram, + struct zram_pp_ctl *ctl, + struct zram_wb_ctl *wb_ctl) +{ + unsigned long blk_idx = INVALID_BDEV_BLOCK; + struct zram_wb_req *req = NULL; + struct zram_pp_slot *pps; + int ret = 0, err = 0; + u32 index = 0; while ((pps = select_pp_slot(ctl))) { - spin_lock(&zram->wb_limit_lock); if (zram->wb_limit_enable && !zram->bd_wb_limit) { - spin_unlock(&zram->wb_limit_lock); ret = -EIO; break; } - spin_unlock(&zram->wb_limit_lock); - if (!blk_idx) { - blk_idx = alloc_block_bdev(zram); - if (!blk_idx) { + while (!req) { + req = zram_select_idle_req(wb_ctl); + if (req) + break; + + wait_event(wb_ctl->done_wait, + !list_empty(&wb_ctl->done_reqs)); + + err = zram_complete_done_reqs(zram, wb_ctl); + /* + * BIO errors are not fatal, we continue and simply + * attempt to writeback the remaining objects (pages). + * At the same time we need to signal user-space that + * some writes (at least one, but also could be all of + * them) were not successful and we do so by returning + * the most recent BIO error. + */ + if (err) + ret = err; + } + + if (blk_idx == INVALID_BDEV_BLOCK) { + blk_idx = zram_reserve_bdev_block(zram); + if (blk_idx == INVALID_BDEV_BLOCK) { ret = -ENOSPC; break; } @@ -768,74 +1043,54 @@ static int zram_writeback_slots(struct zram *zram, struct zram_pp_ctl *ctl) index = pps->index; zram_slot_lock(zram, index); /* - * scan_slots() sets ZRAM_PP_SLOT and relases slot lock, so + * scan_slots() sets ZRAM_PP_SLOT and releases slot lock, so * slots can change in the meantime. If slots are accessed or * freed they lose ZRAM_PP_SLOT flag and hence we don't * post-process them. */ if (!zram_test_flag(zram, index, ZRAM_PP_SLOT)) goto next; - if (zram_read_from_zspool(zram, page, index)) + if (zram_read_from_zspool(zram, req->page, index)) goto next; zram_slot_unlock(zram, index); - bio_init(&bio, zram->bdev, &bio_vec, 1, - REQ_OP_WRITE | REQ_SYNC); - bio.bi_iter.bi_sector = blk_idx * (PAGE_SIZE >> 9); - __bio_add_page(&bio, page, PAGE_SIZE, 0); - - /* - * XXX: A single page IO would be inefficient for write - * but it would be not bad as starter. - */ - err = submit_bio_wait(&bio); - if (err) { - release_pp_slot(zram, pps); - /* - * BIO errors are not fatal, we continue and simply - * attempt to writeback the remaining objects (pages). - * At the same time we need to signal user-space that - * some writes (at least one, but also could be all of - * them) were not successful and we do so by returning - * the most recent BIO error. - */ - ret = err; - continue; - } - - atomic64_inc(&zram->stats.bd_writes); - zram_slot_lock(zram, index); /* - * Same as above, we release slot lock during writeback so - * slot can change under us: slot_free() or slot_free() and - * reallocation (zram_write_page()). In both cases slot loses - * ZRAM_PP_SLOT flag. No concurrent post-processing can set - * ZRAM_PP_SLOT on such slots until current post-processing - * finishes. + * From now on pp-slot is owned by the req, remove it from + * its pp bucket. */ - if (!zram_test_flag(zram, index, ZRAM_PP_SLOT)) - goto next; + list_del_init(&pps->entry); + + req->blk_idx = blk_idx; + req->pps = pps; + bio_init(&req->bio, zram->bdev, &req->bio_vec, 1, REQ_OP_WRITE); + req->bio.bi_iter.bi_sector = req->blk_idx * (PAGE_SIZE >> 9); + req->bio.bi_end_io = zram_writeback_endio; + __bio_add_page(&req->bio, req->page, PAGE_SIZE, 0); + + zram_submit_wb_request(zram, wb_ctl, req); + blk_idx = INVALID_BDEV_BLOCK; + req = NULL; + cond_resched(); + continue; - zram_free_page(zram, index); - zram_set_flag(zram, index, ZRAM_WB); - zram_set_handle(zram, index, blk_idx); - blk_idx = 0; - atomic64_inc(&zram->stats.pages_stored); - spin_lock(&zram->wb_limit_lock); - if (zram->wb_limit_enable && zram->bd_wb_limit > 0) - zram->bd_wb_limit -= 1UL << (PAGE_SHIFT - 12); - spin_unlock(&zram->wb_limit_lock); next: zram_slot_unlock(zram, index); release_pp_slot(zram, pps); - - cond_resched(); } - if (blk_idx) - free_block_bdev(zram, blk_idx); - if (page) - __free_page(page); + /* + * Selected idle req, but never submitted it due to some error or + * wb limit. + */ + if (req) + release_wb_req(req); + + while (atomic_read(&wb_ctl->num_inflight) > 0) { + wait_event(wb_ctl->done_wait, !list_empty(&wb_ctl->done_reqs)); + err = zram_complete_done_reqs(zram, wb_ctl); + if (err) + ret = err; + } return ret; } @@ -948,7 +1203,8 @@ static ssize_t writeback_store(struct device *dev, struct zram *zram = dev_to_zram(dev); u64 nr_pages = zram->disksize >> PAGE_SHIFT; unsigned long lo = 0, hi = nr_pages; - struct zram_pp_ctl *ctl = NULL; + struct zram_pp_ctl *pp_ctl = NULL; + struct zram_wb_ctl *wb_ctl = NULL; char *args, *param, *val; ssize_t ret = len; int err, mode = 0; @@ -970,8 +1226,14 @@ static ssize_t writeback_store(struct device *dev, goto release_init_lock; } - ctl = init_pp_ctl(); - if (!ctl) { + pp_ctl = init_pp_ctl(); + if (!pp_ctl) { + ret = -ENOMEM; + goto release_init_lock; + } + + wb_ctl = init_wb_ctl(zram); + if (!wb_ctl) { ret = -ENOMEM; goto release_init_lock; } @@ -1000,7 +1262,7 @@ static ssize_t writeback_store(struct device *dev, goto release_init_lock; } - scan_slots_for_writeback(zram, mode, lo, hi, ctl); + scan_slots_for_writeback(zram, mode, lo, hi, pp_ctl); break; } @@ -1011,7 +1273,7 @@ static ssize_t writeback_store(struct device *dev, goto release_init_lock; } - scan_slots_for_writeback(zram, mode, lo, hi, ctl); + scan_slots_for_writeback(zram, mode, lo, hi, pp_ctl); break; } @@ -1022,7 +1284,7 @@ static ssize_t writeback_store(struct device *dev, goto release_init_lock; } - scan_slots_for_writeback(zram, mode, lo, hi, ctl); + scan_slots_for_writeback(zram, mode, lo, hi, pp_ctl); continue; } @@ -1033,17 +1295,18 @@ static ssize_t writeback_store(struct device *dev, goto release_init_lock; } - scan_slots_for_writeback(zram, mode, lo, hi, ctl); + scan_slots_for_writeback(zram, mode, lo, hi, pp_ctl); continue; } } - err = zram_writeback_slots(zram, ctl); + err = zram_writeback_slots(zram, pp_ctl, wb_ctl); if (err) ret = err; release_init_lock: - release_pp_ctl(zram, ctl); + release_pp_ctl(zram, pp_ctl); + release_wb_ctl(wb_ctl); atomic_set(&zram->pp_in_progress, 0); up_read(&zram->init_lock); @@ -1112,7 +1375,9 @@ static int read_from_bdev(struct zram *zram, struct page *page, return -EIO; } -static void free_block_bdev(struct zram *zram, unsigned long blk_idx) {}; +static void zram_release_bdev_block(struct zram *zram, unsigned long blk_idx) +{ +} #endif #ifdef CONFIG_ZRAM_MEMORY_TRACKING @@ -1634,7 +1899,7 @@ static void zram_free_page(struct zram *zram, size_t index) if (zram_test_flag(zram, index, ZRAM_WB)) { zram_clear_flag(zram, index, ZRAM_WB); - free_block_bdev(zram, zram_get_handle(zram, index)); + zram_release_bdev_block(zram, zram_get_handle(zram, index)); goto out; } @@ -1740,14 +2005,14 @@ static int zram_read_page(struct zram *zram, struct page *page, u32 index, ret = zram_read_from_zspool(zram, page, index); zram_slot_unlock(zram, index); } else { + unsigned long blk_idx = zram_get_handle(zram, index); + /* * The slot should be unlocked before reading from the backing * device. */ zram_slot_unlock(zram, index); - - ret = read_from_bdev(zram, page, zram_get_handle(zram, index), - parent); + ret = read_from_bdev(zram, page, blk_idx, parent); } /* Should NEVER happen. Return bio error if it does. */ @@ -2610,6 +2875,7 @@ static DEVICE_ATTR_RW(backing_dev); static DEVICE_ATTR_WO(writeback); static DEVICE_ATTR_RW(writeback_limit); static DEVICE_ATTR_RW(writeback_limit_enable); +static DEVICE_ATTR_RW(writeback_batch_size); #endif #ifdef CONFIG_ZRAM_MULTI_COMP static DEVICE_ATTR_RW(recomp_algorithm); @@ -2631,6 +2897,7 @@ static struct attribute *zram_disk_attrs[] = { &dev_attr_writeback.attr, &dev_attr_writeback_limit.attr, &dev_attr_writeback_limit_enable.attr, + &dev_attr_writeback_batch_size.attr, #endif &dev_attr_io_stat.attr, &dev_attr_mm_stat.attr, @@ -2692,7 +2959,7 @@ static int zram_add(void) init_rwsem(&zram->init_lock); #ifdef CONFIG_ZRAM_WRITEBACK - spin_lock_init(&zram->wb_limit_lock); + zram->wb_batch_size = 32; #endif /* gendisk structure */ diff --git a/drivers/block/zram/zram_drv.h b/drivers/block/zram/zram_drv.h index 6cee93f9c0d0..c6d94501376c 100644 --- a/drivers/block/zram/zram_drv.h +++ b/drivers/block/zram/zram_drv.h @@ -127,8 +127,8 @@ struct zram { bool claim; /* Protected by disk->open_mutex */ #ifdef CONFIG_ZRAM_WRITEBACK struct file *backing_dev; - spinlock_t wb_limit_lock; bool wb_limit_enable; + u32 wb_batch_size; u64 bd_wb_limit; struct block_device *bdev; unsigned long *bitmap; |