summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorNilay Shroff <nilay@linux.ibm.com>2025-11-13 14:28:18 +0530
committerJens Axboe <axboe@kernel.dk>2025-11-13 09:27:49 -0700
commit232143b605387b372dee0ec7830f93b93df5f67d (patch)
tree0a68492c431dc740e995e01ab33a9678b99c77ee
parentc3f42a6de708db6891879547b29cd0d84ad83c92 (diff)
block: unify elevator tags and type xarrays into struct elv_change_ctx
Currently, the nr_hw_queues update path manages two disjoint xarrays — one for elevator tags and another for elevator type — both used during elevator switching. Maintaining these two parallel structures for the same purpose adds unnecessary complexity and potential for mismatched state. This patch unifies both xarrays into a single structure, struct elv_change_ctx, which holds all per-queue elevator change context. A single xarray, named elv_tbl, now maps each queue (q->id) in a tagset to its corresponding elv_change_ctx entry, encapsulating the elevator tags, type and name references. This unification simplifies the code, improves maintainability, and clarifies ownership of per-queue elevator state. Reviewed-by: Ming Lei <ming.lei@redhat.com> Reviewed-by: Yu Kuai <yukuai@fnnas.com> Signed-off-by: Nilay Shroff <nilay@linux.ibm.com> Signed-off-by: Jens Axboe <axboe@kernel.dk>
-rw-r--r--block/blk-mq-sched.c76
-rw-r--r--block/blk-mq-sched.h3
-rw-r--r--block/blk-mq.c50
-rw-r--r--block/blk.h7
-rw-r--r--block/elevator.c31
-rw-r--r--block/elevator.h15
6 files changed, 115 insertions, 67 deletions
diff --git a/block/blk-mq-sched.c b/block/blk-mq-sched.c
index 97b69fbe26f6..7ba285a7759b 100644
--- a/block/blk-mq-sched.c
+++ b/block/blk-mq-sched.c
@@ -427,11 +427,11 @@ void blk_mq_free_sched_tags(struct elevator_tags *et,
kfree(et);
}
-void blk_mq_free_sched_tags_batch(struct xarray *et_table,
+void blk_mq_free_sched_tags_batch(struct xarray *elv_tbl,
struct blk_mq_tag_set *set)
{
struct request_queue *q;
- struct elevator_tags *et;
+ struct elv_change_ctx *ctx;
lockdep_assert_held_write(&set->update_nr_hwq_lock);
@@ -444,13 +444,47 @@ void blk_mq_free_sched_tags_batch(struct xarray *et_table,
* concurrently.
*/
if (q->elevator) {
- et = xa_load(et_table, q->id);
- if (unlikely(!et))
+ ctx = xa_load(elv_tbl, q->id);
+ if (!ctx || !ctx->et) {
WARN_ON_ONCE(1);
- else
- blk_mq_free_sched_tags(et, set);
+ continue;
+ }
+ blk_mq_free_sched_tags(ctx->et, set);
+ ctx->et = NULL;
+ }
+ }
+}
+
+void blk_mq_free_sched_ctx_batch(struct xarray *elv_tbl)
+{
+ unsigned long i;
+ struct elv_change_ctx *ctx;
+
+ xa_for_each(elv_tbl, i, ctx) {
+ xa_erase(elv_tbl, i);
+ kfree(ctx);
+ }
+}
+
+int blk_mq_alloc_sched_ctx_batch(struct xarray *elv_tbl,
+ struct blk_mq_tag_set *set)
+{
+ struct request_queue *q;
+ struct elv_change_ctx *ctx;
+
+ lockdep_assert_held_write(&set->update_nr_hwq_lock);
+
+ list_for_each_entry(q, &set->tag_list, tag_set_list) {
+ ctx = kzalloc(sizeof(struct elv_change_ctx), GFP_KERNEL);
+ if (!ctx)
+ return -ENOMEM;
+
+ if (xa_insert(elv_tbl, q->id, ctx, GFP_KERNEL)) {
+ kfree(ctx);
+ return -ENOMEM;
}
}
+ return 0;
}
struct elevator_tags *blk_mq_alloc_sched_tags(struct blk_mq_tag_set *set,
@@ -497,12 +531,13 @@ out:
return NULL;
}
-int blk_mq_alloc_sched_tags_batch(struct xarray *et_table,
+int blk_mq_alloc_sched_tags_batch(struct xarray *elv_tbl,
struct blk_mq_tag_set *set, unsigned int nr_hw_queues)
{
+ struct elv_change_ctx *ctx;
struct request_queue *q;
struct elevator_tags *et;
- gfp_t gfp = GFP_NOIO | __GFP_ZERO | __GFP_NOWARN | __GFP_NORETRY;
+ int ret = -ENOMEM;
lockdep_assert_held_write(&set->update_nr_hwq_lock);
@@ -515,26 +550,31 @@ int blk_mq_alloc_sched_tags_batch(struct xarray *et_table,
* concurrently.
*/
if (q->elevator) {
- et = blk_mq_alloc_sched_tags(set, nr_hw_queues,
+ ctx = xa_load(elv_tbl, q->id);
+ if (WARN_ON_ONCE(!ctx)) {
+ ret = -ENOENT;
+ goto out_unwind;
+ }
+
+ ctx->et = blk_mq_alloc_sched_tags(set, nr_hw_queues,
blk_mq_default_nr_requests(set));
- if (!et)
+ if (!ctx->et)
goto out_unwind;
- if (xa_insert(et_table, q->id, et, gfp))
- goto out_free_tags;
+
}
}
return 0;
-out_free_tags:
- blk_mq_free_sched_tags(et, set);
out_unwind:
list_for_each_entry_continue_reverse(q, &set->tag_list, tag_set_list) {
if (q->elevator) {
- et = xa_load(et_table, q->id);
- if (et)
- blk_mq_free_sched_tags(et, set);
+ ctx = xa_load(elv_tbl, q->id);
+ if (ctx && ctx->et) {
+ blk_mq_free_sched_tags(ctx->et, set);
+ ctx->et = NULL;
+ }
}
}
- return -ENOMEM;
+ return ret;
}
/* caller must have a reference to @e, will grab another one if successful */
diff --git a/block/blk-mq-sched.h b/block/blk-mq-sched.h
index 8e21a6b1415d..2fddbc91a235 100644
--- a/block/blk-mq-sched.h
+++ b/block/blk-mq-sched.h
@@ -27,6 +27,9 @@ struct elevator_tags *blk_mq_alloc_sched_tags(struct blk_mq_tag_set *set,
unsigned int nr_hw_queues, unsigned int nr_requests);
int blk_mq_alloc_sched_tags_batch(struct xarray *et_table,
struct blk_mq_tag_set *set, unsigned int nr_hw_queues);
+int blk_mq_alloc_sched_ctx_batch(struct xarray *elv_tbl,
+ struct blk_mq_tag_set *set);
+void blk_mq_free_sched_ctx_batch(struct xarray *elv_tbl);
void blk_mq_free_sched_tags(struct elevator_tags *et,
struct blk_mq_tag_set *set);
void blk_mq_free_sched_tags_batch(struct xarray *et_table,
diff --git a/block/blk-mq.c b/block/blk-mq.c
index b2fdeaac0efb..04c6356f0eb2 100644
--- a/block/blk-mq.c
+++ b/block/blk-mq.c
@@ -4989,27 +4989,28 @@ struct elevator_tags *blk_mq_update_nr_requests(struct request_queue *q,
* Switch back to the elevator type stored in the xarray.
*/
static void blk_mq_elv_switch_back(struct request_queue *q,
- struct xarray *elv_tbl, struct xarray *et_tbl)
+ struct xarray *elv_tbl)
{
- struct elevator_type *e = xa_load(elv_tbl, q->id);
- struct elevator_tags *t = xa_load(et_tbl, q->id);
+ struct elv_change_ctx *ctx = xa_load(elv_tbl, q->id);
+
+ if (WARN_ON_ONCE(!ctx))
+ return;
/* The elv_update_nr_hw_queues unfreezes the queue. */
- elv_update_nr_hw_queues(q, e, t);
+ elv_update_nr_hw_queues(q, ctx);
/* Drop the reference acquired in blk_mq_elv_switch_none. */
- if (e)
- elevator_put(e);
+ if (ctx->type)
+ elevator_put(ctx->type);
}
/*
- * Stores elevator type in xarray and set current elevator to none. It uses
- * q->id as an index to store the elevator type into the xarray.
+ * Stores elevator name and type in ctx and set current elevator to none.
*/
static int blk_mq_elv_switch_none(struct request_queue *q,
struct xarray *elv_tbl)
{
- int ret = 0;
+ struct elv_change_ctx *ctx;
lockdep_assert_held_write(&q->tag_set->update_nr_hwq_lock);
@@ -5021,10 +5022,11 @@ static int blk_mq_elv_switch_none(struct request_queue *q,
* can't run concurrently.
*/
if (q->elevator) {
+ ctx = xa_load(elv_tbl, q->id);
+ if (WARN_ON_ONCE(!ctx))
+ return -ENOENT;
- ret = xa_insert(elv_tbl, q->id, q->elevator->type, GFP_KERNEL);
- if (WARN_ON_ONCE(ret))
- return ret;
+ ctx->name = q->elevator->type->elevator_name;
/*
* Before we switch elevator to 'none', take a reference to
@@ -5035,9 +5037,14 @@ static int blk_mq_elv_switch_none(struct request_queue *q,
*/
__elevator_get(q->elevator->type);
+ /*
+ * Store elevator type so that we can release the reference
+ * taken above later.
+ */
+ ctx->type = q->elevator->type;
elevator_set_none(q);
}
- return ret;
+ return 0;
}
static void __blk_mq_update_nr_hw_queues(struct blk_mq_tag_set *set,
@@ -5047,7 +5054,7 @@ static void __blk_mq_update_nr_hw_queues(struct blk_mq_tag_set *set,
int prev_nr_hw_queues = set->nr_hw_queues;
unsigned int memflags;
int i;
- struct xarray elv_tbl, et_tbl;
+ struct xarray elv_tbl;
bool queues_frozen = false;
lockdep_assert_held(&set->tag_list_lock);
@@ -5061,11 +5068,12 @@ static void __blk_mq_update_nr_hw_queues(struct blk_mq_tag_set *set,
memflags = memalloc_noio_save();
- xa_init(&et_tbl);
- if (blk_mq_alloc_sched_tags_batch(&et_tbl, set, nr_hw_queues) < 0)
- goto out_memalloc_restore;
-
xa_init(&elv_tbl);
+ if (blk_mq_alloc_sched_ctx_batch(&elv_tbl, set) < 0)
+ goto out_free_ctx;
+
+ if (blk_mq_alloc_sched_tags_batch(&elv_tbl, set, nr_hw_queues) < 0)
+ goto out_free_ctx;
list_for_each_entry(q, &set->tag_list, tag_set_list) {
blk_mq_debugfs_unregister_hctxs(q);
@@ -5111,7 +5119,7 @@ switch_back:
/* switch_back expects queue to be frozen */
if (!queues_frozen)
blk_mq_freeze_queue_nomemsave(q);
- blk_mq_elv_switch_back(q, &elv_tbl, &et_tbl);
+ blk_mq_elv_switch_back(q, &elv_tbl);
}
list_for_each_entry(q, &set->tag_list, tag_set_list) {
@@ -5122,9 +5130,9 @@ switch_back:
blk_mq_add_hw_queues_cpuhp(q);
}
+out_free_ctx:
+ blk_mq_free_sched_ctx_batch(&elv_tbl);
xa_destroy(&elv_tbl);
- xa_destroy(&et_tbl);
-out_memalloc_restore:
memalloc_noio_restore(memflags);
/* Free the excess tags when nr_hw_queues shrink. */
diff --git a/block/blk.h b/block/blk.h
index 4d809588b771..e4c433f62dfc 100644
--- a/block/blk.h
+++ b/block/blk.h
@@ -11,8 +11,7 @@
#include <xen/xen.h>
#include "blk-crypto-internal.h"
-struct elevator_type;
-struct elevator_tags;
+struct elv_change_ctx;
/*
* Default upper limit for the software max_sectors limit used for regular I/Os.
@@ -333,8 +332,8 @@ bool blk_bio_list_merge(struct request_queue *q, struct list_head *list,
bool blk_insert_flush(struct request *rq);
-void elv_update_nr_hw_queues(struct request_queue *q, struct elevator_type *e,
- struct elevator_tags *t);
+void elv_update_nr_hw_queues(struct request_queue *q,
+ struct elv_change_ctx *ctx);
void elevator_set_default(struct request_queue *q);
void elevator_set_none(struct request_queue *q);
diff --git a/block/elevator.c b/block/elevator.c
index e2ebfbf107b3..cd7bdff205c8 100644
--- a/block/elevator.c
+++ b/block/elevator.c
@@ -45,19 +45,6 @@
#include "blk-wbt.h"
#include "blk-cgroup.h"
-/* Holding context data for changing elevator */
-struct elv_change_ctx {
- const char *name;
- bool no_uevent;
-
- /* for unregistering old elevator */
- struct elevator_queue *old;
- /* for registering new elevator */
- struct elevator_queue *new;
- /* holds sched tags data */
- struct elevator_tags *et;
-};
-
static DEFINE_SPINLOCK(elv_list_lock);
static LIST_HEAD(elv_list);
@@ -706,32 +693,28 @@ static int elevator_change(struct request_queue *q, struct elv_change_ctx *ctx)
* The I/O scheduler depends on the number of hardware queues, this forces a
* reattachment when nr_hw_queues changes.
*/
-void elv_update_nr_hw_queues(struct request_queue *q, struct elevator_type *e,
- struct elevator_tags *t)
+void elv_update_nr_hw_queues(struct request_queue *q,
+ struct elv_change_ctx *ctx)
{
struct blk_mq_tag_set *set = q->tag_set;
- struct elv_change_ctx ctx = {};
int ret = -ENODEV;
WARN_ON_ONCE(q->mq_freeze_depth == 0);
- if (e && !blk_queue_dying(q) && blk_queue_registered(q)) {
- ctx.name = e->elevator_name;
- ctx.et = t;
-
+ if (ctx->type && !blk_queue_dying(q) && blk_queue_registered(q)) {
mutex_lock(&q->elevator_lock);
/* force to reattach elevator after nr_hw_queue is updated */
- ret = elevator_switch(q, &ctx);
+ ret = elevator_switch(q, ctx);
mutex_unlock(&q->elevator_lock);
}
blk_mq_unfreeze_queue_nomemrestore(q);
if (!ret)
- WARN_ON_ONCE(elevator_change_done(q, &ctx));
+ WARN_ON_ONCE(elevator_change_done(q, ctx));
/*
* Free sched tags if it's allocated but we couldn't switch elevator.
*/
- if (t && !ctx.new)
- blk_mq_free_sched_tags(t, set);
+ if (ctx->et && !ctx->new)
+ blk_mq_free_sched_tags(ctx->et, set);
}
/*
diff --git a/block/elevator.h b/block/elevator.h
index c4d20155065e..bad43182361e 100644
--- a/block/elevator.h
+++ b/block/elevator.h
@@ -32,6 +32,21 @@ struct elevator_tags {
struct blk_mq_tags *tags[];
};
+/* Holding context data for changing elevator */
+struct elv_change_ctx {
+ const char *name;
+ bool no_uevent;
+
+ /* for unregistering old elevator */
+ struct elevator_queue *old;
+ /* for registering new elevator */
+ struct elevator_queue *new;
+ /* store elevator type */
+ struct elevator_type *type;
+ /* holds sched tags data */
+ struct elevator_tags *et;
+};
+
struct elevator_mq_ops {
int (*init_sched)(struct request_queue *, struct elevator_queue *);
void (*exit_sched)(struct elevator_queue *);