summaryrefslogtreecommitdiff
path: root/kernel
diff options
context:
space:
mode:
authorZqiang <qiang.zhang@linux.dev>2025-12-01 19:25:40 +0800
committerTejun Heo <tj@kernel.org>2025-12-01 10:58:49 -1000
commit1dd6c84f1c544e552848a8968599220bd464e338 (patch)
tree6834a6a5cb34aee2a364b28b0213bb8d532d5d9e /kernel
parent06a7415cf24774baf1945fc28ea152e888bd72bb (diff)
sched_ext: Fix incorrect sched_class settings for per-cpu migration tasks
When loading the ebpf scheduler, the tasks in the scx_tasks list will be traversed and invoke __setscheduler_class() to get new sched_class. however, this would also incorrectly set the per-cpu migration task's->sched_class to rt_sched_class, even after unload, the per-cpu migration task's->sched_class remains sched_rt_class. The log for this issue is as follows: ./scx_rustland --stats 1 [ 199.245639][ T630] sched_ext: "rustland" does not implement cgroup cpu.weight [ 199.269213][ T630] sched_ext: BPF scheduler "rustland" enabled 04:25:09 [INFO] RustLand scheduler attached bpftrace -e 'iter:task /strcontains(ctx->task->comm, "migration")/ { printf("%s:%d->%pS\n", ctx->task->comm, ctx->task->pid, ctx->task->sched_class); }' Attaching 1 probe... migration/0:24->rt_sched_class+0x0/0xe0 migration/1:27->rt_sched_class+0x0/0xe0 migration/2:33->rt_sched_class+0x0/0xe0 migration/3:39->rt_sched_class+0x0/0xe0 migration/4:45->rt_sched_class+0x0/0xe0 migration/5:52->rt_sched_class+0x0/0xe0 migration/6:58->rt_sched_class+0x0/0xe0 migration/7:64->rt_sched_class+0x0/0xe0 sched_ext: BPF scheduler "rustland" disabled (unregistered from user space) EXIT: unregistered from user space 04:25:21 [INFO] Unregister RustLand scheduler bpftrace -e 'iter:task /strcontains(ctx->task->comm, "migration")/ { printf("%s:%d->%pS\n", ctx->task->comm, ctx->task->pid, ctx->task->sched_class); }' Attaching 1 probe... migration/0:24->rt_sched_class+0x0/0xe0 migration/1:27->rt_sched_class+0x0/0xe0 migration/2:33->rt_sched_class+0x0/0xe0 migration/3:39->rt_sched_class+0x0/0xe0 migration/4:45->rt_sched_class+0x0/0xe0 migration/5:52->rt_sched_class+0x0/0xe0 migration/6:58->rt_sched_class+0x0/0xe0 migration/7:64->rt_sched_class+0x0/0xe0 This commit therefore generate a new scx_setscheduler_class() and add check for stop_sched_class to replace __setscheduler_class(). Fixes: f0e1a0643a59 ("sched_ext: Implement BPF extensible scheduler class") Cc: stable@vger.kernel.org # v6.12+ Signed-off-by: Zqiang <qiang.zhang@linux.dev> Reviewed-by: Andrea Righi <arighi@nvidia.com> Signed-off-by: Tejun Heo <tj@kernel.org>
Diffstat (limited to 'kernel')
-rw-r--r--kernel/sched/ext.c14
1 files changed, 10 insertions, 4 deletions
diff --git a/kernel/sched/ext.c b/kernel/sched/ext.c
index dca9ca0c1854..b563b8c3fd24 100644
--- a/kernel/sched/ext.c
+++ b/kernel/sched/ext.c
@@ -248,6 +248,14 @@ static struct scx_dispatch_q *find_user_dsq(struct scx_sched *sch, u64 dsq_id)
return rhashtable_lookup(&sch->dsq_hash, &dsq_id, dsq_hash_params);
}
+static const struct sched_class *scx_setscheduler_class(struct task_struct *p)
+{
+ if (p->sched_class == &stop_sched_class)
+ return &stop_sched_class;
+
+ return __setscheduler_class(p->policy, p->prio);
+}
+
/*
* scx_kf_mask enforcement. Some kfuncs can only be called from specific SCX
* ops. When invoking SCX ops, SCX_CALL_OP[_RET]() should be used to indicate
@@ -4241,8 +4249,7 @@ static void scx_disable_workfn(struct kthread_work *work)
while ((p = scx_task_iter_next_locked(&sti))) {
unsigned int queue_flags = DEQUEUE_SAVE | DEQUEUE_MOVE | DEQUEUE_NOCLOCK;
const struct sched_class *old_class = p->sched_class;
- const struct sched_class *new_class =
- __setscheduler_class(p->policy, p->prio);
+ const struct sched_class *new_class = scx_setscheduler_class(p);
update_rq_clock(task_rq(p));
@@ -5042,8 +5049,7 @@ static int scx_enable(struct sched_ext_ops *ops, struct bpf_link *link)
while ((p = scx_task_iter_next_locked(&sti))) {
unsigned int queue_flags = DEQUEUE_SAVE | DEQUEUE_MOVE;
const struct sched_class *old_class = p->sched_class;
- const struct sched_class *new_class =
- __setscheduler_class(p->policy, p->prio);
+ const struct sched_class *new_class = scx_setscheduler_class(p);
if (scx_get_task_state(p) != SCX_TASK_READY)
continue;