summaryrefslogtreecommitdiff
path: root/kernel/sched/sched.h
diff options
context:
space:
mode:
authorPeter Zijlstra <peterz@infradead.org>2025-09-16 23:02:41 +0200
committerPeter Zijlstra <peterz@infradead.org>2025-09-25 09:51:50 +0200
commit4ae8d9aa9f9dc7137ea5e564d79c5aa5af1bc45c (patch)
treeee3a49e04d70d79beb5ac3bb93e39adde655af30 /kernel/sched/sched.h
parentf83ec76bf285bea5727f478a68b894f5543ca76e (diff)
sched/deadline: Fix dl_server getting stuck
John found it was easy to hit lockup warnings when running locktorture on a 2 CPU VM, which he bisected down to: commit cccb45d7c429 ("sched/deadline: Less agressive dl_server handling"). While debugging it seems there is a chance where we end up with the dl_server dequeued, with dl_se->dl_server_active. This causes dl_server_start() to return without enqueueing the dl_server, thus it fails to run when RT tasks starve the cpu. When this happens, dl_server_timer() catches the '!dl_se->server_has_tasks(dl_se)' case, which then calls replenish_dl_entity() and dl_server_stopped() and finally return HRTIMER_NO_RESTART. This ends in no new timer and also no enqueue, leaving the dl_server 'dead', allowing starvation. What should have happened is for the bandwidth timer to start the zero-laxity timer, which in turn would enqueue the dl_server and cause dl_se->server_pick_task() to be called -- which will stop the dl_server if no fair tasks are observed for a whole period. IOW, it is totally irrelevant if there are fair tasks at the moment of bandwidth refresh. This removes all dl_se->server_has_tasks() users, so remove the whole thing. Fixes: cccb45d7c4295 ("sched/deadline: Less agressive dl_server handling") Reported-by: John Stultz <jstultz@google.com> Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org> Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org> Tested-by: John Stultz <jstultz@google.com>
Diffstat (limited to 'kernel/sched/sched.h')
-rw-r--r--kernel/sched/sched.h4
1 files changed, 0 insertions, 4 deletions
diff --git a/kernel/sched/sched.h b/kernel/sched/sched.h
index be9745d104f7..f10d6277dca1 100644
--- a/kernel/sched/sched.h
+++ b/kernel/sched/sched.h
@@ -365,9 +365,6 @@ extern s64 dl_scaled_delta_exec(struct rq *rq, struct sched_dl_entity *dl_se, s6
*
* dl_se::rq -- runqueue we belong to.
*
- * dl_se::server_has_tasks() -- used on bandwidth enforcement; we 'stop' the
- * server when it runs out of tasks to run.
- *
* dl_se::server_pick() -- nested pick_next_task(); we yield the period if this
* returns NULL.
*
@@ -383,7 +380,6 @@ extern void dl_server_update(struct sched_dl_entity *dl_se, s64 delta_exec);
extern void dl_server_start(struct sched_dl_entity *dl_se);
extern void dl_server_stop(struct sched_dl_entity *dl_se);
extern void dl_server_init(struct sched_dl_entity *dl_se, struct rq *rq,
- dl_server_has_tasks_f has_tasks,
dl_server_pick_f pick_task);
extern void sched_init_dl_servers(void);