diff options
| -rw-r--r-- | include/linux/sched/ext.h | 2 | ||||
| -rw-r--r-- | kernel/sched/ext.c | 18 | ||||
| -rw-r--r-- | kernel/watchdog.c | 9 |
3 files changed, 29 insertions, 0 deletions
diff --git a/include/linux/sched/ext.h b/include/linux/sched/ext.h index 3d3216ff9188..d6c152475f5b 100644 --- a/include/linux/sched/ext.h +++ b/include/linux/sched/ext.h @@ -223,6 +223,7 @@ struct sched_ext_entity { void sched_ext_dead(struct task_struct *p); void print_scx_info(const char *log_lvl, struct task_struct *p); void scx_softlockup(u32 dur_s); +bool scx_hardlockup(void); bool scx_rcu_cpu_stall(void); #else /* !CONFIG_SCHED_CLASS_EXT */ @@ -230,6 +231,7 @@ bool scx_rcu_cpu_stall(void); static inline void sched_ext_dead(struct task_struct *p) {} static inline void print_scx_info(const char *log_lvl, struct task_struct *p) {} static inline void scx_softlockup(u32 dur_s) {} +static inline bool scx_hardlockup(void) { return false; } static inline bool scx_rcu_cpu_stall(void) { return false; } #endif /* CONFIG_SCHED_CLASS_EXT */ diff --git a/kernel/sched/ext.c b/kernel/sched/ext.c index 85bb052459ec..b5c87a03f112 100644 --- a/kernel/sched/ext.c +++ b/kernel/sched/ext.c @@ -3712,6 +3712,24 @@ void scx_softlockup(u32 dur_s) } /** + * scx_hardlockup - sched_ext hardlockup handler + * + * A poorly behaving BPF scheduler can trigger hard lockup by e.g. putting + * numerous affinitized tasks in a single queue and directing all CPUs at it. + * Try kicking out the current scheduler in an attempt to recover the system to + * a good state before taking more drastic actions. + */ +bool scx_hardlockup(void) +{ + if (!handle_lockup("hard lockup - CPU %d", smp_processor_id())) + return false; + + printk_deferred(KERN_ERR "sched_ext: Hard lockup - CPU %d, disabling BPF scheduler\n", + smp_processor_id()); + return true; +} + +/** * scx_bypass - [Un]bypass scx_ops and guarantee forward progress * @bypass: true for bypass, false for unbypass * diff --git a/kernel/watchdog.c b/kernel/watchdog.c index 5b62d1002783..8dfac4a8f587 100644 --- a/kernel/watchdog.c +++ b/kernel/watchdog.c @@ -196,6 +196,15 @@ void watchdog_hardlockup_check(unsigned int cpu, struct pt_regs *regs) #ifdef CONFIG_SYSFS ++hardlockup_count; #endif + /* + * A poorly behaving BPF scheduler can trigger hard lockup by + * e.g. putting numerous affinitized tasks in a single queue and + * directing all CPUs at it. The following call can return true + * only once when sched_ext is enabled and will immediately + * abort the BPF scheduler and print out a warning message. + */ + if (scx_hardlockup()) + return; /* Only print hardlockups once. */ if (per_cpu(watchdog_hardlockup_warned, cpu)) |