summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--include/linux/sched/ext.h2
-rw-r--r--kernel/sched/ext.c18
-rw-r--r--kernel/watchdog.c9
3 files changed, 29 insertions, 0 deletions
diff --git a/include/linux/sched/ext.h b/include/linux/sched/ext.h
index 3d3216ff9188..d6c152475f5b 100644
--- a/include/linux/sched/ext.h
+++ b/include/linux/sched/ext.h
@@ -223,6 +223,7 @@ struct sched_ext_entity {
void sched_ext_dead(struct task_struct *p);
void print_scx_info(const char *log_lvl, struct task_struct *p);
void scx_softlockup(u32 dur_s);
+bool scx_hardlockup(void);
bool scx_rcu_cpu_stall(void);
#else /* !CONFIG_SCHED_CLASS_EXT */
@@ -230,6 +231,7 @@ bool scx_rcu_cpu_stall(void);
static inline void sched_ext_dead(struct task_struct *p) {}
static inline void print_scx_info(const char *log_lvl, struct task_struct *p) {}
static inline void scx_softlockup(u32 dur_s) {}
+static inline bool scx_hardlockup(void) { return false; }
static inline bool scx_rcu_cpu_stall(void) { return false; }
#endif /* CONFIG_SCHED_CLASS_EXT */
diff --git a/kernel/sched/ext.c b/kernel/sched/ext.c
index 85bb052459ec..b5c87a03f112 100644
--- a/kernel/sched/ext.c
+++ b/kernel/sched/ext.c
@@ -3712,6 +3712,24 @@ void scx_softlockup(u32 dur_s)
}
/**
+ * scx_hardlockup - sched_ext hardlockup handler
+ *
+ * A poorly behaving BPF scheduler can trigger hard lockup by e.g. putting
+ * numerous affinitized tasks in a single queue and directing all CPUs at it.
+ * Try kicking out the current scheduler in an attempt to recover the system to
+ * a good state before taking more drastic actions.
+ */
+bool scx_hardlockup(void)
+{
+ if (!handle_lockup("hard lockup - CPU %d", smp_processor_id()))
+ return false;
+
+ printk_deferred(KERN_ERR "sched_ext: Hard lockup - CPU %d, disabling BPF scheduler\n",
+ smp_processor_id());
+ return true;
+}
+
+/**
* scx_bypass - [Un]bypass scx_ops and guarantee forward progress
* @bypass: true for bypass, false for unbypass
*
diff --git a/kernel/watchdog.c b/kernel/watchdog.c
index 5b62d1002783..8dfac4a8f587 100644
--- a/kernel/watchdog.c
+++ b/kernel/watchdog.c
@@ -196,6 +196,15 @@ void watchdog_hardlockup_check(unsigned int cpu, struct pt_regs *regs)
#ifdef CONFIG_SYSFS
++hardlockup_count;
#endif
+ /*
+ * A poorly behaving BPF scheduler can trigger hard lockup by
+ * e.g. putting numerous affinitized tasks in a single queue and
+ * directing all CPUs at it. The following call can return true
+ * only once when sched_ext is enabled and will immediately
+ * abort the BPF scheduler and print out a warning message.
+ */
+ if (scx_hardlockup())
+ return;
/* Only print hardlockups once. */
if (per_cpu(watchdog_hardlockup_warned, cpu))