summaryrefslogtreecommitdiff
path: root/kernel/locking/lockdep.c
diff options
context:
space:
mode:
authorDavid Woodhouse <dwmw@amazon.co.uk>2024-09-26 16:17:37 +0100
committerBoqun Feng <boqun.feng@gmail.com>2024-10-17 20:07:22 -0700
commit0784181b44af831a3fa52e1e5ff77c388d699dba (patch)
treed677e86aacf6b04fc9082e6d33f6686d89a81c94 /kernel/locking/lockdep.c
parent87347f148061b48c3495fb61dcbad384760da9cf (diff)
lockdep: Add lockdep_cleanup_dead_cpu()
Add a function to check that an offline CPU has left the tracing infrastructure in a sane state. Commit 9bb69ba4c177 ("ACPI: processor_idle: use raw_safe_halt() in acpi_idle_play_dead()") fixed an issue where the acpi_idle_play_dead() function called safe_halt() instead of raw_safe_halt(), which had the side-effect of setting the hardirqs_enabled flag for the offline CPU. On x86 this triggered warnings from lockdep_assert_irqs_disabled() when the CPU was brought back online again later. These warnings were too early for the exception to be handled correctly, leading to a triple-fault. Add lockdep_cleanup_dead_cpu() to check for this kind of failure mode, print the events leading up to it, and correct it so that the CPU can come online again correctly. Re-introducing the original bug now merely results in this warning instead: [ 61.556652] smpboot: CPU 1 is now offline [ 61.556769] CPU 1 left hardirqs enabled! [ 61.556915] irq event stamp: 128149 [ 61.556965] hardirqs last enabled at (128149): [<ffffffff81720a36>] acpi_idle_play_dead+0x46/0x70 [ 61.557055] hardirqs last disabled at (128148): [<ffffffff81124d50>] do_idle+0x90/0xe0 [ 61.557117] softirqs last enabled at (128078): [<ffffffff81cec74c>] __do_softirq+0x31c/0x423 [ 61.557199] softirqs last disabled at (128065): [<ffffffff810baae1>] __irq_exit_rcu+0x91/0x100 [boqun: Capitalize the title and reword the message a bit] Signed-off-by: David Woodhouse <dwmw@amazon.co.uk> Reviewed-by: Thomas Gleixner <tglx@linutronix.de> Signed-off-by: Boqun Feng <boqun.feng@gmail.com> Link: https://lore.kernel.org/r/f7bd2b3b999051bb3ef4be34526a9262008285f5.camel@infradead.org
Diffstat (limited to 'kernel/locking/lockdep.c')
-rw-r--r--kernel/locking/lockdep.c24
1 files changed, 24 insertions, 0 deletions
diff --git a/kernel/locking/lockdep.c b/kernel/locking/lockdep.c
index 536bd471557f..6fd4af217e71 100644
--- a/kernel/locking/lockdep.c
+++ b/kernel/locking/lockdep.c
@@ -4586,6 +4586,30 @@ void lockdep_softirqs_off(unsigned long ip)
debug_atomic_inc(redundant_softirqs_off);
}
+/**
+ * lockdep_cleanup_dead_cpu - Ensure CPU lockdep state is cleanly stopped
+ *
+ * @cpu: index of offlined CPU
+ * @idle: task pointer for offlined CPU's idle thread
+ *
+ * Invoked after the CPU is dead. Ensures that the tracing infrastructure
+ * is left in a suitable state for the CPU to be subsequently brought
+ * online again.
+ */
+void lockdep_cleanup_dead_cpu(unsigned int cpu, struct task_struct *idle)
+{
+ if (unlikely(!debug_locks))
+ return;
+
+ if (unlikely(per_cpu(hardirqs_enabled, cpu))) {
+ pr_warn("CPU %u left hardirqs enabled!", cpu);
+ if (idle)
+ print_irqtrace_events(idle);
+ /* Clean it up for when the CPU comes online again. */
+ per_cpu(hardirqs_enabled, cpu) = 0;
+ }
+}
+
static int
mark_usage(struct task_struct *curr, struct held_lock *hlock, int check)
{