summaryrefslogtreecommitdiff
path: root/arch/um/kernel
diff options
context:
space:
mode:
authorBenjamin Berg <benjamin@sipsolutions.net>2025-06-02 15:00:49 +0200
committerJohannes Berg <johannes.berg@intel.com>2025-06-02 15:17:19 +0200
commit8420e08fe3a594b6ffa07705ac270faa2ed452c5 (patch)
tree22b578adabd9ef875fdaf560bd8d6a37eb8309b6 /arch/um/kernel
parentb1e1bd2e69430445021394536740352be1b41cd0 (diff)
um: Track userspace children dying in SECCOMP mode
When in seccomp mode, we would hang forever on the futex if a child has died unexpectedly. In contrast, ptrace mode will notice it and kill the corresponding thread when it fails to run it. Fix this issue using a new IRQ that is fired after a SIGCHLD and keeping an (internal) list of all MMs. In the IRQ handler, find the affected MM and set its PID to -1 as well as the futex variable to FUTEX_IN_KERN. This, together with futex returning -EINTR after the signal is sufficient to implement a race-free detection of a child dying. Note that this also enables IRQ handling while starting a userspace process. This should be safe and SECCOMP requires the IRQ in case the process does not come up properly. Signed-off-by: Benjamin Berg <benjamin@sipsolutions.net> Signed-off-by: Benjamin Berg <benjamin.berg@intel.com> Link: https://patch.msgid.link/20250602130052.545733-5-benjamin@sipsolutions.net Signed-off-by: Johannes Berg <johannes.berg@intel.com>
Diffstat (limited to 'arch/um/kernel')
-rw-r--r--arch/um/kernel/irq.c6
-rw-r--r--arch/um/kernel/skas/mmu.c82
2 files changed, 83 insertions, 5 deletions
diff --git a/arch/um/kernel/irq.c b/arch/um/kernel/irq.c
index abe8f30a521c..f1787be3983c 100644
--- a/arch/um/kernel/irq.c
+++ b/arch/um/kernel/irq.c
@@ -690,3 +690,9 @@ void __init init_IRQ(void)
/* Initialize EPOLL Loop */
os_setup_epoll();
}
+
+extern void sigchld_handler(int sig, struct siginfo *unused_si,
+ struct uml_pt_regs *regs, void *mc)
+{
+ do_IRQ(SIGCHLD_IRQ, regs);
+}
diff --git a/arch/um/kernel/skas/mmu.c b/arch/um/kernel/skas/mmu.c
index 0eb5a1d3ba70..1e146a0f9549 100644
--- a/arch/um/kernel/skas/mmu.c
+++ b/arch/um/kernel/skas/mmu.c
@@ -8,6 +8,7 @@
#include <linux/sched/signal.h>
#include <linux/slab.h>
+#include <shared/irq_kern.h>
#include <asm/pgalloc.h>
#include <asm/sections.h>
#include <asm/mmu_context.h>
@@ -19,6 +20,9 @@
/* Ensure the stub_data struct covers the allocated area */
static_assert(sizeof(struct stub_data) == STUB_DATA_PAGES * UM_KERN_PAGE_SIZE);
+spinlock_t mm_list_lock;
+struct list_head mm_list;
+
int init_new_context(struct task_struct *task, struct mm_struct *mm)
{
struct mm_id *new_id = &mm->context.id;
@@ -31,10 +35,12 @@ int init_new_context(struct task_struct *task, struct mm_struct *mm)
new_id->stack = stack;
- block_signals_trace();
- new_id->pid = start_userspace(stack);
- unblock_signals_trace();
+ scoped_guard(spinlock_irqsave, &mm_list_lock) {
+ /* Insert into list, used for lookups when the child dies */
+ list_add(&mm->context.list, &mm_list);
+ }
+ new_id->pid = start_userspace(stack);
if (new_id->pid < 0) {
ret = new_id->pid;
goto out_free;
@@ -60,13 +66,79 @@ void destroy_context(struct mm_struct *mm)
* zero, resulting in a kill(0), which will result in the
* whole UML suddenly dying. Also, cover negative and
* 1 cases, since they shouldn't happen either.
+ *
+ * Negative cases happen if the child died unexpectedly.
*/
- if (mmu->id.pid < 2) {
+ if (mmu->id.pid >= 0 && mmu->id.pid < 2) {
printk(KERN_ERR "corrupt mm_context - pid = %d\n",
mmu->id.pid);
return;
}
- os_kill_ptraced_process(mmu->id.pid, 1);
+
+ if (mmu->id.pid > 0) {
+ os_kill_ptraced_process(mmu->id.pid, 1);
+ mmu->id.pid = -1;
+ }
free_pages(mmu->id.stack, ilog2(STUB_DATA_PAGES));
+
+ guard(spinlock_irqsave)(&mm_list_lock);
+
+ list_del(&mm->context.list);
+}
+
+static irqreturn_t mm_sigchld_irq(int irq, void* dev)
+{
+ struct mm_context *mm_context;
+ pid_t pid;
+
+ guard(spinlock)(&mm_list_lock);
+
+ while ((pid = os_reap_child()) > 0) {
+ /*
+ * A child died, check if we have an MM with the PID. This is
+ * only relevant in SECCOMP mode (as ptrace will fail anyway).
+ *
+ * See wait_stub_done_seccomp for more details.
+ */
+ list_for_each_entry(mm_context, &mm_list, list) {
+ if (mm_context->id.pid == pid) {
+ struct stub_data *stub_data;
+ printk("Unexpectedly lost MM child! Affected tasks will segfault.");
+
+ /* Marks the MM as dead */
+ mm_context->id.pid = -1;
+
+ /*
+ * NOTE: If SMP is implemented, a futex_wake
+ * needs to be added here.
+ */
+ stub_data = (void *)mm_context->id.stack;
+ stub_data->futex = FUTEX_IN_KERN;
+
+ /*
+ * NOTE: Currently executing syscalls by
+ * affected tasks may finish normally.
+ */
+ break;
+ }
+ }
+ }
+
+ return IRQ_HANDLED;
+}
+
+static int __init init_child_tracking(void)
+{
+ int err;
+
+ spin_lock_init(&mm_list_lock);
+ INIT_LIST_HEAD(&mm_list);
+
+ err = request_irq(SIGCHLD_IRQ, mm_sigchld_irq, 0, "SIGCHLD", NULL);
+ if (err < 0)
+ panic("Failed to register SIGCHLD IRQ: %d", err);
+
+ return 0;
}
+early_initcall(init_child_tracking)