diff options
| author | Linus Torvalds <torvalds@linux-foundation.org> | 2025-12-01 10:17:39 -0800 |
|---|---|---|
| committer | Linus Torvalds <torvalds@linux-foundation.org> | 2025-12-01 10:17:39 -0800 |
| commit | 212c4053a1502e5117d8cbbbd1c15579ce1839bb (patch) | |
| tree | 3cde8f505314577d9261ef45585e949298f46b0e /fs | |
| parent | 415d34b92c1f921a9ff3c38f56319cbc5536f642 (diff) | |
| parent | 390d967653e17205f0e519f691b7d6be0a05ff45 (diff) | |
Merge tag 'vfs-6.19-rc1.coredump' of git://git.kernel.org/pub/scm/linux/kernel/git/vfs/vfs
Pull pidfd and coredump updates from Christian Brauner:
"Features:
- Expose coredump signal via pidfd
Expose the signal that caused the coredump through the pidfd
interface. The recent changes to rework coredump handling to rely
on unix sockets are in the process of being used in systemd. The
previous systemd coredump container interface requires the coredump
file descriptor and basic information including the signal number
to be sent to the container. This means the signal number needs to
be available before sending the coredump to the container.
- Add supported_mask field to pidfd
Add a new supported_mask field to struct pidfd_info that indicates
which information fields are supported by the running kernel. This
allows userspace to detect feature availability without relying on
error codes or kernel version checks.
Cleanups:
- Drop struct pidfs_exit_info and prepare to drop exit_info pointer,
simplifying the internal publication mechanism for exit and
coredump information retrievable via the pidfd ioctl
- Use guard() for task_lock in pidfs
- Reduce wait_pidfd lock scope
- Add missing PIDFD_INFO_SIZE_VER1 constant
- Add missing BUILD_BUG_ON() assert on struct pidfd_info
Fixes:
- Fix PIDFD_INFO_COREDUMP handling
Selftests:
- Split out coredump socket tests and common helpers into separate
files for better organization
- Fix userspace coredump client detection issues
- Handle edge-triggered epoll correctly
- Ignore ENOSPC errors in tests
- Add debug logging to coredump socket tests, socket protocol tests,
and test helpers
- Add tests for PIDFD_INFO_COREDUMP_SIGNAL
- Add tests for supported_mask field
- Update pidfd header for selftests"
* tag 'vfs-6.19-rc1.coredump' of git://git.kernel.org/pub/scm/linux/kernel/git/vfs/vfs: (23 commits)
pidfs: reduce wait_pidfd lock scope
selftests/coredump: add second PIDFD_INFO_COREDUMP_SIGNAL test
selftests/coredump: add first PIDFD_INFO_COREDUMP_SIGNAL test
selftests/coredump: ignore ENOSPC errors
selftests/coredump: add debug logging to coredump socket protocol tests
selftests/coredump: add debug logging to coredump socket tests
selftests/coredump: add debug logging to test helpers
selftests/coredump: handle edge-triggered epoll correctly
selftests/coredump: fix userspace coredump client detection
selftests/coredump: fix userspace client detection
selftests/coredump: split out coredump socket tests
selftests/coredump: split out common helpers
selftests/pidfd: add second supported_mask test
selftests/pidfd: add first supported_mask test
selftests/pidfd: update pidfd header
pidfs: expose coredump signal
pidfs: drop struct pidfs_exit_info
pidfs: prepare to drop exit_info pointer
pidfd: add a new supported_mask field
pidfs: add missing BUILD_BUG_ON() assert on struct pidfd_info
...
Diffstat (limited to 'fs')
| -rw-r--r-- | fs/pidfs.c | 113 |
1 files changed, 67 insertions, 46 deletions
diff --git a/fs/pidfs.c b/fs/pidfs.c index 78dee3c201af..dba703d4ce4a 100644 --- a/fs/pidfs.c +++ b/fs/pidfs.c @@ -39,20 +39,20 @@ void pidfs_get_root(struct path *path) path_get(path); } -/* - * Stashes information that userspace needs to access even after the - * process has been reaped. - */ -struct pidfs_exit_info { - __u64 cgroupid; - __s32 exit_code; - __u32 coredump_mask; +enum pidfs_attr_mask_bits { + PIDFS_ATTR_BIT_EXIT = 0, + PIDFS_ATTR_BIT_COREDUMP = 1, }; struct pidfs_attr { + unsigned long attr_mask; struct simple_xattrs *xattrs; - struct pidfs_exit_info __pei; - struct pidfs_exit_info *exit_info; + struct /* exit info */ { + __u64 cgroupid; + __s32 exit_code; + }; + __u32 coredump_mask; + __u32 coredump_signal; }; static struct rb_root pidfs_ino_tree = RB_ROOT; @@ -293,6 +293,15 @@ static __u32 pidfs_coredump_mask(unsigned long mm_flags) return 0; } +/* This must be updated whenever a new flag is added */ +#define PIDFD_INFO_SUPPORTED (PIDFD_INFO_PID | \ + PIDFD_INFO_CREDS | \ + PIDFD_INFO_CGROUPID | \ + PIDFD_INFO_EXIT | \ + PIDFD_INFO_COREDUMP | \ + PIDFD_INFO_SUPPORTED_MASK | \ + PIDFD_INFO_COREDUMP_SIGNAL) + static long pidfd_info(struct file *file, unsigned int cmd, unsigned long arg) { struct pidfd_info __user *uinfo = (struct pidfd_info __user *)arg; @@ -300,12 +309,13 @@ static long pidfd_info(struct file *file, unsigned int cmd, unsigned long arg) struct pid *pid = pidfd_pid(file); size_t usize = _IOC_SIZE(cmd); struct pidfd_info kinfo = {}; - struct pidfs_exit_info *exit_info; struct user_namespace *user_ns; struct pidfs_attr *attr; const struct cred *c; __u64 mask; + BUILD_BUG_ON(sizeof(struct pidfd_info) != PIDFD_INFO_SIZE_VER2); + if (!uinfo) return -EINVAL; if (usize < PIDFD_INFO_SIZE_VER0) @@ -323,20 +333,24 @@ static long pidfd_info(struct file *file, unsigned int cmd, unsigned long arg) attr = READ_ONCE(pid->attr); if (mask & PIDFD_INFO_EXIT) { - exit_info = READ_ONCE(attr->exit_info); - if (exit_info) { + if (test_bit(PIDFS_ATTR_BIT_EXIT, &attr->attr_mask)) { + smp_rmb(); kinfo.mask |= PIDFD_INFO_EXIT; #ifdef CONFIG_CGROUPS - kinfo.cgroupid = exit_info->cgroupid; + kinfo.cgroupid = attr->cgroupid; kinfo.mask |= PIDFD_INFO_CGROUPID; #endif - kinfo.exit_code = exit_info->exit_code; + kinfo.exit_code = attr->exit_code; } } if (mask & PIDFD_INFO_COREDUMP) { - kinfo.mask |= PIDFD_INFO_COREDUMP; - kinfo.coredump_mask = READ_ONCE(attr->__pei.coredump_mask); + if (test_bit(PIDFS_ATTR_BIT_COREDUMP, &attr->attr_mask)) { + smp_rmb(); + kinfo.mask |= PIDFD_INFO_COREDUMP | PIDFD_INFO_COREDUMP_SIGNAL; + kinfo.coredump_mask = attr->coredump_mask; + kinfo.coredump_signal = attr->coredump_signal; + } } task = get_pid_task(pid, PIDTYPE_PID); @@ -355,14 +369,15 @@ static long pidfd_info(struct file *file, unsigned int cmd, unsigned long arg) if (!c) return -ESRCH; - if ((kinfo.mask & PIDFD_INFO_COREDUMP) && !(kinfo.coredump_mask)) { - task_lock(task); + if ((mask & PIDFD_INFO_COREDUMP) && !kinfo.coredump_mask) { + guard(task_lock)(task); if (task->mm) { unsigned long flags = __mm_flags_get_dumpable(task->mm); kinfo.coredump_mask = pidfs_coredump_mask(flags); + kinfo.mask |= PIDFD_INFO_COREDUMP; + /* No coredump actually took place, so no coredump signal. */ } - task_unlock(task); } /* Unconditionally return identifiers and credentials, the rest only on request */ @@ -409,6 +424,13 @@ static long pidfd_info(struct file *file, unsigned int cmd, unsigned long arg) return -ESRCH; copy_out: + if (mask & PIDFD_INFO_SUPPORTED_MASK) { + kinfo.mask |= PIDFD_INFO_SUPPORTED_MASK; + kinfo.supported_mask = PIDFD_INFO_SUPPORTED; + } + + /* Are there bits in the return mask not present in PIDFD_INFO_SUPPORTED? */ + WARN_ON_ONCE(~PIDFD_INFO_SUPPORTED & kinfo.mask); /* * If userspace and the kernel have the same struct size it can just * be copied. If userspace provides an older struct, only the bits that @@ -603,24 +625,25 @@ void pidfs_exit(struct task_struct *tsk) { struct pid *pid = task_pid(tsk); struct pidfs_attr *attr; - struct pidfs_exit_info *exit_info; #ifdef CONFIG_CGROUPS struct cgroup *cgrp; #endif might_sleep(); - guard(spinlock_irq)(&pid->wait_pidfd.lock); - attr = pid->attr; - if (!attr) { - /* - * No one ever held a pidfd for this struct pid. - * Mark it as dead so no one can add a pidfs - * entry anymore. We're about to be reaped and - * so no exit information would be available. - */ - pid->attr = PIDFS_PID_DEAD; - return; + /* Synchronize with pidfs_register_pid(). */ + scoped_guard(spinlock_irq, &pid->wait_pidfd.lock) { + attr = pid->attr; + if (!attr) { + /* + * No one ever held a pidfd for this struct pid. + * Mark it as dead so no one can add a pidfs + * entry anymore. We're about to be reaped and + * so no exit information would be available. + */ + pid->attr = PIDFS_PID_DEAD; + return; + } } /* @@ -631,41 +654,39 @@ void pidfs_exit(struct task_struct *tsk) * is put */ - exit_info = &attr->__pei; - #ifdef CONFIG_CGROUPS rcu_read_lock(); cgrp = task_dfl_cgroup(tsk); - exit_info->cgroupid = cgroup_id(cgrp); + attr->cgroupid = cgroup_id(cgrp); rcu_read_unlock(); #endif - exit_info->exit_code = tsk->exit_code; + attr->exit_code = tsk->exit_code; /* Ensure that PIDFD_GET_INFO sees either all or nothing. */ - smp_store_release(&attr->exit_info, &attr->__pei); + smp_wmb(); + set_bit(PIDFS_ATTR_BIT_EXIT, &attr->attr_mask); } #ifdef CONFIG_COREDUMP void pidfs_coredump(const struct coredump_params *cprm) { struct pid *pid = cprm->pid; - struct pidfs_exit_info *exit_info; struct pidfs_attr *attr; - __u32 coredump_mask = 0; attr = READ_ONCE(pid->attr); VFS_WARN_ON_ONCE(!attr); VFS_WARN_ON_ONCE(attr == PIDFS_PID_DEAD); - exit_info = &attr->__pei; - /* Note how we were coredumped. */ - coredump_mask = pidfs_coredump_mask(cprm->mm_flags); - /* Note that we actually did coredump. */ - coredump_mask |= PIDFD_COREDUMPED; + /* Note how we were coredumped and that we coredumped. */ + attr->coredump_mask = pidfs_coredump_mask(cprm->mm_flags) | + PIDFD_COREDUMPED; /* If coredumping is set to skip we should never end up here. */ - VFS_WARN_ON_ONCE(coredump_mask & PIDFD_COREDUMP_SKIP); - smp_store_release(&exit_info->coredump_mask, coredump_mask); + VFS_WARN_ON_ONCE(attr->coredump_mask & PIDFD_COREDUMP_SKIP); + /* Expose the signal number that caused the coredump. */ + attr->coredump_signal = cprm->siginfo->si_signo; + smp_wmb(); + set_bit(PIDFS_ATTR_BIT_COREDUMP, &attr->attr_mask); } #endif |