summaryrefslogtreecommitdiff
path: root/fs
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2025-12-01 10:17:39 -0800
committerLinus Torvalds <torvalds@linux-foundation.org>2025-12-01 10:17:39 -0800
commit212c4053a1502e5117d8cbbbd1c15579ce1839bb (patch)
tree3cde8f505314577d9261ef45585e949298f46b0e /fs
parent415d34b92c1f921a9ff3c38f56319cbc5536f642 (diff)
parent390d967653e17205f0e519f691b7d6be0a05ff45 (diff)
Merge tag 'vfs-6.19-rc1.coredump' of git://git.kernel.org/pub/scm/linux/kernel/git/vfs/vfs
Pull pidfd and coredump updates from Christian Brauner: "Features: - Expose coredump signal via pidfd Expose the signal that caused the coredump through the pidfd interface. The recent changes to rework coredump handling to rely on unix sockets are in the process of being used in systemd. The previous systemd coredump container interface requires the coredump file descriptor and basic information including the signal number to be sent to the container. This means the signal number needs to be available before sending the coredump to the container. - Add supported_mask field to pidfd Add a new supported_mask field to struct pidfd_info that indicates which information fields are supported by the running kernel. This allows userspace to detect feature availability without relying on error codes or kernel version checks. Cleanups: - Drop struct pidfs_exit_info and prepare to drop exit_info pointer, simplifying the internal publication mechanism for exit and coredump information retrievable via the pidfd ioctl - Use guard() for task_lock in pidfs - Reduce wait_pidfd lock scope - Add missing PIDFD_INFO_SIZE_VER1 constant - Add missing BUILD_BUG_ON() assert on struct pidfd_info Fixes: - Fix PIDFD_INFO_COREDUMP handling Selftests: - Split out coredump socket tests and common helpers into separate files for better organization - Fix userspace coredump client detection issues - Handle edge-triggered epoll correctly - Ignore ENOSPC errors in tests - Add debug logging to coredump socket tests, socket protocol tests, and test helpers - Add tests for PIDFD_INFO_COREDUMP_SIGNAL - Add tests for supported_mask field - Update pidfd header for selftests" * tag 'vfs-6.19-rc1.coredump' of git://git.kernel.org/pub/scm/linux/kernel/git/vfs/vfs: (23 commits) pidfs: reduce wait_pidfd lock scope selftests/coredump: add second PIDFD_INFO_COREDUMP_SIGNAL test selftests/coredump: add first PIDFD_INFO_COREDUMP_SIGNAL test selftests/coredump: ignore ENOSPC errors selftests/coredump: add debug logging to coredump socket protocol tests selftests/coredump: add debug logging to coredump socket tests selftests/coredump: add debug logging to test helpers selftests/coredump: handle edge-triggered epoll correctly selftests/coredump: fix userspace coredump client detection selftests/coredump: fix userspace client detection selftests/coredump: split out coredump socket tests selftests/coredump: split out common helpers selftests/pidfd: add second supported_mask test selftests/pidfd: add first supported_mask test selftests/pidfd: update pidfd header pidfs: expose coredump signal pidfs: drop struct pidfs_exit_info pidfs: prepare to drop exit_info pointer pidfd: add a new supported_mask field pidfs: add missing BUILD_BUG_ON() assert on struct pidfd_info ...
Diffstat (limited to 'fs')
-rw-r--r--fs/pidfs.c113
1 files changed, 67 insertions, 46 deletions
diff --git a/fs/pidfs.c b/fs/pidfs.c
index 78dee3c201af..dba703d4ce4a 100644
--- a/fs/pidfs.c
+++ b/fs/pidfs.c
@@ -39,20 +39,20 @@ void pidfs_get_root(struct path *path)
path_get(path);
}
-/*
- * Stashes information that userspace needs to access even after the
- * process has been reaped.
- */
-struct pidfs_exit_info {
- __u64 cgroupid;
- __s32 exit_code;
- __u32 coredump_mask;
+enum pidfs_attr_mask_bits {
+ PIDFS_ATTR_BIT_EXIT = 0,
+ PIDFS_ATTR_BIT_COREDUMP = 1,
};
struct pidfs_attr {
+ unsigned long attr_mask;
struct simple_xattrs *xattrs;
- struct pidfs_exit_info __pei;
- struct pidfs_exit_info *exit_info;
+ struct /* exit info */ {
+ __u64 cgroupid;
+ __s32 exit_code;
+ };
+ __u32 coredump_mask;
+ __u32 coredump_signal;
};
static struct rb_root pidfs_ino_tree = RB_ROOT;
@@ -293,6 +293,15 @@ static __u32 pidfs_coredump_mask(unsigned long mm_flags)
return 0;
}
+/* This must be updated whenever a new flag is added */
+#define PIDFD_INFO_SUPPORTED (PIDFD_INFO_PID | \
+ PIDFD_INFO_CREDS | \
+ PIDFD_INFO_CGROUPID | \
+ PIDFD_INFO_EXIT | \
+ PIDFD_INFO_COREDUMP | \
+ PIDFD_INFO_SUPPORTED_MASK | \
+ PIDFD_INFO_COREDUMP_SIGNAL)
+
static long pidfd_info(struct file *file, unsigned int cmd, unsigned long arg)
{
struct pidfd_info __user *uinfo = (struct pidfd_info __user *)arg;
@@ -300,12 +309,13 @@ static long pidfd_info(struct file *file, unsigned int cmd, unsigned long arg)
struct pid *pid = pidfd_pid(file);
size_t usize = _IOC_SIZE(cmd);
struct pidfd_info kinfo = {};
- struct pidfs_exit_info *exit_info;
struct user_namespace *user_ns;
struct pidfs_attr *attr;
const struct cred *c;
__u64 mask;
+ BUILD_BUG_ON(sizeof(struct pidfd_info) != PIDFD_INFO_SIZE_VER2);
+
if (!uinfo)
return -EINVAL;
if (usize < PIDFD_INFO_SIZE_VER0)
@@ -323,20 +333,24 @@ static long pidfd_info(struct file *file, unsigned int cmd, unsigned long arg)
attr = READ_ONCE(pid->attr);
if (mask & PIDFD_INFO_EXIT) {
- exit_info = READ_ONCE(attr->exit_info);
- if (exit_info) {
+ if (test_bit(PIDFS_ATTR_BIT_EXIT, &attr->attr_mask)) {
+ smp_rmb();
kinfo.mask |= PIDFD_INFO_EXIT;
#ifdef CONFIG_CGROUPS
- kinfo.cgroupid = exit_info->cgroupid;
+ kinfo.cgroupid = attr->cgroupid;
kinfo.mask |= PIDFD_INFO_CGROUPID;
#endif
- kinfo.exit_code = exit_info->exit_code;
+ kinfo.exit_code = attr->exit_code;
}
}
if (mask & PIDFD_INFO_COREDUMP) {
- kinfo.mask |= PIDFD_INFO_COREDUMP;
- kinfo.coredump_mask = READ_ONCE(attr->__pei.coredump_mask);
+ if (test_bit(PIDFS_ATTR_BIT_COREDUMP, &attr->attr_mask)) {
+ smp_rmb();
+ kinfo.mask |= PIDFD_INFO_COREDUMP | PIDFD_INFO_COREDUMP_SIGNAL;
+ kinfo.coredump_mask = attr->coredump_mask;
+ kinfo.coredump_signal = attr->coredump_signal;
+ }
}
task = get_pid_task(pid, PIDTYPE_PID);
@@ -355,14 +369,15 @@ static long pidfd_info(struct file *file, unsigned int cmd, unsigned long arg)
if (!c)
return -ESRCH;
- if ((kinfo.mask & PIDFD_INFO_COREDUMP) && !(kinfo.coredump_mask)) {
- task_lock(task);
+ if ((mask & PIDFD_INFO_COREDUMP) && !kinfo.coredump_mask) {
+ guard(task_lock)(task);
if (task->mm) {
unsigned long flags = __mm_flags_get_dumpable(task->mm);
kinfo.coredump_mask = pidfs_coredump_mask(flags);
+ kinfo.mask |= PIDFD_INFO_COREDUMP;
+ /* No coredump actually took place, so no coredump signal. */
}
- task_unlock(task);
}
/* Unconditionally return identifiers and credentials, the rest only on request */
@@ -409,6 +424,13 @@ static long pidfd_info(struct file *file, unsigned int cmd, unsigned long arg)
return -ESRCH;
copy_out:
+ if (mask & PIDFD_INFO_SUPPORTED_MASK) {
+ kinfo.mask |= PIDFD_INFO_SUPPORTED_MASK;
+ kinfo.supported_mask = PIDFD_INFO_SUPPORTED;
+ }
+
+ /* Are there bits in the return mask not present in PIDFD_INFO_SUPPORTED? */
+ WARN_ON_ONCE(~PIDFD_INFO_SUPPORTED & kinfo.mask);
/*
* If userspace and the kernel have the same struct size it can just
* be copied. If userspace provides an older struct, only the bits that
@@ -603,24 +625,25 @@ void pidfs_exit(struct task_struct *tsk)
{
struct pid *pid = task_pid(tsk);
struct pidfs_attr *attr;
- struct pidfs_exit_info *exit_info;
#ifdef CONFIG_CGROUPS
struct cgroup *cgrp;
#endif
might_sleep();
- guard(spinlock_irq)(&pid->wait_pidfd.lock);
- attr = pid->attr;
- if (!attr) {
- /*
- * No one ever held a pidfd for this struct pid.
- * Mark it as dead so no one can add a pidfs
- * entry anymore. We're about to be reaped and
- * so no exit information would be available.
- */
- pid->attr = PIDFS_PID_DEAD;
- return;
+ /* Synchronize with pidfs_register_pid(). */
+ scoped_guard(spinlock_irq, &pid->wait_pidfd.lock) {
+ attr = pid->attr;
+ if (!attr) {
+ /*
+ * No one ever held a pidfd for this struct pid.
+ * Mark it as dead so no one can add a pidfs
+ * entry anymore. We're about to be reaped and
+ * so no exit information would be available.
+ */
+ pid->attr = PIDFS_PID_DEAD;
+ return;
+ }
}
/*
@@ -631,41 +654,39 @@ void pidfs_exit(struct task_struct *tsk)
* is put
*/
- exit_info = &attr->__pei;
-
#ifdef CONFIG_CGROUPS
rcu_read_lock();
cgrp = task_dfl_cgroup(tsk);
- exit_info->cgroupid = cgroup_id(cgrp);
+ attr->cgroupid = cgroup_id(cgrp);
rcu_read_unlock();
#endif
- exit_info->exit_code = tsk->exit_code;
+ attr->exit_code = tsk->exit_code;
/* Ensure that PIDFD_GET_INFO sees either all or nothing. */
- smp_store_release(&attr->exit_info, &attr->__pei);
+ smp_wmb();
+ set_bit(PIDFS_ATTR_BIT_EXIT, &attr->attr_mask);
}
#ifdef CONFIG_COREDUMP
void pidfs_coredump(const struct coredump_params *cprm)
{
struct pid *pid = cprm->pid;
- struct pidfs_exit_info *exit_info;
struct pidfs_attr *attr;
- __u32 coredump_mask = 0;
attr = READ_ONCE(pid->attr);
VFS_WARN_ON_ONCE(!attr);
VFS_WARN_ON_ONCE(attr == PIDFS_PID_DEAD);
- exit_info = &attr->__pei;
- /* Note how we were coredumped. */
- coredump_mask = pidfs_coredump_mask(cprm->mm_flags);
- /* Note that we actually did coredump. */
- coredump_mask |= PIDFD_COREDUMPED;
+ /* Note how we were coredumped and that we coredumped. */
+ attr->coredump_mask = pidfs_coredump_mask(cprm->mm_flags) |
+ PIDFD_COREDUMPED;
/* If coredumping is set to skip we should never end up here. */
- VFS_WARN_ON_ONCE(coredump_mask & PIDFD_COREDUMP_SKIP);
- smp_store_release(&exit_info->coredump_mask, coredump_mask);
+ VFS_WARN_ON_ONCE(attr->coredump_mask & PIDFD_COREDUMP_SKIP);
+ /* Expose the signal number that caused the coredump. */
+ attr->coredump_signal = cprm->siginfo->si_signo;
+ smp_wmb();
+ set_bit(PIDFS_ATTR_BIT_COREDUMP, &attr->attr_mask);
}
#endif