diff options
Diffstat (limited to 'include')
| -rw-r--r-- | include/linux/ns/ns_common_types.h | 196 | ||||
| -rw-r--r-- | include/linux/ns/nstree_types.h | 55 | ||||
| -rw-r--r-- | include/linux/ns_common.h | 233 | ||||
| -rw-r--r-- | include/linux/nsfs.h | 3 | ||||
| -rw-r--r-- | include/linux/nsproxy.h | 9 | ||||
| -rw-r--r-- | include/linux/nstree.h | 52 | ||||
| -rw-r--r-- | include/linux/pid_namespace.h | 3 | ||||
| -rw-r--r-- | include/linux/pseudo_fs.h | 1 | ||||
| -rw-r--r-- | include/linux/syscalls.h | 4 | ||||
| -rw-r--r-- | include/linux/user_namespace.h | 4 | ||||
| -rw-r--r-- | include/uapi/asm-generic/unistd.h | 4 | ||||
| -rw-r--r-- | include/uapi/linux/nsfs.h | 58 |
12 files changed, 479 insertions, 143 deletions
diff --git a/include/linux/ns/ns_common_types.h b/include/linux/ns/ns_common_types.h new file mode 100644 index 000000000000..b332b019b29c --- /dev/null +++ b/include/linux/ns/ns_common_types.h @@ -0,0 +1,196 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef _LINUX_NS_COMMON_TYPES_H +#define _LINUX_NS_COMMON_TYPES_H + +#include <linux/atomic.h> +#include <linux/ns/nstree_types.h> +#include <linux/rbtree.h> +#include <linux/refcount.h> +#include <linux/types.h> + +struct cgroup_namespace; +struct dentry; +struct ipc_namespace; +struct mnt_namespace; +struct net; +struct pid_namespace; +struct proc_ns_operations; +struct time_namespace; +struct user_namespace; +struct uts_namespace; + +extern struct cgroup_namespace init_cgroup_ns; +extern struct ipc_namespace init_ipc_ns; +extern struct mnt_namespace init_mnt_ns; +extern struct net init_net; +extern struct pid_namespace init_pid_ns; +extern struct time_namespace init_time_ns; +extern struct user_namespace init_user_ns; +extern struct uts_namespace init_uts_ns; + +extern const struct proc_ns_operations cgroupns_operations; +extern const struct proc_ns_operations ipcns_operations; +extern const struct proc_ns_operations mntns_operations; +extern const struct proc_ns_operations netns_operations; +extern const struct proc_ns_operations pidns_operations; +extern const struct proc_ns_operations pidns_for_children_operations; +extern const struct proc_ns_operations timens_operations; +extern const struct proc_ns_operations timens_for_children_operations; +extern const struct proc_ns_operations userns_operations; +extern const struct proc_ns_operations utsns_operations; + +/* + * Namespace lifetimes are managed via a two-tier reference counting model: + * + * (1) __ns_ref (refcount_t): Main reference count tracking memory + * lifetime. Controls when the namespace structure itself is freed. + * It also pins the namespace on the namespace trees whereas (2) + * only regulates their visibility to userspace. + * + * (2) __ns_ref_active (atomic_t): Reference count tracking active users. + * Controls visibility of the namespace in the namespace trees. + * Any live task that uses the namespace (via nsproxy or cred) holds + * an active reference. Any open file descriptor or bind-mount of + * the namespace holds an active reference. Once all tasks have + * called exited their namespaces and all file descriptors and + * bind-mounts have been released the active reference count drops + * to zero and the namespace becomes inactive. IOW, the namespace + * cannot be listed or opened via file handles anymore. + * + * Note that it is valid to transition from active to inactive and + * back from inactive to active e.g., when resurrecting an inactive + * namespace tree via the SIOCGSKNS ioctl(). + * + * Relationship and lifecycle states: + * + * - Active (__ns_ref_active > 0): + * Namespace is actively used and visible to userspace. The namespace + * can be reopened via /proc/<pid>/ns/<ns_type>, via namespace file + * handles, or discovered via listns(). + * + * - Inactive (__ns_ref_active == 0, __ns_ref > 0): + * No tasks are actively using the namespace and it isn't pinned by + * any bind-mounts or open file descriptors anymore. But the namespace + * is still kept alive by internal references. For example, the user + * namespace could be pinned by an open file through file->f_cred + * references when one of the now defunct tasks had opened a file and + * handed the file descriptor off to another process via a UNIX + * sockets. Such references keep the namespace structure alive through + * __ns_ref but will not hold an active reference. + * + * - Destroyed (__ns_ref == 0): + * No references remain. The namespace is removed from the tree and freed. + * + * State transitions: + * + * Active -> Inactive: + * When the last task using the namespace exits it drops its active + * references to all namespaces. However, user and pid namespaces + * remain accessible until the task has been reaped. + * + * Inactive -> Active: + * An inactive namespace tree might be resurrected due to e.g., the + * SIOCGSKNS ioctl() on a socket. + * + * Inactive -> Destroyed: + * When __ns_ref drops to zero the namespace is removed from the + * namespaces trees and the memory is freed (after RCU grace period). + * + * Initial namespaces: + * Boot-time namespaces (init_net, init_pid_ns, etc.) start with + * __ns_ref_active = 1 and remain active forever. + * + * @ns_type: type of namespace (e.g., CLONE_NEWNET) + * @stashed: cached dentry to be used by the vfs + * @ops: namespace operations + * @inum: namespace inode number (quickly recycled for non-initial namespaces) + * @__ns_ref: main reference count (do not use directly) + * @ns_tree: namespace tree nodes and active reference count + */ +struct ns_common { + u32 ns_type; + struct dentry *stashed; + const struct proc_ns_operations *ops; + unsigned int inum; + refcount_t __ns_ref; /* do not use directly */ + union { + struct ns_tree; + struct rcu_head ns_rcu; + }; +}; + +#define to_ns_common(__ns) \ + _Generic((__ns), \ + struct cgroup_namespace *: &(__ns)->ns, \ + const struct cgroup_namespace *: &(__ns)->ns, \ + struct ipc_namespace *: &(__ns)->ns, \ + const struct ipc_namespace *: &(__ns)->ns, \ + struct mnt_namespace *: &(__ns)->ns, \ + const struct mnt_namespace *: &(__ns)->ns, \ + struct net *: &(__ns)->ns, \ + const struct net *: &(__ns)->ns, \ + struct pid_namespace *: &(__ns)->ns, \ + const struct pid_namespace *: &(__ns)->ns, \ + struct time_namespace *: &(__ns)->ns, \ + const struct time_namespace *: &(__ns)->ns, \ + struct user_namespace *: &(__ns)->ns, \ + const struct user_namespace *: &(__ns)->ns, \ + struct uts_namespace *: &(__ns)->ns, \ + const struct uts_namespace *: &(__ns)->ns) + +#define ns_init_inum(__ns) \ + _Generic((__ns), \ + struct cgroup_namespace *: CGROUP_NS_INIT_INO, \ + struct ipc_namespace *: IPC_NS_INIT_INO, \ + struct mnt_namespace *: MNT_NS_INIT_INO, \ + struct net *: NET_NS_INIT_INO, \ + struct pid_namespace *: PID_NS_INIT_INO, \ + struct time_namespace *: TIME_NS_INIT_INO, \ + struct user_namespace *: USER_NS_INIT_INO, \ + struct uts_namespace *: UTS_NS_INIT_INO) + +#define ns_init_ns(__ns) \ + _Generic((__ns), \ + struct cgroup_namespace *: &init_cgroup_ns, \ + struct ipc_namespace *: &init_ipc_ns, \ + struct mnt_namespace *: &init_mnt_ns, \ + struct net *: &init_net, \ + struct pid_namespace *: &init_pid_ns, \ + struct time_namespace *: &init_time_ns, \ + struct user_namespace *: &init_user_ns, \ + struct uts_namespace *: &init_uts_ns) + +#define ns_init_id(__ns) \ + _Generic((__ns), \ + struct cgroup_namespace *: CGROUP_NS_INIT_ID, \ + struct ipc_namespace *: IPC_NS_INIT_ID, \ + struct mnt_namespace *: MNT_NS_INIT_ID, \ + struct net *: NET_NS_INIT_ID, \ + struct pid_namespace *: PID_NS_INIT_ID, \ + struct time_namespace *: TIME_NS_INIT_ID, \ + struct user_namespace *: USER_NS_INIT_ID, \ + struct uts_namespace *: UTS_NS_INIT_ID) + +#define to_ns_operations(__ns) \ + _Generic((__ns), \ + struct cgroup_namespace *: (IS_ENABLED(CONFIG_CGROUPS) ? &cgroupns_operations : NULL), \ + struct ipc_namespace *: (IS_ENABLED(CONFIG_IPC_NS) ? &ipcns_operations : NULL), \ + struct mnt_namespace *: &mntns_operations, \ + struct net *: (IS_ENABLED(CONFIG_NET_NS) ? &netns_operations : NULL), \ + struct pid_namespace *: (IS_ENABLED(CONFIG_PID_NS) ? &pidns_operations : NULL), \ + struct time_namespace *: (IS_ENABLED(CONFIG_TIME_NS) ? &timens_operations : NULL), \ + struct user_namespace *: (IS_ENABLED(CONFIG_USER_NS) ? &userns_operations : NULL), \ + struct uts_namespace *: (IS_ENABLED(CONFIG_UTS_NS) ? &utsns_operations : NULL)) + +#define ns_common_type(__ns) \ + _Generic((__ns), \ + struct cgroup_namespace *: CLONE_NEWCGROUP, \ + struct ipc_namespace *: CLONE_NEWIPC, \ + struct mnt_namespace *: CLONE_NEWNS, \ + struct net *: CLONE_NEWNET, \ + struct pid_namespace *: CLONE_NEWPID, \ + struct time_namespace *: CLONE_NEWTIME, \ + struct user_namespace *: CLONE_NEWUSER, \ + struct uts_namespace *: CLONE_NEWUTS) + +#endif /* _LINUX_NS_COMMON_TYPES_H */ diff --git a/include/linux/ns/nstree_types.h b/include/linux/ns/nstree_types.h new file mode 100644 index 000000000000..2fb28ee31efb --- /dev/null +++ b/include/linux/ns/nstree_types.h @@ -0,0 +1,55 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* Copyright (c) 2025 Christian Brauner <brauner@kernel.org> */ +#ifndef _LINUX_NSTREE_TYPES_H +#define _LINUX_NSTREE_TYPES_H + +#include <linux/rbtree.h> +#include <linux/list.h> + +/** + * struct ns_tree_root - Root of a namespace tree + * @ns_rb: Red-black tree root for efficient lookups + * @ns_list_head: List head for sequential iteration + * + * Each namespace tree maintains both an rbtree (for O(log n) lookups) + * and a list (for efficient sequential iteration). The list is kept in + * the same sorted order as the rbtree. + */ +struct ns_tree_root { + struct rb_root ns_rb; + struct list_head ns_list_head; +}; + +/** + * struct ns_tree_node - Node in a namespace tree + * @ns_node: Red-black tree node + * @ns_list_entry: List entry for sequential iteration + * + * Represents a namespace's position in a tree. Each namespace has + * multiple tree nodes for different trees (unified, per-type, owner). + */ +struct ns_tree_node { + struct rb_node ns_node; + struct list_head ns_list_entry; +}; + +/** + * struct ns_tree - Namespace tree nodes and active reference count + * @ns_id: Unique namespace identifier + * @__ns_ref_active: Active reference count (do not use directly) + * @ns_unified_node: Node in the global namespace tree + * @ns_tree_node: Node in the per-type namespace tree + * @ns_owner_node: Node in the owner namespace's tree of owned namespaces + * @ns_owner_root: Root of the tree of namespaces owned by this namespace + * (only used when this namespace is an owner) + */ +struct ns_tree { + u64 ns_id; + atomic_t __ns_ref_active; + struct ns_tree_node ns_unified_node; + struct ns_tree_node ns_tree_node; + struct ns_tree_node ns_owner_node; + struct ns_tree_root ns_owner_root; +}; + +#endif /* _LINUX_NSTREE_TYPES_H */ diff --git a/include/linux/ns_common.h b/include/linux/ns_common.h index f5b68b8abb54..825f5865bfc5 100644 --- a/include/linux/ns_common.h +++ b/include/linux/ns_common.h @@ -2,122 +2,44 @@ #ifndef _LINUX_NS_COMMON_H #define _LINUX_NS_COMMON_H +#include <linux/ns/ns_common_types.h> #include <linux/refcount.h> -#include <linux/rbtree.h> +#include <linux/vfsdebug.h> #include <uapi/linux/sched.h> +#include <uapi/linux/nsfs.h> -struct proc_ns_operations; - -struct cgroup_namespace; -struct ipc_namespace; -struct mnt_namespace; -struct net; -struct pid_namespace; -struct time_namespace; -struct user_namespace; -struct uts_namespace; - -extern struct cgroup_namespace init_cgroup_ns; -extern struct ipc_namespace init_ipc_ns; -extern struct mnt_namespace init_mnt_ns; -extern struct net init_net; -extern struct pid_namespace init_pid_ns; -extern struct time_namespace init_time_ns; -extern struct user_namespace init_user_ns; -extern struct uts_namespace init_uts_ns; - -extern const struct proc_ns_operations netns_operations; -extern const struct proc_ns_operations utsns_operations; -extern const struct proc_ns_operations ipcns_operations; -extern const struct proc_ns_operations pidns_operations; -extern const struct proc_ns_operations pidns_for_children_operations; -extern const struct proc_ns_operations userns_operations; -extern const struct proc_ns_operations mntns_operations; -extern const struct proc_ns_operations cgroupns_operations; -extern const struct proc_ns_operations timens_operations; -extern const struct proc_ns_operations timens_for_children_operations; - -struct ns_common { - u32 ns_type; - struct dentry *stashed; - const struct proc_ns_operations *ops; - unsigned int inum; - refcount_t __ns_ref; /* do not use directly */ - union { - struct { - u64 ns_id; - struct rb_node ns_tree_node; - struct list_head ns_list_node; - }; - struct rcu_head ns_rcu; - }; -}; - +bool is_current_namespace(struct ns_common *ns); int __ns_common_init(struct ns_common *ns, u32 ns_type, const struct proc_ns_operations *ops, int inum); void __ns_common_free(struct ns_common *ns); +struct ns_common *__must_check ns_owner(struct ns_common *ns); + +static __always_inline bool is_ns_init_inum(const struct ns_common *ns) +{ + VFS_WARN_ON_ONCE(ns->inum == 0); + return unlikely(in_range(ns->inum, MNT_NS_INIT_INO, + IPC_NS_INIT_INO - MNT_NS_INIT_INO + 1)); +} + +static __always_inline bool is_ns_init_id(const struct ns_common *ns) +{ + VFS_WARN_ON_ONCE(ns->ns_id == 0); + return ns->ns_id <= NS_LAST_INIT_ID; +} -#define to_ns_common(__ns) \ - _Generic((__ns), \ - struct cgroup_namespace *: &(__ns)->ns, \ - const struct cgroup_namespace *: &(__ns)->ns, \ - struct ipc_namespace *: &(__ns)->ns, \ - const struct ipc_namespace *: &(__ns)->ns, \ - struct mnt_namespace *: &(__ns)->ns, \ - const struct mnt_namespace *: &(__ns)->ns, \ - struct net *: &(__ns)->ns, \ - const struct net *: &(__ns)->ns, \ - struct pid_namespace *: &(__ns)->ns, \ - const struct pid_namespace *: &(__ns)->ns, \ - struct time_namespace *: &(__ns)->ns, \ - const struct time_namespace *: &(__ns)->ns, \ - struct user_namespace *: &(__ns)->ns, \ - const struct user_namespace *: &(__ns)->ns, \ - struct uts_namespace *: &(__ns)->ns, \ - const struct uts_namespace *: &(__ns)->ns) - -#define ns_init_inum(__ns) \ - _Generic((__ns), \ - struct cgroup_namespace *: CGROUP_NS_INIT_INO, \ - struct ipc_namespace *: IPC_NS_INIT_INO, \ - struct mnt_namespace *: MNT_NS_INIT_INO, \ - struct net *: NET_NS_INIT_INO, \ - struct pid_namespace *: PID_NS_INIT_INO, \ - struct time_namespace *: TIME_NS_INIT_INO, \ - struct user_namespace *: USER_NS_INIT_INO, \ - struct uts_namespace *: UTS_NS_INIT_INO) - -#define ns_init_ns(__ns) \ - _Generic((__ns), \ - struct cgroup_namespace *: &init_cgroup_ns, \ - struct ipc_namespace *: &init_ipc_ns, \ - struct mnt_namespace *: &init_mnt_ns, \ - struct net *: &init_net, \ - struct pid_namespace *: &init_pid_ns, \ - struct time_namespace *: &init_time_ns, \ - struct user_namespace *: &init_user_ns, \ - struct uts_namespace *: &init_uts_ns) - -#define to_ns_operations(__ns) \ - _Generic((__ns), \ - struct cgroup_namespace *: (IS_ENABLED(CONFIG_CGROUPS) ? &cgroupns_operations : NULL), \ - struct ipc_namespace *: (IS_ENABLED(CONFIG_IPC_NS) ? &ipcns_operations : NULL), \ - struct mnt_namespace *: &mntns_operations, \ - struct net *: (IS_ENABLED(CONFIG_NET_NS) ? &netns_operations : NULL), \ - struct pid_namespace *: (IS_ENABLED(CONFIG_PID_NS) ? &pidns_operations : NULL), \ - struct time_namespace *: (IS_ENABLED(CONFIG_TIME_NS) ? &timens_operations : NULL), \ - struct user_namespace *: (IS_ENABLED(CONFIG_USER_NS) ? &userns_operations : NULL), \ - struct uts_namespace *: (IS_ENABLED(CONFIG_UTS_NS) ? &utsns_operations : NULL)) - -#define ns_common_type(__ns) \ - _Generic((__ns), \ - struct cgroup_namespace *: CLONE_NEWCGROUP, \ - struct ipc_namespace *: CLONE_NEWIPC, \ - struct mnt_namespace *: CLONE_NEWNS, \ - struct net *: CLONE_NEWNET, \ - struct pid_namespace *: CLONE_NEWPID, \ - struct time_namespace *: CLONE_NEWTIME, \ - struct user_namespace *: CLONE_NEWUSER, \ - struct uts_namespace *: CLONE_NEWUTS) +#define NS_COMMON_INIT(nsname) \ +{ \ + .ns_type = ns_common_type(&nsname), \ + .ns_id = ns_init_id(&nsname), \ + .inum = ns_init_inum(&nsname), \ + .ops = to_ns_operations(&nsname), \ + .stashed = NULL, \ + .__ns_ref = REFCOUNT_INIT(1), \ + .__ns_ref_active = ATOMIC_INIT(1), \ + .ns_unified_node.ns_list_entry = LIST_HEAD_INIT(nsname.ns.ns_unified_node.ns_list_entry), \ + .ns_tree_node.ns_list_entry = LIST_HEAD_INIT(nsname.ns.ns_tree_node.ns_list_entry), \ + .ns_owner_node.ns_list_entry = LIST_HEAD_INIT(nsname.ns.ns_owner_node.ns_list_entry), \ + .ns_owner_root.ns_list_head = LIST_HEAD_INIT(nsname.ns.ns_owner_root.ns_list_head), \ +} #define ns_common_init(__ns) \ __ns_common_init(to_ns_common(__ns), \ @@ -133,21 +55,96 @@ void __ns_common_free(struct ns_common *ns); #define ns_common_free(__ns) __ns_common_free(to_ns_common((__ns))) +static __always_inline __must_check int __ns_ref_active_read(const struct ns_common *ns) +{ + return atomic_read(&ns->__ns_ref_active); +} + +static __always_inline __must_check int __ns_ref_read(const struct ns_common *ns) +{ + return refcount_read(&ns->__ns_ref); +} + static __always_inline __must_check bool __ns_ref_put(struct ns_common *ns) { - return refcount_dec_and_test(&ns->__ns_ref); + if (is_ns_init_id(ns)) { + VFS_WARN_ON_ONCE(__ns_ref_read(ns) != 1); + VFS_WARN_ON_ONCE(__ns_ref_active_read(ns) != 1); + return false; + } + if (refcount_dec_and_test(&ns->__ns_ref)) { + VFS_WARN_ON_ONCE(__ns_ref_active_read(ns)); + return true; + } + return false; } static __always_inline __must_check bool __ns_ref_get(struct ns_common *ns) { - return refcount_inc_not_zero(&ns->__ns_ref); + if (is_ns_init_id(ns)) { + VFS_WARN_ON_ONCE(__ns_ref_read(ns) != 1); + VFS_WARN_ON_ONCE(__ns_ref_active_read(ns) != 1); + return true; + } + if (refcount_inc_not_zero(&ns->__ns_ref)) + return true; + VFS_WARN_ON_ONCE(__ns_ref_active_read(ns)); + return false; } -#define ns_ref_read(__ns) refcount_read(&to_ns_common((__ns))->__ns_ref) -#define ns_ref_inc(__ns) refcount_inc(&to_ns_common((__ns))->__ns_ref) -#define ns_ref_get(__ns) __ns_ref_get(to_ns_common((__ns))) -#define ns_ref_put(__ns) __ns_ref_put(to_ns_common((__ns))) -#define ns_ref_put_and_lock(__ns, __lock) \ - refcount_dec_and_lock(&to_ns_common((__ns))->__ns_ref, (__lock)) +static __always_inline void __ns_ref_inc(struct ns_common *ns) +{ + if (is_ns_init_id(ns)) { + VFS_WARN_ON_ONCE(__ns_ref_read(ns) != 1); + VFS_WARN_ON_ONCE(__ns_ref_active_read(ns) != 1); + return; + } + refcount_inc(&ns->__ns_ref); +} + +static __always_inline __must_check bool __ns_ref_dec_and_lock(struct ns_common *ns, + spinlock_t *ns_lock) +{ + if (is_ns_init_id(ns)) { + VFS_WARN_ON_ONCE(__ns_ref_read(ns) != 1); + VFS_WARN_ON_ONCE(__ns_ref_active_read(ns) != 1); + return false; + } + return refcount_dec_and_lock(&ns->__ns_ref, ns_lock); +} + +#define ns_ref_read(__ns) __ns_ref_read(to_ns_common((__ns))) +#define ns_ref_inc(__ns) \ + do { if (__ns) __ns_ref_inc(to_ns_common((__ns))); } while (0) +#define ns_ref_get(__ns) \ + ((__ns) ? __ns_ref_get(to_ns_common((__ns))) : false) +#define ns_ref_put(__ns) \ + ((__ns) ? __ns_ref_put(to_ns_common((__ns))) : false) +#define ns_ref_put_and_lock(__ns, __ns_lock) \ + ((__ns) ? __ns_ref_dec_and_lock(to_ns_common((__ns)), __ns_lock) : false) + +#define ns_ref_active_read(__ns) \ + ((__ns) ? __ns_ref_active_read(to_ns_common(__ns)) : 0) + +void __ns_ref_active_put(struct ns_common *ns); + +#define ns_ref_active_put(__ns) \ + do { if (__ns) __ns_ref_active_put(to_ns_common(__ns)); } while (0) + +static __always_inline struct ns_common *__must_check ns_get_unless_inactive(struct ns_common *ns) +{ + if (!__ns_ref_active_read(ns)) { + VFS_WARN_ON_ONCE(is_ns_init_id(ns)); + return NULL; + } + if (!__ns_ref_get(ns)) + return NULL; + return ns; +} + +void __ns_ref_active_get(struct ns_common *ns); + +#define ns_ref_active_get(__ns) \ + do { if (__ns) __ns_ref_active_get(to_ns_common(__ns)); } while (0) #endif diff --git a/include/linux/nsfs.h b/include/linux/nsfs.h index e5a5fa83d36b..731b67fc2fec 100644 --- a/include/linux/nsfs.h +++ b/include/linux/nsfs.h @@ -37,4 +37,7 @@ void nsfs_init(void); #define current_in_namespace(__ns) (__current_namespace_from_type(__ns) == __ns) +void nsproxy_ns_active_get(struct nsproxy *ns); +void nsproxy_ns_active_put(struct nsproxy *ns); + #endif /* _LINUX_NSFS_H */ diff --git a/include/linux/nsproxy.h b/include/linux/nsproxy.h index bd118a187dec..5a67648721c7 100644 --- a/include/linux/nsproxy.h +++ b/include/linux/nsproxy.h @@ -93,10 +93,13 @@ static inline struct cred *nsset_cred(struct nsset *set) */ int copy_namespaces(u64 flags, struct task_struct *tsk); -void exit_task_namespaces(struct task_struct *tsk); +void switch_cred_namespaces(const struct cred *old, const struct cred *new); +void exit_nsproxy_namespaces(struct task_struct *tsk); +void get_cred_namespaces(struct task_struct *tsk); +void exit_cred_namespaces(struct task_struct *tsk); void switch_task_namespaces(struct task_struct *tsk, struct nsproxy *new); int exec_task_namespaces(void); -void free_nsproxy(struct nsproxy *ns); +void deactivate_nsproxy(struct nsproxy *ns); int unshare_nsproxy_namespaces(unsigned long, struct nsproxy **, struct cred *, struct fs_struct *); int __init nsproxy_cache_init(void); @@ -104,7 +107,7 @@ int __init nsproxy_cache_init(void); static inline void put_nsproxy(struct nsproxy *ns) { if (refcount_dec_and_test(&ns->count)) - free_nsproxy(ns); + deactivate_nsproxy(ns); } static inline void get_nsproxy(struct nsproxy *ns) diff --git a/include/linux/nstree.h b/include/linux/nstree.h index 8b8636690473..175e4625bfa6 100644 --- a/include/linux/nstree.h +++ b/include/linux/nstree.h @@ -1,22 +1,34 @@ /* SPDX-License-Identifier: GPL-2.0 */ +/* Copyright (c) 2025 Christian Brauner <brauner@kernel.org> */ #ifndef _LINUX_NSTREE_H #define _LINUX_NSTREE_H -#include <linux/ns_common.h> +#include <linux/ns/nstree_types.h> #include <linux/nsproxy.h> #include <linux/rbtree.h> #include <linux/seqlock.h> #include <linux/rculist.h> #include <linux/cookie.h> +#include <uapi/linux/nsfs.h> -extern struct ns_tree cgroup_ns_tree; -extern struct ns_tree ipc_ns_tree; -extern struct ns_tree mnt_ns_tree; -extern struct ns_tree net_ns_tree; -extern struct ns_tree pid_ns_tree; -extern struct ns_tree time_ns_tree; -extern struct ns_tree user_ns_tree; -extern struct ns_tree uts_ns_tree; +struct ns_common; + +extern struct ns_tree_root cgroup_ns_tree; +extern struct ns_tree_root ipc_ns_tree; +extern struct ns_tree_root mnt_ns_tree; +extern struct ns_tree_root net_ns_tree; +extern struct ns_tree_root pid_ns_tree; +extern struct ns_tree_root time_ns_tree; +extern struct ns_tree_root user_ns_tree; +extern struct ns_tree_root uts_ns_tree; + +void ns_tree_node_init(struct ns_tree_node *node); +void ns_tree_root_init(struct ns_tree_root *root); +bool ns_tree_node_empty(const struct ns_tree_node *node); +struct rb_node *ns_tree_node_add(struct ns_tree_node *node, + struct ns_tree_root *root, + int (*cmp)(struct rb_node *, const struct rb_node *)); +void ns_tree_node_del(struct ns_tree_node *node, struct ns_tree_root *root); #define to_ns_tree(__ns) \ _Generic((__ns), \ @@ -29,17 +41,21 @@ extern struct ns_tree uts_ns_tree; struct user_namespace *: &(user_ns_tree), \ struct uts_namespace *: &(uts_ns_tree)) -u64 ns_tree_gen_id(struct ns_common *ns); -void __ns_tree_add_raw(struct ns_common *ns, struct ns_tree *ns_tree); -void __ns_tree_remove(struct ns_common *ns, struct ns_tree *ns_tree); +#define ns_tree_gen_id(__ns) \ + __ns_tree_gen_id(to_ns_common(__ns), \ + (((__ns) == ns_init_ns(__ns)) ? ns_init_id(__ns) : 0)) + +u64 __ns_tree_gen_id(struct ns_common *ns, u64 id); +void __ns_tree_add_raw(struct ns_common *ns, struct ns_tree_root *ns_tree); +void __ns_tree_remove(struct ns_common *ns, struct ns_tree_root *ns_tree); struct ns_common *ns_tree_lookup_rcu(u64 ns_id, int ns_type); struct ns_common *__ns_tree_adjoined_rcu(struct ns_common *ns, - struct ns_tree *ns_tree, + struct ns_tree_root *ns_tree, bool previous); -static inline void __ns_tree_add(struct ns_common *ns, struct ns_tree *ns_tree) +static inline void __ns_tree_add(struct ns_common *ns, struct ns_tree_root *ns_tree, u64 id) { - ns_tree_gen_id(ns); + __ns_tree_gen_id(ns, id); __ns_tree_add_raw(ns, ns_tree); } @@ -59,7 +75,9 @@ static inline void __ns_tree_add(struct ns_common *ns, struct ns_tree *ns_tree) * This function assigns a new id to the namespace and adds it to the * appropriate namespace tree and list. */ -#define ns_tree_add(__ns) __ns_tree_add(to_ns_common(__ns), to_ns_tree(__ns)) +#define ns_tree_add(__ns) \ + __ns_tree_add(to_ns_common(__ns), to_ns_tree(__ns), \ + (((__ns) == ns_init_ns(__ns)) ? ns_init_id(__ns) : 0)) /** * ns_tree_remove - Remove a namespace from a namespace tree @@ -73,6 +91,6 @@ static inline void __ns_tree_add(struct ns_common *ns, struct ns_tree *ns_tree) #define ns_tree_adjoined_rcu(__ns, __previous) \ __ns_tree_adjoined_rcu(to_ns_common(__ns), to_ns_tree(__ns), __previous) -#define ns_tree_active(__ns) (!RB_EMPTY_NODE(&to_ns_common(__ns)->ns_tree_node)) +#define ns_tree_active(__ns) (!RB_EMPTY_NODE(&to_ns_common(__ns)->ns_tree_node.ns_node)) #endif /* _LINUX_NSTREE_H */ diff --git a/include/linux/pid_namespace.h b/include/linux/pid_namespace.h index 445517a72ad0..0e7ae12c96d2 100644 --- a/include/linux/pid_namespace.h +++ b/include/linux/pid_namespace.h @@ -61,8 +61,7 @@ static inline struct pid_namespace *to_pid_ns(struct ns_common *ns) static inline struct pid_namespace *get_pid_ns(struct pid_namespace *ns) { - if (ns != &init_pid_ns) - ns_ref_inc(ns); + ns_ref_inc(ns); return ns; } diff --git a/include/linux/pseudo_fs.h b/include/linux/pseudo_fs.h index 2503f7625d65..a651e60d9410 100644 --- a/include/linux/pseudo_fs.h +++ b/include/linux/pseudo_fs.h @@ -9,6 +9,7 @@ struct pseudo_fs_context { const struct xattr_handler * const *xattr; const struct dentry_operations *dops; unsigned long magic; + unsigned int s_d_flags; }; struct pseudo_fs_context *init_pseudo(struct fs_context *fc, diff --git a/include/linux/syscalls.h b/include/linux/syscalls.h index 66c06fcdfe19..cf84d98964b2 100644 --- a/include/linux/syscalls.h +++ b/include/linux/syscalls.h @@ -77,6 +77,7 @@ struct cachestat_range; struct cachestat; struct statmount; struct mnt_id_req; +struct ns_id_req; struct xattr_args; struct file_attr; @@ -437,6 +438,9 @@ asmlinkage long sys_statmount(const struct mnt_id_req __user *req, asmlinkage long sys_listmount(const struct mnt_id_req __user *req, u64 __user *mnt_ids, size_t nr_mnt_ids, unsigned int flags); +asmlinkage long sys_listns(const struct ns_id_req __user *req, + u64 __user *ns_ids, size_t nr_ns_ids, + unsigned int flags); asmlinkage long sys_truncate(const char __user *path, long length); asmlinkage long sys_ftruncate(unsigned int fd, off_t length); #if BITS_PER_LONG == 32 diff --git a/include/linux/user_namespace.h b/include/linux/user_namespace.h index 9a9aebbf96b9..9c3be157397e 100644 --- a/include/linux/user_namespace.h +++ b/include/linux/user_namespace.h @@ -166,13 +166,13 @@ static inline void set_userns_rlimit_max(struct user_namespace *ns, ns->rlimit_max[type] = max <= LONG_MAX ? max : LONG_MAX; } -#ifdef CONFIG_USER_NS - static inline struct user_namespace *to_user_ns(struct ns_common *ns) { return container_of(ns, struct user_namespace, ns); } +#ifdef CONFIG_USER_NS + static inline struct user_namespace *get_user_ns(struct user_namespace *ns) { if (ns) diff --git a/include/uapi/asm-generic/unistd.h b/include/uapi/asm-generic/unistd.h index 04e0077fb4c9..942370b3f5d2 100644 --- a/include/uapi/asm-generic/unistd.h +++ b/include/uapi/asm-generic/unistd.h @@ -857,9 +857,11 @@ __SYSCALL(__NR_open_tree_attr, sys_open_tree_attr) __SYSCALL(__NR_file_getattr, sys_file_getattr) #define __NR_file_setattr 469 __SYSCALL(__NR_file_setattr, sys_file_setattr) +#define __NR_listns 470 +__SYSCALL(__NR_listns, sys_listns) #undef __NR_syscalls -#define __NR_syscalls 470 +#define __NR_syscalls 471 /* * 32 bit systems traditionally used different diff --git a/include/uapi/linux/nsfs.h b/include/uapi/linux/nsfs.h index e098759ec917..a25e38d1c874 100644 --- a/include/uapi/linux/nsfs.h +++ b/include/uapi/linux/nsfs.h @@ -67,4 +67,62 @@ struct nsfs_file_handle { #define NSFS_FILE_HANDLE_SIZE_VER0 16 /* sizeof first published struct */ #define NSFS_FILE_HANDLE_SIZE_LATEST sizeof(struct nsfs_file_handle) /* sizeof latest published struct */ +enum init_ns_id { + IPC_NS_INIT_ID = 1ULL, + UTS_NS_INIT_ID = 2ULL, + USER_NS_INIT_ID = 3ULL, + PID_NS_INIT_ID = 4ULL, + CGROUP_NS_INIT_ID = 5ULL, + TIME_NS_INIT_ID = 6ULL, + NET_NS_INIT_ID = 7ULL, + MNT_NS_INIT_ID = 8ULL, +#ifdef __KERNEL__ + NS_LAST_INIT_ID = MNT_NS_INIT_ID, +#endif +}; + +enum ns_type { + TIME_NS = (1ULL << 7), /* CLONE_NEWTIME */ + MNT_NS = (1ULL << 17), /* CLONE_NEWNS */ + CGROUP_NS = (1ULL << 25), /* CLONE_NEWCGROUP */ + UTS_NS = (1ULL << 26), /* CLONE_NEWUTS */ + IPC_NS = (1ULL << 27), /* CLONE_NEWIPC */ + USER_NS = (1ULL << 28), /* CLONE_NEWUSER */ + PID_NS = (1ULL << 29), /* CLONE_NEWPID */ + NET_NS = (1ULL << 30), /* CLONE_NEWNET */ +}; + +/** + * struct ns_id_req - namespace ID request structure + * @size: size of this structure + * @spare: reserved for future use + * @filter: filter mask + * @ns_id: last namespace id + * @user_ns_id: owning user namespace ID + * + * Structure for passing namespace ID and miscellaneous parameters to + * statns(2) and listns(2). + * + * For statns(2) @param represents the request mask. + * For listns(2) @param represents the last listed mount id (or zero). + */ +struct ns_id_req { + __u32 size; + __u32 spare; + __u64 ns_id; + struct /* listns */ { + __u32 ns_type; + __u32 spare2; + __u64 user_ns_id; + }; +}; + +/* + * Special @user_ns_id value that can be passed to listns() + */ +#define LISTNS_CURRENT_USER 0xffffffffffffffff /* Caller's userns */ + +/* List of all ns_id_req versions. */ +#define NS_ID_REQ_SIZE_VER0 32 /* sizeof first published struct */ + #endif /* __LINUX_NSFS_H */ |