diff options
Diffstat (limited to 'kernel/nscommon.c')
| -rw-r--r-- | kernel/nscommon.c | 214 |
1 files changed, 213 insertions, 1 deletions
diff --git a/kernel/nscommon.c b/kernel/nscommon.c index 238402b189f7..abd1ac1a2d02 100644 --- a/kernel/nscommon.c +++ b/kernel/nscommon.c @@ -3,6 +3,7 @@ #include <linux/ns_common.h> #include <linux/proc_ns.h> +#include <linux/user_namespace.h> #include <linux/vfsdebug.h> #ifdef CONFIG_DEBUG_VFS @@ -53,6 +54,8 @@ static void ns_debug(struct ns_common *ns, const struct proc_ns_operations *ops) int __ns_common_init(struct ns_common *ns, u32 ns_type, const struct proc_ns_operations *ops, int inum) { + int ret; + refcount_set(&ns->__ns_ref, 1); ns->stashed = NULL; ns->ops = ops; @@ -69,10 +72,219 @@ int __ns_common_init(struct ns_common *ns, u32 ns_type, const struct proc_ns_ope ns->inum = inum; return 0; } - return proc_alloc_inum(&ns->inum); + ret = proc_alloc_inum(&ns->inum); + if (ret) + return ret; + /* + * Tree ref starts at 0. It's incremented when namespace enters + * active use (installed in nsproxy) and decremented when all + * active uses are gone. Initial namespaces are always active. + */ + if (is_initial_namespace(ns)) + atomic_set(&ns->__ns_ref_active, 1); + else + atomic_set(&ns->__ns_ref_active, 0); + return 0; } void __ns_common_free(struct ns_common *ns) { proc_free_inum(ns->inum); } + +static struct ns_common *ns_owner(struct ns_common *ns) +{ + struct user_namespace *owner; + + if (unlikely(!ns->ops)) + return NULL; + VFS_WARN_ON_ONCE(!ns->ops->owner); + owner = ns->ops->owner(ns); + VFS_WARN_ON_ONCE(!owner && ns != to_ns_common(&init_user_ns)); + if (!owner) + return NULL; + /* Skip init_user_ns as it's always active */ + if (owner == &init_user_ns) + return NULL; + return to_ns_common(owner); +} + +void __ns_ref_active_get_owner(struct ns_common *ns) +{ + ns = ns_owner(ns); + if (ns) + WARN_ON_ONCE(atomic_add_negative(1, &ns->__ns_ref_active)); +} + +/* + * The active reference count works by having each namespace that gets + * created take a single active reference on its owning user namespace. + * That single reference is only released once the child namespace's + * active count itself goes down. + * + * A regular namespace tree might look as follow: + * Legend: + * + : adding active reference + * - : dropping active reference + * x : always active (initial namespace) + * + * + * net_ns pid_ns + * \ / + * + + + * user_ns1 (2) + * | + * ipc_ns | uts_ns + * \ | / + * + + + + * user_ns2 (3) + * | + * cgroup_ns | mnt_ns + * \ | / + * x x x + * init_user_ns (1) + * + * If both net_ns and pid_ns put their last active reference on + * themselves it will cascade to user_ns1 dropping its own active + * reference and dropping one active reference on user_ns2: + * + * net_ns pid_ns + * \ / + * - - + * user_ns1 (0) + * | + * ipc_ns | uts_ns + * \ | / + * + - + + * user_ns2 (2) + * | + * cgroup_ns | mnt_ns + * \ | / + * x x x + * init_user_ns (1) + * + * The iteration stops once we reach a namespace that still has active + * references. + */ +void __ns_ref_active_put_owner(struct ns_common *ns) +{ + for (;;) { + ns = ns_owner(ns); + if (!ns) + return; + if (!atomic_dec_and_test(&ns->__ns_ref_active)) + return; + } +} + +/* + * The active reference count works by having each namespace that gets + * created take a single active reference on its owning user namespace. + * That single reference is only released once the child namespace's + * active count itself goes down. This makes it possible to efficiently + * resurrect a namespace tree: + * + * A regular namespace tree might look as follow: + * Legend: + * + : adding active reference + * - : dropping active reference + * x : always active (initial namespace) + * + * + * net_ns pid_ns + * \ / + * + + + * user_ns1 (2) + * | + * ipc_ns | uts_ns + * \ | / + * + + + + * user_ns2 (3) + * | + * cgroup_ns | mnt_ns + * \ | / + * x x x + * init_user_ns (1) + * + * If both net_ns and pid_ns put their last active reference on + * themselves it will cascade to user_ns1 dropping its own active + * reference and dropping one active reference on user_ns2: + * + * net_ns pid_ns + * \ / + * - - + * user_ns1 (0) + * | + * ipc_ns | uts_ns + * \ | / + * + - + + * user_ns2 (2) + * | + * cgroup_ns | mnt_ns + * \ | / + * x x x + * init_user_ns (1) + * + * Assume the whole tree is dead but all namespaces are still active: + * + * net_ns pid_ns + * \ / + * - - + * user_ns1 (0) + * | + * ipc_ns | uts_ns + * \ | / + * - - - + * user_ns2 (0) + * | + * cgroup_ns | mnt_ns + * \ | / + * x x x + * init_user_ns (1) + * + * Now assume the net_ns gets resurrected (.e.g., via the SIOCGSKNS ioctl()): + * + * net_ns pid_ns + * \ / + * + - + * user_ns1 (0) + * | + * ipc_ns | uts_ns + * \ | / + * - + - + * user_ns2 (0) + * | + * cgroup_ns | mnt_ns + * \ | / + * x x x + * init_user_ns (1) + * + * If net_ns had a zero reference count and we bumped it we also need to + * take another reference on its owning user namespace. Similarly, if + * pid_ns had a zero reference count it also needs to take another + * reference on its owning user namespace. So both net_ns and pid_ns + * will each have their own reference on the owning user namespace. + * + * If the owning user namespace user_ns1 had a zero reference count then + * it also needs to take another reference on its owning user namespace + * and so on. + */ +void __ns_ref_active_resurrect(struct ns_common *ns) +{ + /* If we didn't resurrect the namespace we're done. */ + if (atomic_fetch_add(1, &ns->__ns_ref_active)) + return; + + /* + * We did resurrect it. Walk the ownership hierarchy upwards + * until we found an owning user namespace that is active. + */ + for (;;) { + ns = ns_owner(ns); + if (!ns) + return; + + if (atomic_fetch_add(1, &ns->__ns_ref_active)) + return; + } +} |