summaryrefslogtreecommitdiff
path: root/fs
diff options
context:
space:
mode:
Diffstat (limited to 'fs')
-rw-r--r--fs/libfs.c1
-rw-r--r--fs/mount.h3
-rw-r--r--fs/namespace.c10
-rw-r--r--fs/nsfs.c101
-rw-r--r--fs/pidfs.c76
5 files changed, 142 insertions, 49 deletions
diff --git a/fs/libfs.c b/fs/libfs.c
index 96e3d7fc7fc6..1661dcb7d983 100644
--- a/fs/libfs.c
+++ b/fs/libfs.c
@@ -680,6 +680,7 @@ static int pseudo_fs_fill_super(struct super_block *s, struct fs_context *fc)
s->s_export_op = ctx->eops;
s->s_xattr = ctx->xattr;
s->s_time_gran = 1;
+ s->s_d_flags |= ctx->s_d_flags;
root = new_inode(s);
if (!root)
return -ENOMEM;
diff --git a/fs/mount.h b/fs/mount.h
index f13a28752d0b..2d28ef2a3aed 100644
--- a/fs/mount.h
+++ b/fs/mount.h
@@ -27,6 +27,7 @@ struct mnt_namespace {
unsigned int nr_mounts; /* # of mounts in the namespace */
unsigned int pending_mounts;
refcount_t passive; /* number references not pinning @mounts */
+ bool is_anon;
} __randomize_layout;
struct mnt_pcp {
@@ -175,7 +176,7 @@ static inline bool is_local_mountpoint(const struct dentry *dentry)
static inline bool is_anon_ns(struct mnt_namespace *ns)
{
- return ns->ns.ns_id == 0;
+ return ns->is_anon;
}
static inline bool anon_ns_root(const struct mount *m)
diff --git a/fs/namespace.c b/fs/namespace.c
index a7fd9682bcf9..d7afac807ac3 100644
--- a/fs/namespace.c
+++ b/fs/namespace.c
@@ -4090,8 +4090,9 @@ static struct mnt_namespace *alloc_mnt_ns(struct user_namespace *user_ns, bool a
dec_mnt_namespaces(ucounts);
return ERR_PTR(ret);
}
- if (!anon)
- ns_tree_gen_id(&new_ns->ns);
+ ns_tree_gen_id(new_ns);
+
+ new_ns->is_anon = anon;
refcount_set(&new_ns->passive, 1);
new_ns->mounts = RB_ROOT;
init_waitqueue_head(&new_ns->poll);
@@ -5982,11 +5983,8 @@ SYSCALL_DEFINE4(listmount, const struct mnt_id_req __user *, req,
}
struct mnt_namespace init_mnt_ns = {
- .ns.inum = ns_init_inum(&init_mnt_ns),
- .ns.ops = &mntns_operations,
+ .ns = NS_COMMON_INIT(init_mnt_ns),
.user_ns = &init_user_ns,
- .ns.__ns_ref = REFCOUNT_INIT(1),
- .ns.ns_type = ns_common_type(&init_mnt_ns),
.passive = REFCOUNT_INIT(1),
.mounts = RB_ROOT,
.poll = __WAIT_QUEUE_HEAD_INITIALIZER(init_mnt_ns.poll),
diff --git a/fs/nsfs.c b/fs/nsfs.c
index 79b026a36fb6..a80f8d2a4122 100644
--- a/fs/nsfs.c
+++ b/fs/nsfs.c
@@ -58,6 +58,8 @@ const struct dentry_operations ns_dentry_operations = {
static void nsfs_evict(struct inode *inode)
{
struct ns_common *ns = inode->i_private;
+
+ __ns_ref_active_put(ns);
clear_inode(inode);
ns->ops->put(ns);
}
@@ -408,6 +410,7 @@ static const struct super_operations nsfs_ops = {
.statfs = simple_statfs,
.evict_inode = nsfs_evict,
.show_path = nsfs_show_path,
+ .drop_inode = inode_just_drop,
};
static int nsfs_init_inode(struct inode *inode, void *data)
@@ -418,6 +421,16 @@ static int nsfs_init_inode(struct inode *inode, void *data)
inode->i_mode |= S_IRUGO;
inode->i_fop = &ns_file_operations;
inode->i_ino = ns->inum;
+
+ /*
+ * Bring the namespace subtree back to life if we have to. This
+ * can happen when e.g., all processes using a network namespace
+ * and all namespace files or namespace file bind-mounts have
+ * died but there are still sockets pinning it. The SIOCGSKNS
+ * ioctl on such a socket will resurrect the relevant namespace
+ * subtree.
+ */
+ __ns_ref_active_get(ns);
return 0;
}
@@ -458,6 +471,45 @@ static int nsfs_encode_fh(struct inode *inode, u32 *fh, int *max_len,
return FILEID_NSFS;
}
+bool is_current_namespace(struct ns_common *ns)
+{
+ switch (ns->ns_type) {
+#ifdef CONFIG_CGROUPS
+ case CLONE_NEWCGROUP:
+ return current_in_namespace(to_cg_ns(ns));
+#endif
+#ifdef CONFIG_IPC_NS
+ case CLONE_NEWIPC:
+ return current_in_namespace(to_ipc_ns(ns));
+#endif
+ case CLONE_NEWNS:
+ return current_in_namespace(to_mnt_ns(ns));
+#ifdef CONFIG_NET_NS
+ case CLONE_NEWNET:
+ return current_in_namespace(to_net_ns(ns));
+#endif
+#ifdef CONFIG_PID_NS
+ case CLONE_NEWPID:
+ return current_in_namespace(to_pid_ns(ns));
+#endif
+#ifdef CONFIG_TIME_NS
+ case CLONE_NEWTIME:
+ return current_in_namespace(to_time_ns(ns));
+#endif
+#ifdef CONFIG_USER_NS
+ case CLONE_NEWUSER:
+ return current_in_namespace(to_user_ns(ns));
+#endif
+#ifdef CONFIG_UTS_NS
+ case CLONE_NEWUTS:
+ return current_in_namespace(to_uts_ns(ns));
+#endif
+ default:
+ VFS_WARN_ON_ONCE(true);
+ return false;
+ }
+}
+
static struct dentry *nsfs_fh_to_dentry(struct super_block *sb, struct fid *fh,
int fh_len, int fh_type)
{
@@ -483,18 +535,35 @@ static struct dentry *nsfs_fh_to_dentry(struct super_block *sb, struct fid *fh,
return NULL;
}
+ if (!fid->ns_id)
+ return NULL;
+ /* Either both are set or both are unset. */
+ if (!fid->ns_inum != !fid->ns_type)
+ return NULL;
+
scoped_guard(rcu) {
ns = ns_tree_lookup_rcu(fid->ns_id, fid->ns_type);
if (!ns)
return NULL;
VFS_WARN_ON_ONCE(ns->ns_id != fid->ns_id);
- VFS_WARN_ON_ONCE(ns->ns_type != fid->ns_type);
- if (ns->inum != fid->ns_inum)
+ if (fid->ns_inum && (fid->ns_inum != ns->inum))
+ return NULL;
+ if (fid->ns_type && (fid->ns_type != ns->ns_type))
return NULL;
- if (!__ns_ref_get(ns))
+ /*
+ * This is racy because we're not actually taking an
+ * active reference. IOW, it could happen that the
+ * namespace becomes inactive after this check.
+ * We don't care because nsfs_init_inode() will just
+ * resurrect the relevant namespace tree for us. If it
+ * has been active here we just allow it's resurrection.
+ * We could try to take an active reference here and
+ * then drop it again. But really, why bother.
+ */
+ if (!ns_get_unless_inactive(ns))
return NULL;
}
@@ -590,6 +659,8 @@ static int nsfs_init_fs_context(struct fs_context *fc)
struct pseudo_fs_context *ctx = init_pseudo(fc, NSFS_MAGIC);
if (!ctx)
return -ENOMEM;
+ fc->s_iflags |= SB_I_NOEXEC | SB_I_NODEV;
+ ctx->s_d_flags |= DCACHE_DONTCACHE;
ctx->ops = &nsfs_ops;
ctx->eops = &nsfs_export_operations;
ctx->dops = &ns_dentry_operations;
@@ -612,3 +683,27 @@ void __init nsfs_init(void)
nsfs_root_path.mnt = nsfs_mnt;
nsfs_root_path.dentry = nsfs_mnt->mnt_root;
}
+
+void nsproxy_ns_active_get(struct nsproxy *ns)
+{
+ ns_ref_active_get(ns->mnt_ns);
+ ns_ref_active_get(ns->uts_ns);
+ ns_ref_active_get(ns->ipc_ns);
+ ns_ref_active_get(ns->pid_ns_for_children);
+ ns_ref_active_get(ns->cgroup_ns);
+ ns_ref_active_get(ns->net_ns);
+ ns_ref_active_get(ns->time_ns);
+ ns_ref_active_get(ns->time_ns_for_children);
+}
+
+void nsproxy_ns_active_put(struct nsproxy *ns)
+{
+ ns_ref_active_put(ns->mnt_ns);
+ ns_ref_active_put(ns->uts_ns);
+ ns_ref_active_put(ns->ipc_ns);
+ ns_ref_active_put(ns->pid_ns_for_children);
+ ns_ref_active_put(ns->cgroup_ns);
+ ns_ref_active_put(ns->net_ns);
+ ns_ref_active_put(ns->time_ns);
+ ns_ref_active_put(ns->time_ns_for_children);
+}
diff --git a/fs/pidfs.c b/fs/pidfs.c
index 0ef5b47d796a..78dee3c201af 100644
--- a/fs/pidfs.c
+++ b/fs/pidfs.c
@@ -454,7 +454,6 @@ static long pidfd_ioctl(struct file *file, unsigned int cmd, unsigned long arg)
struct task_struct *task __free(put_task) = NULL;
struct nsproxy *nsp __free(put_nsproxy) = NULL;
struct ns_common *ns_common = NULL;
- struct pid_namespace *pid_ns;
if (!pidfs_ioctl_valid(cmd))
return -ENOIOCTLCMD;
@@ -496,66 +495,64 @@ static long pidfd_ioctl(struct file *file, unsigned int cmd, unsigned long arg)
switch (cmd) {
/* Namespaces that hang of nsproxy. */
case PIDFD_GET_CGROUP_NAMESPACE:
- if (IS_ENABLED(CONFIG_CGROUPS)) {
- get_cgroup_ns(nsp->cgroup_ns);
- ns_common = to_ns_common(nsp->cgroup_ns);
- }
+ if (!ns_ref_get(nsp->cgroup_ns))
+ break;
+ ns_common = to_ns_common(nsp->cgroup_ns);
break;
case PIDFD_GET_IPC_NAMESPACE:
- if (IS_ENABLED(CONFIG_IPC_NS)) {
- get_ipc_ns(nsp->ipc_ns);
- ns_common = to_ns_common(nsp->ipc_ns);
- }
+ if (!ns_ref_get(nsp->ipc_ns))
+ break;
+ ns_common = to_ns_common(nsp->ipc_ns);
break;
case PIDFD_GET_MNT_NAMESPACE:
- get_mnt_ns(nsp->mnt_ns);
+ if (!ns_ref_get(nsp->mnt_ns))
+ break;
ns_common = to_ns_common(nsp->mnt_ns);
break;
case PIDFD_GET_NET_NAMESPACE:
- if (IS_ENABLED(CONFIG_NET_NS)) {
- ns_common = to_ns_common(nsp->net_ns);
- get_net_ns(ns_common);
- }
+ if (!ns_ref_get(nsp->net_ns))
+ break;
+ ns_common = to_ns_common(nsp->net_ns);
break;
case PIDFD_GET_PID_FOR_CHILDREN_NAMESPACE:
- if (IS_ENABLED(CONFIG_PID_NS)) {
- get_pid_ns(nsp->pid_ns_for_children);
- ns_common = to_ns_common(nsp->pid_ns_for_children);
- }
+ if (!ns_ref_get(nsp->pid_ns_for_children))
+ break;
+ ns_common = to_ns_common(nsp->pid_ns_for_children);
break;
case PIDFD_GET_TIME_NAMESPACE:
- if (IS_ENABLED(CONFIG_TIME_NS)) {
- get_time_ns(nsp->time_ns);
- ns_common = to_ns_common(nsp->time_ns);
- }
+ if (!ns_ref_get(nsp->time_ns))
+ break;
+ ns_common = to_ns_common(nsp->time_ns);
break;
case PIDFD_GET_TIME_FOR_CHILDREN_NAMESPACE:
- if (IS_ENABLED(CONFIG_TIME_NS)) {
- get_time_ns(nsp->time_ns_for_children);
- ns_common = to_ns_common(nsp->time_ns_for_children);
- }
+ if (!ns_ref_get(nsp->time_ns_for_children))
+ break;
+ ns_common = to_ns_common(nsp->time_ns_for_children);
break;
case PIDFD_GET_UTS_NAMESPACE:
- if (IS_ENABLED(CONFIG_UTS_NS)) {
- get_uts_ns(nsp->uts_ns);
- ns_common = to_ns_common(nsp->uts_ns);
- }
+ if (!ns_ref_get(nsp->uts_ns))
+ break;
+ ns_common = to_ns_common(nsp->uts_ns);
break;
/* Namespaces that don't hang of nsproxy. */
case PIDFD_GET_USER_NAMESPACE:
- if (IS_ENABLED(CONFIG_USER_NS)) {
- rcu_read_lock();
- ns_common = to_ns_common(get_user_ns(task_cred_xxx(task, user_ns)));
- rcu_read_unlock();
+ scoped_guard(rcu) {
+ struct user_namespace *user_ns;
+
+ user_ns = task_cred_xxx(task, user_ns);
+ if (!ns_ref_get(user_ns))
+ break;
+ ns_common = to_ns_common(user_ns);
}
break;
case PIDFD_GET_PID_NAMESPACE:
- if (IS_ENABLED(CONFIG_PID_NS)) {
- rcu_read_lock();
+ scoped_guard(rcu) {
+ struct pid_namespace *pid_ns;
+
pid_ns = task_active_pid_ns(task);
- if (pid_ns)
- ns_common = to_ns_common(get_pid_ns(pid_ns));
- rcu_read_unlock();
+ if (!ns_ref_get(pid_ns))
+ break;
+ ns_common = to_ns_common(pid_ns);
}
break;
default:
@@ -1022,6 +1019,7 @@ static int pidfs_init_fs_context(struct fs_context *fc)
fc->s_iflags |= SB_I_NOEXEC;
fc->s_iflags |= SB_I_NODEV;
+ ctx->s_d_flags |= DCACHE_DONTCACHE;
ctx->ops = &pidfs_sops;
ctx->eops = &pidfs_export_operations;
ctx->dops = &pidfs_dentry_operations;