summaryrefslogtreecommitdiff
path: root/fs
diff options
context:
space:
mode:
Diffstat (limited to 'fs')
-rw-r--r--fs/fhandle.c6
-rw-r--r--fs/internal.h1
-rw-r--r--fs/mount.h12
-rw-r--r--fs/namespace.c196
-rw-r--r--fs/nsfs.c211
-rw-r--r--fs/pidfs.c2
-rw-r--r--fs/proc/root.c2
7 files changed, 263 insertions, 167 deletions
diff --git a/fs/fhandle.c b/fs/fhandle.c
index a907ddfac4d5..052f9c9368fb 100644
--- a/fs/fhandle.c
+++ b/fs/fhandle.c
@@ -11,6 +11,7 @@
#include <linux/personality.h>
#include <linux/uaccess.h>
#include <linux/compat.h>
+#include <linux/nsfs.h>
#include "internal.h"
#include "mount.h"
@@ -189,6 +190,11 @@ static int get_path_anchor(int fd, struct path *root)
return 0;
}
+ if (fd == FD_NSFS_ROOT) {
+ nsfs_get_root(root);
+ return 0;
+ }
+
return -EBADF;
}
diff --git a/fs/internal.h b/fs/internal.h
index 38e8aab27bbd..a33d18ee5b74 100644
--- a/fs/internal.h
+++ b/fs/internal.h
@@ -355,3 +355,4 @@ int anon_inode_getattr(struct mnt_idmap *idmap, const struct path *path,
int anon_inode_setattr(struct mnt_idmap *idmap, struct dentry *dentry,
struct iattr *attr);
void pidfs_get_root(struct path *path);
+void nsfs_get_root(struct path *path);
diff --git a/fs/mount.h b/fs/mount.h
index 97737051a8b9..79c85639a7ba 100644
--- a/fs/mount.h
+++ b/fs/mount.h
@@ -17,11 +17,7 @@ struct mnt_namespace {
};
struct user_namespace *user_ns;
struct ucounts *ucounts;
- u64 seq; /* Sequence number to prevent loops */
- union {
- wait_queue_head_t poll;
- struct rcu_head mnt_ns_rcu;
- };
+ wait_queue_head_t poll;
u64 seq_origin; /* Sequence number of origin mount namespace */
u64 event;
#ifdef CONFIG_FSNOTIFY
@@ -30,8 +26,6 @@ struct mnt_namespace {
#endif
unsigned int nr_mounts; /* # of mounts in the namespace */
unsigned int pending_mounts;
- struct rb_node mnt_ns_tree_node; /* node in the mnt_ns_tree */
- struct list_head mnt_ns_list; /* entry in the sequential list of mounts namespace */
refcount_t passive; /* number references not pinning @mounts */
} __randomize_layout;
@@ -149,7 +143,7 @@ static inline void detach_mounts(struct dentry *dentry)
static inline void get_mnt_ns(struct mnt_namespace *ns)
{
- refcount_inc(&ns->ns.count);
+ ns_ref_inc(ns);
}
extern seqlock_t mount_lock;
@@ -173,7 +167,7 @@ static inline bool is_local_mountpoint(const struct dentry *dentry)
static inline bool is_anon_ns(struct mnt_namespace *ns)
{
- return ns->seq == 0;
+ return ns->ns.ns_id == 0;
}
static inline bool anon_ns_root(const struct mount *m)
diff --git a/fs/namespace.c b/fs/namespace.c
index 18bd27d8c9ab..dc01b14c58cd 100644
--- a/fs/namespace.c
+++ b/fs/namespace.c
@@ -33,6 +33,7 @@
#include <linux/shmem_fs.h>
#include <linux/mnt_idmapping.h>
#include <linux/pidfs.h>
+#include <linux/nstree.h>
#include "pnode.h"
#include "internal.h"
@@ -89,13 +90,10 @@ static DECLARE_RWSEM(namespace_sem);
static HLIST_HEAD(unmounted); /* protected by namespace_sem */
static LIST_HEAD(ex_mountpoints); /* protected by namespace_sem */
static struct mnt_namespace *emptied_ns; /* protected by namespace_sem */
-static DEFINE_SEQLOCK(mnt_ns_tree_lock);
#ifdef CONFIG_FSNOTIFY
LIST_HEAD(notify_list); /* protected by namespace_sem */
#endif
-static struct rb_root mnt_ns_tree = RB_ROOT; /* protected by mnt_ns_tree_lock */
-static LIST_HEAD(mnt_ns_list); /* protected by mnt_ns_tree_lock */
enum mount_kattr_flags_t {
MOUNT_KATTR_RECURSE = (1 << 0),
@@ -128,53 +126,12 @@ __cacheline_aligned_in_smp DEFINE_SEQLOCK(mount_lock);
static inline struct mnt_namespace *node_to_mnt_ns(const struct rb_node *node)
{
+ struct ns_common *ns;
+
if (!node)
return NULL;
- return rb_entry(node, struct mnt_namespace, mnt_ns_tree_node);
-}
-
-static int mnt_ns_cmp(struct rb_node *a, const struct rb_node *b)
-{
- struct mnt_namespace *ns_a = node_to_mnt_ns(a);
- struct mnt_namespace *ns_b = node_to_mnt_ns(b);
- u64 seq_a = ns_a->seq;
- u64 seq_b = ns_b->seq;
-
- if (seq_a < seq_b)
- return -1;
- if (seq_a > seq_b)
- return 1;
- return 0;
-}
-
-static inline void mnt_ns_tree_write_lock(void)
-{
- write_seqlock(&mnt_ns_tree_lock);
-}
-
-static inline void mnt_ns_tree_write_unlock(void)
-{
- write_sequnlock(&mnt_ns_tree_lock);
-}
-
-static void mnt_ns_tree_add(struct mnt_namespace *ns)
-{
- struct rb_node *node, *prev;
-
- mnt_ns_tree_write_lock();
- node = rb_find_add_rcu(&ns->mnt_ns_tree_node, &mnt_ns_tree, mnt_ns_cmp);
- /*
- * If there's no previous entry simply add it after the
- * head and if there is add it after the previous entry.
- */
- prev = rb_prev(&ns->mnt_ns_tree_node);
- if (!prev)
- list_add_rcu(&ns->mnt_ns_list, &mnt_ns_list);
- else
- list_add_rcu(&ns->mnt_ns_list, &node_to_mnt_ns(prev)->mnt_ns_list);
- mnt_ns_tree_write_unlock();
-
- WARN_ON_ONCE(node);
+ ns = rb_entry(node, struct ns_common, ns_tree_node);
+ return container_of(ns, struct mnt_namespace, ns);
}
static void mnt_ns_release(struct mnt_namespace *ns)
@@ -190,32 +147,16 @@ DEFINE_FREE(mnt_ns_release, struct mnt_namespace *, if (_T) mnt_ns_release(_T))
static void mnt_ns_release_rcu(struct rcu_head *rcu)
{
- mnt_ns_release(container_of(rcu, struct mnt_namespace, mnt_ns_rcu));
+ mnt_ns_release(container_of(rcu, struct mnt_namespace, ns.ns_rcu));
}
static void mnt_ns_tree_remove(struct mnt_namespace *ns)
{
/* remove from global mount namespace list */
- if (!is_anon_ns(ns)) {
- mnt_ns_tree_write_lock();
- rb_erase(&ns->mnt_ns_tree_node, &mnt_ns_tree);
- list_bidir_del_rcu(&ns->mnt_ns_list);
- mnt_ns_tree_write_unlock();
- }
+ if (ns_tree_active(ns))
+ ns_tree_remove(ns);
- call_rcu(&ns->mnt_ns_rcu, mnt_ns_release_rcu);
-}
-
-static int mnt_ns_find(const void *key, const struct rb_node *node)
-{
- const u64 mnt_ns_id = *(u64 *)key;
- const struct mnt_namespace *ns = node_to_mnt_ns(node);
-
- if (mnt_ns_id < ns->seq)
- return -1;
- if (mnt_ns_id > ns->seq)
- return 1;
- return 0;
+ call_rcu(&ns->ns.ns_rcu, mnt_ns_release_rcu);
}
/*
@@ -234,28 +175,21 @@ static int mnt_ns_find(const void *key, const struct rb_node *node)
*/
static struct mnt_namespace *lookup_mnt_ns(u64 mnt_ns_id)
{
- struct mnt_namespace *ns;
- struct rb_node *node;
- unsigned int seq;
+ struct mnt_namespace *mnt_ns;
+ struct ns_common *ns;
guard(rcu)();
- do {
- seq = read_seqbegin(&mnt_ns_tree_lock);
- node = rb_find_rcu(&mnt_ns_id, &mnt_ns_tree, mnt_ns_find);
- if (node)
- break;
- } while (read_seqretry(&mnt_ns_tree_lock, seq));
-
- if (!node)
+ ns = ns_tree_lookup_rcu(mnt_ns_id, CLONE_NEWNS);
+ if (!ns)
return NULL;
/*
* The last reference count is put with RCU delay so we can
* unconditonally acquire a reference here.
*/
- ns = node_to_mnt_ns(node);
- refcount_inc(&ns->passive);
- return ns;
+ mnt_ns = container_of(ns, struct mnt_namespace, ns);
+ refcount_inc(&mnt_ns->passive);
+ return mnt_ns;
}
static inline void lock_mount_hash(void)
@@ -1026,7 +960,7 @@ static inline bool check_anonymous_mnt(struct mount *mnt)
return false;
seq = mnt->mnt_ns->seq_origin;
- return !seq || (seq == current->nsproxy->mnt_ns->seq);
+ return !seq || (seq == current->nsproxy->mnt_ns->ns.ns_id);
}
/*
@@ -2161,19 +2095,16 @@ struct ns_common *from_mnt_ns(struct mnt_namespace *mnt)
struct mnt_namespace *get_sequential_mnt_ns(struct mnt_namespace *mntns, bool previous)
{
+ struct ns_common *ns;
+
guard(rcu)();
for (;;) {
- struct list_head *list;
-
- if (previous)
- list = rcu_dereference(list_bidir_prev_rcu(&mntns->mnt_ns_list));
- else
- list = rcu_dereference(list_next_rcu(&mntns->mnt_ns_list));
- if (list_is_head(list, &mnt_ns_list))
- return ERR_PTR(-ENOENT);
+ ns = ns_tree_adjoined_rcu(mntns, previous);
+ if (IS_ERR(ns))
+ return ERR_CAST(ns);
- mntns = list_entry_rcu(list, struct mnt_namespace, mnt_ns_list);
+ mntns = to_mnt_ns(ns);
/*
* The last passive reference count is put with RCU
@@ -2188,7 +2119,7 @@ struct mnt_namespace *get_sequential_mnt_ns(struct mnt_namespace *mntns, bool pr
* the mount namespace and it might already be on its
* deathbed.
*/
- if (!refcount_inc_not_zero(&mntns->ns.count))
+ if (!ns_ref_get(mntns))
continue;
return mntns;
@@ -2213,7 +2144,7 @@ static bool mnt_ns_loop(struct dentry *dentry)
if (!mnt_ns)
return false;
- return current->nsproxy->mnt_ns->seq >= mnt_ns->seq;
+ return current->nsproxy->mnt_ns->ns.ns_id >= mnt_ns->ns.ns_id;
}
struct mount *copy_tree(struct mount *src_root, struct dentry *dentry,
@@ -3089,7 +3020,7 @@ static struct file *open_detached_copy(struct path *path, bool recursive)
if (is_anon_ns(src_mnt_ns))
ns->seq_origin = src_mnt_ns->seq_origin;
else
- ns->seq_origin = src_mnt_ns->seq;
+ ns->seq_origin = src_mnt_ns->ns.ns_id;
}
mnt = __do_loopback(path, recursive);
@@ -4162,20 +4093,11 @@ static void dec_mnt_namespaces(struct ucounts *ucounts)
static void free_mnt_ns(struct mnt_namespace *ns)
{
if (!is_anon_ns(ns))
- ns_free_inum(&ns->ns);
+ ns_common_free(ns);
dec_mnt_namespaces(ns->ucounts);
mnt_ns_tree_remove(ns);
}
-/*
- * Assign a sequence number so we can detect when we attempt to bind
- * mount a reference to an older mount namespace into the current
- * mount namespace, preventing reference counting loops. A 64bit
- * number incrementing at 10Ghz will take 12,427 years to wrap which
- * is effectively never, so we can ignore the possibility.
- */
-static atomic64_t mnt_ns_seq = ATOMIC64_INIT(1);
-
static struct mnt_namespace *alloc_mnt_ns(struct user_namespace *user_ns, bool anon)
{
struct mnt_namespace *new_ns;
@@ -4191,22 +4113,20 @@ static struct mnt_namespace *alloc_mnt_ns(struct user_namespace *user_ns, bool a
dec_mnt_namespaces(ucounts);
return ERR_PTR(-ENOMEM);
}
- if (!anon) {
- ret = ns_alloc_inum(&new_ns->ns);
- if (ret) {
- kfree(new_ns);
- dec_mnt_namespaces(ucounts);
- return ERR_PTR(ret);
- }
+
+ if (anon)
+ ret = ns_common_init_inum(new_ns, MNT_NS_ANON_INO);
+ else
+ ret = ns_common_init(new_ns);
+ if (ret) {
+ kfree(new_ns);
+ dec_mnt_namespaces(ucounts);
+ return ERR_PTR(ret);
}
- new_ns->ns.ops = &mntns_operations;
if (!anon)
- new_ns->seq = atomic64_inc_return(&mnt_ns_seq);
- refcount_set(&new_ns->ns.count, 1);
+ ns_tree_gen_id(&new_ns->ns);
refcount_set(&new_ns->passive, 1);
new_ns->mounts = RB_ROOT;
- INIT_LIST_HEAD(&new_ns->mnt_ns_list);
- RB_CLEAR_NODE(&new_ns->mnt_ns_tree_node);
init_waitqueue_head(&new_ns->poll);
new_ns->user_ns = get_user_ns(user_ns);
new_ns->ucounts = ucounts;
@@ -4245,7 +4165,7 @@ struct mnt_namespace *copy_mnt_ns(u64 flags, struct mnt_namespace *ns,
new = copy_tree(old, old->mnt.mnt_root, copy_flags);
if (IS_ERR(new)) {
namespace_unlock();
- ns_free_inum(&new_ns->ns);
+ ns_common_free(ns);
dec_mnt_namespaces(new_ns->ucounts);
mnt_ns_release(new_ns);
return ERR_CAST(new);
@@ -4292,7 +4212,7 @@ struct mnt_namespace *copy_mnt_ns(u64 flags, struct mnt_namespace *ns,
if (pwdmnt)
mntput(pwdmnt);
- mnt_ns_tree_add(new_ns);
+ ns_tree_add_raw(new_ns);
return new_ns;
}
@@ -5018,7 +4938,7 @@ static int build_mount_idmapped(const struct mount_attr *attr, size_t usize,
return -EINVAL;
ns = get_proc_ns(file_inode(fd_file(f)));
- if (ns->ops->type != CLONE_NEWUSER)
+ if (ns->ns_type != CLONE_NEWUSER)
return -EINVAL;
/*
@@ -5411,7 +5331,7 @@ static int statmount_sb_source(struct kstatmount *s, struct seq_file *seq)
static void statmount_mnt_ns_id(struct kstatmount *s, struct mnt_namespace *ns)
{
s->sm.mask |= STATMOUNT_MNT_NS_ID;
- s->sm.mnt_ns_id = ns->seq;
+ s->sm.mnt_ns_id = ns->ns.ns_id;
}
static int statmount_mnt_opts(struct kstatmount *s, struct seq_file *seq)
@@ -5918,7 +5838,7 @@ static struct mnt_namespace *grab_requested_mnt_ns(const struct mnt_id_req *kreq
return ERR_PTR(-EINVAL);
ns = get_proc_ns(file_inode(fd_file(f)));
- if (ns->ops->type != CLONE_NEWNS)
+ if (ns->ns_type != CLONE_NEWNS)
return ERR_PTR(-EINVAL);
mnt_ns = to_mnt_ns(ns);
@@ -6131,28 +6051,33 @@ SYSCALL_DEFINE4(listmount, const struct mnt_id_req __user *, req,
return ret;
}
+struct mnt_namespace init_mnt_ns = {
+ .ns.inum = ns_init_inum(&init_mnt_ns),
+ .ns.ops = &mntns_operations,
+ .user_ns = &init_user_ns,
+ .ns.__ns_ref = REFCOUNT_INIT(1),
+ .ns.ns_type = ns_common_type(&init_mnt_ns),
+ .passive = REFCOUNT_INIT(1),
+ .mounts = RB_ROOT,
+ .poll = __WAIT_QUEUE_HEAD_INITIALIZER(init_mnt_ns.poll),
+};
+
static void __init init_mount_tree(void)
{
struct vfsmount *mnt;
struct mount *m;
- struct mnt_namespace *ns;
struct path root;
mnt = vfs_kern_mount(&rootfs_fs_type, 0, "rootfs", initramfs_options);
if (IS_ERR(mnt))
panic("Can't create rootfs");
- ns = alloc_mnt_ns(&init_user_ns, true);
- if (IS_ERR(ns))
- panic("Can't allocate initial namespace");
- ns->seq = atomic64_inc_return(&mnt_ns_seq);
- ns->ns.inum = PROC_MNT_INIT_INO;
m = real_mount(mnt);
- ns->root = m;
- ns->nr_mounts = 1;
- mnt_add_to_ns(ns, m);
- init_task.nsproxy->mnt_ns = ns;
- get_mnt_ns(ns);
+ init_mnt_ns.root = m;
+ init_mnt_ns.nr_mounts = 1;
+ mnt_add_to_ns(&init_mnt_ns, m);
+ init_task.nsproxy->mnt_ns = &init_mnt_ns;
+ get_mnt_ns(&init_mnt_ns);
root.mnt = mnt;
root.dentry = mnt->mnt_root;
@@ -6160,7 +6085,7 @@ static void __init init_mount_tree(void)
set_fs_pwd(current->fs, &root);
set_fs_root(current->fs, &root);
- mnt_ns_tree_add(ns);
+ ns_tree_add(&init_mnt_ns);
}
void __init mnt_init(void)
@@ -6200,7 +6125,7 @@ void __init mnt_init(void)
void put_mnt_ns(struct mnt_namespace *ns)
{
- if (!refcount_dec_and_test(&ns->ns.count))
+ if (!ns_ref_put(ns))
return;
namespace_lock();
emptied_ns = ns;
@@ -6449,7 +6374,6 @@ static struct user_namespace *mntns_owner(struct ns_common *ns)
const struct proc_ns_operations mntns_operations = {
.name = "mnt",
- .type = CLONE_NEWNS,
.get = mntns_get,
.put = mntns_put,
.install = mntns_install,
diff --git a/fs/nsfs.c b/fs/nsfs.c
index 59aa801347a7..e7fd8a790aaa 100644
--- a/fs/nsfs.c
+++ b/fs/nsfs.c
@@ -13,12 +13,26 @@
#include <linux/nsfs.h>
#include <linux/uaccess.h>
#include <linux/mnt_namespace.h>
+#include <linux/ipc_namespace.h>
+#include <linux/time_namespace.h>
+#include <linux/utsname.h>
+#include <linux/exportfs.h>
+#include <linux/nstree.h>
+#include <net/net_namespace.h>
#include "mount.h"
#include "internal.h"
static struct vfsmount *nsfs_mnt;
+static struct path nsfs_root_path = {};
+
+void nsfs_get_root(struct path *path)
+{
+ *path = nsfs_root_path;
+ path_get(path);
+}
+
static long ns_ioctl(struct file *filp, unsigned int ioctl,
unsigned long arg);
static const struct file_operations ns_file_operations = {
@@ -139,7 +153,7 @@ static int copy_ns_info_to_user(const struct mnt_namespace *mnt_ns,
* the size value will be set to the size the kernel knows about.
*/
kinfo->size = min(usize, sizeof(*kinfo));
- kinfo->mnt_ns_id = mnt_ns->seq;
+ kinfo->mnt_ns_id = mnt_ns->ns.ns_id;
kinfo->nr_mounts = READ_ONCE(mnt_ns->nr_mounts);
/* Subtract the root mount of the mount namespace. */
if (kinfo->nr_mounts)
@@ -163,15 +177,18 @@ static bool nsfs_ioctl_valid(unsigned int cmd)
case NS_GET_TGID_FROM_PIDNS:
case NS_GET_PID_IN_PIDNS:
case NS_GET_TGID_IN_PIDNS:
- return (_IOC_TYPE(cmd) == _IOC_TYPE(cmd));
+ case NS_GET_ID:
+ return true;
}
/* Extensible ioctls require some extra handling. */
switch (_IOC_NR(cmd)) {
case _IOC_NR(NS_MNT_GET_INFO):
+ return extensible_ioctl_valid(cmd, NS_MNT_GET_INFO, MNT_NS_INFO_SIZE_VER0);
case _IOC_NR(NS_MNT_GET_NEXT):
+ return extensible_ioctl_valid(cmd, NS_MNT_GET_NEXT, MNT_NS_INFO_SIZE_VER0);
case _IOC_NR(NS_MNT_GET_PREV):
- return (_IOC_TYPE(cmd) == _IOC_TYPE(cmd));
+ return extensible_ioctl_valid(cmd, NS_MNT_GET_PREV, MNT_NS_INFO_SIZE_VER0);
}
return false;
@@ -202,26 +219,14 @@ static long ns_ioctl(struct file *filp, unsigned int ioctl,
return -EINVAL;
return open_related_ns(ns, ns->ops->get_parent);
case NS_GET_NSTYPE:
- return ns->ops->type;
+ return ns->ns_type;
case NS_GET_OWNER_UID:
- if (ns->ops->type != CLONE_NEWUSER)
+ if (ns->ns_type != CLONE_NEWUSER)
return -EINVAL;
user_ns = container_of(ns, struct user_namespace, ns);
argp = (uid_t __user *) arg;
uid = from_kuid_munged(current_user_ns(), user_ns->owner);
return put_user(uid, argp);
- case NS_GET_MNTNS_ID: {
- __u64 __user *idp;
- __u64 id;
-
- if (ns->ops->type != CLONE_NEWNS)
- return -EINVAL;
-
- mnt_ns = container_of(ns, struct mnt_namespace, ns);
- idp = (__u64 __user *)arg;
- id = mnt_ns->seq;
- return put_user(id, idp);
- }
case NS_GET_PID_FROM_PIDNS:
fallthrough;
case NS_GET_TGID_FROM_PIDNS:
@@ -229,7 +234,7 @@ static long ns_ioctl(struct file *filp, unsigned int ioctl,
case NS_GET_PID_IN_PIDNS:
fallthrough;
case NS_GET_TGID_IN_PIDNS: {
- if (ns->ops->type != CLONE_NEWPID)
+ if (ns->ns_type != CLONE_NEWPID)
return -EINVAL;
ret = -ESRCH;
@@ -267,6 +272,18 @@ static long ns_ioctl(struct file *filp, unsigned int ioctl,
ret = -ESRCH;
return ret;
}
+ case NS_GET_MNTNS_ID:
+ if (ns->ns_type != CLONE_NEWNS)
+ return -EINVAL;
+ fallthrough;
+ case NS_GET_ID: {
+ __u64 __user *idp;
+ __u64 id;
+
+ idp = (__u64 __user *)arg;
+ id = ns->ns_id;
+ return put_user(id, idp);
+ }
}
/* extensible ioctls */
@@ -276,7 +293,7 @@ static long ns_ioctl(struct file *filp, unsigned int ioctl,
struct mnt_ns_info __user *uinfo = (struct mnt_ns_info __user *)arg;
size_t usize = _IOC_SIZE(ioctl);
- if (ns->ops->type != CLONE_NEWNS)
+ if (ns->ns_type != CLONE_NEWNS)
return -EINVAL;
if (!uinfo)
@@ -297,7 +314,7 @@ static long ns_ioctl(struct file *filp, unsigned int ioctl,
struct file *f __free(fput) = NULL;
size_t usize = _IOC_SIZE(ioctl);
- if (ns->ops->type != CLONE_NEWNS)
+ if (ns->ns_type != CLONE_NEWNS)
return -EINVAL;
if (usize < MNT_NS_INFO_SIZE_VER0)
@@ -415,12 +432,164 @@ static const struct stashed_operations nsfs_stashed_ops = {
.put_data = nsfs_put_data,
};
+#define NSFS_FID_SIZE_U32_VER0 (NSFS_FILE_HANDLE_SIZE_VER0 / sizeof(u32))
+#define NSFS_FID_SIZE_U32_LATEST (NSFS_FILE_HANDLE_SIZE_LATEST / sizeof(u32))
+
+static int nsfs_encode_fh(struct inode *inode, u32 *fh, int *max_len,
+ struct inode *parent)
+{
+ struct nsfs_file_handle *fid = (struct nsfs_file_handle *)fh;
+ struct ns_common *ns = inode->i_private;
+ int len = *max_len;
+
+ if (parent)
+ return FILEID_INVALID;
+
+ if (len < NSFS_FID_SIZE_U32_VER0) {
+ *max_len = NSFS_FID_SIZE_U32_LATEST;
+ return FILEID_INVALID;
+ } else if (len > NSFS_FID_SIZE_U32_LATEST) {
+ *max_len = NSFS_FID_SIZE_U32_LATEST;
+ }
+
+ fid->ns_id = ns->ns_id;
+ fid->ns_type = ns->ns_type;
+ fid->ns_inum = inode->i_ino;
+ return FILEID_NSFS;
+}
+
+static struct dentry *nsfs_fh_to_dentry(struct super_block *sb, struct fid *fh,
+ int fh_len, int fh_type)
+{
+ struct path path __free(path_put) = {};
+ struct nsfs_file_handle *fid = (struct nsfs_file_handle *)fh;
+ struct user_namespace *owning_ns = NULL;
+ struct ns_common *ns;
+ int ret;
+
+ if (fh_len < NSFS_FID_SIZE_U32_VER0)
+ return NULL;
+
+ /* Check that any trailing bytes are zero. */
+ if ((fh_len > NSFS_FID_SIZE_U32_LATEST) &&
+ memchr_inv((void *)fid + NSFS_FID_SIZE_U32_LATEST, 0,
+ fh_len - NSFS_FID_SIZE_U32_LATEST))
+ return NULL;
+
+ switch (fh_type) {
+ case FILEID_NSFS:
+ break;
+ default:
+ return NULL;
+ }
+
+ scoped_guard(rcu) {
+ ns = ns_tree_lookup_rcu(fid->ns_id, fid->ns_type);
+ if (!ns)
+ return NULL;
+
+ VFS_WARN_ON_ONCE(ns->ns_id != fid->ns_id);
+ VFS_WARN_ON_ONCE(ns->ns_type != fid->ns_type);
+ VFS_WARN_ON_ONCE(ns->inum != fid->ns_inum);
+
+ if (!__ns_ref_get(ns))
+ return NULL;
+ }
+
+ switch (ns->ns_type) {
+#ifdef CONFIG_CGROUPS
+ case CLONE_NEWCGROUP:
+ if (!current_in_namespace(to_cg_ns(ns)))
+ owning_ns = to_cg_ns(ns)->user_ns;
+ break;
+#endif
+#ifdef CONFIG_IPC_NS
+ case CLONE_NEWIPC:
+ if (!current_in_namespace(to_ipc_ns(ns)))
+ owning_ns = to_ipc_ns(ns)->user_ns;
+ break;
+#endif
+ case CLONE_NEWNS:
+ if (!current_in_namespace(to_mnt_ns(ns)))
+ owning_ns = to_mnt_ns(ns)->user_ns;
+ break;
+#ifdef CONFIG_NET_NS
+ case CLONE_NEWNET:
+ if (!current_in_namespace(to_net_ns(ns)))
+ owning_ns = to_net_ns(ns)->user_ns;
+ break;
+#endif
+#ifdef CONFIG_PID_NS
+ case CLONE_NEWPID:
+ if (!current_in_namespace(to_pid_ns(ns))) {
+ owning_ns = to_pid_ns(ns)->user_ns;
+ } else if (!READ_ONCE(to_pid_ns(ns)->child_reaper)) {
+ ns->ops->put(ns);
+ return ERR_PTR(-EPERM);
+ }
+ break;
+#endif
+#ifdef CONFIG_TIME_NS
+ case CLONE_NEWTIME:
+ if (!current_in_namespace(to_time_ns(ns)))
+ owning_ns = to_time_ns(ns)->user_ns;
+ break;
+#endif
+#ifdef CONFIG_USER_NS
+ case CLONE_NEWUSER:
+ if (!current_in_namespace(to_user_ns(ns)))
+ owning_ns = to_user_ns(ns);
+ break;
+#endif
+#ifdef CONFIG_UTS_NS
+ case CLONE_NEWUTS:
+ if (!current_in_namespace(to_uts_ns(ns)))
+ owning_ns = to_uts_ns(ns)->user_ns;
+ break;
+#endif
+ default:
+ return ERR_PTR(-EOPNOTSUPP);
+ }
+
+ if (owning_ns && !ns_capable(owning_ns, CAP_SYS_ADMIN)) {
+ ns->ops->put(ns);
+ return ERR_PTR(-EPERM);
+ }
+
+ /* path_from_stashed() unconditionally consumes the reference. */
+ ret = path_from_stashed(&ns->stashed, nsfs_mnt, ns, &path);
+ if (ret)
+ return ERR_PTR(ret);
+
+ return no_free_ptr(path.dentry);
+}
+
+static int nsfs_export_permission(struct handle_to_path_ctx *ctx,
+ unsigned int oflags)
+{
+ /* nsfs_fh_to_dentry() performs all permission checks. */
+ return 0;
+}
+
+static struct file *nsfs_export_open(struct path *path, unsigned int oflags)
+{
+ return file_open_root(path, "", oflags, 0);
+}
+
+static const struct export_operations nsfs_export_operations = {
+ .encode_fh = nsfs_encode_fh,
+ .fh_to_dentry = nsfs_fh_to_dentry,
+ .open = nsfs_export_open,
+ .permission = nsfs_export_permission,
+};
+
static int nsfs_init_fs_context(struct fs_context *fc)
{
struct pseudo_fs_context *ctx = init_pseudo(fc, NSFS_MAGIC);
if (!ctx)
return -ENOMEM;
ctx->ops = &nsfs_ops;
+ ctx->eops = &nsfs_export_operations;
ctx->dops = &ns_dentry_operations;
fc->s_fs_info = (void *)&nsfs_stashed_ops;
return 0;
@@ -438,4 +607,6 @@ void __init nsfs_init(void)
if (IS_ERR(nsfs_mnt))
panic("can't set nsfs up\n");
nsfs_mnt->mnt_sb->s_flags &= ~SB_NOUSER;
+ nsfs_root_path.mnt = nsfs_mnt;
+ nsfs_root_path.dentry = nsfs_mnt->mnt_root;
}
diff --git a/fs/pidfs.c b/fs/pidfs.c
index d01729c5263a..c40c29c702e5 100644
--- a/fs/pidfs.c
+++ b/fs/pidfs.c
@@ -440,7 +440,7 @@ static bool pidfs_ioctl_valid(unsigned int cmd)
* erronously mistook the file descriptor for a pidfd.
* This is not perfect but will catch most cases.
*/
- return (_IOC_TYPE(cmd) == _IOC_TYPE(PIDFD_GET_INFO));
+ return extensible_ioctl_valid(cmd, PIDFD_GET_INFO, PIDFD_INFO_SIZE_VER0);
}
return false;
diff --git a/fs/proc/root.c b/fs/proc/root.c
index fd1f1c8a939a..1e24e085c7d5 100644
--- a/fs/proc/root.c
+++ b/fs/proc/root.c
@@ -143,7 +143,7 @@ static int proc_parse_pidns_param(struct fs_context *fc,
if (!proc_ns_file(ns_filp))
return invalfc(fc, "pidns argument is not an nsfs file");
ns = get_proc_ns(file_inode(ns_filp));
- if (ns->ops->type != CLONE_NEWPID)
+ if (ns->ns_type != CLONE_NEWPID)
return invalfc(fc, "pidns argument is not a pidns file");
target = container_of(ns, struct pid_namespace, ns);