summaryrefslogtreecommitdiff
path: root/fs/namespace.c
diff options
context:
space:
mode:
authorChristian Brauner <brauner@kernel.org>2025-11-24 13:11:21 +0100
committerChristian Brauner <brauner@kernel.org>2025-11-28 12:42:36 +0100
commit0512bf9701f339c8fee2cc82b6fc35f0a8f6be7a (patch)
tree496bec84a2cd45dce6520011accdfd180503f849 /fs/namespace.c
parent3a8660878839faadb4f1a6dd72c3179c1df56787 (diff)
parent6fb102291873c6d88b221c476589e65558e3f30c (diff)
Merge patch series "file: FD_{ADD,PREPARE}()"
Christian Brauner <brauner@kernel.org> says: This now removes roughly double the code that it adds. I've been playing with this to allow for moderately flexible usage of the get_unused_fd_flags() + create file + fd_install() pattern that's used quite extensively and requires cumbersome cleanup paths. How callers allocate files is really heterogenous so it's not really convenient to fold them into a single class. It's possibe to split them into subclasses like for anon inodes. I think that's not necessarily nice as well. This adds two primitives: (1) FD_ADD() the simple cases a file is installed: fd = FD_ADD(O_CLOEXEC, vfio_device_open_file(device)); if (fd < 0) vfio_device_put_registration(device); return fd; (2) FD_PREPARE() that captures all the cases where access to fd or file or additional work before publishing the fd is needed: FD_PREPARE(fdf, O_CLOEXEC, sync_file->file); if (fdf.err) { fput(sync_file->file); return fdf.err; } data.fence = fd_prepare_fd(fdf); if (copy_to_user((void __user *)arg, &data, sizeof(data))) return -EFAULT; return fd_publish(fdf); I've converted all of the easy cases over to it and it gets rid of an aweful lot of convoluted cleanup logic. There are a bunch of other cases that can also be converted after a bit of massaging. It's centered around a simple struct. FD_PREPARE() encapsulates all of allocation and cleanup logic and must be followed by a call to fd_publish() which associates the fd with the file and installs it into the callers fdtable. If fd_publish() isn't called both are deallocated. FD_ADD() is a shorthand that does the fd_publish() and never exposes the struct to the caller. That's often the case when they don't need access to anything after installing the fd. It mandates a specific order namely that first we allocate the fd and then instantiate the file. But that shouldn't be a problem. Nearly everyone I've converted used this order anyway. There's a bunch of additional cases where it would be easy to convert them to this pattern. For example, the whole sync file stuff in dma currently returns the containing structure of the file instead of the file itself even though it's only used to allocate files. Changing that would make it fall into the FD_PREPARE() pattern easily. I've not done that work yet. There's room for extending this in a way that wed'd have subclasses for some particularly often use patterns but as I said I'm not even sure that's worth it. * patches from https://patch.msgid.link/20251123-work-fd-prepare-v4-0-b6efa1706cfd@kernel.org: (47 commits) kvm: convert kvm_vcpu_ioctl_get_stats_fd() to FD_PREPARE() kvm: convert kvm_arch_supports_gmem_init_shared() to FD_PREPARE() io_uring: convert io_create_mock_file() to FD_PREPARE() file: convert replace_fd() to FD_PREPARE() vfio: convert vfio_group_ioctl_get_device_fd() to FD_PREPARE() tty: convert ptm_open_peer() to FD_PREPARE() ntsync: convert ntsync_obj_get_fd() to FD_PREPARE() media: convert media_request_alloc() to FD_PREPARE() hv: convert mshv_ioctl_create_partition() to FD_PREPARE() gpio: convert linehandle_create() to FD_PREPARE() dma: port sw_sync_ioctl_create_fence() to FD_PREPARE() pseries: port papr_rtas_setup_file_interface() to FD_PREPARE() pseries: convert papr_platform_dump_create_handle() to FD_PREPARE() spufs: convert spufs_gang_open() to FD_PREPARE() papr-hvpipe: convert papr_hvpipe_dev_create_handle() to FD_PREPARE() spufs: convert spufs_context_open() to FD_PREPARE() net/socket: convert __sys_accept4_file() to FD_PREPARE() net/socket: convert sock_map_fd() to FD_PREPARE() net/sctp: convert sctp_getsockopt_peeloff_common() to FD_PREPARE() net/kcm: convert kcm_ioctl() to FD_PREPARE() ... Link: https://patch.msgid.link/20251123-work-fd-prepare-v4-0-b6efa1706cfd@kernel.org Signed-off-by: Christian Brauner <brauner@kernel.org>
Diffstat (limited to 'fs/namespace.c')
-rw-r--r--fs/namespace.c103
1 files changed, 37 insertions, 66 deletions
diff --git a/fs/namespace.c b/fs/namespace.c
index d82910f33dc4..8302dd64be20 100644
--- a/fs/namespace.c
+++ b/fs/namespace.c
@@ -3103,19 +3103,7 @@ static struct file *vfs_open_tree(int dfd, const char __user *filename, unsigned
SYSCALL_DEFINE3(open_tree, int, dfd, const char __user *, filename, unsigned, flags)
{
- int fd;
- struct file *file __free(fput) = NULL;
-
- file = vfs_open_tree(dfd, filename, flags);
- if (IS_ERR(file))
- return PTR_ERR(file);
-
- fd = get_unused_fd_flags(flags & O_CLOEXEC);
- if (fd < 0)
- return fd;
-
- fd_install(fd, no_free_ptr(file));
- return fd;
+ return FD_ADD(flags, vfs_open_tree(dfd, filename, flags));
}
/*
@@ -4283,10 +4271,10 @@ static unsigned int attr_flags_to_mnt_flags(u64 attr_flags)
SYSCALL_DEFINE3(fsmount, int, fs_fd, unsigned int, flags,
unsigned int, attr_flags)
{
+ struct path new_path __free(path_put) = {};
struct mnt_namespace *ns;
struct fs_context *fc;
- struct file *file;
- struct path newmount;
+ struct vfsmount *new_mnt;
struct mount *mnt;
unsigned int mnt_flags = 0;
long ret;
@@ -4324,35 +4312,36 @@ SYSCALL_DEFINE3(fsmount, int, fs_fd, unsigned int, flags,
fc = fd_file(f)->private_data;
- ret = mutex_lock_interruptible(&fc->uapi_mutex);
- if (ret < 0)
+ ACQUIRE(mutex_intr, uapi_mutex)(&fc->uapi_mutex);
+ ret = ACQUIRE_ERR(mutex_intr, &uapi_mutex);
+ if (ret)
return ret;
/* There must be a valid superblock or we can't mount it */
ret = -EINVAL;
if (!fc->root)
- goto err_unlock;
+ return ret;
ret = -EPERM;
if (mount_too_revealing(fc->root->d_sb, &mnt_flags)) {
errorfcp(fc, "VFS", "Mount too revealing");
- goto err_unlock;
+ return ret;
}
ret = -EBUSY;
if (fc->phase != FS_CONTEXT_AWAITING_MOUNT)
- goto err_unlock;
+ return ret;
if (fc->sb_flags & SB_MANDLOCK)
warn_mandlock();
- newmount.mnt = vfs_create_mount(fc);
- if (IS_ERR(newmount.mnt)) {
- ret = PTR_ERR(newmount.mnt);
- goto err_unlock;
- }
- newmount.dentry = dget(fc->root);
- newmount.mnt->mnt_flags = mnt_flags;
+ new_mnt = vfs_create_mount(fc);
+ if (IS_ERR(new_mnt))
+ return PTR_ERR(new_mnt);
+ new_mnt->mnt_flags = mnt_flags;
+
+ new_path.dentry = dget(fc->root);
+ new_path.mnt = new_mnt;
/* We've done the mount bit - now move the file context into more or
* less the same state as if we'd done an fspick(). We don't want to
@@ -4362,38 +4351,27 @@ SYSCALL_DEFINE3(fsmount, int, fs_fd, unsigned int, flags,
vfs_clean_context(fc);
ns = alloc_mnt_ns(current->nsproxy->mnt_ns->user_ns, true);
- if (IS_ERR(ns)) {
- ret = PTR_ERR(ns);
- goto err_path;
- }
- mnt = real_mount(newmount.mnt);
+ if (IS_ERR(ns))
+ return PTR_ERR(ns);
+ mnt = real_mount(new_path.mnt);
ns->root = mnt;
ns->nr_mounts = 1;
mnt_add_to_ns(ns, mnt);
- mntget(newmount.mnt);
+ mntget(new_path.mnt);
- /* Attach to an apparent O_PATH fd with a note that we need to unmount
- * it, not just simply put it.
- */
- file = dentry_open(&newmount, O_PATH, fc->cred);
- if (IS_ERR(file)) {
- dissolve_on_fput(newmount.mnt);
- ret = PTR_ERR(file);
- goto err_path;
+ FD_PREPARE(fdf, (flags & FSMOUNT_CLOEXEC) ? O_CLOEXEC : 0,
+ dentry_open(&new_path, O_PATH, fc->cred));
+ if (fdf.err) {
+ dissolve_on_fput(new_path.mnt);
+ return fdf.err;
}
- file->f_mode |= FMODE_NEED_UNMOUNT;
- ret = get_unused_fd_flags((flags & FSMOUNT_CLOEXEC) ? O_CLOEXEC : 0);
- if (ret >= 0)
- fd_install(ret, file);
- else
- fput(file);
-
-err_path:
- path_put(&newmount);
-err_unlock:
- mutex_unlock(&fc->uapi_mutex);
- return ret;
+ /*
+ * Attach to an apparent O_PATH fd with a note that we
+ * need to unmount it, not just simply put it.
+ */
+ fd_prepare_file(fdf)->f_mode |= FMODE_NEED_UNMOUNT;
+ return fd_publish(fdf);
}
static inline int vfs_move_mount(const struct path *from_path,
@@ -5035,19 +5013,17 @@ SYSCALL_DEFINE5(open_tree_attr, int, dfd, const char __user *, filename,
unsigned, flags, struct mount_attr __user *, uattr,
size_t, usize)
{
- struct file __free(fput) *file = NULL;
- int fd;
-
if (!uattr && usize)
return -EINVAL;
- file = vfs_open_tree(dfd, filename, flags);
- if (IS_ERR(file))
- return PTR_ERR(file);
+ FD_PREPARE(fdf, flags, vfs_open_tree(dfd, filename, flags));
+ if (fdf.err)
+ return fdf.err;
if (uattr) {
- int ret;
struct mount_kattr kattr = {};
+ struct file *file = fd_prepare_file(fdf);
+ int ret;
if (flags & OPEN_TREE_CLONE)
kattr.kflags = MOUNT_KATTR_IDMAP_REPLACE;
@@ -5063,12 +5039,7 @@ SYSCALL_DEFINE5(open_tree_attr, int, dfd, const char __user *, filename,
return ret;
}
- fd = get_unused_fd_flags(flags & O_CLOEXEC);
- if (fd < 0)
- return fd;
-
- fd_install(fd, no_free_ptr(file));
- return fd;
+ return fd_publish(fdf);
}
int show_path(struct seq_file *m, struct dentry *root)