summaryrefslogtreecommitdiff
path: root/fs/mount.h
diff options
context:
space:
mode:
authorChristian Brauner <brauner@kernel.org>2024-12-15 21:17:05 +0100
committerChristian Brauner <brauner@kernel.org>2025-01-09 16:58:50 +0100
commit344bac8f0d73fe970cd9f5b2f132906317d29e8b (patch)
treefadbfbd6a35947d899d5d88eb2a6802c0dab8714 /fs/mount.h
parent40384c840ea1944d7c5a392e8975ed088ecf0b37 (diff)
fs: kill MNT_ONRB
Move mnt->mnt_node into the union with mnt->mnt_rcu and mnt->mnt_llist instead of keeping it with mnt->mnt_list. This allows us to use RB_CLEAR_NODE(&mnt->mnt_node) in umount_tree() as well as list_empty(&mnt->mnt_node). That in turn allows us to remove MNT_ONRB. This also fixes the bug reported in [1] where seemingly MNT_ONRB wasn't set in @mnt->mnt_flags even though the mount was present in the mount rbtree of the mount namespace. The root cause is the following race. When a btrfs subvolume is mounted a temporary mount is created: btrfs_get_tree_subvol() { mnt = fc_mount() // Register the newly allocated mount with sb->mounts: lock_mount_hash(); list_add_tail(&mnt->mnt_instance, &mnt->mnt.mnt_sb->s_mounts); unlock_mount_hash(); } and registered on sb->s_mounts. Later it is added to an anonymous mount namespace via mount_subvol(): -> mount_subvol() -> mount_subtree() -> alloc_mnt_ns() mnt_add_to_ns() vfs_path_lookup() put_mnt_ns() The mnt_add_to_ns() call raises MNT_ONRB in @mnt->mnt_flags. If someone concurrently does a ro remount: reconfigure_super() -> sb_prepare_remount_readonly() { list_for_each_entry(mnt, &sb->s_mounts, mnt_instance) { } all mounts registered in sb->s_mounts are visited and first MNT_WRITE_HOLD is raised, then MNT_READONLY is raised, and finally MNT_WRITE_HOLD is removed again. The flag modification for MNT_WRITE_HOLD/MNT_READONLY and MNT_ONRB race so MNT_ONRB might be lost. Fixes: 2eea9ce4310d ("mounts: keep list of mounts in an rbtree") Cc: <stable@kernel.org> # v6.8+ Link: https://lore.kernel.org/r/20241215-vfs-6-14-mount-work-v1-1-fd55922c4af8@kernel.org Link: https://lore.kernel.org/r/ec6784ed-8722-4695-980a-4400d4e7bd1a@gmx.com [1] Signed-off-by: Christian Brauner <brauner@kernel.org>
Diffstat (limited to 'fs/mount.h')
-rw-r--r--fs/mount.h15
1 files changed, 9 insertions, 6 deletions
diff --git a/fs/mount.h b/fs/mount.h
index 185fc56afc13..179f690a0c72 100644
--- a/fs/mount.h
+++ b/fs/mount.h
@@ -38,6 +38,7 @@ struct mount {
struct dentry *mnt_mountpoint;
struct vfsmount mnt;
union {
+ struct rb_node mnt_node; /* node in the ns->mounts rbtree */
struct rcu_head mnt_rcu;
struct llist_node mnt_llist;
};
@@ -51,10 +52,7 @@ struct mount {
struct list_head mnt_child; /* and going through their mnt_child */
struct list_head mnt_instance; /* mount instance on sb->s_mounts */
const char *mnt_devname; /* Name of device e.g. /dev/dsk/hda1 */
- union {
- struct rb_node mnt_node; /* Under ns->mounts */
- struct list_head mnt_list;
- };
+ struct list_head mnt_list;
struct list_head mnt_expire; /* link in fs-specific expiry list */
struct list_head mnt_share; /* circular list of shared mounts */
struct list_head mnt_slave_list;/* list of slave mounts */
@@ -145,11 +143,16 @@ static inline bool is_anon_ns(struct mnt_namespace *ns)
return ns->seq == 0;
}
+static inline bool mnt_ns_attached(const struct mount *mnt)
+{
+ return !RB_EMPTY_NODE(&mnt->mnt_node);
+}
+
static inline void move_from_ns(struct mount *mnt, struct list_head *dt_list)
{
- WARN_ON(!(mnt->mnt.mnt_flags & MNT_ONRB));
- mnt->mnt.mnt_flags &= ~MNT_ONRB;
+ WARN_ON(!mnt_ns_attached(mnt));
rb_erase(&mnt->mnt_node, &mnt->mnt_ns->mounts);
+ RB_CLEAR_NODE(&mnt->mnt_node);
list_add_tail(&mnt->mnt_list, dt_list);
}