diff options
| author | Linus Torvalds <torvalds@linux-foundation.org> | 2025-12-05 14:36:21 -0800 |
|---|---|---|
| committer | Linus Torvalds <torvalds@linux-foundation.org> | 2025-12-05 14:36:21 -0800 |
| commit | 7cd122b55283d3ceef71a5b723ccaa03a72284b4 (patch) | |
| tree | 56cb173d2c4ea306add677d6cc8f70e8cb4c14f2 /fs/dcache.c | |
| parent | 7203ca412fc8e8a0588e9adc0f777d3163f8dff3 (diff) | |
| parent | eb028c33451af08bb34f45c6be6967ef1c98cbd1 (diff) | |
Merge tag 'pull-persistency' of git://git.kernel.org/pub/scm/linux/kernel/git/viro/vfs
Pull persistent dentry infrastructure and conversion from Al Viro:
"Some filesystems use a kinda-sorta controlled dentry refcount leak to
pin dentries of created objects in dcache (and undo it when removing
those). A reference is grabbed and not released, but it's not actually
_stored_ anywhere.
That works, but it's hard to follow and verify; among other things, we
have no way to tell _which_ of the increments is intended to be an
unpaired one. Worse, on removal we need to decide whether the
reference had already been dropped, which can be non-trivial if that
removal is on umount and we need to figure out if this dentry is
pinned due to e.g. unlink() not done. Usually that is handled by using
kill_litter_super() as ->kill_sb(), but there are open-coded special
cases of the same (consider e.g. /proc/self).
Things get simpler if we introduce a new dentry flag
(DCACHE_PERSISTENT) marking those "leaked" dentries. Having it set
claims responsibility for +1 in refcount.
The end result this series is aiming for:
- get these unbalanced dget() and dput() replaced with new primitives
that would, in addition to adjusting refcount, set and clear
persistency flag.
- instead of having kill_litter_super() mess with removing the
remaining "leaked" references (e.g. for all tmpfs files that hadn't
been removed prior to umount), have the regular
shrink_dcache_for_umount() strip DCACHE_PERSISTENT of all dentries,
dropping the corresponding reference if it had been set. After that
kill_litter_super() becomes an equivalent of kill_anon_super().
Doing that in a single step is not feasible - it would affect too many
places in too many filesystems. It has to be split into a series.
This work has really started early in 2024; quite a few preliminary
pieces have already gone into mainline. This chunk is finally getting
to the meat of that stuff - infrastructure and most of the conversions
to it.
Some pieces are still sitting in the local branches, but the bulk of
that stuff is here"
* tag 'pull-persistency' of git://git.kernel.org/pub/scm/linux/kernel/git/viro/vfs: (54 commits)
d_make_discardable(): warn if given a non-persistent dentry
kill securityfs_recursive_remove()
convert securityfs
get rid of kill_litter_super()
convert rust_binderfs
convert nfsctl
convert rpc_pipefs
convert hypfs
hypfs: swich hypfs_create_u64() to returning int
hypfs: switch hypfs_create_str() to returning int
hypfs: don't pin dentries twice
convert gadgetfs
gadgetfs: switch to simple_remove_by_name()
convert functionfs
functionfs: switch to simple_remove_by_name()
functionfs: fix the open/removal races
functionfs: need to cancel ->reset_work in ->kill_sb()
functionfs: don't bother with ffs->ref in ffs_data_{opened,closed}()
functionfs: don't abuse ffs_data_closed() on fs shutdown
convert selinuxfs
...
Diffstat (limited to 'fs/dcache.c')
| -rw-r--r-- | fs/dcache.c | 111 |
1 files changed, 72 insertions, 39 deletions
diff --git a/fs/dcache.c b/fs/dcache.c index 9143fd502def..c8e21b4423cf 100644 --- a/fs/dcache.c +++ b/fs/dcache.c @@ -870,6 +870,24 @@ locked: return false; } +static void finish_dput(struct dentry *dentry) + __releases(dentry->d_lock) + __releases(RCU) +{ + while (lock_for_kill(dentry)) { + rcu_read_unlock(); + dentry = __dentry_kill(dentry); + if (!dentry) + return; + if (retain_dentry(dentry, true)) { + spin_unlock(&dentry->d_lock); + return; + } + rcu_read_lock(); + } + rcu_read_unlock(); + spin_unlock(&dentry->d_lock); +} /* * This is dput @@ -907,22 +925,21 @@ void dput(struct dentry *dentry) rcu_read_unlock(); return; } - while (lock_for_kill(dentry)) { - rcu_read_unlock(); - dentry = __dentry_kill(dentry); - if (!dentry) - return; - if (retain_dentry(dentry, true)) { - spin_unlock(&dentry->d_lock); - return; - } - rcu_read_lock(); - } - rcu_read_unlock(); - spin_unlock(&dentry->d_lock); + finish_dput(dentry); } EXPORT_SYMBOL(dput); +void d_make_discardable(struct dentry *dentry) +{ + spin_lock(&dentry->d_lock); + WARN_ON(!(dentry->d_flags & DCACHE_PERSISTENT)); + dentry->d_flags &= ~DCACHE_PERSISTENT; + dentry->d_lockref.count--; + rcu_read_lock(); + finish_dput(dentry); +} +EXPORT_SYMBOL(d_make_discardable); + static void to_shrink_list(struct dentry *dentry, struct list_head *list) __must_hold(&dentry->d_lock) { @@ -1512,6 +1529,15 @@ out: return ret; } +static enum d_walk_ret select_collect_umount(void *_data, struct dentry *dentry) +{ + if (dentry->d_flags & DCACHE_PERSISTENT) { + dentry->d_flags &= ~DCACHE_PERSISTENT; + dentry->d_lockref.count--; + } + return select_collect(_data, dentry); +} + static enum d_walk_ret select_collect2(void *_data, struct dentry *dentry) { struct select_data *data = _data; @@ -1540,18 +1566,20 @@ out: } /** - * shrink_dcache_parent - prune dcache + * shrink_dcache_tree - prune dcache * @parent: parent of entries to prune + * @for_umount: true if we want to unpin the persistent ones * * Prune the dcache to remove unused children of the parent dentry. */ -void shrink_dcache_parent(struct dentry *parent) +static void shrink_dcache_tree(struct dentry *parent, bool for_umount) { for (;;) { struct select_data data = {.start = parent}; INIT_LIST_HEAD(&data.dispose); - d_walk(parent, &data, select_collect); + d_walk(parent, &data, + for_umount ? select_collect_umount : select_collect); if (!list_empty(&data.dispose)) { shrink_dentry_list(&data.dispose); @@ -1576,6 +1604,11 @@ void shrink_dcache_parent(struct dentry *parent) shrink_dentry_list(&data.dispose); } } + +void shrink_dcache_parent(struct dentry *parent) +{ + shrink_dcache_tree(parent, false); +} EXPORT_SYMBOL(shrink_dcache_parent); static enum d_walk_ret umount_check(void *_data, struct dentry *dentry) @@ -1602,7 +1635,7 @@ static enum d_walk_ret umount_check(void *_data, struct dentry *dentry) static void do_one_tree(struct dentry *dentry) { - shrink_dcache_parent(dentry); + shrink_dcache_tree(dentry, true); d_walk(dentry, dentry, umount_check); d_drop(dentry); dput(dentry); @@ -1924,7 +1957,6 @@ static void __d_instantiate(struct dentry *dentry, struct inode *inode) unsigned add_flags = d_flags_for_inode(inode); WARN_ON(d_in_lookup(dentry)); - spin_lock(&dentry->d_lock); /* * The negative counter only tracks dentries on the LRU. Don't dec if * d_lru is on another list. @@ -1937,7 +1969,6 @@ static void __d_instantiate(struct dentry *dentry, struct inode *inode) __d_set_inode_and_type(dentry, inode, add_flags); raw_write_seqcount_end(&dentry->d_seq); fsnotify_update_flags(dentry); - spin_unlock(&dentry->d_lock); } /** @@ -1961,7 +1992,9 @@ void d_instantiate(struct dentry *entry, struct inode * inode) if (inode) { security_d_instantiate(entry, inode); spin_lock(&inode->i_lock); + spin_lock(&entry->d_lock); __d_instantiate(entry, inode); + spin_unlock(&entry->d_lock); spin_unlock(&inode->i_lock); } } @@ -1980,7 +2013,9 @@ void d_instantiate_new(struct dentry *entry, struct inode *inode) lockdep_annotate_inode_mutex_key(inode); security_d_instantiate(entry, inode); spin_lock(&inode->i_lock); + spin_lock(&entry->d_lock); __d_instantiate(entry, inode); + spin_unlock(&entry->d_lock); WARN_ON(!(inode_state_read(inode) & I_NEW)); inode_state_clear(inode, I_NEW | I_CREATING); inode_wake_up_bit(inode, __I_NEW); @@ -2742,6 +2777,24 @@ void d_add(struct dentry *entry, struct inode *inode) } EXPORT_SYMBOL(d_add); +struct dentry *d_make_persistent(struct dentry *dentry, struct inode *inode) +{ + WARN_ON(!hlist_unhashed(&dentry->d_u.d_alias)); + WARN_ON(!inode); + security_d_instantiate(dentry, inode); + spin_lock(&inode->i_lock); + spin_lock(&dentry->d_lock); + __d_instantiate(dentry, inode); + dentry->d_flags |= DCACHE_PERSISTENT; + dget_dlock(dentry); + if (d_unhashed(dentry)) + __d_rehash(dentry); + spin_unlock(&dentry->d_lock); + spin_unlock(&inode->i_lock); + return dentry; +} +EXPORT_SYMBOL(d_make_persistent); + static void swap_names(struct dentry *dentry, struct dentry *target) { if (unlikely(dname_external(target))) { @@ -3111,26 +3164,6 @@ bool is_subdir(struct dentry *new_dentry, struct dentry *old_dentry) } EXPORT_SYMBOL(is_subdir); -static enum d_walk_ret d_genocide_kill(void *data, struct dentry *dentry) -{ - struct dentry *root = data; - if (dentry != root) { - if (d_unhashed(dentry) || !dentry->d_inode) - return D_WALK_SKIP; - - if (!(dentry->d_flags & DCACHE_GENOCIDE)) { - dentry->d_flags |= DCACHE_GENOCIDE; - dentry->d_lockref.count--; - } - } - return D_WALK_CONTINUE; -} - -void d_genocide(struct dentry *parent) -{ - d_walk(parent, parent, d_genocide_kill); -} - void d_mark_tmpfile(struct file *file, struct inode *inode) { struct dentry *dentry = file->f_path.dentry; |