summaryrefslogtreecommitdiff
path: root/fs
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2025-12-01 08:44:26 -0800
committerLinus Torvalds <torvalds@linux-foundation.org>2025-12-01 08:44:26 -0800
commitb04b2e7a61830cabd00c6f95308a8e2f5d82fa52 (patch)
treea228d66ed097ffd37e05e4bcc4d37337be731d46 /fs
parent1885cdbfbb51ede3637166c895d0b8040c9899cc (diff)
parentebf8538979101ef879742dcfaf04b684f5461e12 (diff)
Merge tag 'vfs-6.19-rc1.misc' of git://git.kernel.org/pub/scm/linux/kernel/git/vfs/vfs
Pull misc vfs updates from Christian Brauner: "Features: - Cheaper MAY_EXEC handling for path lookup. This elides MAY_WRITE permission checks during path lookup and adds the IOP_FASTPERM_MAY_EXEC flag so filesystems like btrfs can avoid expensive permission work. - Hide dentry_cache behind runtime const machinery. - Add German Maglione as virtiofs co-maintainer. Cleanups: - Tidy up and inline step_into() and walk_component() for improved code generation. - Re-enable IOCB_NOWAIT writes to files. This refactors file timestamp update logic, fixing a layering bypass in btrfs when updating timestamps on device files and improving FMODE_NOCMTIME handling in VFS now that nfsd started using it. - Path lookup optimizations extracting slowpaths into dedicated routines and adding branch prediction hints for mntput_no_expire(), fd_install(), lookup_slow(), and various other hot paths. - Enable clang's -fms-extensions flag, requiring a JFS rename to avoid conflicts. - Remove spurious exports in fs/file_attr.c. - Stop duplicating union pipe_index declaration. This depends on the shared kbuild branch that brings in -fms-extensions support which is merged into this branch. - Use MD5 library instead of crypto_shash in ecryptfs. - Use largest_zero_folio() in iomap_dio_zero(). - Replace simple_strtol/strtoul with kstrtoint/kstrtouint in init and initrd code. - Various typo fixes. Fixes: - Fix emergency sync for btrfs. Btrfs requires an explicit sync_fs() call with wait == 1 to commit super blocks. The emergency sync path never passed this, leaving btrfs data uncommitted during emergency sync. - Use local kmap in watch_queue's post_one_notification(). - Add hint prints in sb_set_blocksize() for LBS dependency on THP" * tag 'vfs-6.19-rc1.misc' of git://git.kernel.org/pub/scm/linux/kernel/git/vfs/vfs: (35 commits) MAINTAINERS: add German Maglione as virtiofs co-maintainer fs: inline step_into() and walk_component() fs: tidy up step_into() & friends before inlining orangefs: use inode_update_timestamps directly btrfs: fix the comment on btrfs_update_time btrfs: use vfs_utimes to update file timestamps fs: export vfs_utimes fs: lift the FMODE_NOCMTIME check into file_update_time_flags fs: refactor file timestamp update logic include/linux/fs.h: trivial fix: regualr -> regular fs/splice.c: trivial fix: pipes -> pipe's fs: mark lookup_slow() as noinline fs: add predicts based on nd->depth fs: move mntput_no_expire() slowpath into a dedicated routine fs: remove spurious exports in fs/file_attr.c watch_queue: Use local kmap in post_one_notification() fs: touch up predicts in path lookup fs: move fd_install() slowpath into a dedicated routine and provide commentary fs: hide dentry_cache behind runtime const machinery fs: touch predicts in do_dentry_open() ...
Diffstat (limited to 'fs')
-rw-r--r--fs/btrfs/inode.c16
-rw-r--r--fs/btrfs/volumes.c11
-rw-r--r--fs/dcache.c6
-rw-r--r--fs/ecryptfs/Kconfig2
-rw-r--r--fs/ecryptfs/crypto.c90
-rw-r--r--fs/ecryptfs/ecryptfs_kernel.h13
-rw-r--r--fs/ecryptfs/inode.c7
-rw-r--r--fs/ecryptfs/keystore.c65
-rw-r--r--fs/ecryptfs/main.c7
-rw-r--r--fs/ecryptfs/super.c5
-rw-r--r--fs/file.c35
-rw-r--r--fs/file_attr.c4
-rw-r--r--fs/inode.c58
-rw-r--r--fs/iomap/direct-io.c38
-rw-r--r--fs/jfs/jfs_incore.h6
-rw-r--r--fs/namei.c142
-rw-r--r--fs/namespace.c38
-rw-r--r--fs/open.c6
-rw-r--r--fs/orangefs/inode.c4
-rw-r--r--fs/splice.c2
-rw-r--r--fs/sync.c7
-rw-r--r--fs/utimes.c1
22 files changed, 262 insertions, 301 deletions
diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c
index 6282911e536f..8fa80689b5bb 100644
--- a/fs/btrfs/inode.c
+++ b/fs/btrfs/inode.c
@@ -5839,6 +5839,8 @@ struct btrfs_inode *btrfs_iget(u64 ino, struct btrfs_root *root)
if (ret)
return ERR_PTR(ret);
+ if (S_ISDIR(inode->vfs_inode.i_mode))
+ inode->vfs_inode.i_opflags |= IOP_FASTPERM_MAY_EXEC;
unlock_new_inode(&inode->vfs_inode);
return inode;
}
@@ -6291,8 +6293,8 @@ static int btrfs_dirty_inode(struct btrfs_inode *inode)
}
/*
- * This is a copy of file_update_time. We need this so we can return error on
- * ENOSPC for updating the inode in the case of file write and mmap writes.
+ * We need our own ->update_time so that we can return error on ENOSPC for
+ * updating the inode in the case of file write and mmap writes.
*/
static int btrfs_update_time(struct inode *inode, int flags)
{
@@ -6790,8 +6792,11 @@ static int btrfs_create_common(struct inode *dir, struct dentry *dentry,
}
ret = btrfs_create_new_inode(trans, &new_inode_args);
- if (!ret)
+ if (!ret) {
+ if (S_ISDIR(inode->i_mode))
+ inode->i_opflags |= IOP_FASTPERM_MAY_EXEC;
d_instantiate_new(dentry, inode);
+ }
btrfs_end_transaction(trans);
btrfs_btree_balance_dirty(fs_info);
@@ -9170,6 +9175,11 @@ int btrfs_prealloc_file_range_trans(struct inode *inode,
min_size, actual_len, alloc_hint, trans);
}
+/*
+ * NOTE: in case you are adding MAY_EXEC check for directories:
+ * we are marking them with IOP_FASTPERM_MAY_EXEC, allowing path lookup to
+ * elide calls here.
+ */
static int btrfs_permission(struct mnt_idmap *idmap,
struct inode *inode, int mask)
{
diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c
index 2bec544d8ba3..259e8b0496df 100644
--- a/fs/btrfs/volumes.c
+++ b/fs/btrfs/volumes.c
@@ -2002,14 +2002,11 @@ out:
static void update_dev_time(const char *device_path)
{
struct path path;
- int ret;
- ret = kern_path(device_path, LOOKUP_FOLLOW, &path);
- if (ret)
- return;
-
- inode_update_time(d_inode(path.dentry), S_MTIME | S_CTIME | S_VERSION);
- path_put(&path);
+ if (!kern_path(device_path, LOOKUP_FOLLOW, &path)) {
+ vfs_utimes(&path, NULL);
+ path_put(&path);
+ }
}
static int btrfs_rm_dev_item(struct btrfs_trans_handle *trans,
diff --git a/fs/dcache.c b/fs/dcache.c
index 035cccbc9276..5c6282b03ba2 100644
--- a/fs/dcache.c
+++ b/fs/dcache.c
@@ -86,7 +86,8 @@ __cacheline_aligned_in_smp DEFINE_SEQLOCK(rename_lock);
EXPORT_SYMBOL(rename_lock);
-static struct kmem_cache *dentry_cache __ro_after_init;
+static struct kmem_cache *__dentry_cache __ro_after_init;
+#define dentry_cache runtime_const_ptr(__dentry_cache)
const struct qstr empty_name = QSTR_INIT("", 0);
EXPORT_SYMBOL(empty_name);
@@ -3222,9 +3223,10 @@ static void __init dcache_init(void)
* but it is probably not worth it because of the cache nature
* of the dcache.
*/
- dentry_cache = KMEM_CACHE_USERCOPY(dentry,
+ __dentry_cache = KMEM_CACHE_USERCOPY(dentry,
SLAB_RECLAIM_ACCOUNT|SLAB_PANIC|SLAB_ACCOUNT,
d_shortname.string);
+ runtime_const_init(ptr, __dentry_cache);
/* Hash may have been set up in dcache_init_early */
if (!hashdist)
diff --git a/fs/ecryptfs/Kconfig b/fs/ecryptfs/Kconfig
index 1bdeaa6d5790..c2f4fb41b4e6 100644
--- a/fs/ecryptfs/Kconfig
+++ b/fs/ecryptfs/Kconfig
@@ -4,7 +4,7 @@ config ECRYPT_FS
depends on KEYS && CRYPTO && (ENCRYPTED_KEYS || ENCRYPTED_KEYS=n)
select CRYPTO_ECB
select CRYPTO_CBC
- select CRYPTO_MD5
+ select CRYPTO_LIB_MD5
help
Encrypted filesystem that operates on the VFS layer. See
<file:Documentation/filesystems/ecryptfs.rst> to learn more about
diff --git a/fs/ecryptfs/crypto.c b/fs/ecryptfs/crypto.c
index 69536cacdea8..260f8a4938b0 100644
--- a/fs/ecryptfs/crypto.c
+++ b/fs/ecryptfs/crypto.c
@@ -9,7 +9,6 @@
* Michael C. Thompson <mcthomps@us.ibm.com>
*/
-#include <crypto/hash.h>
#include <crypto/skcipher.h>
#include <linux/fs.h>
#include <linux/mount.h>
@@ -48,32 +47,6 @@ void ecryptfs_from_hex(char *dst, char *src, int dst_size)
}
}
-/**
- * ecryptfs_calculate_md5 - calculates the md5 of @src
- * @dst: Pointer to 16 bytes of allocated memory
- * @crypt_stat: Pointer to crypt_stat struct for the current inode
- * @src: Data to be md5'd
- * @len: Length of @src
- *
- * Uses the allocated crypto context that crypt_stat references to
- * generate the MD5 sum of the contents of src.
- */
-static int ecryptfs_calculate_md5(char *dst,
- struct ecryptfs_crypt_stat *crypt_stat,
- char *src, int len)
-{
- int rc = crypto_shash_tfm_digest(crypt_stat->hash_tfm, src, len, dst);
-
- if (rc) {
- printk(KERN_ERR
- "%s: Error computing crypto hash; rc = [%d]\n",
- __func__, rc);
- goto out;
- }
-out:
- return rc;
-}
-
static int ecryptfs_crypto_api_algify_cipher_name(char **algified_name,
char *cipher_name,
char *chaining_modifier)
@@ -104,13 +77,10 @@ out:
*
* Generate the initialization vector from the given root IV and page
* offset.
- *
- * Returns zero on success; non-zero on error.
*/
-int ecryptfs_derive_iv(char *iv, struct ecryptfs_crypt_stat *crypt_stat,
- loff_t offset)
+void ecryptfs_derive_iv(char *iv, struct ecryptfs_crypt_stat *crypt_stat,
+ loff_t offset)
{
- int rc = 0;
char dst[MD5_DIGEST_SIZE];
char src[ECRYPTFS_MAX_IV_BYTES + 16];
@@ -129,20 +99,12 @@ int ecryptfs_derive_iv(char *iv, struct ecryptfs_crypt_stat *crypt_stat,
ecryptfs_printk(KERN_DEBUG, "source:\n");
ecryptfs_dump_hex(src, (crypt_stat->iv_bytes + 16));
}
- rc = ecryptfs_calculate_md5(dst, crypt_stat, src,
- (crypt_stat->iv_bytes + 16));
- if (rc) {
- ecryptfs_printk(KERN_WARNING, "Error attempting to compute "
- "MD5 while generating IV for a page\n");
- goto out;
- }
+ md5(src, crypt_stat->iv_bytes + 16, dst);
memcpy(iv, dst, crypt_stat->iv_bytes);
if (unlikely(ecryptfs_verbosity > 0)) {
ecryptfs_printk(KERN_DEBUG, "derived iv:\n");
ecryptfs_dump_hex(iv, crypt_stat->iv_bytes);
}
-out:
- return rc;
}
/**
@@ -151,29 +113,14 @@ out:
*
* Initialize the crypt_stat structure.
*/
-int ecryptfs_init_crypt_stat(struct ecryptfs_crypt_stat *crypt_stat)
+void ecryptfs_init_crypt_stat(struct ecryptfs_crypt_stat *crypt_stat)
{
- struct crypto_shash *tfm;
- int rc;
-
- tfm = crypto_alloc_shash(ECRYPTFS_DEFAULT_HASH, 0, 0);
- if (IS_ERR(tfm)) {
- rc = PTR_ERR(tfm);
- ecryptfs_printk(KERN_ERR, "Error attempting to "
- "allocate crypto context; rc = [%d]\n",
- rc);
- return rc;
- }
-
memset((void *)crypt_stat, 0, sizeof(struct ecryptfs_crypt_stat));
INIT_LIST_HEAD(&crypt_stat->keysig_list);
mutex_init(&crypt_stat->keysig_list_mutex);
mutex_init(&crypt_stat->cs_mutex);
mutex_init(&crypt_stat->cs_tfm_mutex);
- crypt_stat->hash_tfm = tfm;
crypt_stat->flags |= ECRYPTFS_STRUCT_INITIALIZED;
-
- return 0;
}
/**
@@ -187,7 +134,6 @@ void ecryptfs_destroy_crypt_stat(struct ecryptfs_crypt_stat *crypt_stat)
struct ecryptfs_key_sig *key_sig, *key_sig_tmp;
crypto_free_skcipher(crypt_stat->tfm);
- crypto_free_shash(crypt_stat->hash_tfm);
list_for_each_entry_safe(key_sig, key_sig_tmp,
&crypt_stat->keysig_list, crypt_stat_list) {
list_del(&key_sig->crypt_stat_list);
@@ -361,14 +307,7 @@ static int crypt_extent(struct ecryptfs_crypt_stat *crypt_stat,
int rc;
extent_base = (((loff_t)page_index) * (PAGE_SIZE / extent_size));
- rc = ecryptfs_derive_iv(extent_iv, crypt_stat,
- (extent_base + extent_offset));
- if (rc) {
- ecryptfs_printk(KERN_ERR, "Error attempting to derive IV for "
- "extent [0x%.16llx]; rc = [%d]\n",
- (unsigned long long)(extent_base + extent_offset), rc);
- goto out;
- }
+ ecryptfs_derive_iv(extent_iv, crypt_stat, extent_base + extent_offset);
sg_init_table(&src_sg, 1);
sg_init_table(&dst_sg, 1);
@@ -609,31 +548,20 @@ void ecryptfs_set_default_sizes(struct ecryptfs_crypt_stat *crypt_stat)
*/
int ecryptfs_compute_root_iv(struct ecryptfs_crypt_stat *crypt_stat)
{
- int rc = 0;
char dst[MD5_DIGEST_SIZE];
BUG_ON(crypt_stat->iv_bytes > MD5_DIGEST_SIZE);
BUG_ON(crypt_stat->iv_bytes <= 0);
if (!(crypt_stat->flags & ECRYPTFS_KEY_VALID)) {
- rc = -EINVAL;
ecryptfs_printk(KERN_WARNING, "Session key not valid; "
"cannot generate root IV\n");
- goto out;
- }
- rc = ecryptfs_calculate_md5(dst, crypt_stat, crypt_stat->key,
- crypt_stat->key_size);
- if (rc) {
- ecryptfs_printk(KERN_WARNING, "Error attempting to compute "
- "MD5 while generating root IV\n");
- goto out;
- }
- memcpy(crypt_stat->root_iv, dst, crypt_stat->iv_bytes);
-out:
- if (rc) {
memset(crypt_stat->root_iv, 0, crypt_stat->iv_bytes);
crypt_stat->flags |= ECRYPTFS_SECURITY_WARNING;
+ return -EINVAL;
}
- return rc;
+ md5(crypt_stat->key, crypt_stat->key_size, dst);
+ memcpy(crypt_stat->root_iv, dst, crypt_stat->iv_bytes);
+ return 0;
}
static void ecryptfs_generate_new_key(struct ecryptfs_crypt_stat *crypt_stat)
diff --git a/fs/ecryptfs/ecryptfs_kernel.h b/fs/ecryptfs/ecryptfs_kernel.h
index 9e6ab0b41337..62a2ea7f59ed 100644
--- a/fs/ecryptfs/ecryptfs_kernel.h
+++ b/fs/ecryptfs/ecryptfs_kernel.h
@@ -14,6 +14,7 @@
#ifndef ECRYPTFS_KERNEL_H
#define ECRYPTFS_KERNEL_H
+#include <crypto/md5.h>
#include <crypto/skcipher.h>
#include <keys/user-type.h>
#include <keys/encrypted-type.h>
@@ -137,8 +138,6 @@ ecryptfs_get_key_payload_data(struct key *key)
+ MAGIC_ECRYPTFS_MARKER_SIZE_BYTES)
#define ECRYPTFS_DEFAULT_CIPHER "aes"
#define ECRYPTFS_DEFAULT_KEY_BYTES 16
-#define ECRYPTFS_DEFAULT_HASH "md5"
-#define ECRYPTFS_TAG_70_DIGEST ECRYPTFS_DEFAULT_HASH
#define ECRYPTFS_TAG_1_PACKET_TYPE 0x01
#define ECRYPTFS_TAG_3_PACKET_TYPE 0x8C
#define ECRYPTFS_TAG_11_PACKET_TYPE 0xED
@@ -163,8 +162,6 @@ ecryptfs_get_key_payload_data(struct key *key)
* ECRYPTFS_MAX_IV_BYTES */
#define ECRYPTFS_FILENAME_MIN_RANDOM_PREPEND_BYTES 16
#define ECRYPTFS_NON_NULL 0x42 /* A reasonable substitute for NULL */
-#define MD5_DIGEST_SIZE 16
-#define ECRYPTFS_TAG_70_DIGEST_SIZE MD5_DIGEST_SIZE
#define ECRYPTFS_TAG_70_MIN_METADATA_SIZE (1 + ECRYPTFS_MIN_PKT_LEN_SIZE \
+ ECRYPTFS_SIG_SIZE + 1 + 1)
#define ECRYPTFS_TAG_70_MAX_METADATA_SIZE (1 + ECRYPTFS_MAX_PKT_LEN_SIZE \
@@ -237,8 +234,6 @@ struct ecryptfs_crypt_stat {
unsigned int extent_mask;
struct ecryptfs_mount_crypt_stat *mount_crypt_stat;
struct crypto_skcipher *tfm;
- struct crypto_shash *hash_tfm; /* Crypto context for generating
- * the initialization vectors */
unsigned char cipher[ECRYPTFS_MAX_CIPHER_NAME_SIZE + 1];
unsigned char key[ECRYPTFS_MAX_KEY_BYTES];
unsigned char root_iv[ECRYPTFS_MAX_IV_BYTES];
@@ -558,7 +553,7 @@ int virt_to_scatterlist(const void *addr, int size, struct scatterlist *sg,
int sg_size);
int ecryptfs_compute_root_iv(struct ecryptfs_crypt_stat *crypt_stat);
void ecryptfs_rotate_iv(unsigned char *iv);
-int ecryptfs_init_crypt_stat(struct ecryptfs_crypt_stat *crypt_stat);
+void ecryptfs_init_crypt_stat(struct ecryptfs_crypt_stat *crypt_stat);
void ecryptfs_destroy_crypt_stat(struct ecryptfs_crypt_stat *crypt_stat);
void ecryptfs_destroy_mount_crypt_stat(
struct ecryptfs_mount_crypt_stat *mount_crypt_stat);
@@ -693,8 +688,8 @@ ecryptfs_parse_tag_70_packet(char **filename, size_t *filename_size,
char *data, size_t max_packet_size);
int ecryptfs_set_f_namelen(long *namelen, long lower_namelen,
struct ecryptfs_mount_crypt_stat *mount_crypt_stat);
-int ecryptfs_derive_iv(char *iv, struct ecryptfs_crypt_stat *crypt_stat,
- loff_t offset);
+void ecryptfs_derive_iv(char *iv, struct ecryptfs_crypt_stat *crypt_stat,
+ loff_t offset);
extern const struct xattr_handler * const ecryptfs_xattr_handlers[];
diff --git a/fs/ecryptfs/inode.c b/fs/ecryptfs/inode.c
index ed1394da8d6b..bae9011fa62f 100644
--- a/fs/ecryptfs/inode.c
+++ b/fs/ecryptfs/inode.c
@@ -903,11 +903,8 @@ static int ecryptfs_setattr(struct mnt_idmap *idmap,
struct ecryptfs_crypt_stat *crypt_stat;
crypt_stat = &ecryptfs_inode_to_private(d_inode(dentry))->crypt_stat;
- if (!(crypt_stat->flags & ECRYPTFS_STRUCT_INITIALIZED)) {
- rc = ecryptfs_init_crypt_stat(crypt_stat);
- if (rc)
- return rc;
- }
+ if (!(crypt_stat->flags & ECRYPTFS_STRUCT_INITIALIZED))
+ ecryptfs_init_crypt_stat(crypt_stat);
inode = d_inode(dentry);
lower_inode = ecryptfs_inode_to_lower(inode);
lower_dentry = ecryptfs_dentry_to_lower(dentry);
diff --git a/fs/ecryptfs/keystore.c b/fs/ecryptfs/keystore.c
index 7f9f68c00ef6..bbf8603242fa 100644
--- a/fs/ecryptfs/keystore.c
+++ b/fs/ecryptfs/keystore.c
@@ -11,7 +11,6 @@
* Trevor S. Highland <trevor.highland@gmail.com>
*/
-#include <crypto/hash.h>
#include <crypto/skcipher.h>
#include <linux/string.h>
#include <linux/pagemap.h>
@@ -601,10 +600,7 @@ struct ecryptfs_write_tag_70_packet_silly_stack {
struct crypto_skcipher *skcipher_tfm;
struct skcipher_request *skcipher_req;
char iv[ECRYPTFS_MAX_IV_BYTES];
- char hash[ECRYPTFS_TAG_70_DIGEST_SIZE];
- char tmp_hash[ECRYPTFS_TAG_70_DIGEST_SIZE];
- struct crypto_shash *hash_tfm;
- struct shash_desc *hash_desc;
+ char hash[MD5_DIGEST_SIZE];
};
/*
@@ -741,51 +737,15 @@ ecryptfs_write_tag_70_packet(char *dest, size_t *remaining_bytes,
"password tokens\n", __func__);
goto out_free_unlock;
}
- s->hash_tfm = crypto_alloc_shash(ECRYPTFS_TAG_70_DIGEST, 0, 0);
- if (IS_ERR(s->hash_tfm)) {
- rc = PTR_ERR(s->hash_tfm);
- printk(KERN_ERR "%s: Error attempting to "
- "allocate hash crypto context; rc = [%d]\n",
- __func__, rc);
- goto out_free_unlock;
- }
-
- s->hash_desc = kmalloc(sizeof(*s->hash_desc) +
- crypto_shash_descsize(s->hash_tfm), GFP_KERNEL);
- if (!s->hash_desc) {
- rc = -ENOMEM;
- goto out_release_free_unlock;
- }
- s->hash_desc->tfm = s->hash_tfm;
-
- rc = crypto_shash_digest(s->hash_desc,
- (u8 *)s->auth_tok->token.password.session_key_encryption_key,
- s->auth_tok->token.password.session_key_encryption_key_bytes,
- s->hash);
- if (rc) {
- printk(KERN_ERR
- "%s: Error computing crypto hash; rc = [%d]\n",
- __func__, rc);
- goto out_release_free_unlock;
- }
+ md5(s->auth_tok->token.password.session_key_encryption_key,
+ s->auth_tok->token.password.session_key_encryption_key_bytes,
+ s->hash);
for (s->j = 0; s->j < (s->num_rand_bytes - 1); s->j++) {
s->block_aligned_filename[s->j] =
- s->hash[(s->j % ECRYPTFS_TAG_70_DIGEST_SIZE)];
- if ((s->j % ECRYPTFS_TAG_70_DIGEST_SIZE)
- == (ECRYPTFS_TAG_70_DIGEST_SIZE - 1)) {
- rc = crypto_shash_digest(s->hash_desc, (u8 *)s->hash,
- ECRYPTFS_TAG_70_DIGEST_SIZE,
- s->tmp_hash);
- if (rc) {
- printk(KERN_ERR
- "%s: Error computing crypto hash; "
- "rc = [%d]\n", __func__, rc);
- goto out_release_free_unlock;
- }
- memcpy(s->hash, s->tmp_hash,
- ECRYPTFS_TAG_70_DIGEST_SIZE);
- }
+ s->hash[s->j % MD5_DIGEST_SIZE];
+ if ((s->j % MD5_DIGEST_SIZE) == (MD5_DIGEST_SIZE - 1))
+ md5(s->hash, MD5_DIGEST_SIZE, s->hash);
if (s->block_aligned_filename[s->j] == '\0')
s->block_aligned_filename[s->j] = ECRYPTFS_NON_NULL;
}
@@ -798,7 +758,7 @@ ecryptfs_write_tag_70_packet(char *dest, size_t *remaining_bytes,
"convert filename memory to scatterlist; rc = [%d]. "
"block_aligned_filename_size = [%zd]\n", __func__, rc,
s->block_aligned_filename_size);
- goto out_release_free_unlock;
+ goto out_free_unlock;
}
rc = virt_to_scatterlist(&dest[s->i], s->block_aligned_filename_size,
s->dst_sg, 2);
@@ -807,7 +767,7 @@ ecryptfs_write_tag_70_packet(char *dest, size_t *remaining_bytes,
"convert encrypted filename memory to scatterlist; "
"rc = [%d]. block_aligned_filename_size = [%zd]\n",
__func__, rc, s->block_aligned_filename_size);
- goto out_release_free_unlock;
+ goto out_free_unlock;
}
/* The characters in the first block effectively do the job
* of the IV here, so we just use 0's for the IV. Note the
@@ -825,7 +785,7 @@ ecryptfs_write_tag_70_packet(char *dest, size_t *remaining_bytes,
rc,
s->auth_tok->token.password.session_key_encryption_key,
mount_crypt_stat->global_default_fn_cipher_key_bytes);
- goto out_release_free_unlock;
+ goto out_free_unlock;
}
skcipher_request_set_crypt(s->skcipher_req, s->src_sg, s->dst_sg,
s->block_aligned_filename_size, s->iv);
@@ -833,13 +793,11 @@ ecryptfs_write_tag_70_packet(char *dest, size_t *remaining_bytes,
if (rc) {
printk(KERN_ERR "%s: Error attempting to encrypt filename; "
"rc = [%d]\n", __func__, rc);
- goto out_release_free_unlock;
+ goto out_free_unlock;
}
s->i += s->block_aligned_filename_size;
(*packet_size) = s->i;
(*remaining_bytes) -= (*packet_size);
-out_release_free_unlock:
- crypto_free_shash(s->hash_tfm);
out_free_unlock:
kfree_sensitive(s->block_aligned_filename);
out_unlock:
@@ -850,7 +808,6 @@ out:
key_put(auth_tok_key);
}
skcipher_request_free(s->skcipher_req);
- kfree_sensitive(s->hash_desc);
kfree(s);
return rc;
}
diff --git a/fs/ecryptfs/main.c b/fs/ecryptfs/main.c
index 16ea14dd2c62..c12dc680f8fe 100644
--- a/fs/ecryptfs/main.c
+++ b/fs/ecryptfs/main.c
@@ -12,6 +12,7 @@
#include <linux/dcache.h>
#include <linux/file.h>
+#include <linux/fips.h>
#include <linux/module.h>
#include <linux/namei.h>
#include <linux/skbuff.h>
@@ -454,6 +455,12 @@ static int ecryptfs_get_tree(struct fs_context *fc)
goto out;
}
+ if (fips_enabled) {
+ rc = -EINVAL;
+ err = "eCryptfs support is disabled due to FIPS";
+ goto out;
+ }
+
s = sget_fc(fc, NULL, set_anon_super_fc);
if (IS_ERR(s)) {
rc = PTR_ERR(s);
diff --git a/fs/ecryptfs/super.c b/fs/ecryptfs/super.c
index e7b7f426fecf..3bc21d677564 100644
--- a/fs/ecryptfs/super.c
+++ b/fs/ecryptfs/super.c
@@ -41,10 +41,7 @@ static struct inode *ecryptfs_alloc_inode(struct super_block *sb)
inode_info = alloc_inode_sb(sb, ecryptfs_inode_info_cache, GFP_KERNEL);
if (unlikely(!inode_info))
goto out;
- if (ecryptfs_init_crypt_stat(&inode_info->crypt_stat)) {
- kmem_cache_free(ecryptfs_inode_info_cache, inode_info);
- goto out;
- }
+ ecryptfs_init_crypt_stat(&inode_info->crypt_stat);
mutex_init(&inode_info->lower_file_mutex);
atomic_set(&inode_info->lower_file_count, 0);
inode_info->lower_file = NULL;
diff --git a/fs/file.c b/fs/file.c
index 28743b742e3c..3f56890068aa 100644
--- a/fs/file.c
+++ b/fs/file.c
@@ -641,6 +641,34 @@ void put_unused_fd(unsigned int fd)
EXPORT_SYMBOL(put_unused_fd);
+/*
+ * Install a file pointer in the fd array while it is being resized.
+ *
+ * We need to make sure our update to the array does not get lost as the resizing
+ * thread can be copying the content as we modify it.
+ *
+ * We have two ways to do it:
+ * - go off CPU waiting for resize_in_progress to clear
+ * - take the spin lock
+ *
+ * The latter is trivial to implement and saves us from having to might_sleep()
+ * for debugging purposes.
+ *
+ * This is moved out of line from fd_install() to convince gcc to optimize that
+ * routine better.
+ */
+static void noinline fd_install_slowpath(unsigned int fd, struct file *file)
+{
+ struct files_struct *files = current->files;
+ struct fdtable *fdt;
+
+ spin_lock(&files->file_lock);
+ fdt = files_fdtable(files);
+ VFS_BUG_ON(rcu_access_pointer(fdt->fd[fd]) != NULL);
+ rcu_assign_pointer(fdt->fd[fd], file);
+ spin_unlock(&files->file_lock);
+}
+
/**
* fd_install - install a file pointer in the fd array
* @fd: file descriptor to install the file in
@@ -658,14 +686,9 @@ void fd_install(unsigned int fd, struct file *file)
return;
rcu_read_lock_sched();
-
if (unlikely(files->resize_in_progress)) {
rcu_read_unlock_sched();
- spin_lock(&files->file_lock);
- fdt = files_fdtable(files);
- VFS_BUG_ON(rcu_access_pointer(fdt->fd[fd]) != NULL);
- rcu_assign_pointer(fdt->fd[fd], file);
- spin_unlock(&files->file_lock);
+ fd_install_slowpath(fd, file);
return;
}
/* coupled with smp_wmb() in expand_fdtable() */
diff --git a/fs/file_attr.c b/fs/file_attr.c
index 1dcec88c0680..4c4916632f11 100644
--- a/fs/file_attr.c
+++ b/fs/file_attr.c
@@ -316,7 +316,6 @@ int ioctl_getflags(struct file *file, unsigned int __user *argp)
err = put_user(fa.flags, argp);
return err;
}
-EXPORT_SYMBOL(ioctl_getflags);
int ioctl_setflags(struct file *file, unsigned int __user *argp)
{
@@ -337,7 +336,6 @@ int ioctl_setflags(struct file *file, unsigned int __user *argp)
}
return err;
}
-EXPORT_SYMBOL(ioctl_setflags);
int ioctl_fsgetxattr(struct file *file, void __user *argp)
{
@@ -350,7 +348,6 @@ int ioctl_fsgetxattr(struct file *file, void __user *argp)
return err;
}
-EXPORT_SYMBOL(ioctl_fsgetxattr);
int ioctl_fssetxattr(struct file *file, void __user *argp)
{
@@ -369,7 +366,6 @@ int ioctl_fssetxattr(struct file *file, void __user *argp)
}
return err;
}
-EXPORT_SYMBOL(ioctl_fssetxattr);
SYSCALL_DEFINE5(file_getattr, int, dfd, const char __user *, filename,
struct file_attr __user *, ufattr, size_t, usize,
diff --git a/fs/inode.c b/fs/inode.c
index cff1d3af0d57..2c55ec49b023 100644
--- a/fs/inode.c
+++ b/fs/inode.c
@@ -2322,42 +2322,40 @@ out:
}
EXPORT_SYMBOL(current_time);
-static int inode_needs_update_time(struct inode *inode)
+static int file_update_time_flags(struct file *file, unsigned int flags)
{
+ struct inode *inode = file_inode(file);
struct timespec64 now, ts;
- int sync_it = 0;
+ int sync_mode = 0;
+ int ret = 0;
/* First try to exhaust all avenues to not sync */
if (IS_NOCMTIME(inode))
return 0;
+ if (unlikely(file->f_mode & FMODE_NOCMTIME))
+ return 0;
now = current_time(inode);
ts = inode_get_mtime(inode);
if (!timespec64_equal(&ts, &now))
- sync_it |= S_MTIME;
-
+ sync_mode |= S_MTIME;
ts = inode_get_ctime(inode);
if (!timespec64_equal(&ts, &now))
- sync_it |= S_CTIME;
-
+ sync_mode |= S_CTIME;
if (IS_I_VERSION(inode) && inode_iversion_need_inc(inode))
- sync_it |= S_VERSION;
-
- return sync_it;
-}
+ sync_mode |= S_VERSION;
-static int __file_update_time(struct file *file, int sync_mode)
-{
- int ret = 0;
- struct inode *inode = file_inode(file);
+ if (!sync_mode)
+ return 0;
- /* try to update time settings */
- if (!mnt_get_write_access_file(file)) {
- ret = inode_update_time(inode, sync_mode);
- mnt_put_write_access_file(file);
- }
+ if (flags & IOCB_NOWAIT)
+ return -EAGAIN;
+ if (mnt_get_write_access_file(file))
+ return 0;
+ ret = inode_update_time(inode, sync_mode);
+ mnt_put_write_access_file(file);
return ret;
}
@@ -2377,14 +2375,7 @@ static int __file_update_time(struct file *file, int sync_mode)
*/
int file_update_time(struct file *file)
{
- int ret;
- struct inode *inode = file_inode(file);
-
- ret = inode_needs_update_time(inode);
- if (ret <= 0)
- return ret;
-
- return __file_update_time(file, ret);
+ return file_update_time_flags(file, 0);
}
EXPORT_SYMBOL(file_update_time);
@@ -2406,7 +2397,6 @@ EXPORT_SYMBOL(file_update_time);
static int file_modified_flags(struct file *file, int flags)
{
int ret;
- struct inode *inode = file_inode(file);
/*
* Clear the security bits if the process is not being run by root.
@@ -2415,17 +2405,7 @@ static int file_modified_flags(struct file *file, int flags)
ret = file_remove_privs_flags(file, flags);
if (ret)
return ret;
-
- if (unlikely(file->f_mode & FMODE_NOCMTIME))
- return 0;
-
- ret = inode_needs_update_time(inode);
- if (ret <= 0)
- return ret;
- if (flags & IOCB_NOWAIT)
- return -EAGAIN;
-
- return __file_update_time(file, ret);
+ return file_update_time_flags(file, flags);
}
/**
diff --git a/fs/iomap/direct-io.c b/fs/iomap/direct-io.c
index 38e6653fe3a6..8e273408453a 100644
--- a/fs/iomap/direct-io.c
+++ b/fs/iomap/direct-io.c
@@ -23,13 +23,6 @@
#define IOMAP_DIO_WRITE (1U << 30)
#define IOMAP_DIO_DIRTY (1U << 31)
-/*
- * Used for sub block zeroing in iomap_dio_zero()
- */
-#define IOMAP_ZERO_PAGE_SIZE (SZ_64K)
-#define IOMAP_ZERO_PAGE_ORDER (get_order(IOMAP_ZERO_PAGE_SIZE))
-static struct page *zero_page;
-
struct iomap_dio {
struct kiocb *iocb;
const struct iomap_dio_ops *dops;
@@ -276,24 +269,35 @@ static int iomap_dio_zero(const struct iomap_iter *iter, struct iomap_dio *dio,
{
struct inode *inode = file_inode(dio->iocb->ki_filp);
struct bio *bio;
+ struct folio *zero_folio = largest_zero_folio();
+ int nr_vecs = max(1, i_blocksize(inode) / folio_size(zero_folio));
if (!len)
return 0;
+
/*
- * Max block size supported is 64k
+ * This limit shall never be reached as most filesystems have a
+ * maximum blocksize of 64k.
*/
- if (WARN_ON_ONCE(len > IOMAP_ZERO_PAGE_SIZE))
+ if (WARN_ON_ONCE(nr_vecs > BIO_MAX_VECS))
return -EINVAL;
- bio = iomap_dio_alloc_bio(iter, dio, 1, REQ_OP_WRITE | REQ_SYNC | REQ_IDLE);
+ bio = iomap_dio_alloc_bio(iter, dio, nr_vecs,
+ REQ_OP_WRITE | REQ_SYNC | REQ_IDLE);
fscrypt_set_bio_crypt_ctx(bio, inode, pos >> inode->i_blkbits,
GFP_KERNEL);
bio->bi_iter.bi_sector = iomap_sector(&iter->iomap, pos);
bio->bi_private = dio;
bio->bi_end_io = iomap_dio_bio_end_io;
- __bio_add_page(bio, zero_page, len, 0);
+ while (len > 0) {
+ unsigned int io_len = min(len, folio_size(zero_folio));
+
+ bio_add_folio_nofail(bio, zero_folio, io_len, 0);
+ len -= io_len;
+ }
iomap_dio_submit_bio(iter, dio, bio, pos);
+
return 0;
}
@@ -847,15 +851,3 @@ iomap_dio_rw(struct kiocb *iocb, struct iov_iter *iter,
return iomap_dio_complete(dio);
}
EXPORT_SYMBOL_GPL(iomap_dio_rw);
-
-static int __init iomap_dio_init(void)
-{
- zero_page = alloc_pages(GFP_KERNEL | __GFP_ZERO,
- IOMAP_ZERO_PAGE_ORDER);
-
- if (!zero_page)
- return -ENOMEM;
-
- return 0;
-}
-fs_initcall(iomap_dio_init);
diff --git a/fs/jfs/jfs_incore.h b/fs/jfs/jfs_incore.h
index 10934f9a11be..5aaafedb8fbc 100644
--- a/fs/jfs/jfs_incore.h
+++ b/fs/jfs/jfs_incore.h
@@ -76,14 +76,14 @@ struct jfs_inode_info {
struct {
unchar _unused[16]; /* 16: */
dxd_t _dxd; /* 16: */
- /* _inline may overflow into _inline_ea when needed */
+ /* _inline_sym may overflow into _inline_ea when needed */
/* _inline_ea may overlay the last part of
* file._xtroot if maxentry = XTROOTINITSLOT
*/
union {
struct {
/* 128: inline symlink */
- unchar _inline[128];
+ unchar _inline_sym[128];
/* 128: inline extended attr */
unchar _inline_ea[128];
};
@@ -101,7 +101,7 @@ struct jfs_inode_info {
#define i_imap u.file._imap
#define i_dirtable u.dir._table
#define i_dtroot u.dir._dtroot
-#define i_inline u.link._inline
+#define i_inline u.link._inline_sym
#define i_inline_ea u.link._inline_ea
#define i_inline_all u.link._inline_all
diff --git a/fs/namei.c b/fs/namei.c
index 7377020a2cba..8281dfe5047f 100644
--- a/fs/namei.c
+++ b/fs/namei.c
@@ -282,7 +282,7 @@ void putname(struct filename *name)
return;
refcnt = atomic_read(&name->refcnt);
- if (refcnt != 1) {
+ if (unlikely(refcnt != 1)) {
if (WARN_ON_ONCE(!refcnt))
return;
@@ -290,7 +290,7 @@ void putname(struct filename *name)
return;
}
- if (name->name != name->iname) {
+ if (unlikely(name->name != name->iname)) {
__putname(name->name);
kfree(name);
} else
@@ -540,10 +540,13 @@ static inline int do_inode_permission(struct mnt_idmap *idmap,
* @mask: Right to check for (%MAY_READ, %MAY_WRITE, %MAY_EXEC)
*
* Separate out file-system wide checks from inode-specific permission checks.
+ *
+ * Note: lookup_inode_permission_may_exec() does not call here. If you add
+ * MAY_EXEC checks, adjust it.
*/
static int sb_permission(struct super_block *sb, struct inode *inode, int mask)
{
- if (unlikely(mask & MAY_WRITE)) {
+ if (mask & MAY_WRITE) {
umode_t mode = inode->i_mode;
/* Nobody gets write access to a read-only fs. */
@@ -574,7 +577,7 @@ int inode_permission(struct mnt_idmap *idmap,
if (unlikely(retval))
return retval;
- if (unlikely(mask & MAY_WRITE)) {
+ if (mask & MAY_WRITE) {
/*
* Nobody gets write access to an immutable file.
*/
@@ -602,6 +605,42 @@ int inode_permission(struct mnt_idmap *idmap,
}
EXPORT_SYMBOL(inode_permission);
+/*
+ * lookup_inode_permission_may_exec - Check traversal right for given inode
+ *
+ * This is a special case routine for may_lookup() making assumptions specific
+ * to path traversal. Use inode_permission() if you are doing something else.
+ *
+ * Work is shaved off compared to inode_permission() as follows:
+ * - we know for a fact there is no MAY_WRITE to worry about
+ * - it is an invariant the inode is a directory
+ *
+ * Since majority of real-world traversal happens on inodes which grant it for
+ * everyone, we check it upfront and only resort to more expensive work if it
+ * fails.
+ *
+ * Filesystems which have their own ->permission hook and consequently miss out
+ * on IOP_FASTPERM can still get the optimization if they set IOP_FASTPERM_MAY_EXEC
+ * on their directory inodes.
+ */
+static __always_inline int lookup_inode_permission_may_exec(struct mnt_idmap *idmap,
+ struct inode *inode, int mask)
+{
+ /* Lookup already checked this to return -ENOTDIR */
+ VFS_BUG_ON_INODE(!S_ISDIR(inode->i_mode), inode);
+ VFS_BUG_ON((mask & ~MAY_NOT_BLOCK) != 0);
+
+ mask |= MAY_EXEC;
+
+ if (unlikely(!(inode->i_opflags & (IOP_FASTPERM | IOP_FASTPERM_MAY_EXEC))))
+ return inode_permission(idmap, inode, mask);
+
+ if (unlikely(((inode->i_mode & 0111) != 0111) || !no_acl_inode(inode)))
+ return inode_permission(idmap, inode, mask);
+
+ return security_inode_permission(inode, mask);
+}
+
/**
* path_get - get a reference to a path
* @path: path to get the reference to
@@ -746,7 +785,8 @@ static void leave_rcu(struct nameidata *nd)
static void terminate_walk(struct nameidata *nd)
{
- drop_links(nd);
+ if (unlikely(nd->depth))
+ drop_links(nd);
if (!(nd->flags & LOOKUP_RCU)) {
int i;
path_put(&nd->path);
@@ -843,7 +883,7 @@ static bool try_to_unlazy(struct nameidata *nd)
BUG_ON(!(nd->flags & LOOKUP_RCU));
- if (unlikely(!legitimize_links(nd)))
+ if (unlikely(nd->depth && !legitimize_links(nd)))
goto out1;
if (unlikely(!legitimize_path(nd, &nd->path, nd->seq)))
goto out;
@@ -878,7 +918,7 @@ static bool try_to_unlazy_next(struct nameidata *nd, struct dentry *dentry)
int res;
BUG_ON(!(nd->flags & LOOKUP_RCU));
- if (unlikely(!legitimize_links(nd)))
+ if (unlikely(nd->depth && !legitimize_links(nd)))
goto out2;
res = __legitimize_mnt(nd->path.mnt, nd->m_seq);
if (unlikely(res)) {
@@ -951,8 +991,8 @@ static int complete_walk(struct nameidata *nd)
* We don't want to zero nd->root for scoped-lookups or
* externally-managed nd->root.
*/
- if (!(nd->state & ND_ROOT_PRESET))
- if (!(nd->flags & LOOKUP_IS_SCOPED))
+ if (likely(!(nd->state & ND_ROOT_PRESET)))
+ if (likely(!(nd->flags & LOOKUP_IS_SCOPED)))
nd->root.mnt = NULL;
nd->flags &= ~LOOKUP_CACHED;
if (!try_to_unlazy(nd))
@@ -1034,7 +1074,7 @@ static int nd_jump_root(struct nameidata *nd)
}
if (!nd->root.mnt) {
int error = set_root(nd);
- if (error)
+ if (unlikely(error))
return error;
}
if (nd->flags & LOOKUP_RCU) {
@@ -1632,13 +1672,15 @@ static inline int handle_mounts(struct nameidata *nd, struct dentry *dentry,
path->dentry = dentry;
if (nd->flags & LOOKUP_RCU) {
unsigned int seq = nd->next_seq;
+ if (likely(!d_managed(dentry)))
+ return 0;
if (likely(__follow_mount_rcu(nd, path)))
return 0;
// *path and nd->next_seq might've been clobbered
path->mnt = nd->path.mnt;
path->dentry = dentry;
nd->next_seq = seq;
- if (!try_to_unlazy_next(nd, dentry))
+ if (unlikely(!try_to_unlazy_next(nd, dentry)))
return -ECHILD;
}
ret = traverse_mounts(path, &jumped, &nd->total_link_count, nd->flags);
@@ -1823,7 +1865,7 @@ again:
return dentry;
}
-static struct dentry *lookup_slow(const struct qstr *name,
+static noinline struct dentry *lookup_slow(const struct qstr *name,
struct dentry *dir,
unsigned int flags)
{
@@ -1855,7 +1897,7 @@ static inline int may_lookup(struct mnt_idmap *idmap,
int err, mask;
mask = nd->flags & LOOKUP_RCU ? MAY_NOT_BLOCK : 0;
- err = inode_permission(idmap, nd->inode, mask | MAY_EXEC);
+ err = lookup_inode_permission_may_exec(idmap, nd->inode, mask);
if (likely(!err))
return 0;
@@ -1870,7 +1912,7 @@ static inline int may_lookup(struct mnt_idmap *idmap,
if (err != -ECHILD) // hard error
return err;
- return inode_permission(idmap, nd->inode, MAY_EXEC);
+ return lookup_inode_permission_may_exec(idmap, nd->inode, 0);
}
static int reserve_stack(struct nameidata *nd, struct path *link)
@@ -1901,13 +1943,23 @@ static int reserve_stack(struct nameidata *nd, struct path *link)
enum {WALK_TRAILING = 1, WALK_MORE = 2, WALK_NOFOLLOW = 4};
-static const char *pick_link(struct nameidata *nd, struct path *link,
+static noinline const char *pick_link(struct nameidata *nd, struct path *link,
struct inode *inode, int flags)
{
struct saved *last;
const char *res;
- int error = reserve_stack(nd, link);
+ int error;
+
+ if (nd->flags & LOOKUP_RCU) {
+ /* make sure that d_is_symlink from step_into_slowpath() matches the inode */
+ if (read_seqcount_retry(&link->dentry->d_seq, nd->next_seq))
+ return ERR_PTR(-ECHILD);
+ } else {
+ if (link->mnt == nd->path.mnt)
+ mntget(link->mnt);
+ }
+ error = reserve_stack(nd, link);
if (unlikely(error)) {
if (!(nd->flags & LOOKUP_RCU))
path_put(link);
@@ -1981,14 +2033,15 @@ all_done: // pure jump
*
* NOTE: dentry must be what nd->next_seq had been sampled from.
*/
-static const char *step_into(struct nameidata *nd, int flags,
+static noinline const char *step_into_slowpath(struct nameidata *nd, int flags,
struct dentry *dentry)
{
struct path path;
struct inode *inode;
- int err = handle_mounts(nd, dentry, &path);
+ int err;
- if (err < 0)
+ err = handle_mounts(nd, dentry, &path);
+ if (unlikely(err < 0))
return ERR_PTR(err);
inode = path.dentry->d_inode;
if (likely(!d_is_symlink(path.dentry)) ||
@@ -2010,15 +2063,32 @@ static const char *step_into(struct nameidata *nd, int flags,
nd->seq = nd->next_seq;
return NULL;
}
- if (nd->flags & LOOKUP_RCU) {
- /* make sure that d_is_symlink above matches inode */
- if (read_seqcount_retry(&path.dentry->d_seq, nd->next_seq))
+ return pick_link(nd, &path, inode, flags);
+}
+
+static __always_inline const char *step_into(struct nameidata *nd, int flags,
+ struct dentry *dentry)
+{
+ /*
+ * In the common case we are in rcu-walk and traversing over a non-mounted on
+ * directory (as opposed to e.g., a symlink).
+ *
+ * We can handle that and negative entries with the checks below.
+ */
+ if (likely((nd->flags & LOOKUP_RCU) &&
+ !d_managed(dentry) && !d_is_symlink(dentry))) {
+ struct inode *inode = dentry->d_inode;
+ if (read_seqcount_retry(&dentry->d_seq, nd->next_seq))
return ERR_PTR(-ECHILD);
- } else {
- if (path.mnt == nd->path.mnt)
- mntget(path.mnt);
+ if (unlikely(!inode))
+ return ERR_PTR(-ENOENT);
+ nd->path.dentry = dentry;
+ /* nd->path.mnt is retained on purpose */
+ nd->inode = inode;
+ nd->seq = nd->next_seq;
+ return NULL;
}
- return pick_link(nd, &path, inode, flags);
+ return step_into_slowpath(nd, flags, dentry);
}
static struct dentry *follow_dotdot_rcu(struct nameidata *nd)
@@ -2101,7 +2171,7 @@ static const char *handle_dots(struct nameidata *nd, int type)
if (!nd->root.mnt) {
error = ERR_PTR(set_root(nd));
- if (error)
+ if (unlikely(error))
return error;
}
if (nd->flags & LOOKUP_RCU)
@@ -2131,7 +2201,7 @@ static const char *handle_dots(struct nameidata *nd, int type)
return NULL;
}
-static const char *walk_component(struct nameidata *nd, int flags)
+static __always_inline const char *walk_component(struct nameidata *nd, int flags)
{
struct dentry *dentry;
/*
@@ -2140,7 +2210,7 @@ static const char *walk_component(struct nameidata *nd, int flags)
* parent relationships.
*/
if (unlikely(nd->last_type != LAST_NORM)) {
- if (!(flags & WALK_MORE) && nd->depth)
+ if (unlikely(nd->depth) && !(flags & WALK_MORE))
put_link(nd);
return handle_dots(nd, nd->last_type);
}
@@ -2152,7 +2222,7 @@ static const char *walk_component(struct nameidata *nd, int flags)
if (IS_ERR(dentry))
return ERR_CAST(dentry);
}
- if (!(flags & WALK_MORE) && nd->depth)
+ if (unlikely(nd->depth) && !(flags & WALK_MORE))
put_link(nd);
return step_into(nd, flags, dentry);
}
@@ -2505,7 +2575,7 @@ static int link_path_walk(const char *name, struct nameidata *nd)
if (unlikely(!*name)) {
OK:
/* pathname or trailing symlink, done */
- if (!depth) {
+ if (likely(!depth)) {
nd->dir_vfsuid = i_uid_into_vfsuid(idmap, nd->inode);
nd->dir_mode = nd->inode->i_mode;
nd->flags &= ~LOOKUP_PARENT;
@@ -2543,10 +2613,10 @@ static const char *path_init(struct nameidata *nd, unsigned flags)
const char *s = nd->pathname;
/* LOOKUP_CACHED requires RCU, ask caller to retry */
- if ((flags & (LOOKUP_RCU | LOOKUP_CACHED)) == LOOKUP_CACHED)
+ if (unlikely((flags & (LOOKUP_RCU | LOOKUP_CACHED)) == LOOKUP_CACHED))
return ERR_PTR(-EAGAIN);
- if (!*s)
+ if (unlikely(!*s))
flags &= ~LOOKUP_RCU;
if (flags & LOOKUP_RCU)
rcu_read_lock();
@@ -2560,7 +2630,7 @@ static const char *path_init(struct nameidata *nd, unsigned flags)
nd->r_seq = __read_seqcount_begin(&rename_lock.seqcount);
smp_rmb();
- if (nd->state & ND_ROOT_PRESET) {
+ if (unlikely(nd->state & ND_ROOT_PRESET)) {
struct dentry *root = nd->root.dentry;
struct inode *inode = root->d_inode;
if (*s && unlikely(!d_can_lookup(root)))
@@ -2579,7 +2649,7 @@ static const char *path_init(struct nameidata *nd, unsigned flags)
nd->root.mnt = NULL;
/* Absolute pathname -- fetch the root (LOOKUP_IN_ROOT uses nd->dfd). */
- if (*s == '/' && !(flags & LOOKUP_IN_ROOT)) {
+ if (*s == '/' && likely(!(flags & LOOKUP_IN_ROOT))) {
error = nd_jump_root(nd);
if (unlikely(error))
return ERR_PTR(error);
@@ -2632,7 +2702,7 @@ static const char *path_init(struct nameidata *nd, unsigned flags)
}
/* For scoped-lookups we need to set the root to the dirfd as well. */
- if (flags & LOOKUP_IS_SCOPED) {
+ if (unlikely(flags & LOOKUP_IS_SCOPED)) {
nd->root = nd->path;
if (flags & LOOKUP_RCU) {
nd->root_seq = nd->seq;
diff --git a/fs/namespace.c b/fs/namespace.c
index 4272349650b1..a7fd9682bcf9 100644
--- a/fs/namespace.c
+++ b/fs/namespace.c
@@ -1336,26 +1336,12 @@ static void delayed_mntput(struct work_struct *unused)
}
static DECLARE_DELAYED_WORK(delayed_mntput_work, delayed_mntput);
-static void mntput_no_expire(struct mount *mnt)
+static void noinline mntput_no_expire_slowpath(struct mount *mnt)
{
LIST_HEAD(list);
int count;
- rcu_read_lock();
- if (likely(READ_ONCE(mnt->mnt_ns))) {
- /*
- * Since we don't do lock_mount_hash() here,
- * ->mnt_ns can change under us. However, if it's
- * non-NULL, then there's a reference that won't
- * be dropped until after an RCU delay done after
- * turning ->mnt_ns NULL. So if we observe it
- * non-NULL under rcu_read_lock(), the reference
- * we are dropping is not the final one.
- */
- mnt_add_count(mnt, -1);
- rcu_read_unlock();
- return;
- }
+ VFS_BUG_ON(mnt->mnt_ns);
lock_mount_hash();
/*
* make sure that if __legitimize_mnt() has not seen us grab
@@ -1406,6 +1392,26 @@ static void mntput_no_expire(struct mount *mnt)
cleanup_mnt(mnt);
}
+static void mntput_no_expire(struct mount *mnt)
+{
+ rcu_read_lock();
+ if (likely(READ_ONCE(mnt->mnt_ns))) {
+ /*
+ * Since we don't do lock_mount_hash() here,
+ * ->mnt_ns can change under us. However, if it's
+ * non-NULL, then there's a reference that won't
+ * be dropped until after an RCU delay done after
+ * turning ->mnt_ns NULL. So if we observe it
+ * non-NULL under rcu_read_lock(), the reference
+ * we are dropping is not the final one.
+ */
+ mnt_add_count(mnt, -1);
+ rcu_read_unlock();
+ return;
+ }
+ mntput_no_expire_slowpath(mnt);
+}
+
void mntput(struct vfsmount *mnt)
{
if (mnt) {
diff --git a/fs/open.c b/fs/open.c
index 3d64372ecc67..e3c737ede4e4 100644
--- a/fs/open.c
+++ b/fs/open.c
@@ -940,7 +940,7 @@ static int do_dentry_open(struct file *f,
}
error = security_file_open(f);
- if (error)
+ if (unlikely(error))
goto cleanup_all;
/*
@@ -950,11 +950,11 @@ static int do_dentry_open(struct file *f,
* pseudo file, this call will not change the mode.
*/
error = fsnotify_open_perm_and_set_mode(f);
- if (error)
+ if (unlikely(error))
goto cleanup_all;
error = break_lease(file_inode(f), f->f_flags);
- if (error)
+ if (unlikely(error))
goto cleanup_all;
/* normally all 3 are set; ->open() can clear them if needed */
diff --git a/fs/orangefs/inode.c b/fs/orangefs/inode.c
index a01400cd41fd..55f6c8026812 100644
--- a/fs/orangefs/inode.c
+++ b/fs/orangefs/inode.c
@@ -878,7 +878,9 @@ int orangefs_update_time(struct inode *inode, int flags)
gossip_debug(GOSSIP_INODE_DEBUG, "orangefs_update_time: %pU\n",
get_khandle_from_ino(inode));
- flags = generic_update_time(inode, flags);
+
+ flags = inode_update_timestamps(inode, flags);
+
memset(&iattr, 0, sizeof iattr);
if (flags & S_ATIME)
iattr.ia_valid |= ATTR_ATIME;
diff --git a/fs/splice.c b/fs/splice.c
index f5094b6d00a0..d338fe56b50b 100644
--- a/fs/splice.c
+++ b/fs/splice.c
@@ -1498,7 +1498,7 @@ static int pipe_to_user(struct pipe_inode_info *pipe, struct pipe_buffer *buf,
/*
* For lack of a better implementation, implement vmsplice() to userspace
- * as a simple copy of the pipes pages to the user iov.
+ * as a simple copy of the pipe's pages to the user iov.
*/
static ssize_t vmsplice_to_user(struct file *file, struct iov_iter *iter,
unsigned int flags)
diff --git a/fs/sync.c b/fs/sync.c
index 2955cd4c77a3..d4bb6d25b1de 100644
--- a/fs/sync.c
+++ b/fs/sync.c
@@ -117,16 +117,17 @@ SYSCALL_DEFINE0(sync)
static void do_sync_work(struct work_struct *work)
{
int nowait = 0;
+ int wait = 1;
/*
* Sync twice to reduce the possibility we skipped some inodes / pages
* because they were temporarily locked
*/
- iterate_supers(sync_inodes_one_sb, &nowait);
+ iterate_supers(sync_inodes_one_sb, NULL);
iterate_supers(sync_fs_one_sb, &nowait);
sync_bdevs(false);
- iterate_supers(sync_inodes_one_sb, &nowait);
- iterate_supers(sync_fs_one_sb, &nowait);
+ iterate_supers(sync_inodes_one_sb, NULL);
+ iterate_supers(sync_fs_one_sb, &wait);
sync_bdevs(false);
printk("Emergency Sync complete\n");
kfree(work);
diff --git a/fs/utimes.c b/fs/utimes.c
index c7c7958e57b2..3e7156396230 100644
--- a/fs/utimes.c
+++ b/fs/utimes.c
@@ -76,6 +76,7 @@ retry_deleg:
out:
return error;
}
+EXPORT_SYMBOL_GPL(vfs_utimes);
static int do_utimes_path(int dfd, const char __user *filename,
struct timespec64 *times, int flags)