summaryrefslogtreecommitdiff
path: root/fs
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2025-05-26 09:02:39 -0700
committerLinus Torvalds <torvalds@linux-foundation.org>2025-05-26 09:02:39 -0700
commit181d8e399f50c0683b12d40432bb6e1ca5c58d37 (patch)
treed1bb2c62d88c645f4ba3569d0eac14f76108d980 /fs
parenta1ae8ce78bb2600490d49a8f1ee88767b0e64381 (diff)
parent76145cb37ff0636fdf2a15320b2c2421915df32b (diff)
Merge tag 'vfs-6.16-rc1.misc' of git://git.kernel.org/pub/scm/linux/kernel/git/vfs/vfs
Pull misc vfs updates from Christian Brauner: "This contains the usual selections of misc updates for this cycle. Features: - Use folios for symlinks in the page cache FUSE already uses folios for its symlinks. Mirror that conversion in the generic code and the NFS code. That lets us get rid of a few folio->page->folio conversions in this path, and some of the few remaining users of read_cache_page() / read_mapping_page() - Try and make a few filesystem operations killable on the VFS inode->i_mutex level - Add sysctl vfs_cache_pressure_denom for bulk file operations Some workloads need to preserve more dentries than we currently allow through out sysctl interface A HDFS servers with 12 HDDs per server, on a HDFS datanode startup involves scanning all files and caching their metadata (including dentries and inodes) in memory. Each HDD contains approximately 2 million files, resulting in a total of ~20 million cached dentries after initialization To minimize dentry reclamation, they set vfs_cache_pressure to 1. Despite this configuration, memory pressure conditions can still trigger reclamation of up to 50% of cached dentries, reducing the cache from 20 million to approximately 10 million entries. During the subsequent cache rebuild period, any HDFS datanode restart operation incurs substantial latency penalties until full cache recovery completes To maintain service stability, more dentries need to be preserved during memory reclamation. The current minimum reclaim ratio (1/100 of total dentries) remains too aggressive for such workload. This patch introduces vfs_cache_pressure_denom for more granular cache pressure control The configuration [vfs_cache_pressure=1, vfs_cache_pressure_denom=10000] effectively maintains the full 20 million dentry cache under memory pressure, preventing datanode restart performance degradation - Avoid some jumps in inode_permission() using likely()/unlikely() - Avid a memory access which is most likely a cache miss when descending into devcgroup_inode_permission() - Add fastpath predicts for stat() and fdput() - Anonymous inodes currently don't come with a proper mode causing issues in the kernel when we want to add useful VFS debug assert. Fix that by giving them a proper mode and masking it off when we report it to userspace which relies on them not having any mode - Anonymous inodes currently allow to change inode attributes because the VFS falls back to simple_setattr() if i_op->setattr isn't implemented. This means the ownership and mode for every single user of anon_inode_inode can be changed. Block that as it's either useless or actively harmful. If specific ownership is needed the respective subsystem should allocate anonymous inodes from their own private superblock - Raise SB_I_NODEV and SB_I_NOEXEC on the anonymous inode superblock - Add proper tests for anonymous inode behavior - Make it easy to detect proper anonymous inodes and to ensure that we can detect them in codepaths such as readahead() Cleanups: - Port pidfs to the new anon_inode_{g,s}etattr() helpers - Try to remove the uselib() system call - Add unlikely branch hint return path for poll - Add unlikely branch hint on return path for core_sys_select - Don't allow signals to interrupt getdents copying for fuse - Provide a size hint to dir_context for during readdir() - Use writeback_iter directly in mpage_writepages - Update compression and mtime descriptions in initramfs documentation - Update main netfs API document - Remove useless plus one in super_cache_scan() - Remove unnecessary NULL-check guards during setns() - Add separate separate {get,put}_cgroup_ns no-op cases Fixes: - Fix typo in root= kernel parameter description - Use KERN_INFO for infof()|info_plog()|infofc() - Correct comments of fs_validate_description() - Mark an unlikely if condition with unlikely() in vfs_parse_monolithic_sep() - Delete macro fsparam_u32hex() - Remove unused and problematic validate_constant_table() - Fix potential unsigned integer underflow in fs_name() - Make file-nr output the total allocated file handles" * tag 'vfs-6.16-rc1.misc' of git://git.kernel.org/pub/scm/linux/kernel/git/vfs/vfs: (43 commits) fs: Pass a folio to page_put_link() nfs: Use a folio in nfs_get_link() fs: Convert __page_get_link() to use a folio fs/read_write: make default_llseek() killable fs/open: make do_truncate() killable fs/open: make chmod_common() and chown_common() killable include/linux/fs.h: add inode_lock_killable() readdir: supply dir_context.count as readdir buffer size hint vfs: Add sysctl vfs_cache_pressure_denom for bulk file operations fuse: don't allow signals to interrupt getdents copying Documentation: fix typo in root= kernel parameter description include/cgroup: separate {get,put}_cgroup_ns no-op case kernel/nsproxy: remove unnecessary guards fs: use writeback_iter directly in mpage_writepages fs: remove useless plus one in super_cache_scan() fs: add S_ANON_INODE fs: remove uselib() system call device_cgroup: avoid access to ->i_rdev in the common case in devcgroup_inode_permission() fs/fs_parse: Remove unused and problematic validate_constant_table() fs: touch up predicts in inode_permission() ...
Diffstat (limited to 'fs')
-rw-r--r--fs/anon_inodes.c45
-rw-r--r--fs/binfmt_elf.c76
-rw-r--r--fs/dcache.c11
-rw-r--r--fs/exec.c60
-rw-r--r--fs/exportfs/expfs.c1
-rw-r--r--fs/file_table.c2
-rw-r--r--fs/filesystems.c14
-rw-r--r--fs/fs_context.c6
-rw-r--r--fs/fs_parser.c55
-rw-r--r--fs/fuse/dir.c2
-rw-r--r--fs/fuse/readdir.c4
-rw-r--r--fs/internal.h5
-rw-r--r--fs/ioctl.c7
-rw-r--r--fs/libfs.c10
-rw-r--r--fs/mpage.c13
-rw-r--r--fs/namei.c79
-rw-r--r--fs/nfs/symlink.c20
-rw-r--r--fs/open.c14
-rw-r--r--fs/overlayfs/readdir.c12
-rw-r--r--fs/pidfs.c28
-rw-r--r--fs/read_write.c4
-rw-r--r--fs/readdir.c47
-rw-r--r--fs/select.c4
-rw-r--r--fs/stat.c35
-rw-r--r--fs/super.c2
25 files changed, 244 insertions, 312 deletions
diff --git a/fs/anon_inodes.c b/fs/anon_inodes.c
index 583ac81669c2..e51e7d88980a 100644
--- a/fs/anon_inodes.c
+++ b/fs/anon_inodes.c
@@ -24,10 +24,51 @@
#include <linux/uaccess.h>
+#include "internal.h"
+
static struct vfsmount *anon_inode_mnt __ro_after_init;
static struct inode *anon_inode_inode __ro_after_init;
/*
+ * User space expects anonymous inodes to have no file type in st_mode.
+ *
+ * In particular, 'lsof' has this legacy logic:
+ *
+ * type = s->st_mode & S_IFMT;
+ * switch (type) {
+ * ...
+ * case 0:
+ * if (!strcmp(p, "anon_inode"))
+ * Lf->ntype = Ntype = N_ANON_INODE;
+ *
+ * to detect our old anon_inode logic.
+ *
+ * Rather than mess with our internal sane inode data, just fix it
+ * up here in getattr() by masking off the format bits.
+ */
+int anon_inode_getattr(struct mnt_idmap *idmap, const struct path *path,
+ struct kstat *stat, u32 request_mask,
+ unsigned int query_flags)
+{
+ struct inode *inode = d_inode(path->dentry);
+
+ generic_fillattr(&nop_mnt_idmap, request_mask, inode, stat);
+ stat->mode &= ~S_IFMT;
+ return 0;
+}
+
+int anon_inode_setattr(struct mnt_idmap *idmap, struct dentry *dentry,
+ struct iattr *attr)
+{
+ return -EOPNOTSUPP;
+}
+
+static const struct inode_operations anon_inode_operations = {
+ .getattr = anon_inode_getattr,
+ .setattr = anon_inode_setattr,
+};
+
+/*
* anon_inodefs_dname() is called from d_path().
*/
static char *anon_inodefs_dname(struct dentry *dentry, char *buffer, int buflen)
@@ -45,6 +86,8 @@ static int anon_inodefs_init_fs_context(struct fs_context *fc)
struct pseudo_fs_context *ctx = init_pseudo(fc, ANON_INODE_FS_MAGIC);
if (!ctx)
return -ENOMEM;
+ fc->s_iflags |= SB_I_NOEXEC;
+ fc->s_iflags |= SB_I_NODEV;
ctx->dops = &anon_inodefs_dentry_operations;
return 0;
}
@@ -66,6 +109,7 @@ static struct inode *anon_inode_make_secure_inode(
if (IS_ERR(inode))
return inode;
inode->i_flags &= ~S_PRIVATE;
+ inode->i_op = &anon_inode_operations;
error = security_inode_init_security_anon(inode, &QSTR(name),
context_inode);
if (error) {
@@ -313,6 +357,7 @@ static int __init anon_inode_init(void)
anon_inode_inode = alloc_anon_inode(anon_inode_mnt->mnt_sb);
if (IS_ERR(anon_inode_inode))
panic("anon_inode_init() inode allocation failed (%ld)\n", PTR_ERR(anon_inode_inode));
+ anon_inode_inode->i_op = &anon_inode_operations;
return 0;
}
diff --git a/fs/binfmt_elf.c b/fs/binfmt_elf.c
index 4c1ea6b52a53..a43363d593e5 100644
--- a/fs/binfmt_elf.c
+++ b/fs/binfmt_elf.c
@@ -68,12 +68,6 @@
static int load_elf_binary(struct linux_binprm *bprm);
-#ifdef CONFIG_USELIB
-static int load_elf_library(struct file *);
-#else
-#define load_elf_library NULL
-#endif
-
/*
* If we don't support core dumping, then supply a NULL so we
* don't even try.
@@ -101,7 +95,6 @@ static int elf_core_dump(struct coredump_params *cprm);
static struct linux_binfmt elf_format = {
.module = THIS_MODULE,
.load_binary = load_elf_binary,
- .load_shlib = load_elf_library,
#ifdef CONFIG_COREDUMP
.core_dump = elf_core_dump,
.min_coredump = ELF_EXEC_PAGESIZE,
@@ -1384,75 +1377,6 @@ out_free_ph:
goto out;
}
-#ifdef CONFIG_USELIB
-/* This is really simpleminded and specialized - we are loading an
- a.out library that is given an ELF header. */
-static int load_elf_library(struct file *file)
-{
- struct elf_phdr *elf_phdata;
- struct elf_phdr *eppnt;
- int retval, error, i, j;
- struct elfhdr elf_ex;
-
- error = -ENOEXEC;
- retval = elf_read(file, &elf_ex, sizeof(elf_ex), 0);
- if (retval < 0)
- goto out;
-
- if (memcmp(elf_ex.e_ident, ELFMAG, SELFMAG) != 0)
- goto out;
-
- /* First of all, some simple consistency checks */
- if (elf_ex.e_type != ET_EXEC || elf_ex.e_phnum > 2 ||
- !elf_check_arch(&elf_ex) || !file->f_op->mmap)
- goto out;
- if (elf_check_fdpic(&elf_ex))
- goto out;
-
- /* Now read in all of the header information */
-
- j = sizeof(struct elf_phdr) * elf_ex.e_phnum;
- /* j < ELF_MIN_ALIGN because elf_ex.e_phnum <= 2 */
-
- error = -ENOMEM;
- elf_phdata = kmalloc(j, GFP_KERNEL);
- if (!elf_phdata)
- goto out;
-
- eppnt = elf_phdata;
- error = -ENOEXEC;
- retval = elf_read(file, eppnt, j, elf_ex.e_phoff);
- if (retval < 0)
- goto out_free_ph;
-
- for (j = 0, i = 0; i<elf_ex.e_phnum; i++)
- if ((eppnt + i)->p_type == PT_LOAD)
- j++;
- if (j != 1)
- goto out_free_ph;
-
- while (eppnt->p_type != PT_LOAD)
- eppnt++;
-
- /* Now use mmap to map the library into memory. */
- error = elf_load(file, ELF_PAGESTART(eppnt->p_vaddr),
- eppnt,
- PROT_READ | PROT_WRITE | PROT_EXEC,
- MAP_FIXED_NOREPLACE | MAP_PRIVATE,
- 0);
-
- if (error != ELF_PAGESTART(eppnt->p_vaddr))
- goto out_free_ph;
-
- error = 0;
-
-out_free_ph:
- kfree(elf_phdata);
-out:
- return error;
-}
-#endif /* #ifdef CONFIG_USELIB */
-
#ifdef CONFIG_ELF_CORE
/*
* ELF core dumper
diff --git a/fs/dcache.c b/fs/dcache.c
index 89f4acab08c0..03d58b2d4fa3 100644
--- a/fs/dcache.c
+++ b/fs/dcache.c
@@ -74,10 +74,11 @@
* arbitrary, since it's serialized on rename_lock
*/
static int sysctl_vfs_cache_pressure __read_mostly = 100;
+static int sysctl_vfs_cache_pressure_denom __read_mostly = 100;
unsigned long vfs_pressure_ratio(unsigned long val)
{
- return mult_frac(val, sysctl_vfs_cache_pressure, 100);
+ return mult_frac(val, sysctl_vfs_cache_pressure, sysctl_vfs_cache_pressure_denom);
}
EXPORT_SYMBOL_GPL(vfs_pressure_ratio);
@@ -225,6 +226,14 @@ static const struct ctl_table vm_dcache_sysctls[] = {
.proc_handler = proc_dointvec_minmax,
.extra1 = SYSCTL_ZERO,
},
+ {
+ .procname = "vfs_cache_pressure_denom",
+ .data = &sysctl_vfs_cache_pressure_denom,
+ .maxlen = sizeof(sysctl_vfs_cache_pressure_denom),
+ .mode = 0644,
+ .proc_handler = proc_dointvec_minmax,
+ .extra1 = SYSCTL_ONE_HUNDRED,
+ },
};
static int __init init_fs_dcache_sysctls(void)
diff --git a/fs/exec.c b/fs/exec.c
index 8e4ea5f1e64c..cfbb2b9ee3c9 100644
--- a/fs/exec.c
+++ b/fs/exec.c
@@ -115,66 +115,6 @@ bool path_noexec(const struct path *path)
(path->mnt->mnt_sb->s_iflags & SB_I_NOEXEC);
}
-#ifdef CONFIG_USELIB
-/*
- * Note that a shared library must be both readable and executable due to
- * security reasons.
- *
- * Also note that we take the address to load from the file itself.
- */
-SYSCALL_DEFINE1(uselib, const char __user *, library)
-{
- struct linux_binfmt *fmt;
- struct file *file;
- struct filename *tmp = getname(library);
- int error = PTR_ERR(tmp);
- static const struct open_flags uselib_flags = {
- .open_flag = O_LARGEFILE | O_RDONLY,
- .acc_mode = MAY_READ | MAY_EXEC,
- .intent = LOOKUP_OPEN,
- .lookup_flags = LOOKUP_FOLLOW,
- };
-
- if (IS_ERR(tmp))
- goto out;
-
- file = do_filp_open(AT_FDCWD, tmp, &uselib_flags);
- putname(tmp);
- error = PTR_ERR(file);
- if (IS_ERR(file))
- goto out;
-
- /*
- * Check do_open_execat() for an explanation.
- */
- error = -EACCES;
- if (WARN_ON_ONCE(!S_ISREG(file_inode(file)->i_mode)) ||
- path_noexec(&file->f_path))
- goto exit;
-
- error = -ENOEXEC;
-
- read_lock(&binfmt_lock);
- list_for_each_entry(fmt, &formats, lh) {
- if (!fmt->load_shlib)
- continue;
- if (!try_module_get(fmt->module))
- continue;
- read_unlock(&binfmt_lock);
- error = fmt->load_shlib(file);
- read_lock(&binfmt_lock);
- put_binfmt(fmt);
- if (error != -ENOEXEC)
- break;
- }
- read_unlock(&binfmt_lock);
-exit:
- fput(file);
-out:
- return error;
-}
-#endif /* #ifdef CONFIG_USELIB */
-
#ifdef CONFIG_MMU
/*
* The nascent bprm->mm is not visible until exec_mmap() but it can
diff --git a/fs/exportfs/expfs.c b/fs/exportfs/expfs.c
index f0ede3e81cf7..cdefea17986a 100644
--- a/fs/exportfs/expfs.c
+++ b/fs/exportfs/expfs.c
@@ -284,6 +284,7 @@ static int get_name(const struct path *path, char *name, struct dentry *child)
};
struct getdents_callback buffer = {
.ctx.actor = filldir_one,
+ .ctx.count = INT_MAX,
.name = name,
};
diff --git a/fs/file_table.c b/fs/file_table.c
index c04ed94cdc4b..138114d64307 100644
--- a/fs/file_table.c
+++ b/fs/file_table.c
@@ -102,7 +102,7 @@ EXPORT_SYMBOL_GPL(get_max_files);
static int proc_nr_files(const struct ctl_table *table, int write, void *buffer,
size_t *lenp, loff_t *ppos)
{
- files_stat.nr_files = get_nr_files();
+ files_stat.nr_files = percpu_counter_sum_positive(&nr_files);
return proc_doulongvec_minmax(table, write, buffer, lenp, ppos);
}
diff --git a/fs/filesystems.c b/fs/filesystems.c
index 58b9067b2391..95e5256821a5 100644
--- a/fs/filesystems.c
+++ b/fs/filesystems.c
@@ -156,15 +156,19 @@ static int fs_index(const char __user * __name)
static int fs_name(unsigned int index, char __user * buf)
{
struct file_system_type * tmp;
- int len, res;
+ int len, res = -EINVAL;
read_lock(&file_systems_lock);
- for (tmp = file_systems; tmp; tmp = tmp->next, index--)
- if (index <= 0 && try_module_get(tmp->owner))
+ for (tmp = file_systems; tmp; tmp = tmp->next, index--) {
+ if (index == 0) {
+ if (try_module_get(tmp->owner))
+ res = 0;
break;
+ }
+ }
read_unlock(&file_systems_lock);
- if (!tmp)
- return -EINVAL;
+ if (res)
+ return res;
/* OK, we got the reference, so we can safely block */
len = strlen(tmp->name) + 1;
diff --git a/fs/fs_context.c b/fs/fs_context.c
index 582d33e81117..666e61753aed 100644
--- a/fs/fs_context.c
+++ b/fs/fs_context.c
@@ -222,7 +222,7 @@ int vfs_parse_monolithic_sep(struct fs_context *fc, void *data,
char *value = strchr(key, '=');
if (value) {
- if (value == key)
+ if (unlikely(value == key))
continue;
*value++ = 0;
v_len = strlen(value);
@@ -449,6 +449,10 @@ void logfc(struct fc_log *log, const char *prefix, char level, const char *fmt,
printk(KERN_ERR "%s%s%pV\n", prefix ? prefix : "",
prefix ? ": " : "", &vaf);
break;
+ case 'i':
+ printk(KERN_INFO "%s%s%pV\n", prefix ? prefix : "",
+ prefix ? ": " : "", &vaf);
+ break;
default:
printk(KERN_NOTICE "%s%s%pV\n", prefix ? prefix : "",
prefix ? ": " : "", &vaf);
diff --git a/fs/fs_parser.c b/fs/fs_parser.c
index e635a81e17d9..c092a9f79e32 100644
--- a/fs/fs_parser.c
+++ b/fs/fs_parser.c
@@ -380,58 +380,9 @@ EXPORT_SYMBOL(fs_param_is_path);
#ifdef CONFIG_VALIDATE_FS_PARSER
/**
- * validate_constant_table - Validate a constant table
- * @tbl: The constant table to validate.
- * @tbl_size: The size of the table.
- * @low: The lowest permissible value.
- * @high: The highest permissible value.
- * @special: One special permissible value outside of the range.
- */
-bool validate_constant_table(const struct constant_table *tbl, size_t tbl_size,
- int low, int high, int special)
-{
- size_t i;
- bool good = true;
-
- if (tbl_size == 0) {
- pr_warn("VALIDATE C-TBL: Empty\n");
- return true;
- }
-
- for (i = 0; i < tbl_size; i++) {
- if (!tbl[i].name) {
- pr_err("VALIDATE C-TBL[%zu]: Null\n", i);
- good = false;
- } else if (i > 0 && tbl[i - 1].name) {
- int c = strcmp(tbl[i-1].name, tbl[i].name);
-
- if (c == 0) {
- pr_err("VALIDATE C-TBL[%zu]: Duplicate %s\n",
- i, tbl[i].name);
- good = false;
- }
- if (c > 0) {
- pr_err("VALIDATE C-TBL[%zu]: Missorted %s>=%s\n",
- i, tbl[i-1].name, tbl[i].name);
- good = false;
- }
- }
-
- if (tbl[i].value != special &&
- (tbl[i].value < low || tbl[i].value > high)) {
- pr_err("VALIDATE C-TBL[%zu]: %s->%d const out of range (%d-%d)\n",
- i, tbl[i].name, tbl[i].value, low, high);
- good = false;
- }
- }
-
- return good;
-}
-
-/**
- * fs_validate_description - Validate a parameter description
- * @name: The parameter name to search for.
- * @desc: The parameter description to validate.
+ * fs_validate_description - Validate a parameter specification array
+ * @name: Owner name of the parameter specification array
+ * @desc: The parameter specification array to validate.
*/
bool fs_validate_description(const char *name,
const struct fs_parameter_spec *desc)
diff --git a/fs/fuse/dir.c b/fs/fuse/dir.c
index 83ac192e7fdd..33b82529cb6e 100644
--- a/fs/fuse/dir.c
+++ b/fs/fuse/dir.c
@@ -1676,7 +1676,7 @@ static const char *fuse_get_link(struct dentry *dentry, struct inode *inode,
goto out_err;
}
- set_delayed_call(callback, page_put_link, &folio->page);
+ set_delayed_call(callback, page_put_link, folio);
return folio_address(folio);
diff --git a/fs/fuse/readdir.c b/fs/fuse/readdir.c
index 17ce9636a2b1..edcd6f18a8a8 100644
--- a/fs/fuse/readdir.c
+++ b/fs/fuse/readdir.c
@@ -120,7 +120,7 @@ static bool fuse_emit(struct file *file, struct dir_context *ctx,
fuse_add_dirent_to_cache(file, dirent, ctx->pos);
return dir_emit(ctx, dirent->name, dirent->namelen, dirent->ino,
- dirent->type);
+ dirent->type | FILLDIR_FLAG_NOINTR);
}
static int parse_dirfile(char *buf, size_t nbytes, struct file *file,
@@ -419,7 +419,7 @@ static enum fuse_parse_result fuse_parse_cache(struct fuse_file *ff,
if (ff->readdir.pos == ctx->pos) {
res = FOUND_SOME;
if (!dir_emit(ctx, dirent->name, dirent->namelen,
- dirent->ino, dirent->type))
+ dirent->ino, dirent->type | FILLDIR_FLAG_NOINTR))
return FOUND_ALL;
ctx->pos = dirent->off;
}
diff --git a/fs/internal.h b/fs/internal.h
index 213bf3226213..0526e88ede39 100644
--- a/fs/internal.h
+++ b/fs/internal.h
@@ -344,3 +344,8 @@ static inline bool path_mounted(const struct path *path)
void file_f_owner_release(struct file *file);
bool file_seek_cur_needs_f_lock(struct file *file);
int statmount_mnt_idmap(struct mnt_idmap *idmap, struct seq_file *seq, bool uid_map);
+int anon_inode_getattr(struct mnt_idmap *idmap, const struct path *path,
+ struct kstat *stat, u32 request_mask,
+ unsigned int query_flags);
+int anon_inode_setattr(struct mnt_idmap *idmap, struct dentry *dentry,
+ struct iattr *attr);
diff --git a/fs/ioctl.c b/fs/ioctl.c
index c91fd2b46a77..03d9a11f2247 100644
--- a/fs/ioctl.c
+++ b/fs/ioctl.c
@@ -821,7 +821,8 @@ static int do_vfs_ioctl(struct file *filp, unsigned int fd,
return ioctl_fioasync(fd, filp, argp);
case FIOQSIZE:
- if (S_ISDIR(inode->i_mode) || S_ISREG(inode->i_mode) ||
+ if (S_ISDIR(inode->i_mode) ||
+ (S_ISREG(inode->i_mode) && !IS_ANON_FILE(inode)) ||
S_ISLNK(inode->i_mode)) {
loff_t res = inode_get_bytes(inode);
return copy_to_user(argp, &res, sizeof(res)) ?
@@ -856,7 +857,7 @@ static int do_vfs_ioctl(struct file *filp, unsigned int fd,
return ioctl_file_dedupe_range(filp, argp);
case FIONREAD:
- if (!S_ISREG(inode->i_mode))
+ if (!S_ISREG(inode->i_mode) || IS_ANON_FILE(inode))
return vfs_ioctl(filp, cmd, arg);
return put_user(i_size_read(inode) - filp->f_pos,
@@ -881,7 +882,7 @@ static int do_vfs_ioctl(struct file *filp, unsigned int fd,
return ioctl_get_fs_sysfs_path(filp, argp);
default:
- if (S_ISREG(inode->i_mode))
+ if (S_ISREG(inode->i_mode) && !IS_ANON_FILE(inode))
return file_ioctl(filp, cmd, argp);
break;
}
diff --git a/fs/libfs.c b/fs/libfs.c
index 6393d7c49ee6..e28da9574a65 100644
--- a/fs/libfs.c
+++ b/fs/libfs.c
@@ -1647,10 +1647,16 @@ struct inode *alloc_anon_inode(struct super_block *s)
* that it already _is_ on the dirty list.
*/
inode->i_state = I_DIRTY;
- inode->i_mode = S_IRUSR | S_IWUSR;
+ /*
+ * Historically anonymous inodes didn't have a type at all and
+ * userspace has come to rely on this. Internally they're just
+ * regular files but S_IFREG is masked off when reporting
+ * information to userspace.
+ */
+ inode->i_mode = S_IFREG | S_IRUSR | S_IWUSR;
inode->i_uid = current_fsuid();
inode->i_gid = current_fsgid();
- inode->i_flags |= S_PRIVATE;
+ inode->i_flags |= S_PRIVATE | S_ANON_INODE;
simple_inode_init_ts(inode);
return inode;
}
diff --git a/fs/mpage.c b/fs/mpage.c
index ad7844de87c3..c5fd821fd30e 100644
--- a/fs/mpage.c
+++ b/fs/mpage.c
@@ -445,10 +445,9 @@ static void clean_buffers(struct folio *folio, unsigned first_unmapped)
try_to_free_buffers(folio);
}
-static int __mpage_writepage(struct folio *folio, struct writeback_control *wbc,
- void *data)
+static int mpage_write_folio(struct writeback_control *wbc, struct folio *folio,
+ struct mpage_data *mpd)
{
- struct mpage_data *mpd = data;
struct bio *bio = mpd->bio;
struct address_space *mapping = folio->mapping;
struct inode *inode = mapping->host;
@@ -656,14 +655,16 @@ mpage_writepages(struct address_space *mapping,
struct mpage_data mpd = {
.get_block = get_block,
};
+ struct folio *folio = NULL;
struct blk_plug plug;
- int ret;
+ int error;
blk_start_plug(&plug);
- ret = write_cache_pages(mapping, wbc, __mpage_writepage, &mpd);
+ while ((folio = writeback_iter(mapping, wbc, folio, &error)))
+ error = mpage_write_folio(wbc, folio, &mpd);
if (mpd.bio)
mpage_bio_submit_write(mpd.bio);
blk_finish_plug(&plug);
- return ret;
+ return error;
}
EXPORT_SYMBOL(mpage_writepages);
diff --git a/fs/namei.c b/fs/namei.c
index 9a0b1b2158fd..4bb889fc980b 100644
--- a/fs/namei.c
+++ b/fs/namei.c
@@ -571,14 +571,14 @@ int inode_permission(struct mnt_idmap *idmap,
int retval;
retval = sb_permission(inode->i_sb, inode, mask);
- if (retval)
+ if (unlikely(retval))
return retval;
if (unlikely(mask & MAY_WRITE)) {
/*
* Nobody gets write access to an immutable file.
*/
- if (IS_IMMUTABLE(inode))
+ if (unlikely(IS_IMMUTABLE(inode)))
return -EPERM;
/*
@@ -586,16 +586,16 @@ int inode_permission(struct mnt_idmap *idmap,
* written back improperly if their true value is unknown
* to the vfs.
*/
- if (HAS_UNMAPPED_ID(idmap, inode))
+ if (unlikely(HAS_UNMAPPED_ID(idmap, inode)))
return -EACCES;
}
retval = do_inode_permission(idmap, inode, mask);
- if (retval)
+ if (unlikely(retval))
return retval;
retval = devcgroup_inode_permission(inode, mask);
- if (retval)
+ if (unlikely(retval))
return retval;
return security_inode_permission(inode, mask);
@@ -1915,13 +1915,13 @@ static const char *pick_link(struct nameidata *nd, struct path *link,
unlikely(link->mnt->mnt_flags & MNT_NOSYMFOLLOW))
return ERR_PTR(-ELOOP);
- if (!(nd->flags & LOOKUP_RCU)) {
+ if (unlikely(atime_needs_update(&last->link, inode))) {
+ if (nd->flags & LOOKUP_RCU) {
+ if (!try_to_unlazy(nd))
+ return ERR_PTR(-ECHILD);
+ }
touch_atime(&last->link);
cond_resched();
- } else if (atime_needs_update(&last->link, inode)) {
- if (!try_to_unlazy(nd))
- return ERR_PTR(-ECHILD);
- touch_atime(&last->link);
}
error = security_inode_follow_link(link->dentry, inode,
@@ -2434,9 +2434,12 @@ static int link_path_walk(const char *name, struct nameidata *nd)
nd->flags |= LOOKUP_PARENT;
if (IS_ERR(name))
return PTR_ERR(name);
- while (*name=='/')
- name++;
- if (!*name) {
+ if (*name == '/') {
+ do {
+ name++;
+ } while (unlikely(*name == '/'));
+ }
+ if (unlikely(!*name)) {
nd->dir_mode = 0; // short-circuit the 'hardening' idiocy
return 0;
}
@@ -2449,7 +2452,7 @@ static int link_path_walk(const char *name, struct nameidata *nd)
idmap = mnt_idmap(nd->path.mnt);
err = may_lookup(idmap, nd);
- if (err)
+ if (unlikely(err))
return err;
nd->last.name = name;
@@ -5407,25 +5410,25 @@ EXPORT_SYMBOL(vfs_get_link);
static char *__page_get_link(struct dentry *dentry, struct inode *inode,
struct delayed_call *callback)
{
- struct page *page;
+ struct folio *folio;
struct address_space *mapping = inode->i_mapping;
if (!dentry) {
- page = find_get_page(mapping, 0);
- if (!page)
+ folio = filemap_get_folio(mapping, 0);
+ if (IS_ERR(folio))
return ERR_PTR(-ECHILD);
- if (!PageUptodate(page)) {
- put_page(page);
+ if (!folio_test_uptodate(folio)) {
+ folio_put(folio);
return ERR_PTR(-ECHILD);
}
} else {
- page = read_mapping_page(mapping, 0, NULL);
- if (IS_ERR(page))
- return (char*)page;
+ folio = read_mapping_folio(mapping, 0, NULL);
+ if (IS_ERR(folio))
+ return ERR_CAST(folio);
}
- set_delayed_call(callback, page_put_link, page);
+ set_delayed_call(callback, page_put_link, folio);
BUG_ON(mapping_gfp_mask(mapping) & __GFP_HIGHMEM);
- return page_address(page);
+ return folio_address(folio);
}
const char *page_get_link_raw(struct dentry *dentry, struct inode *inode,
@@ -5435,6 +5438,17 @@ const char *page_get_link_raw(struct dentry *dentry, struct inode *inode,
}
EXPORT_SYMBOL_GPL(page_get_link_raw);
+/**
+ * page_get_link() - An implementation of the get_link inode_operation.
+ * @dentry: The directory entry which is the symlink.
+ * @inode: The inode for the symlink.
+ * @callback: Used to drop the reference to the symlink.
+ *
+ * Filesystems which store their symlinks in the page cache should use
+ * this to implement the get_link() member of their inode_operations.
+ *
+ * Return: A pointer to the NUL-terminated symlink.
+ */
const char *page_get_link(struct dentry *dentry, struct inode *inode,
struct delayed_call *callback)
{
@@ -5444,12 +5458,25 @@ const char *page_get_link(struct dentry *dentry, struct inode *inode,
nd_terminate_link(kaddr, inode->i_size, PAGE_SIZE - 1);
return kaddr;
}
-
EXPORT_SYMBOL(page_get_link);
+/**
+ * page_put_link() - Drop the reference to the symlink.
+ * @arg: The folio which contains the symlink.
+ *
+ * This is used internally by page_get_link(). It is exported for use
+ * by filesystems which need to implement a variant of page_get_link()
+ * themselves. Despite the apparent symmetry, filesystems which use
+ * page_get_link() do not need to call page_put_link().
+ *
+ * The argument, while it has a void pointer type, must be a pointer to
+ * the folio which was retrieved from the page cache. The delayed_call
+ * infrastructure is used to drop the reference count once the caller
+ * is done with the symlink.
+ */
void page_put_link(void *arg)
{
- put_page(arg);
+ folio_put(arg);
}
EXPORT_SYMBOL(page_put_link);
diff --git a/fs/nfs/symlink.c b/fs/nfs/symlink.c
index 1c62a5a9f51d..58146e935402 100644
--- a/fs/nfs/symlink.c
+++ b/fs/nfs/symlink.c
@@ -40,31 +40,31 @@ static const char *nfs_get_link(struct dentry *dentry,
struct inode *inode,
struct delayed_call *done)
{
- struct page *page;
+ struct folio *folio;
void *err;
if (!dentry) {
err = ERR_PTR(nfs_revalidate_mapping_rcu(inode));
if (err)
return err;
- page = find_get_page(inode->i_mapping, 0);
- if (!page)
+ folio = filemap_get_folio(inode->i_mapping, 0);
+ if (IS_ERR(folio))
return ERR_PTR(-ECHILD);
- if (!PageUptodate(page)) {
- put_page(page);
+ if (!folio_test_uptodate(folio)) {
+ folio_put(folio);
return ERR_PTR(-ECHILD);
}
} else {
err = ERR_PTR(nfs_revalidate_mapping(inode, inode->i_mapping));
if (err)
return err;
- page = read_cache_page(&inode->i_data, 0, nfs_symlink_filler,
+ folio = read_cache_folio(&inode->i_data, 0, nfs_symlink_filler,
NULL);
- if (IS_ERR(page))
- return ERR_CAST(page);
+ if (IS_ERR(folio))
+ return ERR_CAST(folio);
}
- set_delayed_call(done, page_put_link, page);
- return page_address(page);
+ set_delayed_call(done, page_put_link, folio);
+ return folio_address(folio);
}
/*
diff --git a/fs/open.c b/fs/open.c
index a9063cca9911..7828234a7caa 100644
--- a/fs/open.c
+++ b/fs/open.c
@@ -60,7 +60,10 @@ int do_truncate(struct mnt_idmap *idmap, struct dentry *dentry,
if (ret)
newattrs.ia_valid |= ret | ATTR_FORCE;
- inode_lock(dentry->d_inode);
+ ret = inode_lock_killable(dentry->d_inode);
+ if (ret)
+ return ret;
+
/* Note any delegations or leases have already been broken: */
ret = notify_change(idmap, dentry, &newattrs, NULL);
inode_unlock(dentry->d_inode);
@@ -635,7 +638,9 @@ int chmod_common(const struct path *path, umode_t mode)
if (error)
return error;
retry_deleg:
- inode_lock(inode);
+ error = inode_lock_killable(inode);
+ if (error)
+ goto out_mnt_unlock;
error = security_path_chmod(path, mode);
if (error)
goto out_unlock;
@@ -650,6 +655,7 @@ out_unlock:
if (!error)
goto retry_deleg;
}
+out_mnt_unlock:
mnt_drop_write(path->mnt);
return error;
}
@@ -769,7 +775,9 @@ retry_deleg:
return -EINVAL;
if ((group != (gid_t)-1) && !setattr_vfsgid(&newattrs, gid))
return -EINVAL;
- inode_lock(inode);
+ error = inode_lock_killable(inode);
+ if (error)
+ return error;
if (!S_ISDIR(inode->i_mode))
newattrs.ia_valid |= ATTR_KILL_SUID | ATTR_KILL_PRIV |
setattr_should_drop_sgid(idmap, inode);
diff --git a/fs/overlayfs/readdir.c b/fs/overlayfs/readdir.c
index 2fa450a7854d..44e208da417c 100644
--- a/fs/overlayfs/readdir.c
+++ b/fs/overlayfs/readdir.c
@@ -352,6 +352,7 @@ static int ovl_dir_read_merged(struct dentry *dentry, struct list_head *list,
struct path realpath;
struct ovl_readdir_data rdd = {
.ctx.actor = ovl_fill_merge,
+ .ctx.count = INT_MAX,
.dentry = dentry,
.list = list,
.root = root,
@@ -572,6 +573,7 @@ static int ovl_dir_read_impure(const struct path *path, struct list_head *list,
struct ovl_cache_entry *p, *n;
struct ovl_readdir_data rdd = {
.ctx.actor = ovl_fill_plain,
+ .ctx.count = INT_MAX,
.list = list,
.root = root,
};
@@ -673,6 +675,7 @@ static bool ovl_fill_real(struct dir_context *ctx, const char *name,
struct ovl_readdir_translate *rdt =
container_of(ctx, struct ovl_readdir_translate, ctx);
struct dir_context *orig_ctx = rdt->orig_ctx;
+ bool res;
if (rdt->parent_ino && strcmp(name, "..") == 0) {
ino = rdt->parent_ino;
@@ -687,7 +690,10 @@ static bool ovl_fill_real(struct dir_context *ctx, const char *name,
name, namelen, rdt->xinowarn);
}
- return orig_ctx->actor(orig_ctx, name, namelen, offset, ino, d_type);
+ res = orig_ctx->actor(orig_ctx, name, namelen, offset, ino, d_type);
+ ctx->count = orig_ctx->count;
+
+ return res;
}
static bool ovl_is_impure_dir(struct file *file)
@@ -714,6 +720,7 @@ static int ovl_iterate_real(struct file *file, struct dir_context *ctx)
const struct ovl_layer *lower_layer = ovl_layer_lower(dir);
struct ovl_readdir_translate rdt = {
.ctx.actor = ovl_fill_real,
+ .ctx.count = ctx->count,
.orig_ctx = ctx,
.xinobits = ovl_xino_bits(ofs),
.xinowarn = ovl_xino_warn(ofs),
@@ -1074,6 +1081,7 @@ int ovl_check_d_type_supported(const struct path *realpath)
int err;
struct ovl_readdir_data rdd = {
.ctx.actor = ovl_check_d_type,
+ .ctx.count = INT_MAX,
.d_type_supported = false,
};
@@ -1095,6 +1103,7 @@ static int ovl_workdir_cleanup_recurse(struct ovl_fs *ofs, const struct path *pa
struct ovl_cache_entry *p;
struct ovl_readdir_data rdd = {
.ctx.actor = ovl_fill_plain,
+ .ctx.count = INT_MAX,
.list = &list,
};
bool incompat = false;
@@ -1179,6 +1188,7 @@ int ovl_indexdir_cleanup(struct ovl_fs *ofs)
struct ovl_cache_entry *p;
struct ovl_readdir_data rdd = {
.ctx.actor = ovl_fill_plain,
+ .ctx.count = INT_MAX,
.list = &list,
};
diff --git a/fs/pidfs.c b/fs/pidfs.c
index d64a4cbeb0da..2ac6f5cd861d 100644
--- a/fs/pidfs.c
+++ b/fs/pidfs.c
@@ -569,36 +569,14 @@ static struct vfsmount *pidfs_mnt __ro_after_init;
static int pidfs_setattr(struct mnt_idmap *idmap, struct dentry *dentry,
struct iattr *attr)
{
- return -EOPNOTSUPP;
+ return anon_inode_setattr(idmap, dentry, attr);
}
-
-/*
- * User space expects pidfs inodes to have no file type in st_mode.
- *
- * In particular, 'lsof' has this legacy logic:
- *
- * type = s->st_mode & S_IFMT;
- * switch (type) {
- * ...
- * case 0:
- * if (!strcmp(p, "anon_inode"))
- * Lf->ntype = Ntype = N_ANON_INODE;
- *
- * to detect our old anon_inode logic.
- *
- * Rather than mess with our internal sane inode data, just fix it
- * up here in getattr() by masking off the format bits.
- */
static int pidfs_getattr(struct mnt_idmap *idmap, const struct path *path,
struct kstat *stat, u32 request_mask,
unsigned int query_flags)
{
- struct inode *inode = d_inode(path->dentry);
-
- generic_fillattr(&nop_mnt_idmap, request_mask, inode, stat);
- stat->mode &= ~S_IFMT;
- return 0;
+ return anon_inode_getattr(idmap, path, stat, request_mask, query_flags);
}
static const struct inode_operations pidfs_inode_operations = {
@@ -826,7 +804,7 @@ static int pidfs_init_inode(struct inode *inode, void *data)
const struct pid *pid = data;
inode->i_private = data;
- inode->i_flags |= S_PRIVATE;
+ inode->i_flags |= S_PRIVATE | S_ANON_INODE;
inode->i_mode |= S_IRWXU;
inode->i_op = &pidfs_inode_operations;
inode->i_fop = &pidfs_file_operations;
diff --git a/fs/read_write.c b/fs/read_write.c
index bb0ed26a0b3a..0ef70e128c4a 100644
--- a/fs/read_write.c
+++ b/fs/read_write.c
@@ -332,7 +332,9 @@ loff_t default_llseek(struct file *file, loff_t offset, int whence)
struct inode *inode = file_inode(file);
loff_t retval;
- inode_lock(inode);
+ retval = inode_lock_killable(inode);
+ if (retval)
+ return retval;
switch (whence) {
case SEEK_END:
offset += i_size_read(inode);
diff --git a/fs/readdir.c b/fs/readdir.c
index 0038efda417b..7764b8638978 100644
--- a/fs/readdir.c
+++ b/fs/readdir.c
@@ -222,6 +222,7 @@ SYSCALL_DEFINE3(old_readdir, unsigned int, fd,
CLASS(fd_pos, f)(fd);
struct readdir_callback buf = {
.ctx.actor = fillonedir,
+ .ctx.count = 1, /* Hint to fs: just one entry. */
.dirent = dirent
};
@@ -252,7 +253,6 @@ struct getdents_callback {
struct dir_context ctx;
struct linux_dirent __user * current_dir;
int prev_reclen;
- int count;
int error;
};
@@ -266,12 +266,16 @@ static bool filldir(struct dir_context *ctx, const char *name, int namlen,
int reclen = ALIGN(offsetof(struct linux_dirent, d_name) + namlen + 2,
sizeof(long));
int prev_reclen;
+ unsigned int flags = d_type;
+
+ BUILD_BUG_ON(FILLDIR_FLAG_NOINTR & S_DT_MASK);
+ d_type &= S_DT_MASK;
buf->error = verify_dirent_name(name, namlen);
if (unlikely(buf->error))
return false;
buf->error = -EINVAL; /* only used if we fail.. */
- if (reclen > buf->count)
+ if (reclen > ctx->count)
return false;
d_ino = ino;
if (sizeof(d_ino) < sizeof(ino) && d_ino != ino) {
@@ -279,7 +283,7 @@ static bool filldir(struct dir_context *ctx, const char *name, int namlen,
return false;
}
prev_reclen = buf->prev_reclen;
- if (prev_reclen && signal_pending(current))
+ if (!(flags & FILLDIR_FLAG_NOINTR) && prev_reclen && signal_pending(current))
return false;
dirent = buf->current_dir;
prev = (void __user *) dirent - prev_reclen;
@@ -296,7 +300,7 @@ static bool filldir(struct dir_context *ctx, const char *name, int namlen,
buf->current_dir = (void __user *)dirent + reclen;
buf->prev_reclen = reclen;
- buf->count -= reclen;
+ ctx->count -= reclen;
return true;
efault_end:
user_write_access_end();
@@ -311,7 +315,7 @@ SYSCALL_DEFINE3(getdents, unsigned int, fd,
CLASS(fd_pos, f)(fd);
struct getdents_callback buf = {
.ctx.actor = filldir,
- .count = count,
+ .ctx.count = count,
.current_dir = dirent
};
int error;
@@ -329,7 +333,7 @@ SYSCALL_DEFINE3(getdents, unsigned int, fd,
if (put_user(buf.ctx.pos, &lastdirent->d_off))
error = -EFAULT;
else
- error = count - buf.count;
+ error = count - buf.ctx.count;
}
return error;
}
@@ -338,7 +342,6 @@ struct getdents_callback64 {
struct dir_context ctx;
struct linux_dirent64 __user * current_dir;
int prev_reclen;
- int count;
int error;
};
@@ -351,15 +354,19 @@ static bool filldir64(struct dir_context *ctx, const char *name, int namlen,
int reclen = ALIGN(offsetof(struct linux_dirent64, d_name) + namlen + 1,
sizeof(u64));
int prev_reclen;
+ unsigned int flags = d_type;
+
+ BUILD_BUG_ON(FILLDIR_FLAG_NOINTR & S_DT_MASK);
+ d_type &= S_DT_MASK;
buf->error = verify_dirent_name(name, namlen);
if (unlikely(buf->error))
return false;
buf->error = -EINVAL; /* only used if we fail.. */
- if (reclen > buf->count)
+ if (reclen > ctx->count)
return false;
prev_reclen = buf->prev_reclen;
- if (prev_reclen && signal_pending(current))
+ if (!(flags & FILLDIR_FLAG_NOINTR) && prev_reclen && signal_pending(current))
return false;
dirent = buf->current_dir;
prev = (void __user *)dirent - prev_reclen;
@@ -376,7 +383,7 @@ static bool filldir64(struct dir_context *ctx, const char *name, int namlen,
buf->prev_reclen = reclen;
buf->current_dir = (void __user *)dirent + reclen;
- buf->count -= reclen;
+ ctx->count -= reclen;
return true;
efault_end:
@@ -392,7 +399,7 @@ SYSCALL_DEFINE3(getdents64, unsigned int, fd,
CLASS(fd_pos, f)(fd);
struct getdents_callback64 buf = {
.ctx.actor = filldir64,
- .count = count,
+ .ctx.count = count,
.current_dir = dirent
};
int error;
@@ -411,7 +418,7 @@ SYSCALL_DEFINE3(getdents64, unsigned int, fd,
if (put_user(d_off, &lastdirent->d_off))
error = -EFAULT;
else
- error = count - buf.count;
+ error = count - buf.ctx.count;
}
return error;
}
@@ -475,6 +482,7 @@ COMPAT_SYSCALL_DEFINE3(old_readdir, unsigned int, fd,
CLASS(fd_pos, f)(fd);
struct compat_readdir_callback buf = {
.ctx.actor = compat_fillonedir,
+ .ctx.count = 1, /* Hint to fs: just one entry. */
.dirent = dirent
};
@@ -499,7 +507,6 @@ struct compat_getdents_callback {
struct dir_context ctx;
struct compat_linux_dirent __user *current_dir;
int prev_reclen;
- int count;
int error;
};
@@ -513,12 +520,16 @@ static bool compat_filldir(struct dir_context *ctx, const char *name, int namlen
int reclen = ALIGN(offsetof(struct compat_linux_dirent, d_name) +
namlen + 2, sizeof(compat_long_t));
int prev_reclen;
+ unsigned int flags = d_type;
+
+ BUILD_BUG_ON(FILLDIR_FLAG_NOINTR & S_DT_MASK);
+ d_type &= S_DT_MASK;
buf->error = verify_dirent_name(name, namlen);
if (unlikely(buf->error))
return false;
buf->error = -EINVAL; /* only used if we fail.. */
- if (reclen > buf->count)
+ if (reclen > ctx->count)
return false;
d_ino = ino;
if (sizeof(d_ino) < sizeof(ino) && d_ino != ino) {
@@ -526,7 +537,7 @@ static bool compat_filldir(struct dir_context *ctx, const char *name, int namlen
return false;
}
prev_reclen = buf->prev_reclen;
- if (prev_reclen && signal_pending(current))
+ if (!(flags & FILLDIR_FLAG_NOINTR) && prev_reclen && signal_pending(current))
return false;
dirent = buf->current_dir;
prev = (void __user *) dirent - prev_reclen;
@@ -542,7 +553,7 @@ static bool compat_filldir(struct dir_context *ctx, const char *name, int namlen
buf->prev_reclen = reclen;
buf->current_dir = (void __user *)dirent + reclen;
- buf->count -= reclen;
+ ctx->count -= reclen;
return true;
efault_end:
user_write_access_end();
@@ -557,8 +568,8 @@ COMPAT_SYSCALL_DEFINE3(getdents, unsigned int, fd,
CLASS(fd_pos, f)(fd);
struct compat_getdents_callback buf = {
.ctx.actor = compat_filldir,
+ .ctx.count = count,
.current_dir = dirent,
- .count = count
};
int error;
@@ -575,7 +586,7 @@ COMPAT_SYSCALL_DEFINE3(getdents, unsigned int, fd,
if (put_user(buf.ctx.pos, &lastdirent->d_off))
error = -EFAULT;
else
- error = count - buf.count;
+ error = count - buf.ctx.count;
}
return error;
}
diff --git a/fs/select.c b/fs/select.c
index 7da531b1cf6b..9fb650d03d52 100644
--- a/fs/select.c
+++ b/fs/select.c
@@ -630,7 +630,7 @@ int core_sys_select(int n, fd_set __user *inp, fd_set __user *outp,
long stack_fds[SELECT_STACK_ALLOC/sizeof(long)];
ret = -EINVAL;
- if (n < 0)
+ if (unlikely(n < 0))
goto out_nofds;
/* max_fds can increase, so grab it once to avoid race */
@@ -857,7 +857,7 @@ static inline __poll_t do_pollfd(struct pollfd *pollfd, poll_table *pwait,
int fd = pollfd->fd;
__poll_t mask, filter;
- if (fd < 0)
+ if (unlikely(fd < 0))
return 0;
CLASS(fd, f)(fd);
diff --git a/fs/stat.c b/fs/stat.c
index 3d9222807214..52c604ebbff8 100644
--- a/fs/stat.c
+++ b/fs/stat.c
@@ -254,7 +254,7 @@ int vfs_getattr(const struct path *path, struct kstat *stat,
int retval;
retval = security_inode_getattr(path);
- if (retval)
+ if (unlikely(retval))
return retval;
return vfs_getattr_nosec(path, stat, request_mask, query_flags);
}
@@ -425,7 +425,7 @@ SYSCALL_DEFINE2(stat, const char __user *, filename,
int error;
error = vfs_stat(filename, &stat);
- if (error)
+ if (unlikely(error))
return error;
return cp_old_stat(&stat, statbuf);
@@ -438,7 +438,7 @@ SYSCALL_DEFINE2(lstat, const char __user *, filename,
int error;
error = vfs_lstat(filename, &stat);
- if (error)
+ if (unlikely(error))
return error;
return cp_old_stat(&stat, statbuf);
@@ -447,12 +447,13 @@ SYSCALL_DEFINE2(lstat, const char __user *, filename,
SYSCALL_DEFINE2(fstat, unsigned int, fd, struct __old_kernel_stat __user *, statbuf)
{
struct kstat stat;
- int error = vfs_fstat(fd, &stat);
+ int error;
- if (!error)
- error = cp_old_stat(&stat, statbuf);
+ error = vfs_fstat(fd, &stat);
+ if (unlikely(error))
+ return error;
- return error;
+ return cp_old_stat(&stat, statbuf);
}
#endif /* __ARCH_WANT_OLD_STAT */
@@ -506,10 +507,12 @@ SYSCALL_DEFINE2(newstat, const char __user *, filename,
struct stat __user *, statbuf)
{
struct kstat stat;
- int error = vfs_stat(filename, &stat);
+ int error;
- if (error)
+ error = vfs_stat(filename, &stat);
+ if (unlikely(error))
return error;
+
return cp_new_stat(&stat, statbuf);
}
@@ -520,7 +523,7 @@ SYSCALL_DEFINE2(newlstat, const char __user *, filename,
int error;
error = vfs_lstat(filename, &stat);
- if (error)
+ if (unlikely(error))
return error;
return cp_new_stat(&stat, statbuf);
@@ -534,8 +537,9 @@ SYSCALL_DEFINE4(newfstatat, int, dfd, const char __user *, filename,
int error;
error = vfs_fstatat(dfd, filename, &stat, flag);
- if (error)
+ if (unlikely(error))
return error;
+
return cp_new_stat(&stat, statbuf);
}
#endif
@@ -543,12 +547,13 @@ SYSCALL_DEFINE4(newfstatat, int, dfd, const char __user *, filename,
SYSCALL_DEFINE2(newfstat, unsigned int, fd, struct stat __user *, statbuf)
{
struct kstat stat;
- int error = vfs_fstat(fd, &stat);
+ int error;
- if (!error)
- error = cp_new_stat(&stat, statbuf);
+ error = vfs_fstat(fd, &stat);
+ if (unlikely(error))
+ return error;
- return error;
+ return cp_new_stat(&stat, statbuf);
}
#endif
diff --git a/fs/super.c b/fs/super.c
index 97a17f9d9023..6bbdb7e59a8d 100644
--- a/fs/super.c
+++ b/fs/super.c
@@ -201,7 +201,7 @@ static unsigned long super_cache_scan(struct shrinker *shrink,
inodes = list_lru_shrink_count(&sb->s_inode_lru, sc);
dentries = list_lru_shrink_count(&sb->s_dentry_lru, sc);
- total_objects = dentries + inodes + fs_objects + 1;
+ total_objects = dentries + inodes + fs_objects;
if (!total_objects)
total_objects = 1;