summaryrefslogtreecommitdiff
path: root/fs
diff options
context:
space:
mode:
Diffstat (limited to 'fs')
-rw-r--r--fs/anon_inodes.c45
-rw-r--r--fs/binfmt_elf.c76
-rw-r--r--fs/dcache.c11
-rw-r--r--fs/exec.c60
-rw-r--r--fs/exportfs/expfs.c1
-rw-r--r--fs/file_table.c2
-rw-r--r--fs/filesystems.c14
-rw-r--r--fs/fs_context.c6
-rw-r--r--fs/fs_parser.c55
-rw-r--r--fs/fuse/dir.c2
-rw-r--r--fs/fuse/readdir.c4
-rw-r--r--fs/internal.h5
-rw-r--r--fs/ioctl.c7
-rw-r--r--fs/libfs.c10
-rw-r--r--fs/mpage.c13
-rw-r--r--fs/namei.c79
-rw-r--r--fs/nfs/symlink.c20
-rw-r--r--fs/open.c14
-rw-r--r--fs/overlayfs/readdir.c12
-rw-r--r--fs/pidfs.c28
-rw-r--r--fs/read_write.c4
-rw-r--r--fs/readdir.c47
-rw-r--r--fs/select.c4
-rw-r--r--fs/stat.c35
-rw-r--r--fs/super.c2
25 files changed, 244 insertions, 312 deletions
diff --git a/fs/anon_inodes.c b/fs/anon_inodes.c
index 583ac81669c2..e51e7d88980a 100644
--- a/fs/anon_inodes.c
+++ b/fs/anon_inodes.c
@@ -24,10 +24,51 @@
#include <linux/uaccess.h>
+#include "internal.h"
+
static struct vfsmount *anon_inode_mnt __ro_after_init;
static struct inode *anon_inode_inode __ro_after_init;
/*
+ * User space expects anonymous inodes to have no file type in st_mode.
+ *
+ * In particular, 'lsof' has this legacy logic:
+ *
+ * type = s->st_mode & S_IFMT;
+ * switch (type) {
+ * ...
+ * case 0:
+ * if (!strcmp(p, "anon_inode"))
+ * Lf->ntype = Ntype = N_ANON_INODE;
+ *
+ * to detect our old anon_inode logic.
+ *
+ * Rather than mess with our internal sane inode data, just fix it
+ * up here in getattr() by masking off the format bits.
+ */
+int anon_inode_getattr(struct mnt_idmap *idmap, const struct path *path,
+ struct kstat *stat, u32 request_mask,
+ unsigned int query_flags)
+{
+ struct inode *inode = d_inode(path->dentry);
+
+ generic_fillattr(&nop_mnt_idmap, request_mask, inode, stat);
+ stat->mode &= ~S_IFMT;
+ return 0;
+}
+
+int anon_inode_setattr(struct mnt_idmap *idmap, struct dentry *dentry,
+ struct iattr *attr)
+{
+ return -EOPNOTSUPP;
+}
+
+static const struct inode_operations anon_inode_operations = {
+ .getattr = anon_inode_getattr,
+ .setattr = anon_inode_setattr,
+};
+
+/*
* anon_inodefs_dname() is called from d_path().
*/
static char *anon_inodefs_dname(struct dentry *dentry, char *buffer, int buflen)
@@ -45,6 +86,8 @@ static int anon_inodefs_init_fs_context(struct fs_context *fc)
struct pseudo_fs_context *ctx = init_pseudo(fc, ANON_INODE_FS_MAGIC);
if (!ctx)
return -ENOMEM;
+ fc->s_iflags |= SB_I_NOEXEC;
+ fc->s_iflags |= SB_I_NODEV;
ctx->dops = &anon_inodefs_dentry_operations;
return 0;
}
@@ -66,6 +109,7 @@ static struct inode *anon_inode_make_secure_inode(
if (IS_ERR(inode))
return inode;
inode->i_flags &= ~S_PRIVATE;
+ inode->i_op = &anon_inode_operations;
error = security_inode_init_security_anon(inode, &QSTR(name),
context_inode);
if (error) {
@@ -313,6 +357,7 @@ static int __init anon_inode_init(void)
anon_inode_inode = alloc_anon_inode(anon_inode_mnt->mnt_sb);
if (IS_ERR(anon_inode_inode))
panic("anon_inode_init() inode allocation failed (%ld)\n", PTR_ERR(anon_inode_inode));
+ anon_inode_inode->i_op = &anon_inode_operations;
return 0;
}
diff --git a/fs/binfmt_elf.c b/fs/binfmt_elf.c
index 4c1ea6b52a53..a43363d593e5 100644
--- a/fs/binfmt_elf.c
+++ b/fs/binfmt_elf.c
@@ -68,12 +68,6 @@
static int load_elf_binary(struct linux_binprm *bprm);
-#ifdef CONFIG_USELIB
-static int load_elf_library(struct file *);
-#else
-#define load_elf_library NULL
-#endif
-
/*
* If we don't support core dumping, then supply a NULL so we
* don't even try.
@@ -101,7 +95,6 @@ static int elf_core_dump(struct coredump_params *cprm);
static struct linux_binfmt elf_format = {
.module = THIS_MODULE,
.load_binary = load_elf_binary,
- .load_shlib = load_elf_library,
#ifdef CONFIG_COREDUMP
.core_dump = elf_core_dump,
.min_coredump = ELF_EXEC_PAGESIZE,
@@ -1384,75 +1377,6 @@ out_free_ph:
goto out;
}
-#ifdef CONFIG_USELIB
-/* This is really simpleminded and specialized - we are loading an
- a.out library that is given an ELF header. */
-static int load_elf_library(struct file *file)
-{
- struct elf_phdr *elf_phdata;
- struct elf_phdr *eppnt;
- int retval, error, i, j;
- struct elfhdr elf_ex;
-
- error = -ENOEXEC;
- retval = elf_read(file, &elf_ex, sizeof(elf_ex), 0);
- if (retval < 0)
- goto out;
-
- if (memcmp(elf_ex.e_ident, ELFMAG, SELFMAG) != 0)
- goto out;
-
- /* First of all, some simple consistency checks */
- if (elf_ex.e_type != ET_EXEC || elf_ex.e_phnum > 2 ||
- !elf_check_arch(&elf_ex) || !file->f_op->mmap)
- goto out;
- if (elf_check_fdpic(&elf_ex))
- goto out;
-
- /* Now read in all of the header information */
-
- j = sizeof(struct elf_phdr) * elf_ex.e_phnum;
- /* j < ELF_MIN_ALIGN because elf_ex.e_phnum <= 2 */
-
- error = -ENOMEM;
- elf_phdata = kmalloc(j, GFP_KERNEL);
- if (!elf_phdata)
- goto out;
-
- eppnt = elf_phdata;
- error = -ENOEXEC;
- retval = elf_read(file, eppnt, j, elf_ex.e_phoff);
- if (retval < 0)
- goto out_free_ph;
-
- for (j = 0, i = 0; i<elf_ex.e_phnum; i++)
- if ((eppnt + i)->p_type == PT_LOAD)
- j++;
- if (j != 1)
- goto out_free_ph;
-
- while (eppnt->p_type != PT_LOAD)
- eppnt++;
-
- /* Now use mmap to map the library into memory. */
- error = elf_load(file, ELF_PAGESTART(eppnt->p_vaddr),
- eppnt,
- PROT_READ | PROT_WRITE | PROT_EXEC,
- MAP_FIXED_NOREPLACE | MAP_PRIVATE,
- 0);
-
- if (error != ELF_PAGESTART(eppnt->p_vaddr))
- goto out_free_ph;
-
- error = 0;
-
-out_free_ph:
- kfree(elf_phdata);
-out:
- return error;
-}
-#endif /* #ifdef CONFIG_USELIB */
-
#ifdef CONFIG_ELF_CORE
/*
* ELF core dumper
diff --git a/fs/dcache.c b/fs/dcache.c
index 89f4acab08c0..03d58b2d4fa3 100644
--- a/fs/dcache.c
+++ b/fs/dcache.c
@@ -74,10 +74,11 @@
* arbitrary, since it's serialized on rename_lock
*/
static int sysctl_vfs_cache_pressure __read_mostly = 100;
+static int sysctl_vfs_cache_pressure_denom __read_mostly = 100;
unsigned long vfs_pressure_ratio(unsigned long val)
{
- return mult_frac(val, sysctl_vfs_cache_pressure, 100);
+ return mult_frac(val, sysctl_vfs_cache_pressure, sysctl_vfs_cache_pressure_denom);
}
EXPORT_SYMBOL_GPL(vfs_pressure_ratio);
@@ -225,6 +226,14 @@ static const struct ctl_table vm_dcache_sysctls[] = {
.proc_handler = proc_dointvec_minmax,
.extra1 = SYSCTL_ZERO,
},
+ {
+ .procname = "vfs_cache_pressure_denom",
+ .data = &sysctl_vfs_cache_pressure_denom,
+ .maxlen = sizeof(sysctl_vfs_cache_pressure_denom),
+ .mode = 0644,
+ .proc_handler = proc_dointvec_minmax,
+ .extra1 = SYSCTL_ONE_HUNDRED,
+ },
};
static int __init init_fs_dcache_sysctls(void)
diff --git a/fs/exec.c b/fs/exec.c
index 8e4ea5f1e64c..cfbb2b9ee3c9 100644
--- a/fs/exec.c
+++ b/fs/exec.c
@@ -115,66 +115,6 @@ bool path_noexec(const struct path *path)
(path->mnt->mnt_sb->s_iflags & SB_I_NOEXEC);
}
-#ifdef CONFIG_USELIB
-/*
- * Note that a shared library must be both readable and executable due to
- * security reasons.
- *
- * Also note that we take the address to load from the file itself.
- */
-SYSCALL_DEFINE1(uselib, const char __user *, library)
-{
- struct linux_binfmt *fmt;
- struct file *file;
- struct filename *tmp = getname(library);
- int error = PTR_ERR(tmp);
- static const struct open_flags uselib_flags = {
- .open_flag = O_LARGEFILE | O_RDONLY,
- .acc_mode = MAY_READ | MAY_EXEC,
- .intent = LOOKUP_OPEN,
- .lookup_flags = LOOKUP_FOLLOW,
- };
-
- if (IS_ERR(tmp))
- goto out;
-
- file = do_filp_open(AT_FDCWD, tmp, &uselib_flags);
- putname(tmp);
- error = PTR_ERR(file);
- if (IS_ERR(file))
- goto out;
-
- /*
- * Check do_open_execat() for an explanation.
- */
- error = -EACCES;
- if (WARN_ON_ONCE(!S_ISREG(file_inode(file)->i_mode)) ||
- path_noexec(&file->f_path))
- goto exit;
-
- error = -ENOEXEC;
-
- read_lock(&binfmt_lock);
- list_for_each_entry(fmt, &formats, lh) {
- if (!fmt->load_shlib)
- continue;
- if (!try_module_get(fmt->module))
- continue;
- read_unlock(&binfmt_lock);
- error = fmt->load_shlib(file);
- read_lock(&binfmt_lock);
- put_binfmt(fmt);
- if (error != -ENOEXEC)
- break;
- }
- read_unlock(&binfmt_lock);
-exit:
- fput(file);
-out:
- return error;
-}
-#endif /* #ifdef CONFIG_USELIB */
-
#ifdef CONFIG_MMU
/*
* The nascent bprm->mm is not visible until exec_mmap() but it can
diff --git a/fs/exportfs/expfs.c b/fs/exportfs/expfs.c
index f0ede3e81cf7..cdefea17986a 100644
--- a/fs/exportfs/expfs.c
+++ b/fs/exportfs/expfs.c
@@ -284,6 +284,7 @@ static int get_name(const struct path *path, char *name, struct dentry *child)
};
struct getdents_callback buffer = {
.ctx.actor = filldir_one,
+ .ctx.count = INT_MAX,
.name = name,
};
diff --git a/fs/file_table.c b/fs/file_table.c
index c04ed94cdc4b..138114d64307 100644
--- a/fs/file_table.c
+++ b/fs/file_table.c
@@ -102,7 +102,7 @@ EXPORT_SYMBOL_GPL(get_max_files);
static int proc_nr_files(const struct ctl_table *table, int write, void *buffer,
size_t *lenp, loff_t *ppos)
{
- files_stat.nr_files = get_nr_files();
+ files_stat.nr_files = percpu_counter_sum_positive(&nr_files);
return proc_doulongvec_minmax(table, write, buffer, lenp, ppos);
}
diff --git a/fs/filesystems.c b/fs/filesystems.c
index 58b9067b2391..95e5256821a5 100644
--- a/fs/filesystems.c
+++ b/fs/filesystems.c
@@ -156,15 +156,19 @@ static int fs_index(const char __user * __name)
static int fs_name(unsigned int index, char __user * buf)
{
struct file_system_type * tmp;
- int len, res;
+ int len, res = -EINVAL;
read_lock(&file_systems_lock);
- for (tmp = file_systems; tmp; tmp = tmp->next, index--)
- if (index <= 0 && try_module_get(tmp->owner))
+ for (tmp = file_systems; tmp; tmp = tmp->next, index--) {
+ if (index == 0) {
+ if (try_module_get(tmp->owner))
+ res = 0;
break;
+ }
+ }
read_unlock(&file_systems_lock);
- if (!tmp)
- return -EINVAL;
+ if (res)
+ return res;
/* OK, we got the reference, so we can safely block */
len = strlen(tmp->name) + 1;
diff --git a/fs/fs_context.c b/fs/fs_context.c
index 582d33e81117..666e61753aed 100644
--- a/fs/fs_context.c
+++ b/fs/fs_context.c
@@ -222,7 +222,7 @@ int vfs_parse_monolithic_sep(struct fs_context *fc, void *data,
char *value = strchr(key, '=');
if (value) {
- if (value == key)
+ if (unlikely(value == key))
continue;
*value++ = 0;
v_len = strlen(value);
@@ -449,6 +449,10 @@ void logfc(struct fc_log *log, const char *prefix, char level, const char *fmt,
printk(KERN_ERR "%s%s%pV\n", prefix ? prefix : "",
prefix ? ": " : "", &vaf);
break;
+ case 'i':
+ printk(KERN_INFO "%s%s%pV\n", prefix ? prefix : "",
+ prefix ? ": " : "", &vaf);
+ break;
default:
printk(KERN_NOTICE "%s%s%pV\n", prefix ? prefix : "",
prefix ? ": " : "", &vaf);
diff --git a/fs/fs_parser.c b/fs/fs_parser.c
index e635a81e17d9..c092a9f79e32 100644
--- a/fs/fs_parser.c
+++ b/fs/fs_parser.c
@@ -380,58 +380,9 @@ EXPORT_SYMBOL(fs_param_is_path);
#ifdef CONFIG_VALIDATE_FS_PARSER
/**
- * validate_constant_table - Validate a constant table
- * @tbl: The constant table to validate.
- * @tbl_size: The size of the table.
- * @low: The lowest permissible value.
- * @high: The highest permissible value.
- * @special: One special permissible value outside of the range.
- */
-bool validate_constant_table(const struct constant_table *tbl, size_t tbl_size,
- int low, int high, int special)
-{
- size_t i;
- bool good = true;
-
- if (tbl_size == 0) {
- pr_warn("VALIDATE C-TBL: Empty\n");
- return true;
- }
-
- for (i = 0; i < tbl_size; i++) {
- if (!tbl[i].name) {
- pr_err("VALIDATE C-TBL[%zu]: Null\n", i);
- good = false;
- } else if (i > 0 && tbl[i - 1].name) {
- int c = strcmp(tbl[i-1].name, tbl[i].name);
-
- if (c == 0) {
- pr_err("VALIDATE C-TBL[%zu]: Duplicate %s\n",
- i, tbl[i].name);
- good = false;
- }
- if (c > 0) {
- pr_err("VALIDATE C-TBL[%zu]: Missorted %s>=%s\n",
- i, tbl[i-1].name, tbl[i].name);
- good = false;
- }
- }
-
- if (tbl[i].value != special &&
- (tbl[i].value < low || tbl[i].value > high)) {
- pr_err("VALIDATE C-TBL[%zu]: %s->%d const out of range (%d-%d)\n",
- i, tbl[i].name, tbl[i].value, low, high);
- good = false;
- }
- }
-
- return good;
-}
-
-/**
- * fs_validate_description - Validate a parameter description
- * @name: The parameter name to search for.
- * @desc: The parameter description to validate.
+ * fs_validate_description - Validate a parameter specification array
+ * @name: Owner name of the parameter specification array
+ * @desc: The parameter specification array to validate.
*/
bool fs_validate_description(const char *name,
const struct fs_parameter_spec *desc)
diff --git a/fs/fuse/dir.c b/fs/fuse/dir.c
index 83ac192e7fdd..33b82529cb6e 100644
--- a/fs/fuse/dir.c
+++ b/fs/fuse/dir.c
@@ -1676,7 +1676,7 @@ static const char *fuse_get_link(struct dentry *dentry, struct inode *inode,
goto out_err;
}
- set_delayed_call(callback, page_put_link, &folio->page);
+ set_delayed_call(callback, page_put_link, folio);
return folio_address(folio);
diff --git a/fs/fuse/readdir.c b/fs/fuse/readdir.c
index 17ce9636a2b1..edcd6f18a8a8 100644
--- a/fs/fuse/readdir.c
+++ b/fs/fuse/readdir.c
@@ -120,7 +120,7 @@ static bool fuse_emit(struct file *file, struct dir_context *ctx,
fuse_add_dirent_to_cache(file, dirent, ctx->pos);
return dir_emit(ctx, dirent->name, dirent->namelen, dirent->ino,
- dirent->type);
+ dirent->type | FILLDIR_FLAG_NOINTR);
}
static int parse_dirfile(char *buf, size_t nbytes, struct file *file,
@@ -419,7 +419,7 @@ static enum fuse_parse_result fuse_parse_cache(struct fuse_file *ff,
if (ff->readdir.pos == ctx->pos) {
res = FOUND_SOME;
if (!dir_emit(ctx, dirent->name, dirent->namelen,
- dirent->ino, dirent->type))
+ dirent->ino, dirent->type | FILLDIR_FLAG_NOINTR))
return FOUND_ALL;
ctx->pos = dirent->off;
}
diff --git a/fs/internal.h b/fs/internal.h
index 213bf3226213..0526e88ede39 100644
--- a/fs/internal.h
+++ b/fs/internal.h
@@ -344,3 +344,8 @@ static inline bool path_mounted(const struct path *path)
void file_f_owner_release(struct file *file);
bool file_seek_cur_needs_f_lock(struct file *file);
int statmount_mnt_idmap(struct mnt_idmap *idmap, struct seq_file *seq, bool uid_map);
+int anon_inode_getattr(struct mnt_idmap *idmap, const struct path *path,
+ struct kstat *stat, u32 request_mask,
+ unsigned int query_flags);
+int anon_inode_setattr(struct mnt_idmap *idmap, struct dentry *dentry,
+ struct iattr *attr);
diff --git a/fs/ioctl.c b/fs/ioctl.c
index c91fd2b46a77..03d9a11f2247 100644
--- a/fs/ioctl.c
+++ b/fs/ioctl.c
@@ -821,7 +821,8 @@ static int do_vfs_ioctl(struct file *filp, unsigned int fd,
return ioctl_fioasync(fd, filp, argp);
case FIOQSIZE:
- if (S_ISDIR(inode->i_mode) || S_ISREG(inode->i_mode) ||
+ if (S_ISDIR(inode->i_mode) ||
+ (S_ISREG(inode->i_mode) && !IS_ANON_FILE(inode)) ||
S_ISLNK(inode->i_mode)) {
loff_t res = inode_get_bytes(inode);
return copy_to_user(argp, &res, sizeof(res)) ?
@@ -856,7 +857,7 @@ static int do_vfs_ioctl(struct file *filp, unsigned int fd,
return ioctl_file_dedupe_range(filp, argp);
case FIONREAD:
- if (!S_ISREG(inode->i_mode))
+ if (!S_ISREG(inode->i_mode) || IS_ANON_FILE(inode))
return vfs_ioctl(filp, cmd, arg);
return put_user(i_size_read(inode) - filp->f_pos,
@@ -881,7 +882,7 @@ static int do_vfs_ioctl(struct file *filp, unsigned int fd,
return ioctl_get_fs_sysfs_path(filp, argp);
default:
- if (S_ISREG(inode->i_mode))
+ if (S_ISREG(inode->i_mode) && !IS_ANON_FILE(inode))
return file_ioctl(filp, cmd, argp);
break;
}
diff --git a/fs/libfs.c b/fs/libfs.c
index 6393d7c49ee6..e28da9574a65 100644
--- a/fs/libfs.c
+++ b/fs/libfs.c
@@ -1647,10 +1647,16 @@ struct inode *alloc_anon_inode(struct super_block *s)
* that it already _is_ on the dirty list.
*/
inode->i_state = I_DIRTY;
- inode->i_mode = S_IRUSR | S_IWUSR;
+ /*
+ * Historically anonymous inodes didn't have a type at all and
+ * userspace has come to rely on this. Internally they're just
+ * regular files but S_IFREG is masked off when reporting
+ * information to userspace.
+ */
+ inode->i_mode = S_IFREG | S_IRUSR | S_IWUSR;
inode->i_uid = current_fsuid();
inode->i_gid = current_fsgid();
- inode->i_flags |= S_PRIVATE;
+ inode->i_flags |= S_PRIVATE | S_ANON_INODE;
simple_inode_init_ts(inode);
return inode;
}
diff --git a/fs/mpage.c b/fs/mpage.c
index ad7844de87c3..c5fd821fd30e 100644
--- a/fs/mpage.c
+++ b/fs/mpage.c
@@ -445,10 +445,9 @@ static void clean_buffers(struct folio *folio, unsigned first_unmapped)
try_to_free_buffers(folio);
}
-static int __mpage_writepage(struct folio *folio, struct writeback_control *wbc,
- void *data)
+static int mpage_write_folio(struct writeback_control *wbc, struct folio *folio,
+ struct mpage_data *mpd)
{
- struct mpage_data *mpd = data;
struct bio *bio = mpd->bio;
struct address_space *mapping = folio->mapping;
struct inode *inode = mapping->host;
@@ -656,14 +655,16 @@ mpage_writepages(struct address_space *mapping,
struct mpage_data mpd = {
.get_block = get_block,
};
+ struct folio *folio = NULL;
struct blk_plug plug;
- int ret;
+ int error;
blk_start_plug(&plug);
- ret = write_cache_pages(mapping, wbc, __mpage_writepage, &mpd);
+ while ((folio = writeback_iter(mapping, wbc, folio, &error)))
+ error = mpage_write_folio(wbc, folio, &mpd);
if (mpd.bio)
mpage_bio_submit_write(mpd.bio);
blk_finish_plug(&plug);
- return ret;
+ return error;
}
EXPORT_SYMBOL(mpage_writepages);
diff --git a/fs/namei.c b/fs/namei.c
index 9a0b1b2158fd..4bb889fc980b 100644
--- a/fs/namei.c
+++ b/fs/namei.c
@@ -571,14 +571,14 @@ int inode_permission(struct mnt_idmap *idmap,
int retval;
retval = sb_permission(inode->i_sb, inode, mask);
- if (retval)
+ if (unlikely(retval))
return retval;
if (unlikely(mask & MAY_WRITE)) {
/*
* Nobody gets write access to an immutable file.
*/
- if (IS_IMMUTABLE(inode))
+ if (unlikely(IS_IMMUTABLE(inode)))
return -EPERM;
/*
@@ -586,16 +586,16 @@ int inode_permission(struct mnt_idmap *idmap,
* written back improperly if their true value is unknown
* to the vfs.
*/
- if (HAS_UNMAPPED_ID(idmap, inode))
+ if (unlikely(HAS_UNMAPPED_ID(idmap, inode)))
return -EACCES;
}
retval = do_inode_permission(idmap, inode, mask);
- if (retval)
+ if (unlikely(retval))
return retval;
retval = devcgroup_inode_permission(inode, mask);
- if (retval)
+ if (unlikely(retval))
return retval;
return security_inode_permission(inode, mask);
@@ -1915,13 +1915,13 @@ static const char *pick_link(struct nameidata *nd, struct path *link,
unlikely(link->mnt->mnt_flags & MNT_NOSYMFOLLOW))
return ERR_PTR(-ELOOP);
- if (!(nd->flags & LOOKUP_RCU)) {
+ if (unlikely(atime_needs_update(&last->link, inode))) {
+ if (nd->flags & LOOKUP_RCU) {
+ if (!try_to_unlazy(nd))
+ return ERR_PTR(-ECHILD);
+ }
touch_atime(&last->link);
cond_resched();
- } else if (atime_needs_update(&last->link, inode)) {
- if (!try_to_unlazy(nd))
- return ERR_PTR(-ECHILD);
- touch_atime(&last->link);
}
error = security_inode_follow_link(link->dentry, inode,
@@ -2434,9 +2434,12 @@ static int link_path_walk(const char *name, struct nameidata *nd)
nd->flags |= LOOKUP_PARENT;
if (IS_ERR(name))
return PTR_ERR(name);
- while (*name=='/')
- name++;
- if (!*name) {
+ if (*name == '/') {
+ do {
+ name++;
+ } while (unlikely(*name == '/'));
+ }
+ if (unlikely(!*name)) {
nd->dir_mode = 0; // short-circuit the 'hardening' idiocy
return 0;
}
@@ -2449,7 +2452,7 @@ static int link_path_walk(const char *name, struct nameidata *nd)
idmap = mnt_idmap(nd->path.mnt);
err = may_lookup(idmap, nd);
- if (err)
+ if (unlikely(err))
return err;
nd->last.name = name;
@@ -5407,25 +5410,25 @@ EXPORT_SYMBOL(vfs_get_link);
static char *__page_get_link(struct dentry *dentry, struct inode *inode,
struct delayed_call *callback)
{
- struct page *page;
+ struct folio *folio;
struct address_space *mapping = inode->i_mapping;
if (!dentry) {
- page = find_get_page(mapping, 0);
- if (!page)
+ folio = filemap_get_folio(mapping, 0);
+ if (IS_ERR(folio))
return ERR_PTR(-ECHILD);
- if (!PageUptodate(page)) {
- put_page(page);
+ if (!folio_test_uptodate(folio)) {
+ folio_put(folio);
return ERR_PTR(-ECHILD);
}
} else {
- page = read_mapping_page(mapping, 0, NULL);
- if (IS_ERR(page))
- return (char*)page;
+ folio = read_mapping_folio(mapping, 0, NULL);
+ if (IS_ERR(folio))
+ return ERR_CAST(folio);
}
- set_delayed_call(callback, page_put_link, page);
+ set_delayed_call(callback, page_put_link, folio);
BUG_ON(mapping_gfp_mask(mapping) & __GFP_HIGHMEM);
- return page_address(page);
+ return folio_address(folio);
}
const char *page_get_link_raw(struct dentry *dentry, struct inode *inode,
@@ -5435,6 +5438,17 @@ const char *page_get_link_raw(struct dentry *dentry, struct inode *inode,
}
EXPORT_SYMBOL_GPL(page_get_link_raw);
+/**
+ * page_get_link() - An implementation of the get_link inode_operation.
+ * @dentry: The directory entry which is the symlink.
+ * @inode: The inode for the symlink.
+ * @callback: Used to drop the reference to the symlink.
+ *
+ * Filesystems which store their symlinks in the page cache should use
+ * this to implement the get_link() member of their inode_operations.
+ *
+ * Return: A pointer to the NUL-terminated symlink.
+ */
const char *page_get_link(struct dentry *dentry, struct inode *inode,
struct delayed_call *callback)
{
@@ -5444,12 +5458,25 @@ const char *page_get_link(struct dentry *dentry, struct inode *inode,
nd_terminate_link(kaddr, inode->i_size, PAGE_SIZE - 1);
return kaddr;
}
-
EXPORT_SYMBOL(page_get_link);
+/**
+ * page_put_link() - Drop the reference to the symlink.
+ * @arg: The folio which contains the symlink.
+ *
+ * This is used internally by page_get_link(). It is exported for use
+ * by filesystems which need to implement a variant of page_get_link()
+ * themselves. Despite the apparent symmetry, filesystems which use
+ * page_get_link() do not need to call page_put_link().
+ *
+ * The argument, while it has a void pointer type, must be a pointer to
+ * the folio which was retrieved from the page cache. The delayed_call
+ * infrastructure is used to drop the reference count once the caller
+ * is done with the symlink.
+ */
void page_put_link(void *arg)
{
- put_page(arg);
+ folio_put(arg);
}
EXPORT_SYMBOL(page_put_link);
diff --git a/fs/nfs/symlink.c b/fs/nfs/symlink.c
index 1c62a5a9f51d..58146e935402 100644
--- a/fs/nfs/symlink.c
+++ b/fs/nfs/symlink.c
@@ -40,31 +40,31 @@ static const char *nfs_get_link(struct dentry *dentry,
struct inode *inode,
struct delayed_call *done)
{
- struct page *page;
+ struct folio *folio;
void *err;
if (!dentry) {
err = ERR_PTR(nfs_revalidate_mapping_rcu(inode));
if (err)
return err;
- page = find_get_page(inode->i_mapping, 0);
- if (!page)
+ folio = filemap_get_folio(inode->i_mapping, 0);
+ if (IS_ERR(folio))
return ERR_PTR(-ECHILD);
- if (!PageUptodate(page)) {
- put_page(page);
+ if (!folio_test_uptodate(folio)) {
+ folio_put(folio);
return ERR_PTR(-ECHILD);
}
} else {
err = ERR_PTR(nfs_revalidate_mapping(inode, inode->i_mapping));
if (err)
return err;
- page = read_cache_page(&inode->i_data, 0, nfs_symlink_filler,
+ folio = read_cache_folio(&inode->i_data, 0, nfs_symlink_filler,
NULL);
- if (IS_ERR(page))
- return ERR_CAST(page);
+ if (IS_ERR(folio))
+ return ERR_CAST(folio);
}
- set_delayed_call(done, page_put_link, page);
- return page_address(page);
+ set_delayed_call(done, page_put_link, folio);
+ return folio_address(folio);
}
/*
diff --git a/fs/open.c b/fs/open.c
index a9063cca9911..7828234a7caa 100644
--- a/fs/open.c
+++ b/fs/open.c
@@ -60,7 +60,10 @@ int do_truncate(struct mnt_idmap *idmap, struct dentry *dentry,
if (ret)
newattrs.ia_valid |= ret | ATTR_FORCE;
- inode_lock(dentry->d_inode);
+ ret = inode_lock_killable(dentry->d_inode);
+ if (ret)
+ return ret;
+
/* Note any delegations or leases have already been broken: */
ret = notify_change(idmap, dentry, &newattrs, NULL);
inode_unlock(dentry->d_inode);
@@ -635,7 +638,9 @@ int chmod_common(const struct path *path, umode_t mode)
if (error)
return error;
retry_deleg:
- inode_lock(inode);
+ error = inode_lock_killable(inode);
+ if (error)
+ goto out_mnt_unlock;
error = security_path_chmod(path, mode);
if (error)
goto out_unlock;
@@ -650,6 +655,7 @@ out_unlock:
if (!error)
goto retry_deleg;
}
+out_mnt_unlock:
mnt_drop_write(path->mnt);
return error;
}
@@ -769,7 +775,9 @@ retry_deleg:
return -EINVAL;
if ((group != (gid_t)-1) && !setattr_vfsgid(&newattrs, gid))
return -EINVAL;
- inode_lock(inode);
+ error = inode_lock_killable(inode);
+ if (error)
+ return error;
if (!S_ISDIR(inode->i_mode))
newattrs.ia_valid |= ATTR_KILL_SUID | ATTR_KILL_PRIV |
setattr_should_drop_sgid(idmap, inode);
diff --git a/fs/overlayfs/readdir.c b/fs/overlayfs/readdir.c
index 2fa450a7854d..44e208da417c 100644
--- a/fs/overlayfs/readdir.c
+++ b/fs/overlayfs/readdir.c
@@ -352,6 +352,7 @@ static int ovl_dir_read_merged(struct dentry *dentry, struct list_head *list,
struct path realpath;
struct ovl_readdir_data rdd = {
.ctx.actor = ovl_fill_merge,
+ .ctx.count = INT_MAX,
.dentry = dentry,
.list = list,
.root = root,
@@ -572,6 +573,7 @@ static int ovl_dir_read_impure(const struct path *path, struct list_head *list,
struct ovl_cache_entry *p, *n;
struct ovl_readdir_data rdd = {
.ctx.actor = ovl_fill_plain,
+ .ctx.count = INT_MAX,
.list = list,
.root = root,
};
@@ -673,6 +675,7 @@ static bool ovl_fill_real(struct dir_context *ctx, const char *name,
struct ovl_readdir_translate *rdt =
container_of(ctx, struct ovl_readdir_translate, ctx);
struct dir_context *orig_ctx = rdt->orig_ctx;
+ bool res;
if (rdt->parent_ino && strcmp(name, "..") == 0) {
ino = rdt->parent_ino;
@@ -687,7 +690,10 @@ static bool ovl_fill_real(struct dir_context *ctx, const char *name,
name, namelen, rdt->xinowarn);
}
- return orig_ctx->actor(orig_ctx, name, namelen, offset, ino, d_type);
+ res = orig_ctx->actor(orig_ctx, name, namelen, offset, ino, d_type);
+ ctx->count = orig_ctx->count;
+
+ return res;
}
static bool ovl_is_impure_dir(struct file *file)
@@ -714,6 +720,7 @@ static int ovl_iterate_real(struct file *file, struct dir_context *ctx)
const struct ovl_layer *lower_layer = ovl_layer_lower(dir);
struct ovl_readdir_translate rdt = {
.ctx.actor = ovl_fill_real,
+ .ctx.count = ctx->count,
.orig_ctx = ctx,
.xinobits = ovl_xino_bits(ofs),
.xinowarn = ovl_xino_warn(ofs),
@@ -1074,6 +1081,7 @@ int ovl_check_d_type_supported(const struct path *realpath)
int err;
struct ovl_readdir_data rdd = {
.ctx.actor = ovl_check_d_type,
+ .ctx.count = INT_MAX,
.d_type_supported = false,
};
@@ -1095,6 +1103,7 @@ static int ovl_workdir_cleanup_recurse(struct ovl_fs *ofs, const struct path *pa
struct ovl_cache_entry *p;
struct ovl_readdir_data rdd = {
.ctx.actor = ovl_fill_plain,
+ .ctx.count = INT_MAX,
.list = &list,
};
bool incompat = false;
@@ -1179,6 +1188,7 @@ int ovl_indexdir_cleanup(struct ovl_fs *ofs)
struct ovl_cache_entry *p;
struct ovl_readdir_data rdd = {
.ctx.actor = ovl_fill_plain,
+ .ctx.count = INT_MAX,
.list = &list,
};
diff --git a/fs/pidfs.c b/fs/pidfs.c
index d64a4cbeb0da..2ac6f5cd861d 100644
--- a/fs/pidfs.c
+++ b/fs/pidfs.c
@@ -569,36 +569,14 @@ static struct vfsmount *pidfs_mnt __ro_after_init;
static int pidfs_setattr(struct mnt_idmap *idmap, struct dentry *dentry,
struct iattr *attr)
{
- return -EOPNOTSUPP;
+ return anon_inode_setattr(idmap, dentry, attr);
}
-
-/*
- * User space expects pidfs inodes to have no file type in st_mode.
- *
- * In particular, 'lsof' has this legacy logic:
- *
- * type = s->st_mode & S_IFMT;
- * switch (type) {
- * ...
- * case 0:
- * if (!strcmp(p, "anon_inode"))
- * Lf->ntype = Ntype = N_ANON_INODE;
- *
- * to detect our old anon_inode logic.
- *
- * Rather than mess with our internal sane inode data, just fix it
- * up here in getattr() by masking off the format bits.
- */
static int pidfs_getattr(struct mnt_idmap *idmap, const struct path *path,
struct kstat *stat, u32 request_mask,
unsigned int query_flags)
{
- struct inode *inode = d_inode(path->dentry);
-
- generic_fillattr(&nop_mnt_idmap, request_mask, inode, stat);
- stat->mode &= ~S_IFMT;
- return 0;
+ return anon_inode_getattr(idmap, path, stat, request_mask, query_flags);
}
static const struct inode_operations pidfs_inode_operations = {
@@ -826,7 +804,7 @@ static int pidfs_init_inode(struct inode *inode, void *data)
const struct pid *pid = data;
inode->i_private = data;
- inode->i_flags |= S_PRIVATE;
+ inode->i_flags |= S_PRIVATE | S_ANON_INODE;
inode->i_mode |= S_IRWXU;
inode->i_op = &pidfs_inode_operations;
inode->i_fop = &pidfs_file_operations;
diff --git a/fs/read_write.c b/fs/read_write.c
index bb0ed26a0b3a..0ef70e128c4a 100644
--- a/fs/read_write.c
+++ b/fs/read_write.c
@@ -332,7 +332,9 @@ loff_t default_llseek(struct file *file, loff_t offset, int whence)
struct inode *inode = file_inode(file);
loff_t retval;
- inode_lock(inode);
+ retval = inode_lock_killable(inode);
+ if (retval)
+ return retval;
switch (whence) {
case SEEK_END:
offset += i_size_read(inode);
diff --git a/fs/readdir.c b/fs/readdir.c
index 0038efda417b..7764b8638978 100644
--- a/fs/readdir.c
+++ b/fs/readdir.c
@@ -222,6 +222,7 @@ SYSCALL_DEFINE3(old_readdir, unsigned int, fd,
CLASS(fd_pos, f)(fd);
struct readdir_callback buf = {
.ctx.actor = fillonedir,
+ .ctx.count = 1, /* Hint to fs: just one entry. */
.dirent = dirent
};
@@ -252,7 +253,6 @@ struct getdents_callback {
struct dir_context ctx;
struct linux_dirent __user * current_dir;
int prev_reclen;
- int count;
int error;
};
@@ -266,12 +266,16 @@ static bool filldir(struct dir_context *ctx, const char *name, int namlen,
int reclen = ALIGN(offsetof(struct linux_dirent, d_name) + namlen + 2,
sizeof(long));
int prev_reclen;
+ unsigned int flags = d_type;
+
+ BUILD_BUG_ON(FILLDIR_FLAG_NOINTR & S_DT_MASK);
+ d_type &= S_DT_MASK;
buf->error = verify_dirent_name(name, namlen);
if (unlikely(buf->error))
return false;
buf->error = -EINVAL; /* only used if we fail.. */
- if (reclen > buf->count)
+ if (reclen > ctx->count)
return false;
d_ino = ino;
if (sizeof(d_ino) < sizeof(ino) && d_ino != ino) {
@@ -279,7 +283,7 @@ static bool filldir(struct dir_context *ctx, const char *name, int namlen,
return false;
}
prev_reclen = buf->prev_reclen;
- if (prev_reclen && signal_pending(current))
+ if (!(flags & FILLDIR_FLAG_NOINTR) && prev_reclen && signal_pending(current))
return false;
dirent = buf->current_dir;
prev = (void __user *) dirent - prev_reclen;
@@ -296,7 +300,7 @@ static bool filldir(struct dir_context *ctx, const char *name, int namlen,
buf->current_dir = (void __user *)dirent + reclen;
buf->prev_reclen = reclen;
- buf->count -= reclen;
+ ctx->count -= reclen;
return true;
efault_end:
user_write_access_end();
@@ -311,7 +315,7 @@ SYSCALL_DEFINE3(getdents, unsigned int, fd,
CLASS(fd_pos, f)(fd);
struct getdents_callback buf = {
.ctx.actor = filldir,
- .count = count,
+ .ctx.count = count,
.current_dir = dirent
};
int error;
@@ -329,7 +333,7 @@ SYSCALL_DEFINE3(getdents, unsigned int, fd,
if (put_user(buf.ctx.pos, &lastdirent->d_off))
error = -EFAULT;
else
- error = count - buf.count;
+ error = count - buf.ctx.count;
}
return error;
}
@@ -338,7 +342,6 @@ struct getdents_callback64 {
struct dir_context ctx;
struct linux_dirent64 __user * current_dir;
int prev_reclen;
- int count;
int error;
};
@@ -351,15 +354,19 @@ static bool filldir64(struct dir_context *ctx, const char *name, int namlen,
int reclen = ALIGN(offsetof(struct linux_dirent64, d_name) + namlen + 1,
sizeof(u64));
int prev_reclen;
+ unsigned int flags = d_type;
+
+ BUILD_BUG_ON(FILLDIR_FLAG_NOINTR & S_DT_MASK);
+ d_type &= S_DT_MASK;
buf->error = verify_dirent_name(name, namlen);
if (unlikely(buf->error))
return false;
buf->error = -EINVAL; /* only used if we fail.. */
- if (reclen > buf->count)
+ if (reclen > ctx->count)
return false;
prev_reclen = buf->prev_reclen;
- if (prev_reclen && signal_pending(current))
+ if (!(flags & FILLDIR_FLAG_NOINTR) && prev_reclen && signal_pending(current))
return false;
dirent = buf->current_dir;
prev = (void __user *)dirent - prev_reclen;
@@ -376,7 +383,7 @@ static bool filldir64(struct dir_context *ctx, const char *name, int namlen,
buf->prev_reclen = reclen;
buf->current_dir = (void __user *)dirent + reclen;
- buf->count -= reclen;
+ ctx->count -= reclen;
return true;
efault_end:
@@ -392,7 +399,7 @@ SYSCALL_DEFINE3(getdents64, unsigned int, fd,
CLASS(fd_pos, f)(fd);
struct getdents_callback64 buf = {
.ctx.actor = filldir64,
- .count = count,
+ .ctx.count = count,
.current_dir = dirent
};
int error;
@@ -411,7 +418,7 @@ SYSCALL_DEFINE3(getdents64, unsigned int, fd,
if (put_user(d_off, &lastdirent->d_off))
error = -EFAULT;
else
- error = count - buf.count;
+ error = count - buf.ctx.count;
}
return error;
}
@@ -475,6 +482,7 @@ COMPAT_SYSCALL_DEFINE3(old_readdir, unsigned int, fd,
CLASS(fd_pos, f)(fd);
struct compat_readdir_callback buf = {
.ctx.actor = compat_fillonedir,
+ .ctx.count = 1, /* Hint to fs: just one entry. */
.dirent = dirent
};
@@ -499,7 +507,6 @@ struct compat_getdents_callback {
struct dir_context ctx;
struct compat_linux_dirent __user *current_dir;
int prev_reclen;
- int count;
int error;
};
@@ -513,12 +520,16 @@ static bool compat_filldir(struct dir_context *ctx, const char *name, int namlen
int reclen = ALIGN(offsetof(struct compat_linux_dirent, d_name) +
namlen + 2, sizeof(compat_long_t));
int prev_reclen;
+ unsigned int flags = d_type;
+
+ BUILD_BUG_ON(FILLDIR_FLAG_NOINTR & S_DT_MASK);
+ d_type &= S_DT_MASK;
buf->error = verify_dirent_name(name, namlen);
if (unlikely(buf->error))
return false;
buf->error = -EINVAL; /* only used if we fail.. */
- if (reclen > buf->count)
+ if (reclen > ctx->count)
return false;
d_ino = ino;
if (sizeof(d_ino) < sizeof(ino) && d_ino != ino) {
@@ -526,7 +537,7 @@ static bool compat_filldir(struct dir_context *ctx, const char *name, int namlen
return false;
}
prev_reclen = buf->prev_reclen;
- if (prev_reclen && signal_pending(current))
+ if (!(flags & FILLDIR_FLAG_NOINTR) && prev_reclen && signal_pending(current))
return false;
dirent = buf->current_dir;
prev = (void __user *) dirent - prev_reclen;
@@ -542,7 +553,7 @@ static bool compat_filldir(struct dir_context *ctx, const char *name, int namlen
buf->prev_reclen = reclen;
buf->current_dir = (void __user *)dirent + reclen;
- buf->count -= reclen;
+ ctx->count -= reclen;
return true;
efault_end:
user_write_access_end();
@@ -557,8 +568,8 @@ COMPAT_SYSCALL_DEFINE3(getdents, unsigned int, fd,
CLASS(fd_pos, f)(fd);
struct compat_getdents_callback buf = {
.ctx.actor = compat_filldir,
+ .ctx.count = count,
.current_dir = dirent,
- .count = count
};
int error;
@@ -575,7 +586,7 @@ COMPAT_SYSCALL_DEFINE3(getdents, unsigned int, fd,
if (put_user(buf.ctx.pos, &lastdirent->d_off))
error = -EFAULT;
else
- error = count - buf.count;
+ error = count - buf.ctx.count;
}
return error;
}
diff --git a/fs/select.c b/fs/select.c
index 7da531b1cf6b..9fb650d03d52 100644
--- a/fs/select.c
+++ b/fs/select.c
@@ -630,7 +630,7 @@ int core_sys_select(int n, fd_set __user *inp, fd_set __user *outp,
long stack_fds[SELECT_STACK_ALLOC/sizeof(long)];
ret = -EINVAL;
- if (n < 0)
+ if (unlikely(n < 0))
goto out_nofds;
/* max_fds can increase, so grab it once to avoid race */
@@ -857,7 +857,7 @@ static inline __poll_t do_pollfd(struct pollfd *pollfd, poll_table *pwait,
int fd = pollfd->fd;
__poll_t mask, filter;
- if (fd < 0)
+ if (unlikely(fd < 0))
return 0;
CLASS(fd, f)(fd);
diff --git a/fs/stat.c b/fs/stat.c
index 3d9222807214..52c604ebbff8 100644
--- a/fs/stat.c
+++ b/fs/stat.c
@@ -254,7 +254,7 @@ int vfs_getattr(const struct path *path, struct kstat *stat,
int retval;
retval = security_inode_getattr(path);
- if (retval)
+ if (unlikely(retval))
return retval;
return vfs_getattr_nosec(path, stat, request_mask, query_flags);
}
@@ -425,7 +425,7 @@ SYSCALL_DEFINE2(stat, const char __user *, filename,
int error;
error = vfs_stat(filename, &stat);
- if (error)
+ if (unlikely(error))
return error;
return cp_old_stat(&stat, statbuf);
@@ -438,7 +438,7 @@ SYSCALL_DEFINE2(lstat, const char __user *, filename,
int error;
error = vfs_lstat(filename, &stat);
- if (error)
+ if (unlikely(error))
return error;
return cp_old_stat(&stat, statbuf);
@@ -447,12 +447,13 @@ SYSCALL_DEFINE2(lstat, const char __user *, filename,
SYSCALL_DEFINE2(fstat, unsigned int, fd, struct __old_kernel_stat __user *, statbuf)
{
struct kstat stat;
- int error = vfs_fstat(fd, &stat);
+ int error;
- if (!error)
- error = cp_old_stat(&stat, statbuf);
+ error = vfs_fstat(fd, &stat);
+ if (unlikely(error))
+ return error;
- return error;
+ return cp_old_stat(&stat, statbuf);
}
#endif /* __ARCH_WANT_OLD_STAT */
@@ -506,10 +507,12 @@ SYSCALL_DEFINE2(newstat, const char __user *, filename,
struct stat __user *, statbuf)
{
struct kstat stat;
- int error = vfs_stat(filename, &stat);
+ int error;
- if (error)
+ error = vfs_stat(filename, &stat);
+ if (unlikely(error))
return error;
+
return cp_new_stat(&stat, statbuf);
}
@@ -520,7 +523,7 @@ SYSCALL_DEFINE2(newlstat, const char __user *, filename,
int error;
error = vfs_lstat(filename, &stat);
- if (error)
+ if (unlikely(error))
return error;
return cp_new_stat(&stat, statbuf);
@@ -534,8 +537,9 @@ SYSCALL_DEFINE4(newfstatat, int, dfd, const char __user *, filename,
int error;
error = vfs_fstatat(dfd, filename, &stat, flag);
- if (error)
+ if (unlikely(error))
return error;
+
return cp_new_stat(&stat, statbuf);
}
#endif
@@ -543,12 +547,13 @@ SYSCALL_DEFINE4(newfstatat, int, dfd, const char __user *, filename,
SYSCALL_DEFINE2(newfstat, unsigned int, fd, struct stat __user *, statbuf)
{
struct kstat stat;
- int error = vfs_fstat(fd, &stat);
+ int error;
- if (!error)
- error = cp_new_stat(&stat, statbuf);
+ error = vfs_fstat(fd, &stat);
+ if (unlikely(error))
+ return error;
- return error;
+ return cp_new_stat(&stat, statbuf);
}
#endif
diff --git a/fs/super.c b/fs/super.c
index 97a17f9d9023..6bbdb7e59a8d 100644
--- a/fs/super.c
+++ b/fs/super.c
@@ -201,7 +201,7 @@ static unsigned long super_cache_scan(struct shrinker *shrink,
inodes = list_lru_shrink_count(&sb->s_inode_lru, sc);
dentries = list_lru_shrink_count(&sb->s_dentry_lru, sc);
- total_objects = dentries + inodes + fs_objects + 1;
+ total_objects = dentries + inodes + fs_objects;
if (!total_objects)
total_objects = 1;