diff options
Diffstat (limited to 'fs')
| -rw-r--r-- | fs/anon_inodes.c | 45 | ||||
| -rw-r--r-- | fs/binfmt_elf.c | 76 | ||||
| -rw-r--r-- | fs/dcache.c | 11 | ||||
| -rw-r--r-- | fs/exec.c | 60 | ||||
| -rw-r--r-- | fs/exportfs/expfs.c | 1 | ||||
| -rw-r--r-- | fs/file_table.c | 2 | ||||
| -rw-r--r-- | fs/filesystems.c | 14 | ||||
| -rw-r--r-- | fs/fs_context.c | 6 | ||||
| -rw-r--r-- | fs/fs_parser.c | 55 | ||||
| -rw-r--r-- | fs/fuse/dir.c | 2 | ||||
| -rw-r--r-- | fs/fuse/readdir.c | 4 | ||||
| -rw-r--r-- | fs/internal.h | 5 | ||||
| -rw-r--r-- | fs/ioctl.c | 7 | ||||
| -rw-r--r-- | fs/libfs.c | 10 | ||||
| -rw-r--r-- | fs/mpage.c | 13 | ||||
| -rw-r--r-- | fs/namei.c | 79 | ||||
| -rw-r--r-- | fs/nfs/symlink.c | 20 | ||||
| -rw-r--r-- | fs/open.c | 14 | ||||
| -rw-r--r-- | fs/overlayfs/readdir.c | 12 | ||||
| -rw-r--r-- | fs/pidfs.c | 28 | ||||
| -rw-r--r-- | fs/read_write.c | 4 | ||||
| -rw-r--r-- | fs/readdir.c | 47 | ||||
| -rw-r--r-- | fs/select.c | 4 | ||||
| -rw-r--r-- | fs/stat.c | 35 | ||||
| -rw-r--r-- | fs/super.c | 2 |
25 files changed, 244 insertions, 312 deletions
diff --git a/fs/anon_inodes.c b/fs/anon_inodes.c index 583ac81669c2..e51e7d88980a 100644 --- a/fs/anon_inodes.c +++ b/fs/anon_inodes.c @@ -24,10 +24,51 @@ #include <linux/uaccess.h> +#include "internal.h" + static struct vfsmount *anon_inode_mnt __ro_after_init; static struct inode *anon_inode_inode __ro_after_init; /* + * User space expects anonymous inodes to have no file type in st_mode. + * + * In particular, 'lsof' has this legacy logic: + * + * type = s->st_mode & S_IFMT; + * switch (type) { + * ... + * case 0: + * if (!strcmp(p, "anon_inode")) + * Lf->ntype = Ntype = N_ANON_INODE; + * + * to detect our old anon_inode logic. + * + * Rather than mess with our internal sane inode data, just fix it + * up here in getattr() by masking off the format bits. + */ +int anon_inode_getattr(struct mnt_idmap *idmap, const struct path *path, + struct kstat *stat, u32 request_mask, + unsigned int query_flags) +{ + struct inode *inode = d_inode(path->dentry); + + generic_fillattr(&nop_mnt_idmap, request_mask, inode, stat); + stat->mode &= ~S_IFMT; + return 0; +} + +int anon_inode_setattr(struct mnt_idmap *idmap, struct dentry *dentry, + struct iattr *attr) +{ + return -EOPNOTSUPP; +} + +static const struct inode_operations anon_inode_operations = { + .getattr = anon_inode_getattr, + .setattr = anon_inode_setattr, +}; + +/* * anon_inodefs_dname() is called from d_path(). */ static char *anon_inodefs_dname(struct dentry *dentry, char *buffer, int buflen) @@ -45,6 +86,8 @@ static int anon_inodefs_init_fs_context(struct fs_context *fc) struct pseudo_fs_context *ctx = init_pseudo(fc, ANON_INODE_FS_MAGIC); if (!ctx) return -ENOMEM; + fc->s_iflags |= SB_I_NOEXEC; + fc->s_iflags |= SB_I_NODEV; ctx->dops = &anon_inodefs_dentry_operations; return 0; } @@ -66,6 +109,7 @@ static struct inode *anon_inode_make_secure_inode( if (IS_ERR(inode)) return inode; inode->i_flags &= ~S_PRIVATE; + inode->i_op = &anon_inode_operations; error = security_inode_init_security_anon(inode, &QSTR(name), context_inode); if (error) { @@ -313,6 +357,7 @@ static int __init anon_inode_init(void) anon_inode_inode = alloc_anon_inode(anon_inode_mnt->mnt_sb); if (IS_ERR(anon_inode_inode)) panic("anon_inode_init() inode allocation failed (%ld)\n", PTR_ERR(anon_inode_inode)); + anon_inode_inode->i_op = &anon_inode_operations; return 0; } diff --git a/fs/binfmt_elf.c b/fs/binfmt_elf.c index 4c1ea6b52a53..a43363d593e5 100644 --- a/fs/binfmt_elf.c +++ b/fs/binfmt_elf.c @@ -68,12 +68,6 @@ static int load_elf_binary(struct linux_binprm *bprm); -#ifdef CONFIG_USELIB -static int load_elf_library(struct file *); -#else -#define load_elf_library NULL -#endif - /* * If we don't support core dumping, then supply a NULL so we * don't even try. @@ -101,7 +95,6 @@ static int elf_core_dump(struct coredump_params *cprm); static struct linux_binfmt elf_format = { .module = THIS_MODULE, .load_binary = load_elf_binary, - .load_shlib = load_elf_library, #ifdef CONFIG_COREDUMP .core_dump = elf_core_dump, .min_coredump = ELF_EXEC_PAGESIZE, @@ -1384,75 +1377,6 @@ out_free_ph: goto out; } -#ifdef CONFIG_USELIB -/* This is really simpleminded and specialized - we are loading an - a.out library that is given an ELF header. */ -static int load_elf_library(struct file *file) -{ - struct elf_phdr *elf_phdata; - struct elf_phdr *eppnt; - int retval, error, i, j; - struct elfhdr elf_ex; - - error = -ENOEXEC; - retval = elf_read(file, &elf_ex, sizeof(elf_ex), 0); - if (retval < 0) - goto out; - - if (memcmp(elf_ex.e_ident, ELFMAG, SELFMAG) != 0) - goto out; - - /* First of all, some simple consistency checks */ - if (elf_ex.e_type != ET_EXEC || elf_ex.e_phnum > 2 || - !elf_check_arch(&elf_ex) || !file->f_op->mmap) - goto out; - if (elf_check_fdpic(&elf_ex)) - goto out; - - /* Now read in all of the header information */ - - j = sizeof(struct elf_phdr) * elf_ex.e_phnum; - /* j < ELF_MIN_ALIGN because elf_ex.e_phnum <= 2 */ - - error = -ENOMEM; - elf_phdata = kmalloc(j, GFP_KERNEL); - if (!elf_phdata) - goto out; - - eppnt = elf_phdata; - error = -ENOEXEC; - retval = elf_read(file, eppnt, j, elf_ex.e_phoff); - if (retval < 0) - goto out_free_ph; - - for (j = 0, i = 0; i<elf_ex.e_phnum; i++) - if ((eppnt + i)->p_type == PT_LOAD) - j++; - if (j != 1) - goto out_free_ph; - - while (eppnt->p_type != PT_LOAD) - eppnt++; - - /* Now use mmap to map the library into memory. */ - error = elf_load(file, ELF_PAGESTART(eppnt->p_vaddr), - eppnt, - PROT_READ | PROT_WRITE | PROT_EXEC, - MAP_FIXED_NOREPLACE | MAP_PRIVATE, - 0); - - if (error != ELF_PAGESTART(eppnt->p_vaddr)) - goto out_free_ph; - - error = 0; - -out_free_ph: - kfree(elf_phdata); -out: - return error; -} -#endif /* #ifdef CONFIG_USELIB */ - #ifdef CONFIG_ELF_CORE /* * ELF core dumper diff --git a/fs/dcache.c b/fs/dcache.c index 89f4acab08c0..03d58b2d4fa3 100644 --- a/fs/dcache.c +++ b/fs/dcache.c @@ -74,10 +74,11 @@ * arbitrary, since it's serialized on rename_lock */ static int sysctl_vfs_cache_pressure __read_mostly = 100; +static int sysctl_vfs_cache_pressure_denom __read_mostly = 100; unsigned long vfs_pressure_ratio(unsigned long val) { - return mult_frac(val, sysctl_vfs_cache_pressure, 100); + return mult_frac(val, sysctl_vfs_cache_pressure, sysctl_vfs_cache_pressure_denom); } EXPORT_SYMBOL_GPL(vfs_pressure_ratio); @@ -225,6 +226,14 @@ static const struct ctl_table vm_dcache_sysctls[] = { .proc_handler = proc_dointvec_minmax, .extra1 = SYSCTL_ZERO, }, + { + .procname = "vfs_cache_pressure_denom", + .data = &sysctl_vfs_cache_pressure_denom, + .maxlen = sizeof(sysctl_vfs_cache_pressure_denom), + .mode = 0644, + .proc_handler = proc_dointvec_minmax, + .extra1 = SYSCTL_ONE_HUNDRED, + }, }; static int __init init_fs_dcache_sysctls(void) diff --git a/fs/exec.c b/fs/exec.c index 8e4ea5f1e64c..cfbb2b9ee3c9 100644 --- a/fs/exec.c +++ b/fs/exec.c @@ -115,66 +115,6 @@ bool path_noexec(const struct path *path) (path->mnt->mnt_sb->s_iflags & SB_I_NOEXEC); } -#ifdef CONFIG_USELIB -/* - * Note that a shared library must be both readable and executable due to - * security reasons. - * - * Also note that we take the address to load from the file itself. - */ -SYSCALL_DEFINE1(uselib, const char __user *, library) -{ - struct linux_binfmt *fmt; - struct file *file; - struct filename *tmp = getname(library); - int error = PTR_ERR(tmp); - static const struct open_flags uselib_flags = { - .open_flag = O_LARGEFILE | O_RDONLY, - .acc_mode = MAY_READ | MAY_EXEC, - .intent = LOOKUP_OPEN, - .lookup_flags = LOOKUP_FOLLOW, - }; - - if (IS_ERR(tmp)) - goto out; - - file = do_filp_open(AT_FDCWD, tmp, &uselib_flags); - putname(tmp); - error = PTR_ERR(file); - if (IS_ERR(file)) - goto out; - - /* - * Check do_open_execat() for an explanation. - */ - error = -EACCES; - if (WARN_ON_ONCE(!S_ISREG(file_inode(file)->i_mode)) || - path_noexec(&file->f_path)) - goto exit; - - error = -ENOEXEC; - - read_lock(&binfmt_lock); - list_for_each_entry(fmt, &formats, lh) { - if (!fmt->load_shlib) - continue; - if (!try_module_get(fmt->module)) - continue; - read_unlock(&binfmt_lock); - error = fmt->load_shlib(file); - read_lock(&binfmt_lock); - put_binfmt(fmt); - if (error != -ENOEXEC) - break; - } - read_unlock(&binfmt_lock); -exit: - fput(file); -out: - return error; -} -#endif /* #ifdef CONFIG_USELIB */ - #ifdef CONFIG_MMU /* * The nascent bprm->mm is not visible until exec_mmap() but it can diff --git a/fs/exportfs/expfs.c b/fs/exportfs/expfs.c index f0ede3e81cf7..cdefea17986a 100644 --- a/fs/exportfs/expfs.c +++ b/fs/exportfs/expfs.c @@ -284,6 +284,7 @@ static int get_name(const struct path *path, char *name, struct dentry *child) }; struct getdents_callback buffer = { .ctx.actor = filldir_one, + .ctx.count = INT_MAX, .name = name, }; diff --git a/fs/file_table.c b/fs/file_table.c index c04ed94cdc4b..138114d64307 100644 --- a/fs/file_table.c +++ b/fs/file_table.c @@ -102,7 +102,7 @@ EXPORT_SYMBOL_GPL(get_max_files); static int proc_nr_files(const struct ctl_table *table, int write, void *buffer, size_t *lenp, loff_t *ppos) { - files_stat.nr_files = get_nr_files(); + files_stat.nr_files = percpu_counter_sum_positive(&nr_files); return proc_doulongvec_minmax(table, write, buffer, lenp, ppos); } diff --git a/fs/filesystems.c b/fs/filesystems.c index 58b9067b2391..95e5256821a5 100644 --- a/fs/filesystems.c +++ b/fs/filesystems.c @@ -156,15 +156,19 @@ static int fs_index(const char __user * __name) static int fs_name(unsigned int index, char __user * buf) { struct file_system_type * tmp; - int len, res; + int len, res = -EINVAL; read_lock(&file_systems_lock); - for (tmp = file_systems; tmp; tmp = tmp->next, index--) - if (index <= 0 && try_module_get(tmp->owner)) + for (tmp = file_systems; tmp; tmp = tmp->next, index--) { + if (index == 0) { + if (try_module_get(tmp->owner)) + res = 0; break; + } + } read_unlock(&file_systems_lock); - if (!tmp) - return -EINVAL; + if (res) + return res; /* OK, we got the reference, so we can safely block */ len = strlen(tmp->name) + 1; diff --git a/fs/fs_context.c b/fs/fs_context.c index 582d33e81117..666e61753aed 100644 --- a/fs/fs_context.c +++ b/fs/fs_context.c @@ -222,7 +222,7 @@ int vfs_parse_monolithic_sep(struct fs_context *fc, void *data, char *value = strchr(key, '='); if (value) { - if (value == key) + if (unlikely(value == key)) continue; *value++ = 0; v_len = strlen(value); @@ -449,6 +449,10 @@ void logfc(struct fc_log *log, const char *prefix, char level, const char *fmt, printk(KERN_ERR "%s%s%pV\n", prefix ? prefix : "", prefix ? ": " : "", &vaf); break; + case 'i': + printk(KERN_INFO "%s%s%pV\n", prefix ? prefix : "", + prefix ? ": " : "", &vaf); + break; default: printk(KERN_NOTICE "%s%s%pV\n", prefix ? prefix : "", prefix ? ": " : "", &vaf); diff --git a/fs/fs_parser.c b/fs/fs_parser.c index e635a81e17d9..c092a9f79e32 100644 --- a/fs/fs_parser.c +++ b/fs/fs_parser.c @@ -380,58 +380,9 @@ EXPORT_SYMBOL(fs_param_is_path); #ifdef CONFIG_VALIDATE_FS_PARSER /** - * validate_constant_table - Validate a constant table - * @tbl: The constant table to validate. - * @tbl_size: The size of the table. - * @low: The lowest permissible value. - * @high: The highest permissible value. - * @special: One special permissible value outside of the range. - */ -bool validate_constant_table(const struct constant_table *tbl, size_t tbl_size, - int low, int high, int special) -{ - size_t i; - bool good = true; - - if (tbl_size == 0) { - pr_warn("VALIDATE C-TBL: Empty\n"); - return true; - } - - for (i = 0; i < tbl_size; i++) { - if (!tbl[i].name) { - pr_err("VALIDATE C-TBL[%zu]: Null\n", i); - good = false; - } else if (i > 0 && tbl[i - 1].name) { - int c = strcmp(tbl[i-1].name, tbl[i].name); - - if (c == 0) { - pr_err("VALIDATE C-TBL[%zu]: Duplicate %s\n", - i, tbl[i].name); - good = false; - } - if (c > 0) { - pr_err("VALIDATE C-TBL[%zu]: Missorted %s>=%s\n", - i, tbl[i-1].name, tbl[i].name); - good = false; - } - } - - if (tbl[i].value != special && - (tbl[i].value < low || tbl[i].value > high)) { - pr_err("VALIDATE C-TBL[%zu]: %s->%d const out of range (%d-%d)\n", - i, tbl[i].name, tbl[i].value, low, high); - good = false; - } - } - - return good; -} - -/** - * fs_validate_description - Validate a parameter description - * @name: The parameter name to search for. - * @desc: The parameter description to validate. + * fs_validate_description - Validate a parameter specification array + * @name: Owner name of the parameter specification array + * @desc: The parameter specification array to validate. */ bool fs_validate_description(const char *name, const struct fs_parameter_spec *desc) diff --git a/fs/fuse/dir.c b/fs/fuse/dir.c index 83ac192e7fdd..33b82529cb6e 100644 --- a/fs/fuse/dir.c +++ b/fs/fuse/dir.c @@ -1676,7 +1676,7 @@ static const char *fuse_get_link(struct dentry *dentry, struct inode *inode, goto out_err; } - set_delayed_call(callback, page_put_link, &folio->page); + set_delayed_call(callback, page_put_link, folio); return folio_address(folio); diff --git a/fs/fuse/readdir.c b/fs/fuse/readdir.c index 17ce9636a2b1..edcd6f18a8a8 100644 --- a/fs/fuse/readdir.c +++ b/fs/fuse/readdir.c @@ -120,7 +120,7 @@ static bool fuse_emit(struct file *file, struct dir_context *ctx, fuse_add_dirent_to_cache(file, dirent, ctx->pos); return dir_emit(ctx, dirent->name, dirent->namelen, dirent->ino, - dirent->type); + dirent->type | FILLDIR_FLAG_NOINTR); } static int parse_dirfile(char *buf, size_t nbytes, struct file *file, @@ -419,7 +419,7 @@ static enum fuse_parse_result fuse_parse_cache(struct fuse_file *ff, if (ff->readdir.pos == ctx->pos) { res = FOUND_SOME; if (!dir_emit(ctx, dirent->name, dirent->namelen, - dirent->ino, dirent->type)) + dirent->ino, dirent->type | FILLDIR_FLAG_NOINTR)) return FOUND_ALL; ctx->pos = dirent->off; } diff --git a/fs/internal.h b/fs/internal.h index 213bf3226213..0526e88ede39 100644 --- a/fs/internal.h +++ b/fs/internal.h @@ -344,3 +344,8 @@ static inline bool path_mounted(const struct path *path) void file_f_owner_release(struct file *file); bool file_seek_cur_needs_f_lock(struct file *file); int statmount_mnt_idmap(struct mnt_idmap *idmap, struct seq_file *seq, bool uid_map); +int anon_inode_getattr(struct mnt_idmap *idmap, const struct path *path, + struct kstat *stat, u32 request_mask, + unsigned int query_flags); +int anon_inode_setattr(struct mnt_idmap *idmap, struct dentry *dentry, + struct iattr *attr); diff --git a/fs/ioctl.c b/fs/ioctl.c index c91fd2b46a77..03d9a11f2247 100644 --- a/fs/ioctl.c +++ b/fs/ioctl.c @@ -821,7 +821,8 @@ static int do_vfs_ioctl(struct file *filp, unsigned int fd, return ioctl_fioasync(fd, filp, argp); case FIOQSIZE: - if (S_ISDIR(inode->i_mode) || S_ISREG(inode->i_mode) || + if (S_ISDIR(inode->i_mode) || + (S_ISREG(inode->i_mode) && !IS_ANON_FILE(inode)) || S_ISLNK(inode->i_mode)) { loff_t res = inode_get_bytes(inode); return copy_to_user(argp, &res, sizeof(res)) ? @@ -856,7 +857,7 @@ static int do_vfs_ioctl(struct file *filp, unsigned int fd, return ioctl_file_dedupe_range(filp, argp); case FIONREAD: - if (!S_ISREG(inode->i_mode)) + if (!S_ISREG(inode->i_mode) || IS_ANON_FILE(inode)) return vfs_ioctl(filp, cmd, arg); return put_user(i_size_read(inode) - filp->f_pos, @@ -881,7 +882,7 @@ static int do_vfs_ioctl(struct file *filp, unsigned int fd, return ioctl_get_fs_sysfs_path(filp, argp); default: - if (S_ISREG(inode->i_mode)) + if (S_ISREG(inode->i_mode) && !IS_ANON_FILE(inode)) return file_ioctl(filp, cmd, argp); break; } diff --git a/fs/libfs.c b/fs/libfs.c index 6393d7c49ee6..e28da9574a65 100644 --- a/fs/libfs.c +++ b/fs/libfs.c @@ -1647,10 +1647,16 @@ struct inode *alloc_anon_inode(struct super_block *s) * that it already _is_ on the dirty list. */ inode->i_state = I_DIRTY; - inode->i_mode = S_IRUSR | S_IWUSR; + /* + * Historically anonymous inodes didn't have a type at all and + * userspace has come to rely on this. Internally they're just + * regular files but S_IFREG is masked off when reporting + * information to userspace. + */ + inode->i_mode = S_IFREG | S_IRUSR | S_IWUSR; inode->i_uid = current_fsuid(); inode->i_gid = current_fsgid(); - inode->i_flags |= S_PRIVATE; + inode->i_flags |= S_PRIVATE | S_ANON_INODE; simple_inode_init_ts(inode); return inode; } diff --git a/fs/mpage.c b/fs/mpage.c index ad7844de87c3..c5fd821fd30e 100644 --- a/fs/mpage.c +++ b/fs/mpage.c @@ -445,10 +445,9 @@ static void clean_buffers(struct folio *folio, unsigned first_unmapped) try_to_free_buffers(folio); } -static int __mpage_writepage(struct folio *folio, struct writeback_control *wbc, - void *data) +static int mpage_write_folio(struct writeback_control *wbc, struct folio *folio, + struct mpage_data *mpd) { - struct mpage_data *mpd = data; struct bio *bio = mpd->bio; struct address_space *mapping = folio->mapping; struct inode *inode = mapping->host; @@ -656,14 +655,16 @@ mpage_writepages(struct address_space *mapping, struct mpage_data mpd = { .get_block = get_block, }; + struct folio *folio = NULL; struct blk_plug plug; - int ret; + int error; blk_start_plug(&plug); - ret = write_cache_pages(mapping, wbc, __mpage_writepage, &mpd); + while ((folio = writeback_iter(mapping, wbc, folio, &error))) + error = mpage_write_folio(wbc, folio, &mpd); if (mpd.bio) mpage_bio_submit_write(mpd.bio); blk_finish_plug(&plug); - return ret; + return error; } EXPORT_SYMBOL(mpage_writepages); diff --git a/fs/namei.c b/fs/namei.c index 9a0b1b2158fd..4bb889fc980b 100644 --- a/fs/namei.c +++ b/fs/namei.c @@ -571,14 +571,14 @@ int inode_permission(struct mnt_idmap *idmap, int retval; retval = sb_permission(inode->i_sb, inode, mask); - if (retval) + if (unlikely(retval)) return retval; if (unlikely(mask & MAY_WRITE)) { /* * Nobody gets write access to an immutable file. */ - if (IS_IMMUTABLE(inode)) + if (unlikely(IS_IMMUTABLE(inode))) return -EPERM; /* @@ -586,16 +586,16 @@ int inode_permission(struct mnt_idmap *idmap, * written back improperly if their true value is unknown * to the vfs. */ - if (HAS_UNMAPPED_ID(idmap, inode)) + if (unlikely(HAS_UNMAPPED_ID(idmap, inode))) return -EACCES; } retval = do_inode_permission(idmap, inode, mask); - if (retval) + if (unlikely(retval)) return retval; retval = devcgroup_inode_permission(inode, mask); - if (retval) + if (unlikely(retval)) return retval; return security_inode_permission(inode, mask); @@ -1915,13 +1915,13 @@ static const char *pick_link(struct nameidata *nd, struct path *link, unlikely(link->mnt->mnt_flags & MNT_NOSYMFOLLOW)) return ERR_PTR(-ELOOP); - if (!(nd->flags & LOOKUP_RCU)) { + if (unlikely(atime_needs_update(&last->link, inode))) { + if (nd->flags & LOOKUP_RCU) { + if (!try_to_unlazy(nd)) + return ERR_PTR(-ECHILD); + } touch_atime(&last->link); cond_resched(); - } else if (atime_needs_update(&last->link, inode)) { - if (!try_to_unlazy(nd)) - return ERR_PTR(-ECHILD); - touch_atime(&last->link); } error = security_inode_follow_link(link->dentry, inode, @@ -2434,9 +2434,12 @@ static int link_path_walk(const char *name, struct nameidata *nd) nd->flags |= LOOKUP_PARENT; if (IS_ERR(name)) return PTR_ERR(name); - while (*name=='/') - name++; - if (!*name) { + if (*name == '/') { + do { + name++; + } while (unlikely(*name == '/')); + } + if (unlikely(!*name)) { nd->dir_mode = 0; // short-circuit the 'hardening' idiocy return 0; } @@ -2449,7 +2452,7 @@ static int link_path_walk(const char *name, struct nameidata *nd) idmap = mnt_idmap(nd->path.mnt); err = may_lookup(idmap, nd); - if (err) + if (unlikely(err)) return err; nd->last.name = name; @@ -5407,25 +5410,25 @@ EXPORT_SYMBOL(vfs_get_link); static char *__page_get_link(struct dentry *dentry, struct inode *inode, struct delayed_call *callback) { - struct page *page; + struct folio *folio; struct address_space *mapping = inode->i_mapping; if (!dentry) { - page = find_get_page(mapping, 0); - if (!page) + folio = filemap_get_folio(mapping, 0); + if (IS_ERR(folio)) return ERR_PTR(-ECHILD); - if (!PageUptodate(page)) { - put_page(page); + if (!folio_test_uptodate(folio)) { + folio_put(folio); return ERR_PTR(-ECHILD); } } else { - page = read_mapping_page(mapping, 0, NULL); - if (IS_ERR(page)) - return (char*)page; + folio = read_mapping_folio(mapping, 0, NULL); + if (IS_ERR(folio)) + return ERR_CAST(folio); } - set_delayed_call(callback, page_put_link, page); + set_delayed_call(callback, page_put_link, folio); BUG_ON(mapping_gfp_mask(mapping) & __GFP_HIGHMEM); - return page_address(page); + return folio_address(folio); } const char *page_get_link_raw(struct dentry *dentry, struct inode *inode, @@ -5435,6 +5438,17 @@ const char *page_get_link_raw(struct dentry *dentry, struct inode *inode, } EXPORT_SYMBOL_GPL(page_get_link_raw); +/** + * page_get_link() - An implementation of the get_link inode_operation. + * @dentry: The directory entry which is the symlink. + * @inode: The inode for the symlink. + * @callback: Used to drop the reference to the symlink. + * + * Filesystems which store their symlinks in the page cache should use + * this to implement the get_link() member of their inode_operations. + * + * Return: A pointer to the NUL-terminated symlink. + */ const char *page_get_link(struct dentry *dentry, struct inode *inode, struct delayed_call *callback) { @@ -5444,12 +5458,25 @@ const char *page_get_link(struct dentry *dentry, struct inode *inode, nd_terminate_link(kaddr, inode->i_size, PAGE_SIZE - 1); return kaddr; } - EXPORT_SYMBOL(page_get_link); +/** + * page_put_link() - Drop the reference to the symlink. + * @arg: The folio which contains the symlink. + * + * This is used internally by page_get_link(). It is exported for use + * by filesystems which need to implement a variant of page_get_link() + * themselves. Despite the apparent symmetry, filesystems which use + * page_get_link() do not need to call page_put_link(). + * + * The argument, while it has a void pointer type, must be a pointer to + * the folio which was retrieved from the page cache. The delayed_call + * infrastructure is used to drop the reference count once the caller + * is done with the symlink. + */ void page_put_link(void *arg) { - put_page(arg); + folio_put(arg); } EXPORT_SYMBOL(page_put_link); diff --git a/fs/nfs/symlink.c b/fs/nfs/symlink.c index 1c62a5a9f51d..58146e935402 100644 --- a/fs/nfs/symlink.c +++ b/fs/nfs/symlink.c @@ -40,31 +40,31 @@ static const char *nfs_get_link(struct dentry *dentry, struct inode *inode, struct delayed_call *done) { - struct page *page; + struct folio *folio; void *err; if (!dentry) { err = ERR_PTR(nfs_revalidate_mapping_rcu(inode)); if (err) return err; - page = find_get_page(inode->i_mapping, 0); - if (!page) + folio = filemap_get_folio(inode->i_mapping, 0); + if (IS_ERR(folio)) return ERR_PTR(-ECHILD); - if (!PageUptodate(page)) { - put_page(page); + if (!folio_test_uptodate(folio)) { + folio_put(folio); return ERR_PTR(-ECHILD); } } else { err = ERR_PTR(nfs_revalidate_mapping(inode, inode->i_mapping)); if (err) return err; - page = read_cache_page(&inode->i_data, 0, nfs_symlink_filler, + folio = read_cache_folio(&inode->i_data, 0, nfs_symlink_filler, NULL); - if (IS_ERR(page)) - return ERR_CAST(page); + if (IS_ERR(folio)) + return ERR_CAST(folio); } - set_delayed_call(done, page_put_link, page); - return page_address(page); + set_delayed_call(done, page_put_link, folio); + return folio_address(folio); } /* diff --git a/fs/open.c b/fs/open.c index a9063cca9911..7828234a7caa 100644 --- a/fs/open.c +++ b/fs/open.c @@ -60,7 +60,10 @@ int do_truncate(struct mnt_idmap *idmap, struct dentry *dentry, if (ret) newattrs.ia_valid |= ret | ATTR_FORCE; - inode_lock(dentry->d_inode); + ret = inode_lock_killable(dentry->d_inode); + if (ret) + return ret; + /* Note any delegations or leases have already been broken: */ ret = notify_change(idmap, dentry, &newattrs, NULL); inode_unlock(dentry->d_inode); @@ -635,7 +638,9 @@ int chmod_common(const struct path *path, umode_t mode) if (error) return error; retry_deleg: - inode_lock(inode); + error = inode_lock_killable(inode); + if (error) + goto out_mnt_unlock; error = security_path_chmod(path, mode); if (error) goto out_unlock; @@ -650,6 +655,7 @@ out_unlock: if (!error) goto retry_deleg; } +out_mnt_unlock: mnt_drop_write(path->mnt); return error; } @@ -769,7 +775,9 @@ retry_deleg: return -EINVAL; if ((group != (gid_t)-1) && !setattr_vfsgid(&newattrs, gid)) return -EINVAL; - inode_lock(inode); + error = inode_lock_killable(inode); + if (error) + return error; if (!S_ISDIR(inode->i_mode)) newattrs.ia_valid |= ATTR_KILL_SUID | ATTR_KILL_PRIV | setattr_should_drop_sgid(idmap, inode); diff --git a/fs/overlayfs/readdir.c b/fs/overlayfs/readdir.c index 2fa450a7854d..44e208da417c 100644 --- a/fs/overlayfs/readdir.c +++ b/fs/overlayfs/readdir.c @@ -352,6 +352,7 @@ static int ovl_dir_read_merged(struct dentry *dentry, struct list_head *list, struct path realpath; struct ovl_readdir_data rdd = { .ctx.actor = ovl_fill_merge, + .ctx.count = INT_MAX, .dentry = dentry, .list = list, .root = root, @@ -572,6 +573,7 @@ static int ovl_dir_read_impure(const struct path *path, struct list_head *list, struct ovl_cache_entry *p, *n; struct ovl_readdir_data rdd = { .ctx.actor = ovl_fill_plain, + .ctx.count = INT_MAX, .list = list, .root = root, }; @@ -673,6 +675,7 @@ static bool ovl_fill_real(struct dir_context *ctx, const char *name, struct ovl_readdir_translate *rdt = container_of(ctx, struct ovl_readdir_translate, ctx); struct dir_context *orig_ctx = rdt->orig_ctx; + bool res; if (rdt->parent_ino && strcmp(name, "..") == 0) { ino = rdt->parent_ino; @@ -687,7 +690,10 @@ static bool ovl_fill_real(struct dir_context *ctx, const char *name, name, namelen, rdt->xinowarn); } - return orig_ctx->actor(orig_ctx, name, namelen, offset, ino, d_type); + res = orig_ctx->actor(orig_ctx, name, namelen, offset, ino, d_type); + ctx->count = orig_ctx->count; + + return res; } static bool ovl_is_impure_dir(struct file *file) @@ -714,6 +720,7 @@ static int ovl_iterate_real(struct file *file, struct dir_context *ctx) const struct ovl_layer *lower_layer = ovl_layer_lower(dir); struct ovl_readdir_translate rdt = { .ctx.actor = ovl_fill_real, + .ctx.count = ctx->count, .orig_ctx = ctx, .xinobits = ovl_xino_bits(ofs), .xinowarn = ovl_xino_warn(ofs), @@ -1074,6 +1081,7 @@ int ovl_check_d_type_supported(const struct path *realpath) int err; struct ovl_readdir_data rdd = { .ctx.actor = ovl_check_d_type, + .ctx.count = INT_MAX, .d_type_supported = false, }; @@ -1095,6 +1103,7 @@ static int ovl_workdir_cleanup_recurse(struct ovl_fs *ofs, const struct path *pa struct ovl_cache_entry *p; struct ovl_readdir_data rdd = { .ctx.actor = ovl_fill_plain, + .ctx.count = INT_MAX, .list = &list, }; bool incompat = false; @@ -1179,6 +1188,7 @@ int ovl_indexdir_cleanup(struct ovl_fs *ofs) struct ovl_cache_entry *p; struct ovl_readdir_data rdd = { .ctx.actor = ovl_fill_plain, + .ctx.count = INT_MAX, .list = &list, }; diff --git a/fs/pidfs.c b/fs/pidfs.c index d64a4cbeb0da..2ac6f5cd861d 100644 --- a/fs/pidfs.c +++ b/fs/pidfs.c @@ -569,36 +569,14 @@ static struct vfsmount *pidfs_mnt __ro_after_init; static int pidfs_setattr(struct mnt_idmap *idmap, struct dentry *dentry, struct iattr *attr) { - return -EOPNOTSUPP; + return anon_inode_setattr(idmap, dentry, attr); } - -/* - * User space expects pidfs inodes to have no file type in st_mode. - * - * In particular, 'lsof' has this legacy logic: - * - * type = s->st_mode & S_IFMT; - * switch (type) { - * ... - * case 0: - * if (!strcmp(p, "anon_inode")) - * Lf->ntype = Ntype = N_ANON_INODE; - * - * to detect our old anon_inode logic. - * - * Rather than mess with our internal sane inode data, just fix it - * up here in getattr() by masking off the format bits. - */ static int pidfs_getattr(struct mnt_idmap *idmap, const struct path *path, struct kstat *stat, u32 request_mask, unsigned int query_flags) { - struct inode *inode = d_inode(path->dentry); - - generic_fillattr(&nop_mnt_idmap, request_mask, inode, stat); - stat->mode &= ~S_IFMT; - return 0; + return anon_inode_getattr(idmap, path, stat, request_mask, query_flags); } static const struct inode_operations pidfs_inode_operations = { @@ -826,7 +804,7 @@ static int pidfs_init_inode(struct inode *inode, void *data) const struct pid *pid = data; inode->i_private = data; - inode->i_flags |= S_PRIVATE; + inode->i_flags |= S_PRIVATE | S_ANON_INODE; inode->i_mode |= S_IRWXU; inode->i_op = &pidfs_inode_operations; inode->i_fop = &pidfs_file_operations; diff --git a/fs/read_write.c b/fs/read_write.c index bb0ed26a0b3a..0ef70e128c4a 100644 --- a/fs/read_write.c +++ b/fs/read_write.c @@ -332,7 +332,9 @@ loff_t default_llseek(struct file *file, loff_t offset, int whence) struct inode *inode = file_inode(file); loff_t retval; - inode_lock(inode); + retval = inode_lock_killable(inode); + if (retval) + return retval; switch (whence) { case SEEK_END: offset += i_size_read(inode); diff --git a/fs/readdir.c b/fs/readdir.c index 0038efda417b..7764b8638978 100644 --- a/fs/readdir.c +++ b/fs/readdir.c @@ -222,6 +222,7 @@ SYSCALL_DEFINE3(old_readdir, unsigned int, fd, CLASS(fd_pos, f)(fd); struct readdir_callback buf = { .ctx.actor = fillonedir, + .ctx.count = 1, /* Hint to fs: just one entry. */ .dirent = dirent }; @@ -252,7 +253,6 @@ struct getdents_callback { struct dir_context ctx; struct linux_dirent __user * current_dir; int prev_reclen; - int count; int error; }; @@ -266,12 +266,16 @@ static bool filldir(struct dir_context *ctx, const char *name, int namlen, int reclen = ALIGN(offsetof(struct linux_dirent, d_name) + namlen + 2, sizeof(long)); int prev_reclen; + unsigned int flags = d_type; + + BUILD_BUG_ON(FILLDIR_FLAG_NOINTR & S_DT_MASK); + d_type &= S_DT_MASK; buf->error = verify_dirent_name(name, namlen); if (unlikely(buf->error)) return false; buf->error = -EINVAL; /* only used if we fail.. */ - if (reclen > buf->count) + if (reclen > ctx->count) return false; d_ino = ino; if (sizeof(d_ino) < sizeof(ino) && d_ino != ino) { @@ -279,7 +283,7 @@ static bool filldir(struct dir_context *ctx, const char *name, int namlen, return false; } prev_reclen = buf->prev_reclen; - if (prev_reclen && signal_pending(current)) + if (!(flags & FILLDIR_FLAG_NOINTR) && prev_reclen && signal_pending(current)) return false; dirent = buf->current_dir; prev = (void __user *) dirent - prev_reclen; @@ -296,7 +300,7 @@ static bool filldir(struct dir_context *ctx, const char *name, int namlen, buf->current_dir = (void __user *)dirent + reclen; buf->prev_reclen = reclen; - buf->count -= reclen; + ctx->count -= reclen; return true; efault_end: user_write_access_end(); @@ -311,7 +315,7 @@ SYSCALL_DEFINE3(getdents, unsigned int, fd, CLASS(fd_pos, f)(fd); struct getdents_callback buf = { .ctx.actor = filldir, - .count = count, + .ctx.count = count, .current_dir = dirent }; int error; @@ -329,7 +333,7 @@ SYSCALL_DEFINE3(getdents, unsigned int, fd, if (put_user(buf.ctx.pos, &lastdirent->d_off)) error = -EFAULT; else - error = count - buf.count; + error = count - buf.ctx.count; } return error; } @@ -338,7 +342,6 @@ struct getdents_callback64 { struct dir_context ctx; struct linux_dirent64 __user * current_dir; int prev_reclen; - int count; int error; }; @@ -351,15 +354,19 @@ static bool filldir64(struct dir_context *ctx, const char *name, int namlen, int reclen = ALIGN(offsetof(struct linux_dirent64, d_name) + namlen + 1, sizeof(u64)); int prev_reclen; + unsigned int flags = d_type; + + BUILD_BUG_ON(FILLDIR_FLAG_NOINTR & S_DT_MASK); + d_type &= S_DT_MASK; buf->error = verify_dirent_name(name, namlen); if (unlikely(buf->error)) return false; buf->error = -EINVAL; /* only used if we fail.. */ - if (reclen > buf->count) + if (reclen > ctx->count) return false; prev_reclen = buf->prev_reclen; - if (prev_reclen && signal_pending(current)) + if (!(flags & FILLDIR_FLAG_NOINTR) && prev_reclen && signal_pending(current)) return false; dirent = buf->current_dir; prev = (void __user *)dirent - prev_reclen; @@ -376,7 +383,7 @@ static bool filldir64(struct dir_context *ctx, const char *name, int namlen, buf->prev_reclen = reclen; buf->current_dir = (void __user *)dirent + reclen; - buf->count -= reclen; + ctx->count -= reclen; return true; efault_end: @@ -392,7 +399,7 @@ SYSCALL_DEFINE3(getdents64, unsigned int, fd, CLASS(fd_pos, f)(fd); struct getdents_callback64 buf = { .ctx.actor = filldir64, - .count = count, + .ctx.count = count, .current_dir = dirent }; int error; @@ -411,7 +418,7 @@ SYSCALL_DEFINE3(getdents64, unsigned int, fd, if (put_user(d_off, &lastdirent->d_off)) error = -EFAULT; else - error = count - buf.count; + error = count - buf.ctx.count; } return error; } @@ -475,6 +482,7 @@ COMPAT_SYSCALL_DEFINE3(old_readdir, unsigned int, fd, CLASS(fd_pos, f)(fd); struct compat_readdir_callback buf = { .ctx.actor = compat_fillonedir, + .ctx.count = 1, /* Hint to fs: just one entry. */ .dirent = dirent }; @@ -499,7 +507,6 @@ struct compat_getdents_callback { struct dir_context ctx; struct compat_linux_dirent __user *current_dir; int prev_reclen; - int count; int error; }; @@ -513,12 +520,16 @@ static bool compat_filldir(struct dir_context *ctx, const char *name, int namlen int reclen = ALIGN(offsetof(struct compat_linux_dirent, d_name) + namlen + 2, sizeof(compat_long_t)); int prev_reclen; + unsigned int flags = d_type; + + BUILD_BUG_ON(FILLDIR_FLAG_NOINTR & S_DT_MASK); + d_type &= S_DT_MASK; buf->error = verify_dirent_name(name, namlen); if (unlikely(buf->error)) return false; buf->error = -EINVAL; /* only used if we fail.. */ - if (reclen > buf->count) + if (reclen > ctx->count) return false; d_ino = ino; if (sizeof(d_ino) < sizeof(ino) && d_ino != ino) { @@ -526,7 +537,7 @@ static bool compat_filldir(struct dir_context *ctx, const char *name, int namlen return false; } prev_reclen = buf->prev_reclen; - if (prev_reclen && signal_pending(current)) + if (!(flags & FILLDIR_FLAG_NOINTR) && prev_reclen && signal_pending(current)) return false; dirent = buf->current_dir; prev = (void __user *) dirent - prev_reclen; @@ -542,7 +553,7 @@ static bool compat_filldir(struct dir_context *ctx, const char *name, int namlen buf->prev_reclen = reclen; buf->current_dir = (void __user *)dirent + reclen; - buf->count -= reclen; + ctx->count -= reclen; return true; efault_end: user_write_access_end(); @@ -557,8 +568,8 @@ COMPAT_SYSCALL_DEFINE3(getdents, unsigned int, fd, CLASS(fd_pos, f)(fd); struct compat_getdents_callback buf = { .ctx.actor = compat_filldir, + .ctx.count = count, .current_dir = dirent, - .count = count }; int error; @@ -575,7 +586,7 @@ COMPAT_SYSCALL_DEFINE3(getdents, unsigned int, fd, if (put_user(buf.ctx.pos, &lastdirent->d_off)) error = -EFAULT; else - error = count - buf.count; + error = count - buf.ctx.count; } return error; } diff --git a/fs/select.c b/fs/select.c index 7da531b1cf6b..9fb650d03d52 100644 --- a/fs/select.c +++ b/fs/select.c @@ -630,7 +630,7 @@ int core_sys_select(int n, fd_set __user *inp, fd_set __user *outp, long stack_fds[SELECT_STACK_ALLOC/sizeof(long)]; ret = -EINVAL; - if (n < 0) + if (unlikely(n < 0)) goto out_nofds; /* max_fds can increase, so grab it once to avoid race */ @@ -857,7 +857,7 @@ static inline __poll_t do_pollfd(struct pollfd *pollfd, poll_table *pwait, int fd = pollfd->fd; __poll_t mask, filter; - if (fd < 0) + if (unlikely(fd < 0)) return 0; CLASS(fd, f)(fd); diff --git a/fs/stat.c b/fs/stat.c index 3d9222807214..52c604ebbff8 100644 --- a/fs/stat.c +++ b/fs/stat.c @@ -254,7 +254,7 @@ int vfs_getattr(const struct path *path, struct kstat *stat, int retval; retval = security_inode_getattr(path); - if (retval) + if (unlikely(retval)) return retval; return vfs_getattr_nosec(path, stat, request_mask, query_flags); } @@ -425,7 +425,7 @@ SYSCALL_DEFINE2(stat, const char __user *, filename, int error; error = vfs_stat(filename, &stat); - if (error) + if (unlikely(error)) return error; return cp_old_stat(&stat, statbuf); @@ -438,7 +438,7 @@ SYSCALL_DEFINE2(lstat, const char __user *, filename, int error; error = vfs_lstat(filename, &stat); - if (error) + if (unlikely(error)) return error; return cp_old_stat(&stat, statbuf); @@ -447,12 +447,13 @@ SYSCALL_DEFINE2(lstat, const char __user *, filename, SYSCALL_DEFINE2(fstat, unsigned int, fd, struct __old_kernel_stat __user *, statbuf) { struct kstat stat; - int error = vfs_fstat(fd, &stat); + int error; - if (!error) - error = cp_old_stat(&stat, statbuf); + error = vfs_fstat(fd, &stat); + if (unlikely(error)) + return error; - return error; + return cp_old_stat(&stat, statbuf); } #endif /* __ARCH_WANT_OLD_STAT */ @@ -506,10 +507,12 @@ SYSCALL_DEFINE2(newstat, const char __user *, filename, struct stat __user *, statbuf) { struct kstat stat; - int error = vfs_stat(filename, &stat); + int error; - if (error) + error = vfs_stat(filename, &stat); + if (unlikely(error)) return error; + return cp_new_stat(&stat, statbuf); } @@ -520,7 +523,7 @@ SYSCALL_DEFINE2(newlstat, const char __user *, filename, int error; error = vfs_lstat(filename, &stat); - if (error) + if (unlikely(error)) return error; return cp_new_stat(&stat, statbuf); @@ -534,8 +537,9 @@ SYSCALL_DEFINE4(newfstatat, int, dfd, const char __user *, filename, int error; error = vfs_fstatat(dfd, filename, &stat, flag); - if (error) + if (unlikely(error)) return error; + return cp_new_stat(&stat, statbuf); } #endif @@ -543,12 +547,13 @@ SYSCALL_DEFINE4(newfstatat, int, dfd, const char __user *, filename, SYSCALL_DEFINE2(newfstat, unsigned int, fd, struct stat __user *, statbuf) { struct kstat stat; - int error = vfs_fstat(fd, &stat); + int error; - if (!error) - error = cp_new_stat(&stat, statbuf); + error = vfs_fstat(fd, &stat); + if (unlikely(error)) + return error; - return error; + return cp_new_stat(&stat, statbuf); } #endif diff --git a/fs/super.c b/fs/super.c index 97a17f9d9023..6bbdb7e59a8d 100644 --- a/fs/super.c +++ b/fs/super.c @@ -201,7 +201,7 @@ static unsigned long super_cache_scan(struct shrinker *shrink, inodes = list_lru_shrink_count(&sb->s_inode_lru, sc); dentries = list_lru_shrink_count(&sb->s_dentry_lru, sc); - total_objects = dentries + inodes + fs_objects + 1; + total_objects = dentries + inodes + fs_objects; if (!total_objects) total_objects = 1; |