diff options
Diffstat (limited to 'fs/btrfs/inode.c')
| -rw-r--r-- | fs/btrfs/inode.c | 284 |
1 files changed, 156 insertions, 128 deletions
diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c index dbbb67293e34..4e987fe6d648 100644 --- a/fs/btrfs/inode.c +++ b/fs/btrfs/inode.c @@ -1654,8 +1654,6 @@ out_unlock: clear_bits, page_ops); start += cur_alloc_size; - if (start >= end) - return ret; } /* @@ -1664,9 +1662,11 @@ out_unlock: * space_info's bytes_may_use counter, reserved in * btrfs_check_data_free_space(). */ - extent_clear_unlock_delalloc(inode, start, end, locked_page, - clear_bits | EXTENT_CLEAR_DATA_RESV, - page_ops); + if (start < end) { + clear_bits |= EXTENT_CLEAR_DATA_RESV; + extent_clear_unlock_delalloc(inode, start, end, locked_page, + clear_bits, page_ops); + } return ret; } @@ -3482,15 +3482,21 @@ zeroit: void btrfs_add_delayed_iput(struct btrfs_inode *inode) { struct btrfs_fs_info *fs_info = inode->root->fs_info; + unsigned long flags; if (atomic_add_unless(&inode->vfs_inode.i_count, -1, 1)) return; atomic_inc(&fs_info->nr_delayed_iputs); - spin_lock(&fs_info->delayed_iput_lock); + /* + * Need to be irq safe here because we can be called from either an irq + * context (see bio.c and btrfs_put_ordered_extent()) or a non-irq + * context. + */ + spin_lock_irqsave(&fs_info->delayed_iput_lock, flags); ASSERT(list_empty(&inode->delayed_iput)); list_add_tail(&inode->delayed_iput, &fs_info->delayed_iputs); - spin_unlock(&fs_info->delayed_iput_lock); + spin_unlock_irqrestore(&fs_info->delayed_iput_lock, flags); if (!test_bit(BTRFS_FS_CLEANER_RUNNING, &fs_info->flags)) wake_up_process(fs_info->cleaner_kthread); } @@ -3499,37 +3505,46 @@ static void run_delayed_iput_locked(struct btrfs_fs_info *fs_info, struct btrfs_inode *inode) { list_del_init(&inode->delayed_iput); - spin_unlock(&fs_info->delayed_iput_lock); + spin_unlock_irq(&fs_info->delayed_iput_lock); iput(&inode->vfs_inode); if (atomic_dec_and_test(&fs_info->nr_delayed_iputs)) wake_up(&fs_info->delayed_iputs_wait); - spin_lock(&fs_info->delayed_iput_lock); + spin_lock_irq(&fs_info->delayed_iput_lock); } static void btrfs_run_delayed_iput(struct btrfs_fs_info *fs_info, struct btrfs_inode *inode) { if (!list_empty(&inode->delayed_iput)) { - spin_lock(&fs_info->delayed_iput_lock); + spin_lock_irq(&fs_info->delayed_iput_lock); if (!list_empty(&inode->delayed_iput)) run_delayed_iput_locked(fs_info, inode); - spin_unlock(&fs_info->delayed_iput_lock); + spin_unlock_irq(&fs_info->delayed_iput_lock); } } void btrfs_run_delayed_iputs(struct btrfs_fs_info *fs_info) { - - spin_lock(&fs_info->delayed_iput_lock); + /* + * btrfs_put_ordered_extent() can run in irq context (see bio.c), which + * calls btrfs_add_delayed_iput() and that needs to lock + * fs_info->delayed_iput_lock. So we need to disable irqs here to + * prevent a deadlock. + */ + spin_lock_irq(&fs_info->delayed_iput_lock); while (!list_empty(&fs_info->delayed_iputs)) { struct btrfs_inode *inode; inode = list_first_entry(&fs_info->delayed_iputs, struct btrfs_inode, delayed_iput); run_delayed_iput_locked(fs_info, inode); - cond_resched_lock(&fs_info->delayed_iput_lock); + if (need_resched()) { + spin_unlock_irq(&fs_info->delayed_iput_lock); + cond_resched(); + spin_lock_irq(&fs_info->delayed_iput_lock); + } } - spin_unlock(&fs_info->delayed_iput_lock); + spin_unlock_irq(&fs_info->delayed_iput_lock); } /* @@ -3659,11 +3674,14 @@ int btrfs_orphan_cleanup(struct btrfs_root *root) found_key.type = BTRFS_INODE_ITEM_KEY; found_key.offset = 0; inode = btrfs_iget(fs_info->sb, last_objectid, root); - ret = PTR_ERR_OR_ZERO(inode); - if (ret && ret != -ENOENT) - goto out; + if (IS_ERR(inode)) { + ret = PTR_ERR(inode); + inode = NULL; + if (ret != -ENOENT) + goto out; + } - if (ret == -ENOENT && root == fs_info->tree_root) { + if (!inode && root == fs_info->tree_root) { struct btrfs_root *dead_root; int is_dead_root = 0; @@ -3724,17 +3742,17 @@ int btrfs_orphan_cleanup(struct btrfs_root *root) * deleted but wasn't. The inode number may have been reused, * but either way, we can delete the orphan item. */ - if (ret == -ENOENT || inode->i_nlink) { - if (!ret) { + if (!inode || inode->i_nlink) { + if (inode) { ret = btrfs_drop_verity_items(BTRFS_I(inode)); iput(inode); + inode = NULL; if (ret) goto out; } trans = btrfs_start_transaction(root, 1); if (IS_ERR(trans)) { ret = PTR_ERR(trans); - iput(inode); goto out; } btrfs_debug(fs_info, "auto deleting %Lu", @@ -3742,10 +3760,8 @@ int btrfs_orphan_cleanup(struct btrfs_root *root) ret = btrfs_del_orphan_item(trans, root, found_key.objectid); btrfs_end_transaction(trans); - if (ret) { - iput(inode); + if (ret) goto out; - } continue; } @@ -3901,8 +3917,8 @@ static int btrfs_read_locked_inode(struct inode *inode, inode->i_mtime.tv_sec = btrfs_timespec_sec(leaf, &inode_item->mtime); inode->i_mtime.tv_nsec = btrfs_timespec_nsec(leaf, &inode_item->mtime); - inode->i_ctime.tv_sec = btrfs_timespec_sec(leaf, &inode_item->ctime); - inode->i_ctime.tv_nsec = btrfs_timespec_nsec(leaf, &inode_item->ctime); + inode_set_ctime(inode, btrfs_timespec_sec(leaf, &inode_item->ctime), + btrfs_timespec_nsec(leaf, &inode_item->ctime)); BTRFS_I(inode)->i_otime.tv_sec = btrfs_timespec_sec(leaf, &inode_item->otime); @@ -4073,9 +4089,9 @@ static void fill_inode_item(struct btrfs_trans_handle *trans, inode->i_mtime.tv_nsec); btrfs_set_token_timespec_sec(&token, &item->ctime, - inode->i_ctime.tv_sec); + inode_get_ctime(inode).tv_sec); btrfs_set_token_timespec_nsec(&token, &item->ctime, - inode->i_ctime.tv_nsec); + inode_get_ctime(inode).tv_nsec); btrfs_set_token_timespec_sec(&token, &item->otime, BTRFS_I(inode)->i_otime.tv_sec); @@ -4273,9 +4289,8 @@ err: btrfs_i_size_write(dir, dir->vfs_inode.i_size - name->len * 2); inode_inc_iversion(&inode->vfs_inode); inode_inc_iversion(&dir->vfs_inode); - inode->vfs_inode.i_ctime = current_time(&inode->vfs_inode); - dir->vfs_inode.i_mtime = inode->vfs_inode.i_ctime; - dir->vfs_inode.i_ctime = inode->vfs_inode.i_ctime; + inode_set_ctime_current(&inode->vfs_inode); + dir->vfs_inode.i_mtime = inode_set_ctime_current(&dir->vfs_inode); ret = btrfs_update_inode(trans, root, dir); out: return ret; @@ -4448,8 +4463,7 @@ static int btrfs_unlink_subvol(struct btrfs_trans_handle *trans, btrfs_i_size_write(dir, dir->vfs_inode.i_size - fname.disk_name.len * 2); inode_inc_iversion(&dir->vfs_inode); - dir->vfs_inode.i_mtime = current_time(&dir->vfs_inode); - dir->vfs_inode.i_ctime = dir->vfs_inode.i_mtime; + dir->vfs_inode.i_mtime = inode_set_ctime_current(&dir->vfs_inode); ret = btrfs_update_inode_fallback(trans, root, dir); if (ret) btrfs_abort_transaction(trans, ret); @@ -4847,9 +4861,6 @@ again: ret = -ENOMEM; goto out; } - ret = set_page_extent_mapped(page); - if (ret < 0) - goto out_unlock; if (!PageUptodate(page)) { ret = btrfs_read_folio(NULL, page_folio(page)); @@ -4864,6 +4875,17 @@ again: goto out_unlock; } } + + /* + * We unlock the page after the io is completed and then re-lock it + * above. release_folio() could have come in between that and cleared + * PagePrivate(), but left the page in the mapping. Set the page mapped + * here to make sure it's properly set for the subpage stuff. + */ + ret = set_page_extent_mapped(page); + if (ret < 0) + goto out_unlock; + wait_on_page_writeback(page); lock_extent(io_tree, block_start, block_end, &cached_state); @@ -5091,8 +5113,7 @@ static int btrfs_setsize(struct inode *inode, struct iattr *attr) if (newsize != oldsize) { inode_inc_iversion(inode); if (!(mask & (ATTR_CTIME | ATTR_MTIME))) { - inode->i_mtime = current_time(inode); - inode->i_ctime = inode->i_mtime; + inode->i_mtime = inode_set_ctime_current(inode); } } @@ -5736,9 +5757,8 @@ static struct inode *new_simple_dir(struct super_block *s, inode->i_opflags &= ~IOP_XATTR; inode->i_fop = &simple_dir_operations; inode->i_mode = S_IFDIR | S_IRUGO | S_IWUSR | S_IXUGO; - inode->i_mtime = current_time(inode); + inode->i_mtime = inode_set_ctime_current(inode); inode->i_atime = inode->i_mtime; - inode->i_ctime = inode->i_mtime; BTRFS_I(inode)->i_otime = inode->i_mtime; return inode; @@ -5849,6 +5869,74 @@ static struct dentry *btrfs_lookup(struct inode *dir, struct dentry *dentry, } /* + * Find the highest existing sequence number in a directory and then set the + * in-memory index_cnt variable to the first free sequence number. + */ +static int btrfs_set_inode_index_count(struct btrfs_inode *inode) +{ + struct btrfs_root *root = inode->root; + struct btrfs_key key, found_key; + struct btrfs_path *path; + struct extent_buffer *leaf; + int ret; + + key.objectid = btrfs_ino(inode); + key.type = BTRFS_DIR_INDEX_KEY; + key.offset = (u64)-1; + + path = btrfs_alloc_path(); + if (!path) + return -ENOMEM; + + ret = btrfs_search_slot(NULL, root, &key, path, 0, 0); + if (ret < 0) + goto out; + /* FIXME: we should be able to handle this */ + if (ret == 0) + goto out; + ret = 0; + + if (path->slots[0] == 0) { + inode->index_cnt = BTRFS_DIR_START_INDEX; + goto out; + } + + path->slots[0]--; + + leaf = path->nodes[0]; + btrfs_item_key_to_cpu(leaf, &found_key, path->slots[0]); + + if (found_key.objectid != btrfs_ino(inode) || + found_key.type != BTRFS_DIR_INDEX_KEY) { + inode->index_cnt = BTRFS_DIR_START_INDEX; + goto out; + } + + inode->index_cnt = found_key.offset + 1; +out: + btrfs_free_path(path); + return ret; +} + +static int btrfs_get_dir_last_index(struct btrfs_inode *dir, u64 *index) +{ + if (dir->index_cnt == (u64)-1) { + int ret; + + ret = btrfs_inode_delayed_dir_index_count(dir); + if (ret) { + ret = btrfs_set_inode_index_count(dir); + if (ret) + return ret; + } + } + + *index = dir->index_cnt; + + return 0; +} + +/* * All this infrastructure exists because dir_emit can fault, and we are holding * the tree lock when doing readdir. For now just allocate a buffer and copy * our information into that, and then dir_emit from the buffer. This is @@ -5860,10 +5948,17 @@ static struct dentry *btrfs_lookup(struct inode *dir, struct dentry *dentry, static int btrfs_opendir(struct inode *inode, struct file *file) { struct btrfs_file_private *private; + u64 last_index; + int ret; + + ret = btrfs_get_dir_last_index(BTRFS_I(inode), &last_index); + if (ret) + return ret; private = kzalloc(sizeof(struct btrfs_file_private), GFP_KERNEL); if (!private) return -ENOMEM; + private->last_index = last_index; private->filldir_buf = kzalloc(PAGE_SIZE, GFP_KERNEL); if (!private->filldir_buf) { kfree(private); @@ -5930,7 +6025,8 @@ static int btrfs_real_readdir(struct file *file, struct dir_context *ctx) INIT_LIST_HEAD(&ins_list); INIT_LIST_HEAD(&del_list); - put = btrfs_readdir_get_delayed_items(inode, &ins_list, &del_list); + put = btrfs_readdir_get_delayed_items(inode, private->last_index, + &ins_list, &del_list); again: key.type = BTRFS_DIR_INDEX_KEY; @@ -5948,6 +6044,8 @@ again: break; if (found_key.offset < ctx->pos) continue; + if (found_key.offset > private->last_index) + break; if (btrfs_should_delete_dir_index(&del_list, found_key.offset)) continue; di = btrfs_item_ptr(leaf, path->slots[0], struct btrfs_dir_item); @@ -6063,8 +6161,7 @@ static int btrfs_dirty_inode(struct btrfs_inode *inode) * This is a copy of file_update_time. We need this so we can return error on * ENOSPC for updating the inode in the case of file write and mmap writes. */ -static int btrfs_update_time(struct inode *inode, struct timespec64 *now, - int flags) +static int btrfs_update_time(struct inode *inode, int flags) { struct btrfs_root *root = BTRFS_I(inode)->root; bool dirty = flags & ~S_VERSION; @@ -6072,69 +6169,11 @@ static int btrfs_update_time(struct inode *inode, struct timespec64 *now, if (btrfs_root_readonly(root)) return -EROFS; - if (flags & S_VERSION) - dirty |= inode_maybe_inc_iversion(inode, dirty); - if (flags & S_CTIME) - inode->i_ctime = *now; - if (flags & S_MTIME) - inode->i_mtime = *now; - if (flags & S_ATIME) - inode->i_atime = *now; + dirty = inode_update_timestamps(inode, flags); return dirty ? btrfs_dirty_inode(BTRFS_I(inode)) : 0; } /* - * find the highest existing sequence number in a directory - * and then set the in-memory index_cnt variable to reflect - * free sequence numbers - */ -static int btrfs_set_inode_index_count(struct btrfs_inode *inode) -{ - struct btrfs_root *root = inode->root; - struct btrfs_key key, found_key; - struct btrfs_path *path; - struct extent_buffer *leaf; - int ret; - - key.objectid = btrfs_ino(inode); - key.type = BTRFS_DIR_INDEX_KEY; - key.offset = (u64)-1; - - path = btrfs_alloc_path(); - if (!path) - return -ENOMEM; - - ret = btrfs_search_slot(NULL, root, &key, path, 0, 0); - if (ret < 0) - goto out; - /* FIXME: we should be able to handle this */ - if (ret == 0) - goto out; - ret = 0; - - if (path->slots[0] == 0) { - inode->index_cnt = BTRFS_DIR_START_INDEX; - goto out; - } - - path->slots[0]--; - - leaf = path->nodes[0]; - btrfs_item_key_to_cpu(leaf, &found_key, path->slots[0]); - - if (found_key.objectid != btrfs_ino(inode) || - found_key.type != BTRFS_DIR_INDEX_KEY) { - inode->index_cnt = BTRFS_DIR_START_INDEX; - goto out; - } - - inode->index_cnt = found_key.offset + 1; -out: - btrfs_free_path(path); - return ret; -} - -/* * helper to find a free sequence number in a given directory. This current * code is very simple, later versions will do smarter things in the btree */ @@ -6378,9 +6417,8 @@ int btrfs_create_new_inode(struct btrfs_trans_handle *trans, goto discard; } - inode->i_mtime = current_time(inode); + inode->i_mtime = inode_set_ctime_current(inode); inode->i_atime = inode->i_mtime; - inode->i_ctime = inode->i_mtime; BTRFS_I(inode)->i_otime = inode->i_mtime; /* @@ -6545,12 +6583,10 @@ int btrfs_add_link(struct btrfs_trans_handle *trans, * log replay procedure is responsible for setting them to their correct * values (the ones it had when the fsync was done). */ - if (!test_bit(BTRFS_FS_LOG_RECOVERING, &root->fs_info->flags)) { - struct timespec64 now = current_time(&parent_inode->vfs_inode); + if (!test_bit(BTRFS_FS_LOG_RECOVERING, &root->fs_info->flags)) + parent_inode->vfs_inode.i_mtime = + inode_set_ctime_current(&parent_inode->vfs_inode); - parent_inode->vfs_inode.i_mtime = now; - parent_inode->vfs_inode.i_ctime = now; - } ret = btrfs_update_inode(trans, root, parent_inode); if (ret) btrfs_abort_transaction(trans, ret); @@ -6690,7 +6726,7 @@ static int btrfs_link(struct dentry *old_dentry, struct inode *dir, BTRFS_I(inode)->dir_index = 0ULL; inc_nlink(inode); inode_inc_iversion(inode); - inode->i_ctime = current_time(inode); + inode_set_ctime_current(inode); ihold(inode); set_bit(BTRFS_INODE_COPY_EVERYTHING, &BTRFS_I(inode)->runtime_flags); @@ -7849,8 +7885,11 @@ static void btrfs_dio_submit_io(const struct iomap_iter *iter, struct bio *bio, ret = btrfs_extract_ordered_extent(bbio, dio_data->ordered); if (ret) { - bbio->bio.bi_status = errno_to_blk_status(ret); - btrfs_dio_end_io(bbio); + btrfs_finish_ordered_extent(dio_data->ordered, NULL, + file_offset, dip->bytes, + !ret); + bio->bi_status = errno_to_blk_status(ret); + iomap_dio_bio_end_io(bio); return; } } @@ -8753,7 +8792,7 @@ static int btrfs_getattr(struct mnt_idmap *idmap, STATX_ATTR_IMMUTABLE | STATX_ATTR_NODUMP); - generic_fillattr(idmap, inode, stat); + generic_fillattr(idmap, request_mask, inode, stat); stat->dev = BTRFS_I(inode)->root->anon_dev; spin_lock(&BTRFS_I(inode)->lock); @@ -8777,7 +8816,6 @@ static int btrfs_rename_exchange(struct inode *old_dir, struct btrfs_root *dest = BTRFS_I(new_dir)->root; struct inode *new_inode = new_dentry->d_inode; struct inode *old_inode = old_dentry->d_inode; - struct timespec64 ctime = current_time(old_inode); struct btrfs_rename_ctx old_rename_ctx; struct btrfs_rename_ctx new_rename_ctx; u64 old_ino = btrfs_ino(BTRFS_I(old_inode)); @@ -8908,12 +8946,7 @@ static int btrfs_rename_exchange(struct inode *old_dir, inode_inc_iversion(new_dir); inode_inc_iversion(old_inode); inode_inc_iversion(new_inode); - old_dir->i_mtime = ctime; - old_dir->i_ctime = ctime; - new_dir->i_mtime = ctime; - new_dir->i_ctime = ctime; - old_inode->i_ctime = ctime; - new_inode->i_ctime = ctime; + simple_rename_timestamp(old_dir, old_dentry, new_dir, new_dentry); if (old_dentry->d_parent != new_dentry->d_parent) { btrfs_record_unlink_dir(trans, BTRFS_I(old_dir), @@ -9177,11 +9210,7 @@ static int btrfs_rename(struct mnt_idmap *idmap, inode_inc_iversion(old_dir); inode_inc_iversion(new_dir); inode_inc_iversion(old_inode); - old_dir->i_mtime = current_time(old_dir); - old_dir->i_ctime = old_dir->i_mtime; - new_dir->i_mtime = old_dir->i_mtime; - new_dir->i_ctime = old_dir->i_mtime; - old_inode->i_ctime = old_dir->i_mtime; + simple_rename_timestamp(old_dir, old_dentry, new_dir, new_dentry); if (old_dentry->d_parent != new_dentry->d_parent) btrfs_record_unlink_dir(trans, BTRFS_I(old_dir), @@ -9203,7 +9232,6 @@ static int btrfs_rename(struct mnt_idmap *idmap, if (new_inode) { inode_inc_iversion(new_inode); - new_inode->i_ctime = current_time(new_inode); if (unlikely(btrfs_ino(BTRFS_I(new_inode)) == BTRFS_EMPTY_SUBVOL_DIR_OBJECTID)) { ret = btrfs_unlink_subvol(trans, BTRFS_I(new_dir), new_dentry); @@ -9743,7 +9771,7 @@ next: *alloc_hint = ins.objectid + ins.offset; inode_inc_iversion(inode); - inode->i_ctime = current_time(inode); + inode_set_ctime_current(inode); BTRFS_I(inode)->flags |= BTRFS_INODE_PREALLOC; if (!(mode & FALLOC_FL_KEEP_SIZE) && (actual_len > inode->i_size) && |