forked from Minki/linux
btrfs: replace cleaner_delayed_iput_mutex with a waitqueue
The throttle path doesn't take cleaner_delayed_iput_mutex, which means we could think we're done flushing iputs in the data space reservation path when we could have a throttler doing an iput. There's no real reason to serialize the delayed iput flushing, so instead of taking the cleaner_delayed_iput_mutex whenever we flush the delayed iputs just replace it with an atomic counter and a waitqueue. This removes the short (or long depending on how big the inode is) window where we think there are no more pending iputs when there really are some. The waiting is killable as it could be indirectly called from user operations like fallocate or zero-range. Such call sites should handle the error but otherwise it's not necessary. Eg. flush_space just needs to attempt to make space by waiting on iputs. Signed-off-by: Josef Bacik <josef@toxicpanda.com> [ add killable comment and changelog parts ] Signed-off-by: David Sterba <dsterba@suse.com>
This commit is contained in:
parent
3ece54e504
commit
034f784d7c
@ -934,7 +934,8 @@ struct btrfs_fs_info {
|
||||
|
||||
spinlock_t delayed_iput_lock;
|
||||
struct list_head delayed_iputs;
|
||||
struct mutex cleaner_delayed_iput_mutex;
|
||||
atomic_t nr_delayed_iputs;
|
||||
wait_queue_head_t delayed_iputs_wait;
|
||||
|
||||
/* this protects tree_mod_seq_list */
|
||||
spinlock_t tree_mod_seq_lock;
|
||||
@ -3282,6 +3283,7 @@ int btrfs_orphan_cleanup(struct btrfs_root *root);
|
||||
int btrfs_cont_expand(struct inode *inode, loff_t oldsize, loff_t size);
|
||||
void btrfs_add_delayed_iput(struct inode *inode);
|
||||
void btrfs_run_delayed_iputs(struct btrfs_fs_info *fs_info);
|
||||
int btrfs_wait_on_delayed_iputs(struct btrfs_fs_info *fs_info);
|
||||
int btrfs_prealloc_file_range(struct inode *inode, int mode,
|
||||
u64 start, u64 num_bytes, u64 min_size,
|
||||
loff_t actual_len, u64 *alloc_hint);
|
||||
|
@ -1717,9 +1717,7 @@ static int cleaner_kthread(void *arg)
|
||||
goto sleep;
|
||||
}
|
||||
|
||||
mutex_lock(&fs_info->cleaner_delayed_iput_mutex);
|
||||
btrfs_run_delayed_iputs(fs_info);
|
||||
mutex_unlock(&fs_info->cleaner_delayed_iput_mutex);
|
||||
|
||||
again = btrfs_clean_one_deleted_snapshot(root);
|
||||
mutex_unlock(&fs_info->cleaner_mutex);
|
||||
@ -2676,7 +2674,6 @@ int open_ctree(struct super_block *sb,
|
||||
mutex_init(&fs_info->delete_unused_bgs_mutex);
|
||||
mutex_init(&fs_info->reloc_mutex);
|
||||
mutex_init(&fs_info->delalloc_root_mutex);
|
||||
mutex_init(&fs_info->cleaner_delayed_iput_mutex);
|
||||
seqlock_init(&fs_info->profiles_lock);
|
||||
|
||||
INIT_LIST_HEAD(&fs_info->dirty_cowonly_roots);
|
||||
@ -2698,6 +2695,7 @@ int open_ctree(struct super_block *sb,
|
||||
atomic_set(&fs_info->defrag_running, 0);
|
||||
atomic_set(&fs_info->qgroup_op_seq, 0);
|
||||
atomic_set(&fs_info->reada_works_cnt, 0);
|
||||
atomic_set(&fs_info->nr_delayed_iputs, 0);
|
||||
atomic64_set(&fs_info->tree_mod_seq, 0);
|
||||
fs_info->sb = sb;
|
||||
fs_info->max_inline = BTRFS_DEFAULT_MAX_INLINE;
|
||||
@ -2775,6 +2773,7 @@ int open_ctree(struct super_block *sb,
|
||||
init_waitqueue_head(&fs_info->transaction_wait);
|
||||
init_waitqueue_head(&fs_info->transaction_blocked_wait);
|
||||
init_waitqueue_head(&fs_info->async_submit_wait);
|
||||
init_waitqueue_head(&fs_info->delayed_iputs_wait);
|
||||
|
||||
INIT_LIST_HEAD(&fs_info->pinned_chunks);
|
||||
|
||||
|
@ -4279,10 +4279,14 @@ commit_trans:
|
||||
/*
|
||||
* The cleaner kthread might still be doing iput
|
||||
* operations. Wait for it to finish so that
|
||||
* more space is released.
|
||||
* more space is released. We don't need to
|
||||
* explicitly run the delayed iputs here because
|
||||
* the commit_transaction would have woken up
|
||||
* the cleaner.
|
||||
*/
|
||||
mutex_lock(&fs_info->cleaner_delayed_iput_mutex);
|
||||
mutex_unlock(&fs_info->cleaner_delayed_iput_mutex);
|
||||
ret = btrfs_wait_on_delayed_iputs(fs_info);
|
||||
if (ret)
|
||||
return ret;
|
||||
goto again;
|
||||
} else {
|
||||
btrfs_end_transaction(trans);
|
||||
@ -4967,9 +4971,8 @@ static void flush_space(struct btrfs_fs_info *fs_info,
|
||||
* bunch of pinned space, so make sure we run the iputs before
|
||||
* we do our pinned bytes check below.
|
||||
*/
|
||||
mutex_lock(&fs_info->cleaner_delayed_iput_mutex);
|
||||
btrfs_run_delayed_iputs(fs_info);
|
||||
mutex_unlock(&fs_info->cleaner_delayed_iput_mutex);
|
||||
btrfs_wait_on_delayed_iputs(fs_info);
|
||||
|
||||
ret = may_commit_transaction(fs_info, space_info);
|
||||
break;
|
||||
|
@ -3256,6 +3256,7 @@ void btrfs_add_delayed_iput(struct inode *inode)
|
||||
if (atomic_add_unless(&inode->i_count, -1, 1))
|
||||
return;
|
||||
|
||||
atomic_inc(&fs_info->nr_delayed_iputs);
|
||||
spin_lock(&fs_info->delayed_iput_lock);
|
||||
ASSERT(list_empty(&binode->delayed_iput));
|
||||
list_add_tail(&binode->delayed_iput, &fs_info->delayed_iputs);
|
||||
@ -3276,11 +3277,32 @@ void btrfs_run_delayed_iputs(struct btrfs_fs_info *fs_info)
|
||||
list_del_init(&inode->delayed_iput);
|
||||
spin_unlock(&fs_info->delayed_iput_lock);
|
||||
iput(&inode->vfs_inode);
|
||||
if (atomic_dec_and_test(&fs_info->nr_delayed_iputs))
|
||||
wake_up(&fs_info->delayed_iputs_wait);
|
||||
spin_lock(&fs_info->delayed_iput_lock);
|
||||
}
|
||||
spin_unlock(&fs_info->delayed_iput_lock);
|
||||
}
|
||||
|
||||
/**
|
||||
* btrfs_wait_on_delayed_iputs - wait on the delayed iputs to be done running
|
||||
* @fs_info - the fs_info for this fs
|
||||
* @return - EINTR if we were killed, 0 if nothing's pending
|
||||
*
|
||||
* This will wait on any delayed iputs that are currently running with KILLABLE
|
||||
* set. Once they are all done running we will return, unless we are killed in
|
||||
* which case we return EINTR. This helps in user operations like fallocate etc
|
||||
* that might get blocked on the iputs.
|
||||
*/
|
||||
int btrfs_wait_on_delayed_iputs(struct btrfs_fs_info *fs_info)
|
||||
{
|
||||
int ret = wait_event_killable(fs_info->delayed_iputs_wait,
|
||||
atomic_read(&fs_info->nr_delayed_iputs) == 0);
|
||||
if (ret)
|
||||
return -EINTR;
|
||||
return 0;
|
||||
}
|
||||
|
||||
/*
|
||||
* This creates an orphan entry for the given inode in case something goes wrong
|
||||
* in the middle of an unlink.
|
||||
|
Loading…
Reference in New Issue
Block a user