diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h index 4896d7a947eb..02895a126ab9 100644 --- a/fs/btrfs/ctree.h +++ b/fs/btrfs/ctree.h @@ -1546,6 +1546,9 @@ struct btrfs_fs_info { */ struct btrfs_workqueue *fixup_workers; struct btrfs_workqueue *delayed_workers; + + /* the extent workers do delayed refs on the extent allocation tree */ + struct btrfs_workqueue *extent_workers; struct task_struct *transaction_kthread; struct task_struct *cleaner_kthread; int thread_pool_size; @@ -3268,6 +3271,8 @@ int btrfs_check_space_for_delayed_refs(struct btrfs_trans_handle *trans, void btrfs_put_block_group(struct btrfs_block_group_cache *cache); int btrfs_run_delayed_refs(struct btrfs_trans_handle *trans, struct btrfs_root *root, unsigned long count); +int btrfs_async_run_delayed_refs(struct btrfs_root *root, + unsigned long count, int wait); int btrfs_lookup_extent(struct btrfs_root *root, u64 start, u64 len); int btrfs_lookup_extent_info(struct btrfs_trans_handle *trans, struct btrfs_root *root, u64 bytenr, diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index 0c0fa78ef452..8bb4aa19898f 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c @@ -2069,6 +2069,7 @@ static void btrfs_stop_all_workers(struct btrfs_fs_info *fs_info) btrfs_destroy_workqueue(fs_info->readahead_workers); btrfs_destroy_workqueue(fs_info->flush_workers); btrfs_destroy_workqueue(fs_info->qgroup_rescan_workers); + btrfs_destroy_workqueue(fs_info->extent_workers); } static void free_root_extent_buffers(struct btrfs_root *root) @@ -2586,6 +2587,10 @@ int open_ctree(struct super_block *sb, btrfs_alloc_workqueue("readahead", flags, max_active, 2); fs_info->qgroup_rescan_workers = btrfs_alloc_workqueue("qgroup-rescan", flags, 1, 0); + fs_info->extent_workers = + btrfs_alloc_workqueue("extent-refs", flags, + min_t(u64, fs_devices->num_devices, + max_active), 8); if (!(fs_info->workers && fs_info->delalloc_workers && fs_info->submit_workers && fs_info->flush_workers && @@ -2595,6 +2600,7 @@ int open_ctree(struct super_block *sb, fs_info->endio_freespace_worker && fs_info->rmw_workers && fs_info->caching_workers && fs_info->readahead_workers && fs_info->fixup_workers && fs_info->delayed_workers && + fs_info->fixup_workers && fs_info->extent_workers && fs_info->qgroup_rescan_workers)) { err = -ENOMEM; goto fail_sb_buffer; diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c index bb5b3067ddc3..6caddd5970e4 100644 --- a/fs/btrfs/extent-tree.c +++ b/fs/btrfs/extent-tree.c @@ -2674,15 +2674,94 @@ int btrfs_should_throttle_delayed_refs(struct btrfs_trans_handle *trans, u64 num_entries = atomic_read(&trans->transaction->delayed_refs.num_entries); u64 avg_runtime; + u64 val; smp_mb(); avg_runtime = fs_info->avg_delayed_ref_runtime; + val = num_entries * avg_runtime; if (num_entries * avg_runtime >= NSEC_PER_SEC) return 1; + if (val >= NSEC_PER_SEC / 2) + return 2; return btrfs_check_space_for_delayed_refs(trans, root); } +struct async_delayed_refs { + struct btrfs_root *root; + int count; + int error; + int sync; + struct completion wait; + struct btrfs_work work; +}; + +static void delayed_ref_async_start(struct btrfs_work *work) +{ + struct async_delayed_refs *async; + struct btrfs_trans_handle *trans; + int ret; + + async = container_of(work, struct async_delayed_refs, work); + + trans = btrfs_join_transaction(async->root); + if (IS_ERR(trans)) { + async->error = PTR_ERR(trans); + goto done; + } + + /* + * trans->sync means that when we call end_transaciton, we won't + * wait on delayed refs + */ + trans->sync = true; + ret = btrfs_run_delayed_refs(trans, async->root, async->count); + if (ret) + async->error = ret; + + ret = btrfs_end_transaction(trans, async->root); + if (ret && !async->error) + async->error = ret; +done: + if (async->sync) + complete(&async->wait); + else + kfree(async); +} + +int btrfs_async_run_delayed_refs(struct btrfs_root *root, + unsigned long count, int wait) +{ + struct async_delayed_refs *async; + int ret; + + async = kmalloc(sizeof(*async), GFP_NOFS); + if (!async) + return -ENOMEM; + + async->root = root->fs_info->tree_root; + async->count = count; + async->error = 0; + if (wait) + async->sync = 1; + else + async->sync = 0; + init_completion(&async->wait); + + btrfs_init_work(&async->work, delayed_ref_async_start, + NULL, NULL); + + btrfs_queue_work(root->fs_info->extent_workers, &async->work); + + if (wait) { + wait_for_completion(&async->wait); + ret = async->error; + kfree(async); + return ret; + } + return 0; +} + /* * this starts processing the delayed reference count updates and * extent insertions we have queued up so far. count can be diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c index 992aae6c00b0..38d1e7b976d8 100644 --- a/fs/btrfs/inode.c +++ b/fs/btrfs/inode.c @@ -2678,6 +2678,7 @@ static int btrfs_finish_ordered_io(struct btrfs_ordered_extent *ordered_extent) trans = NULL; goto out_unlock; } + trans->block_rsv = &root->fs_info->delalloc_block_rsv; if (test_bit(BTRFS_ORDERED_COMPRESSED, &ordered_extent->flags)) diff --git a/fs/btrfs/transaction.c b/fs/btrfs/transaction.c index 3aafbde8b637..1c54e2eb74ab 100644 --- a/fs/btrfs/transaction.c +++ b/fs/btrfs/transaction.c @@ -697,6 +697,7 @@ static int __btrfs_end_transaction(struct btrfs_trans_handle *trans, unsigned long cur = trans->delayed_ref_updates; int lock = (trans->type != TRANS_JOIN_NOLOCK); int err = 0; + int must_run_delayed_refs = 0; if (trans->use_count > 1) { trans->use_count--; @@ -711,10 +712,18 @@ static int __btrfs_end_transaction(struct btrfs_trans_handle *trans, btrfs_create_pending_block_groups(trans, root); trans->delayed_ref_updates = 0; - if (!trans->sync && btrfs_should_throttle_delayed_refs(trans, root)) { + if (!trans->sync) { + must_run_delayed_refs = + btrfs_should_throttle_delayed_refs(trans, root); cur = max_t(unsigned long, cur, 32); - trans->delayed_ref_updates = 0; - btrfs_run_delayed_refs(trans, root, cur); + + /* + * don't make the caller wait if they are from a NOLOCK + * or ATTACH transaction, it will deadlock with commit + */ + if (must_run_delayed_refs == 1 && + (trans->type & (__TRANS_JOIN_NOLOCK | __TRANS_ATTACH))) + must_run_delayed_refs = 2; } if (trans->qgroup_reserved) { @@ -775,6 +784,10 @@ static int __btrfs_end_transaction(struct btrfs_trans_handle *trans, assert_qgroups_uptodate(trans); kmem_cache_free(btrfs_trans_handle_cachep, trans); + if (must_run_delayed_refs) { + btrfs_async_run_delayed_refs(root, cur, + must_run_delayed_refs == 1); + } return err; }