mirror of
https://github.com/torvalds/linux.git
synced 2024-11-25 13:41:51 +00:00
for-6.0-rc6-tag
-----BEGIN PGP SIGNATURE----- iQIzBAABCgAdFiEE8rQSAMVO+zA4DBdWxWXV+ddtWDsFAmMpskIACgkQxWXV+ddt WDtxGA//Z4Z9e0p9CTwBGla9eqflpfPQLya93ANEBqhV/S1wxgvQtj+Q2XpGIqhj AVR4ZqEmnFPmAOay5s/mGQ+wZ3dyR+n/XLZ8XsViXY5yBLnRpZJi8p5ozqYuSm59 1A4FF0ZciD73jql8hPodsd1VFkKqtOTmPFyCxHk2lt/Z36FFYKCUm4P8ALdMxlct 6uEp67PI9Pb6PANq4mj8lpNTnsD2wTKDHqQ3WkHBwuHkEOCVkPbRsBlUkUqpYi0h Lc0XhjcnPX0alfiLFwwNdPZ8vrLE4egktzWA6PqEg1YzBPQQNnuQTHmO25KOqrm1 bW20PGOIF7WFg85w1P20G4I8UdT2CWBEloPSjYTDlD2KTdqBOp95oo7MUQlrDFNm lxns3npylswlvia8nH39iOlwUPL75cDe4U8LkOV+rSHmTmt7B6XK/MfI6sYgmveH V4DUI7BnbfEALbJMsJesHAR/3tnsAPqnLtv+lEF9hM70YXdN2o5iN/D0G/vms3Sr RGVpEFJyJPnzvAg6y3PNTdMEpDtouQHQhHBtPKnfOzRJsgtzk5CTpEBkWPSRLiqm DQj25JdcT8j8Xa8nWppEvogC0hfctqs1ROuZux7KajkxUHEDfXs2l0RR1dEpMvs7 v+Bhw3zLPS0e/b+9HqBSwCo0JAkIWzm6TE00LlKCYsnzNwLZT9k= =4Hu8 -----END PGP SIGNATURE----- Merge tag 'for-6.0-rc6-tag' of git://git.kernel.org/pub/scm/linux/kernel/git/kdave/linux Pull btrfs fixes from David Sterba: - two fixes for hangs in the umount sequence where threads depend on each other and the work must be finished in the right order - in zoned mode, wait for flushing all block group metadata IO before finishing the zone * tag 'for-6.0-rc6-tag' of git://git.kernel.org/pub/scm/linux/kernel/git/kdave/linux: btrfs: zoned: wait for extent buffer IOs before finishing a zone btrfs: fix hang during unmount when stopping a space reclaim worker btrfs: fix hang during unmount when stopping block group reclaim worker
This commit is contained in:
commit
60891ec99e
@ -4474,6 +4474,17 @@ void __cold close_ctree(struct btrfs_fs_info *fs_info)
|
||||
|
||||
set_bit(BTRFS_FS_CLOSING_START, &fs_info->flags);
|
||||
|
||||
/*
|
||||
* If we had UNFINISHED_DROPS we could still be processing them, so
|
||||
* clear that bit and wake up relocation so it can stop.
|
||||
* We must do this before stopping the block group reclaim task, because
|
||||
* at btrfs_relocate_block_group() we wait for this bit, and after the
|
||||
* wait we stop with -EINTR if btrfs_fs_closing() returns non-zero - we
|
||||
* have just set BTRFS_FS_CLOSING_START, so btrfs_fs_closing() will
|
||||
* return 1.
|
||||
*/
|
||||
btrfs_wake_unfinished_drop(fs_info);
|
||||
|
||||
/*
|
||||
* We may have the reclaim task running and relocating a data block group,
|
||||
* in which case it may create delayed iputs. So stop it before we park
|
||||
@ -4492,12 +4503,6 @@ void __cold close_ctree(struct btrfs_fs_info *fs_info)
|
||||
*/
|
||||
kthread_park(fs_info->cleaner_kthread);
|
||||
|
||||
/*
|
||||
* If we had UNFINISHED_DROPS we could still be processing them, so
|
||||
* clear that bit and wake up relocation so it can stop.
|
||||
*/
|
||||
btrfs_wake_unfinished_drop(fs_info);
|
||||
|
||||
/* wait for the qgroup rescan worker to stop */
|
||||
btrfs_qgroup_wait_for_completion(fs_info, false);
|
||||
|
||||
@ -4520,6 +4525,31 @@ void __cold close_ctree(struct btrfs_fs_info *fs_info)
|
||||
/* clear out the rbtree of defraggable inodes */
|
||||
btrfs_cleanup_defrag_inodes(fs_info);
|
||||
|
||||
/*
|
||||
* After we parked the cleaner kthread, ordered extents may have
|
||||
* completed and created new delayed iputs. If one of the async reclaim
|
||||
* tasks is running and in the RUN_DELAYED_IPUTS flush state, then we
|
||||
* can hang forever trying to stop it, because if a delayed iput is
|
||||
* added after it ran btrfs_run_delayed_iputs() and before it called
|
||||
* btrfs_wait_on_delayed_iputs(), it will hang forever since there is
|
||||
* no one else to run iputs.
|
||||
*
|
||||
* So wait for all ongoing ordered extents to complete and then run
|
||||
* delayed iputs. This works because once we reach this point no one
|
||||
* can either create new ordered extents nor create delayed iputs
|
||||
* through some other means.
|
||||
*
|
||||
* Also note that btrfs_wait_ordered_roots() is not safe here, because
|
||||
* it waits for BTRFS_ORDERED_COMPLETE to be set on an ordered extent,
|
||||
* but the delayed iput for the respective inode is made only when doing
|
||||
* the final btrfs_put_ordered_extent() (which must happen at
|
||||
* btrfs_finish_ordered_io() when we are unmounting).
|
||||
*/
|
||||
btrfs_flush_workqueue(fs_info->endio_write_workers);
|
||||
/* Ordered extents for free space inodes. */
|
||||
btrfs_flush_workqueue(fs_info->endio_freespace_worker);
|
||||
btrfs_run_delayed_iputs(fs_info);
|
||||
|
||||
cancel_work_sync(&fs_info->async_reclaim_work);
|
||||
cancel_work_sync(&fs_info->async_data_reclaim_work);
|
||||
cancel_work_sync(&fs_info->preempt_reclaim_work);
|
||||
|
@ -1918,10 +1918,44 @@ out_unlock:
|
||||
return ret;
|
||||
}
|
||||
|
||||
static void wait_eb_writebacks(struct btrfs_block_group *block_group)
|
||||
{
|
||||
struct btrfs_fs_info *fs_info = block_group->fs_info;
|
||||
const u64 end = block_group->start + block_group->length;
|
||||
struct radix_tree_iter iter;
|
||||
struct extent_buffer *eb;
|
||||
void __rcu **slot;
|
||||
|
||||
rcu_read_lock();
|
||||
radix_tree_for_each_slot(slot, &fs_info->buffer_radix, &iter,
|
||||
block_group->start >> fs_info->sectorsize_bits) {
|
||||
eb = radix_tree_deref_slot(slot);
|
||||
if (!eb)
|
||||
continue;
|
||||
if (radix_tree_deref_retry(eb)) {
|
||||
slot = radix_tree_iter_retry(&iter);
|
||||
continue;
|
||||
}
|
||||
|
||||
if (eb->start < block_group->start)
|
||||
continue;
|
||||
if (eb->start >= end)
|
||||
break;
|
||||
|
||||
slot = radix_tree_iter_resume(slot, &iter);
|
||||
rcu_read_unlock();
|
||||
wait_on_extent_buffer_writeback(eb);
|
||||
rcu_read_lock();
|
||||
}
|
||||
rcu_read_unlock();
|
||||
}
|
||||
|
||||
static int do_zone_finish(struct btrfs_block_group *block_group, bool fully_written)
|
||||
{
|
||||
struct btrfs_fs_info *fs_info = block_group->fs_info;
|
||||
struct map_lookup *map;
|
||||
const bool is_metadata = (block_group->flags &
|
||||
(BTRFS_BLOCK_GROUP_METADATA | BTRFS_BLOCK_GROUP_SYSTEM));
|
||||
int ret = 0;
|
||||
int i;
|
||||
|
||||
@ -1932,8 +1966,7 @@ static int do_zone_finish(struct btrfs_block_group *block_group, bool fully_writ
|
||||
}
|
||||
|
||||
/* Check if we have unwritten allocated space */
|
||||
if ((block_group->flags &
|
||||
(BTRFS_BLOCK_GROUP_METADATA | BTRFS_BLOCK_GROUP_SYSTEM)) &&
|
||||
if (is_metadata &&
|
||||
block_group->start + block_group->alloc_offset > block_group->meta_write_pointer) {
|
||||
spin_unlock(&block_group->lock);
|
||||
return -EAGAIN;
|
||||
@ -1958,6 +1991,9 @@ static int do_zone_finish(struct btrfs_block_group *block_group, bool fully_writ
|
||||
/* No need to wait for NOCOW writers. Zoned mode does not allow that */
|
||||
btrfs_wait_ordered_roots(fs_info, U64_MAX, block_group->start,
|
||||
block_group->length);
|
||||
/* Wait for extent buffers to be written. */
|
||||
if (is_metadata)
|
||||
wait_eb_writebacks(block_group);
|
||||
|
||||
spin_lock(&block_group->lock);
|
||||
|
||||
|
Loading…
Reference in New Issue
Block a user