block: flush all throttled bios when deleting the cgroup

When a process migrates to another cgroup and the original cgroup is deleted,
the restrictions of throttled bios cannot be removed. If the restrictions
are set too low, it will take a long time to complete these bios.

Refer to the process of deleting a disk to remove the restrictions and
issue bios when deleting the cgroup.

This makes difference on the behavior of throttled bios:
Before: the limit of the throttled bios can't be changed and the bios will
complete under this limit;
Now: the limit will be canceled and the throttled bios will be flushed
immediately.

References:
[1] https://lore.kernel.org/r/20220318130144.1066064-4-ming.lei@redhat.com
[2] https://lore.kernel.org/all/da861d63-58c6-3ca0-2535-9089993e9e28@huaweicloud.com/

Signed-off-by: Li Lingfeng <lilingfeng3@huawei.com>
Acked-by: Tejun Heo <tj@kernel.org>
Link: https://lore.kernel.org/r/20240817071108.1919729-1-lilingfeng@huaweicloud.com
Signed-off-by: Jens Axboe <axboe@kernel.dk>
This commit is contained in:
Li Lingfeng 2024-08-17 15:11:08 +08:00 committed by Jens Axboe
parent 96a9fe64bf
commit 919b5139bd

View File

@ -1526,6 +1526,42 @@ static void throtl_shutdown_wq(struct request_queue *q)
cancel_work_sync(&td->dispatch_work); cancel_work_sync(&td->dispatch_work);
} }
static void tg_flush_bios(struct throtl_grp *tg)
{
struct throtl_service_queue *sq = &tg->service_queue;
if (tg->flags & THROTL_TG_CANCELING)
return;
/*
* Set the flag to make sure throtl_pending_timer_fn() won't
* stop until all throttled bios are dispatched.
*/
tg->flags |= THROTL_TG_CANCELING;
/*
* Do not dispatch cgroup without THROTL_TG_PENDING or cgroup
* will be inserted to service queue without THROTL_TG_PENDING
* set in tg_update_disptime below. Then IO dispatched from
* child in tg_dispatch_one_bio will trigger double insertion
* and corrupt the tree.
*/
if (!(tg->flags & THROTL_TG_PENDING))
return;
/*
* Update disptime after setting the above flag to make sure
* throtl_select_dispatch() won't exit without dispatching.
*/
tg_update_disptime(tg);
throtl_schedule_pending_timer(sq, jiffies + 1);
}
static void throtl_pd_offline(struct blkg_policy_data *pd)
{
tg_flush_bios(pd_to_tg(pd));
}
struct blkcg_policy blkcg_policy_throtl = { struct blkcg_policy blkcg_policy_throtl = {
.dfl_cftypes = throtl_files, .dfl_cftypes = throtl_files,
.legacy_cftypes = throtl_legacy_files, .legacy_cftypes = throtl_legacy_files,
@ -1533,6 +1569,7 @@ struct blkcg_policy blkcg_policy_throtl = {
.pd_alloc_fn = throtl_pd_alloc, .pd_alloc_fn = throtl_pd_alloc,
.pd_init_fn = throtl_pd_init, .pd_init_fn = throtl_pd_init,
.pd_online_fn = throtl_pd_online, .pd_online_fn = throtl_pd_online,
.pd_offline_fn = throtl_pd_offline,
.pd_free_fn = throtl_pd_free, .pd_free_fn = throtl_pd_free,
}; };
@ -1553,32 +1590,15 @@ void blk_throtl_cancel_bios(struct gendisk *disk)
*/ */
rcu_read_lock(); rcu_read_lock();
blkg_for_each_descendant_post(blkg, pos_css, q->root_blkg) { blkg_for_each_descendant_post(blkg, pos_css, q->root_blkg) {
struct throtl_grp *tg = blkg_to_tg(blkg);
struct throtl_service_queue *sq = &tg->service_queue;
/* /*
* Set the flag to make sure throtl_pending_timer_fn() won't * disk_release will call pd_offline_fn to cancel bios.
* stop until all throttled bios are dispatched. * However, disk_release can't be called if someone get
* the refcount of device and issued bios which are
* inflight after del_gendisk.
* Cancel bios here to ensure no bios are inflight after
* del_gendisk.
*/ */
tg->flags |= THROTL_TG_CANCELING; tg_flush_bios(blkg_to_tg(blkg));
/*
* Do not dispatch cgroup without THROTL_TG_PENDING or cgroup
* will be inserted to service queue without THROTL_TG_PENDING
* set in tg_update_disptime below. Then IO dispatched from
* child in tg_dispatch_one_bio will trigger double insertion
* and corrupt the tree.
*/
if (!(tg->flags & THROTL_TG_PENDING))
continue;
/*
* Update disptime after setting the above flag to make sure
* throtl_select_dispatch() won't exit without dispatching.
*/
tg_update_disptime(tg);
throtl_schedule_pending_timer(sq, jiffies + 1);
} }
rcu_read_unlock(); rcu_read_unlock();
spin_unlock_irq(&q->queue_lock); spin_unlock_irq(&q->queue_lock);