mirror of
https://github.com/torvalds/linux.git
synced 2024-12-11 21:52:04 +00:00
8f9e7b65f8
Throttled bios can't be issued after del_gendisk() is done, thus it's better to cancel them immediately rather than waiting for throttle is done. For example, if user thread is throttled with low bps while it's issuing large io, and the device is deleted. The user thread will wait for a long time for io to return. Signed-off-by: Yu Kuai <yukuai3@huawei.com> Signed-off-by: Ming Lei <ming.lei@redhat.com> Link: https://lore.kernel.org/r/20220318130144.1066064-4-ming.lei@redhat.com Signed-off-by: Jens Axboe <axboe@kernel.dk>
190 lines
6.0 KiB
C
190 lines
6.0 KiB
C
#ifndef BLK_THROTTLE_H
|
|
#define BLK_THROTTLE_H
|
|
|
|
#include "blk-cgroup-rwstat.h"
|
|
|
|
/*
|
|
* To implement hierarchical throttling, throtl_grps form a tree and bios
|
|
* are dispatched upwards level by level until they reach the top and get
|
|
* issued. When dispatching bios from the children and local group at each
|
|
* level, if the bios are dispatched into a single bio_list, there's a risk
|
|
* of a local or child group which can queue many bios at once filling up
|
|
* the list starving others.
|
|
*
|
|
* To avoid such starvation, dispatched bios are queued separately
|
|
* according to where they came from. When they are again dispatched to
|
|
* the parent, they're popped in round-robin order so that no single source
|
|
* hogs the dispatch window.
|
|
*
|
|
* throtl_qnode is used to keep the queued bios separated by their sources.
|
|
* Bios are queued to throtl_qnode which in turn is queued to
|
|
* throtl_service_queue and then dispatched in round-robin order.
|
|
*
|
|
* It's also used to track the reference counts on blkg's. A qnode always
|
|
* belongs to a throtl_grp and gets queued on itself or the parent, so
|
|
* incrementing the reference of the associated throtl_grp when a qnode is
|
|
* queued and decrementing when dequeued is enough to keep the whole blkg
|
|
* tree pinned while bios are in flight.
|
|
*/
|
|
struct throtl_qnode {
|
|
struct list_head node; /* service_queue->queued[] */
|
|
struct bio_list bios; /* queued bios */
|
|
struct throtl_grp *tg; /* tg this qnode belongs to */
|
|
};
|
|
|
|
struct throtl_service_queue {
|
|
struct throtl_service_queue *parent_sq; /* the parent service_queue */
|
|
|
|
/*
|
|
* Bios queued directly to this service_queue or dispatched from
|
|
* children throtl_grp's.
|
|
*/
|
|
struct list_head queued[2]; /* throtl_qnode [READ/WRITE] */
|
|
unsigned int nr_queued[2]; /* number of queued bios */
|
|
|
|
/*
|
|
* RB tree of active children throtl_grp's, which are sorted by
|
|
* their ->disptime.
|
|
*/
|
|
struct rb_root_cached pending_tree; /* RB tree of active tgs */
|
|
unsigned int nr_pending; /* # queued in the tree */
|
|
unsigned long first_pending_disptime; /* disptime of the first tg */
|
|
struct timer_list pending_timer; /* fires on first_pending_disptime */
|
|
};
|
|
|
|
enum tg_state_flags {
|
|
THROTL_TG_PENDING = 1 << 0, /* on parent's pending tree */
|
|
THROTL_TG_WAS_EMPTY = 1 << 1, /* bio_lists[] became non-empty */
|
|
THROTL_TG_HAS_IOPS_LIMIT = 1 << 2, /* tg has iops limit */
|
|
THROTL_TG_CANCELING = 1 << 3, /* starts to cancel bio */
|
|
};
|
|
|
|
enum {
|
|
LIMIT_LOW,
|
|
LIMIT_MAX,
|
|
LIMIT_CNT,
|
|
};
|
|
|
|
struct throtl_grp {
|
|
/* must be the first member */
|
|
struct blkg_policy_data pd;
|
|
|
|
/* active throtl group service_queue member */
|
|
struct rb_node rb_node;
|
|
|
|
/* throtl_data this group belongs to */
|
|
struct throtl_data *td;
|
|
|
|
/* this group's service queue */
|
|
struct throtl_service_queue service_queue;
|
|
|
|
/*
|
|
* qnode_on_self is used when bios are directly queued to this
|
|
* throtl_grp so that local bios compete fairly with bios
|
|
* dispatched from children. qnode_on_parent is used when bios are
|
|
* dispatched from this throtl_grp into its parent and will compete
|
|
* with the sibling qnode_on_parents and the parent's
|
|
* qnode_on_self.
|
|
*/
|
|
struct throtl_qnode qnode_on_self[2];
|
|
struct throtl_qnode qnode_on_parent[2];
|
|
|
|
/*
|
|
* Dispatch time in jiffies. This is the estimated time when group
|
|
* will unthrottle and is ready to dispatch more bio. It is used as
|
|
* key to sort active groups in service tree.
|
|
*/
|
|
unsigned long disptime;
|
|
|
|
unsigned int flags;
|
|
|
|
/* are there any throtl rules between this group and td? */
|
|
bool has_rules[2];
|
|
|
|
/* internally used bytes per second rate limits */
|
|
uint64_t bps[2][LIMIT_CNT];
|
|
/* user configured bps limits */
|
|
uint64_t bps_conf[2][LIMIT_CNT];
|
|
|
|
/* internally used IOPS limits */
|
|
unsigned int iops[2][LIMIT_CNT];
|
|
/* user configured IOPS limits */
|
|
unsigned int iops_conf[2][LIMIT_CNT];
|
|
|
|
/* Number of bytes dispatched in current slice */
|
|
uint64_t bytes_disp[2];
|
|
/* Number of bio's dispatched in current slice */
|
|
unsigned int io_disp[2];
|
|
|
|
unsigned long last_low_overflow_time[2];
|
|
|
|
uint64_t last_bytes_disp[2];
|
|
unsigned int last_io_disp[2];
|
|
|
|
unsigned long last_check_time;
|
|
|
|
unsigned long latency_target; /* us */
|
|
unsigned long latency_target_conf; /* us */
|
|
/* When did we start a new slice */
|
|
unsigned long slice_start[2];
|
|
unsigned long slice_end[2];
|
|
|
|
unsigned long last_finish_time; /* ns / 1024 */
|
|
unsigned long checked_last_finish_time; /* ns / 1024 */
|
|
unsigned long avg_idletime; /* ns / 1024 */
|
|
unsigned long idletime_threshold; /* us */
|
|
unsigned long idletime_threshold_conf; /* us */
|
|
|
|
unsigned int bio_cnt; /* total bios */
|
|
unsigned int bad_bio_cnt; /* bios exceeding latency threshold */
|
|
unsigned long bio_cnt_reset_time;
|
|
|
|
struct blkg_rwstat stat_bytes;
|
|
struct blkg_rwstat stat_ios;
|
|
};
|
|
|
|
extern struct blkcg_policy blkcg_policy_throtl;
|
|
|
|
static inline struct throtl_grp *pd_to_tg(struct blkg_policy_data *pd)
|
|
{
|
|
return pd ? container_of(pd, struct throtl_grp, pd) : NULL;
|
|
}
|
|
|
|
static inline struct throtl_grp *blkg_to_tg(struct blkcg_gq *blkg)
|
|
{
|
|
return pd_to_tg(blkg_to_pd(blkg, &blkcg_policy_throtl));
|
|
}
|
|
|
|
/*
|
|
* Internal throttling interface
|
|
*/
|
|
#ifndef CONFIG_BLK_DEV_THROTTLING
|
|
static inline int blk_throtl_init(struct request_queue *q) { return 0; }
|
|
static inline void blk_throtl_exit(struct request_queue *q) { }
|
|
static inline void blk_throtl_register_queue(struct request_queue *q) { }
|
|
static inline bool blk_throtl_bio(struct bio *bio) { return false; }
|
|
static inline void blk_throtl_cancel_bios(struct request_queue *q) { }
|
|
#else /* CONFIG_BLK_DEV_THROTTLING */
|
|
int blk_throtl_init(struct request_queue *q);
|
|
void blk_throtl_exit(struct request_queue *q);
|
|
void blk_throtl_register_queue(struct request_queue *q);
|
|
bool __blk_throtl_bio(struct bio *bio);
|
|
void blk_throtl_cancel_bios(struct request_queue *q);
|
|
static inline bool blk_throtl_bio(struct bio *bio)
|
|
{
|
|
struct throtl_grp *tg = blkg_to_tg(bio->bi_blkg);
|
|
|
|
/* no need to throttle bps any more if the bio has been throttled */
|
|
if (bio_flagged(bio, BIO_THROTTLED) &&
|
|
!(tg->flags & THROTL_TG_HAS_IOPS_LIMIT))
|
|
return false;
|
|
|
|
if (!tg->has_rules[bio_data_dir(bio)])
|
|
return false;
|
|
|
|
return __blk_throtl_bio(bio);
|
|
}
|
|
#endif /* CONFIG_BLK_DEV_THROTTLING */
|
|
|
|
#endif
|