blk-mq: make the polling code adaptive
The previous commit introduced the hybrid sleep/poll mode. Take that one step further, and use the completion latencies to automatically sleep for half the mean completion time. This is a good approximation. This changes the 'io_poll_delay' sysfs file a bit to expose the various options. Depending on the value, the polling code will behave differently: -1 Never enter hybrid sleep mode 0 Use half of the completion mean for the sleep delay >0 Use this specific value as the sleep delay Signed-off-by: Jens Axboe <axboe@fb.com> Tested-By: Stephen Bates <sbates@raithlin.com> Reviewed-By: Stephen Bates <sbates@raithlin.com>
This commit is contained in:
parent
06426adf07
commit
64f1c21e86
@ -2132,6 +2132,11 @@ struct request_queue *blk_mq_init_allocated_queue(struct blk_mq_tag_set *set,
|
|||||||
*/
|
*/
|
||||||
q->nr_requests = set->queue_depth;
|
q->nr_requests = set->queue_depth;
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Default to classic polling
|
||||||
|
*/
|
||||||
|
q->poll_nsec = -1;
|
||||||
|
|
||||||
if (set->ops->complete)
|
if (set->ops->complete)
|
||||||
blk_queue_softirq_done(q, set->ops->complete);
|
blk_queue_softirq_done(q, set->ops->complete);
|
||||||
|
|
||||||
@ -2469,14 +2474,70 @@ void blk_mq_update_nr_hw_queues(struct blk_mq_tag_set *set, int nr_hw_queues)
|
|||||||
}
|
}
|
||||||
EXPORT_SYMBOL_GPL(blk_mq_update_nr_hw_queues);
|
EXPORT_SYMBOL_GPL(blk_mq_update_nr_hw_queues);
|
||||||
|
|
||||||
|
static unsigned long blk_mq_poll_nsecs(struct request_queue *q,
|
||||||
|
struct blk_mq_hw_ctx *hctx,
|
||||||
|
struct request *rq)
|
||||||
|
{
|
||||||
|
struct blk_rq_stat stat[2];
|
||||||
|
unsigned long ret = 0;
|
||||||
|
|
||||||
|
/*
|
||||||
|
* If stats collection isn't on, don't sleep but turn it on for
|
||||||
|
* future users
|
||||||
|
*/
|
||||||
|
if (!blk_stat_enable(q))
|
||||||
|
return 0;
|
||||||
|
|
||||||
|
/*
|
||||||
|
* We don't have to do this once per IO, should optimize this
|
||||||
|
* to just use the current window of stats until it changes
|
||||||
|
*/
|
||||||
|
memset(&stat, 0, sizeof(stat));
|
||||||
|
blk_hctx_stat_get(hctx, stat);
|
||||||
|
|
||||||
|
/*
|
||||||
|
* As an optimistic guess, use half of the mean service time
|
||||||
|
* for this type of request. We can (and should) make this smarter.
|
||||||
|
* For instance, if the completion latencies are tight, we can
|
||||||
|
* get closer than just half the mean. This is especially
|
||||||
|
* important on devices where the completion latencies are longer
|
||||||
|
* than ~10 usec.
|
||||||
|
*/
|
||||||
|
if (req_op(rq) == REQ_OP_READ && stat[BLK_STAT_READ].nr_samples)
|
||||||
|
ret = (stat[BLK_STAT_READ].mean + 1) / 2;
|
||||||
|
else if (req_op(rq) == REQ_OP_WRITE && stat[BLK_STAT_WRITE].nr_samples)
|
||||||
|
ret = (stat[BLK_STAT_WRITE].mean + 1) / 2;
|
||||||
|
|
||||||
|
return ret;
|
||||||
|
}
|
||||||
|
|
||||||
static bool blk_mq_poll_hybrid_sleep(struct request_queue *q,
|
static bool blk_mq_poll_hybrid_sleep(struct request_queue *q,
|
||||||
|
struct blk_mq_hw_ctx *hctx,
|
||||||
struct request *rq)
|
struct request *rq)
|
||||||
{
|
{
|
||||||
struct hrtimer_sleeper hs;
|
struct hrtimer_sleeper hs;
|
||||||
enum hrtimer_mode mode;
|
enum hrtimer_mode mode;
|
||||||
|
unsigned int nsecs;
|
||||||
ktime_t kt;
|
ktime_t kt;
|
||||||
|
|
||||||
if (!q->poll_nsec || test_bit(REQ_ATOM_POLL_SLEPT, &rq->atomic_flags))
|
if (test_bit(REQ_ATOM_POLL_SLEPT, &rq->atomic_flags))
|
||||||
|
return false;
|
||||||
|
|
||||||
|
/*
|
||||||
|
* poll_nsec can be:
|
||||||
|
*
|
||||||
|
* -1: don't ever hybrid sleep
|
||||||
|
* 0: use half of prev avg
|
||||||
|
* >0: use this specific value
|
||||||
|
*/
|
||||||
|
if (q->poll_nsec == -1)
|
||||||
|
return false;
|
||||||
|
else if (q->poll_nsec > 0)
|
||||||
|
nsecs = q->poll_nsec;
|
||||||
|
else
|
||||||
|
nsecs = blk_mq_poll_nsecs(q, hctx, rq);
|
||||||
|
|
||||||
|
if (!nsecs)
|
||||||
return false;
|
return false;
|
||||||
|
|
||||||
set_bit(REQ_ATOM_POLL_SLEPT, &rq->atomic_flags);
|
set_bit(REQ_ATOM_POLL_SLEPT, &rq->atomic_flags);
|
||||||
@ -2485,7 +2546,7 @@ static bool blk_mq_poll_hybrid_sleep(struct request_queue *q,
|
|||||||
* This will be replaced with the stats tracking code, using
|
* This will be replaced with the stats tracking code, using
|
||||||
* 'avg_completion_time / 2' as the pre-sleep target.
|
* 'avg_completion_time / 2' as the pre-sleep target.
|
||||||
*/
|
*/
|
||||||
kt = ktime_set(0, q->poll_nsec);
|
kt = ktime_set(0, nsecs);
|
||||||
|
|
||||||
mode = HRTIMER_MODE_REL;
|
mode = HRTIMER_MODE_REL;
|
||||||
hrtimer_init_on_stack(&hs.timer, CLOCK_MONOTONIC, mode);
|
hrtimer_init_on_stack(&hs.timer, CLOCK_MONOTONIC, mode);
|
||||||
@ -2520,7 +2581,7 @@ static bool __blk_mq_poll(struct blk_mq_hw_ctx *hctx, struct request *rq)
|
|||||||
* the IO isn't complete, we'll get called again and will go
|
* the IO isn't complete, we'll get called again and will go
|
||||||
* straight to the busy poll loop.
|
* straight to the busy poll loop.
|
||||||
*/
|
*/
|
||||||
if (blk_mq_poll_hybrid_sleep(q, rq))
|
if (blk_mq_poll_hybrid_sleep(q, hctx, rq))
|
||||||
return true;
|
return true;
|
||||||
|
|
||||||
hctx->poll_considered++;
|
hctx->poll_considered++;
|
||||||
|
@ -352,24 +352,34 @@ queue_rq_affinity_store(struct request_queue *q, const char *page, size_t count)
|
|||||||
|
|
||||||
static ssize_t queue_poll_delay_show(struct request_queue *q, char *page)
|
static ssize_t queue_poll_delay_show(struct request_queue *q, char *page)
|
||||||
{
|
{
|
||||||
return queue_var_show(q->poll_nsec / 1000, page);
|
int val;
|
||||||
|
|
||||||
|
if (q->poll_nsec == -1)
|
||||||
|
val = -1;
|
||||||
|
else
|
||||||
|
val = q->poll_nsec / 1000;
|
||||||
|
|
||||||
|
return sprintf(page, "%d\n", val);
|
||||||
}
|
}
|
||||||
|
|
||||||
static ssize_t queue_poll_delay_store(struct request_queue *q, const char *page,
|
static ssize_t queue_poll_delay_store(struct request_queue *q, const char *page,
|
||||||
size_t count)
|
size_t count)
|
||||||
{
|
{
|
||||||
unsigned long poll_usec;
|
int err, val;
|
||||||
ssize_t ret;
|
|
||||||
|
|
||||||
if (!q->mq_ops || !q->mq_ops->poll)
|
if (!q->mq_ops || !q->mq_ops->poll)
|
||||||
return -EINVAL;
|
return -EINVAL;
|
||||||
|
|
||||||
ret = queue_var_store(&poll_usec, page, count);
|
err = kstrtoint(page, 10, &val);
|
||||||
if (ret < 0)
|
if (err < 0)
|
||||||
return ret;
|
return err;
|
||||||
|
|
||||||
q->poll_nsec = poll_usec * 1000;
|
if (val == -1)
|
||||||
return ret;
|
q->poll_nsec = -1;
|
||||||
|
else
|
||||||
|
q->poll_nsec = val * 1000;
|
||||||
|
|
||||||
|
return count;
|
||||||
}
|
}
|
||||||
|
|
||||||
static ssize_t queue_poll_show(struct request_queue *q, char *page)
|
static ssize_t queue_poll_show(struct request_queue *q, char *page)
|
||||||
|
@ -509,7 +509,7 @@ struct request_queue {
|
|||||||
unsigned int request_fn_active;
|
unsigned int request_fn_active;
|
||||||
|
|
||||||
unsigned int rq_timeout;
|
unsigned int rq_timeout;
|
||||||
unsigned int poll_nsec;
|
int poll_nsec;
|
||||||
struct timer_list timeout;
|
struct timer_list timeout;
|
||||||
struct work_struct timeout_work;
|
struct work_struct timeout_work;
|
||||||
struct list_head timeout_list;
|
struct list_head timeout_list;
|
||||||
|
Loading…
Reference in New Issue
Block a user