blk-mq: make sure elevator callbacks aren't called for passthrough request

In case of q->elevator, passthrough request can still be marked as
RQF_ELV, so some elevator callbacks will be called for them.

Fix this by splitting RQF_SCHED_TAGS, which is set for all requests that
are issued on a queue that uses an I/O scheduler, and RQF_USE_SCHED for
non-flush, non-passthrough requests on such a queue.

Roughly based on two different patches from
Ming Lei <ming.lei@redhat.com>.

Signed-off-by: Christoph Hellwig <hch@lst.de>
Reviewed-by: Ming Lei <ming.lei@redhat.com>
Link: https://lore.kernel.org/r/20230518053101.760632-4-hch@lst.de
Signed-off-by: Jens Axboe <axboe@kernel.dk>
This commit is contained in:
Christoph Hellwig 2023-05-18 07:31:01 +02:00 committed by Jens Axboe
parent fdcab6cdde
commit dd6216bb16
5 changed files with 44 additions and 36 deletions

View File

@ -249,6 +249,8 @@ static const char *const rqf_name[] = {
RQF_NAME(MIXED_MERGE), RQF_NAME(MIXED_MERGE),
RQF_NAME(MQ_INFLIGHT), RQF_NAME(MQ_INFLIGHT),
RQF_NAME(DONTPREP), RQF_NAME(DONTPREP),
RQF_NAME(SCHED_TAGS),
RQF_NAME(USE_SCHED),
RQF_NAME(FAILED), RQF_NAME(FAILED),
RQF_NAME(QUIET), RQF_NAME(QUIET),
RQF_NAME(IO_STAT), RQF_NAME(IO_STAT),
@ -258,7 +260,6 @@ static const char *const rqf_name[] = {
RQF_NAME(SPECIAL_PAYLOAD), RQF_NAME(SPECIAL_PAYLOAD),
RQF_NAME(ZONE_WRITE_LOCKED), RQF_NAME(ZONE_WRITE_LOCKED),
RQF_NAME(TIMED_OUT), RQF_NAME(TIMED_OUT),
RQF_NAME(ELV),
RQF_NAME(RESV), RQF_NAME(RESV),
}; };
#undef RQF_NAME #undef RQF_NAME

View File

@ -37,7 +37,7 @@ static inline bool
blk_mq_sched_allow_merge(struct request_queue *q, struct request *rq, blk_mq_sched_allow_merge(struct request_queue *q, struct request *rq,
struct bio *bio) struct bio *bio)
{ {
if (rq->rq_flags & RQF_ELV) { if (rq->rq_flags & RQF_USE_SCHED) {
struct elevator_queue *e = q->elevator; struct elevator_queue *e = q->elevator;
if (e->type->ops.allow_merge) if (e->type->ops.allow_merge)
@ -48,7 +48,7 @@ blk_mq_sched_allow_merge(struct request_queue *q, struct request *rq,
static inline void blk_mq_sched_completed_request(struct request *rq, u64 now) static inline void blk_mq_sched_completed_request(struct request *rq, u64 now)
{ {
if (rq->rq_flags & RQF_ELV) { if (rq->rq_flags & RQF_USE_SCHED) {
struct elevator_queue *e = rq->q->elevator; struct elevator_queue *e = rq->q->elevator;
if (e->type->ops.completed_request) if (e->type->ops.completed_request)
@ -58,7 +58,7 @@ static inline void blk_mq_sched_completed_request(struct request *rq, u64 now)
static inline void blk_mq_sched_requeue_request(struct request *rq) static inline void blk_mq_sched_requeue_request(struct request *rq)
{ {
if ((rq->rq_flags & RQF_ELV) && !op_is_flush(rq->cmd_flags)) { if (rq->rq_flags & RQF_USE_SCHED) {
struct request_queue *q = rq->q; struct request_queue *q = rq->q;
struct elevator_queue *e = q->elevator; struct elevator_queue *e = q->elevator;

View File

@ -354,12 +354,12 @@ static struct request *blk_mq_rq_ctx_init(struct blk_mq_alloc_data *data,
data->rq_flags |= RQF_IO_STAT; data->rq_flags |= RQF_IO_STAT;
rq->rq_flags = data->rq_flags; rq->rq_flags = data->rq_flags;
if (!(data->rq_flags & RQF_ELV)) { if (data->rq_flags & RQF_SCHED_TAGS) {
rq->tag = tag;
rq->internal_tag = BLK_MQ_NO_TAG;
} else {
rq->tag = BLK_MQ_NO_TAG; rq->tag = BLK_MQ_NO_TAG;
rq->internal_tag = tag; rq->internal_tag = tag;
} else {
rq->tag = tag;
rq->internal_tag = BLK_MQ_NO_TAG;
} }
rq->timeout = 0; rq->timeout = 0;
@ -386,14 +386,13 @@ static struct request *blk_mq_rq_ctx_init(struct blk_mq_alloc_data *data,
WRITE_ONCE(rq->deadline, 0); WRITE_ONCE(rq->deadline, 0);
req_ref_set(rq, 1); req_ref_set(rq, 1);
if (rq->rq_flags & RQF_ELV) { if (rq->rq_flags & RQF_USE_SCHED) {
struct elevator_queue *e = data->q->elevator; struct elevator_queue *e = data->q->elevator;
INIT_HLIST_NODE(&rq->hash); INIT_HLIST_NODE(&rq->hash);
RB_CLEAR_NODE(&rq->rb_node); RB_CLEAR_NODE(&rq->rb_node);
if (!op_is_flush(data->cmd_flags) && if (e->type->ops.prepare_request)
e->type->ops.prepare_request)
e->type->ops.prepare_request(rq); e->type->ops.prepare_request(rq);
} }
@ -447,26 +446,32 @@ static struct request *__blk_mq_alloc_requests(struct blk_mq_alloc_data *data)
data->flags |= BLK_MQ_REQ_NOWAIT; data->flags |= BLK_MQ_REQ_NOWAIT;
if (q->elevator) { if (q->elevator) {
struct elevator_queue *e = q->elevator; /*
* All requests use scheduler tags when an I/O scheduler is
data->rq_flags |= RQF_ELV; * enabled for the queue.
*/
data->rq_flags |= RQF_SCHED_TAGS;
/* /*
* Flush/passthrough requests are special and go directly to the * Flush/passthrough requests are special and go directly to the
* dispatch list. Don't include reserved tags in the * dispatch list.
* limiting, as it isn't useful.
*/ */
if (!op_is_flush(data->cmd_flags) && if (!op_is_flush(data->cmd_flags) &&
!blk_op_is_passthrough(data->cmd_flags) && !blk_op_is_passthrough(data->cmd_flags)) {
e->type->ops.limit_depth && struct elevator_mq_ops *ops = &q->elevator->type->ops;
!(data->flags & BLK_MQ_REQ_RESERVED))
e->type->ops.limit_depth(data->cmd_flags, data); WARN_ON_ONCE(data->flags & BLK_MQ_REQ_RESERVED);
data->rq_flags |= RQF_USE_SCHED;
if (ops->limit_depth)
ops->limit_depth(data->cmd_flags, data);
}
} }
retry: retry:
data->ctx = blk_mq_get_ctx(q); data->ctx = blk_mq_get_ctx(q);
data->hctx = blk_mq_map_queue(q, data->cmd_flags, data->ctx); data->hctx = blk_mq_map_queue(q, data->cmd_flags, data->ctx);
if (!(data->rq_flags & RQF_ELV)) if (!(data->rq_flags & RQF_SCHED_TAGS))
blk_mq_tag_busy(data->hctx); blk_mq_tag_busy(data->hctx);
if (data->flags & BLK_MQ_REQ_RESERVED) if (data->flags & BLK_MQ_REQ_RESERVED)
@ -646,10 +651,10 @@ struct request *blk_mq_alloc_request_hctx(struct request_queue *q,
goto out_queue_exit; goto out_queue_exit;
data.ctx = __blk_mq_get_ctx(q, cpu); data.ctx = __blk_mq_get_ctx(q, cpu);
if (!q->elevator) if (q->elevator)
blk_mq_tag_busy(data.hctx); data.rq_flags |= RQF_SCHED_TAGS;
else else
data.rq_flags |= RQF_ELV; blk_mq_tag_busy(data.hctx);
if (flags & BLK_MQ_REQ_RESERVED) if (flags & BLK_MQ_REQ_RESERVED)
data.rq_flags |= RQF_RESV; data.rq_flags |= RQF_RESV;
@ -694,7 +699,7 @@ void blk_mq_free_request(struct request *rq)
struct request_queue *q = rq->q; struct request_queue *q = rq->q;
struct blk_mq_hw_ctx *hctx = rq->mq_hctx; struct blk_mq_hw_ctx *hctx = rq->mq_hctx;
if ((rq->rq_flags & RQF_ELV) && !op_is_flush(rq->cmd_flags) && if ((rq->rq_flags & RQF_USE_SCHED) &&
q->elevator->type->ops.finish_request) q->elevator->type->ops.finish_request)
q->elevator->type->ops.finish_request(rq); q->elevator->type->ops.finish_request(rq);
@ -1268,7 +1273,7 @@ static void blk_add_rq_to_plug(struct blk_plug *plug, struct request *rq)
if (!plug->multiple_queues && last && last->q != rq->q) if (!plug->multiple_queues && last && last->q != rq->q)
plug->multiple_queues = true; plug->multiple_queues = true;
if (!plug->has_elevator && (rq->rq_flags & RQF_ELV)) if (!plug->has_elevator && (rq->rq_flags & RQF_USE_SCHED))
plug->has_elevator = true; plug->has_elevator = true;
rq->rq_next = NULL; rq->rq_next = NULL;
rq_list_add(&plug->mq_list, rq); rq_list_add(&plug->mq_list, rq);
@ -2620,7 +2625,7 @@ static void blk_mq_try_issue_directly(struct blk_mq_hw_ctx *hctx,
return; return;
} }
if ((rq->rq_flags & RQF_ELV) || !blk_mq_get_budget_and_tag(rq)) { if ((rq->rq_flags & RQF_USE_SCHED) || !blk_mq_get_budget_and_tag(rq)) {
blk_mq_insert_request(rq, 0); blk_mq_insert_request(rq, 0);
blk_mq_run_hw_queue(hctx, false); blk_mq_run_hw_queue(hctx, false);
return; return;
@ -2983,7 +2988,7 @@ void blk_mq_submit_bio(struct bio *bio)
} }
hctx = rq->mq_hctx; hctx = rq->mq_hctx;
if ((rq->rq_flags & RQF_ELV) || if ((rq->rq_flags & RQF_USE_SCHED) ||
(hctx->dispatch_busy && (q->nr_hw_queues == 1 || !is_sync))) { (hctx->dispatch_busy && (q->nr_hw_queues == 1 || !is_sync))) {
blk_mq_insert_request(rq, 0); blk_mq_insert_request(rq, 0);
blk_mq_run_hw_queue(hctx, true); blk_mq_run_hw_queue(hctx, true);

View File

@ -226,9 +226,9 @@ static inline bool blk_mq_is_shared_tags(unsigned int flags)
static inline struct blk_mq_tags *blk_mq_tags_from_data(struct blk_mq_alloc_data *data) static inline struct blk_mq_tags *blk_mq_tags_from_data(struct blk_mq_alloc_data *data)
{ {
if (!(data->rq_flags & RQF_ELV)) if (data->rq_flags & RQF_SCHED_TAGS)
return data->hctx->tags;
return data->hctx->sched_tags; return data->hctx->sched_tags;
return data->hctx->tags;
} }
static inline bool blk_mq_hctx_stopped(struct blk_mq_hw_ctx *hctx) static inline bool blk_mq_hctx_stopped(struct blk_mq_hw_ctx *hctx)

View File

@ -38,6 +38,10 @@ typedef __u32 __bitwise req_flags_t;
#define RQF_MQ_INFLIGHT ((__force req_flags_t)(1 << 6)) #define RQF_MQ_INFLIGHT ((__force req_flags_t)(1 << 6))
/* don't call prep for this one */ /* don't call prep for this one */
#define RQF_DONTPREP ((__force req_flags_t)(1 << 7)) #define RQF_DONTPREP ((__force req_flags_t)(1 << 7))
/* use hctx->sched_tags */
#define RQF_SCHED_TAGS ((__force req_flags_t)(1 << 8))
/* use an I/O scheduler for this request */
#define RQF_USE_SCHED ((__force req_flags_t)(1 << 9))
/* vaguely specified driver internal error. Ignored by the block layer */ /* vaguely specified driver internal error. Ignored by the block layer */
#define RQF_FAILED ((__force req_flags_t)(1 << 10)) #define RQF_FAILED ((__force req_flags_t)(1 << 10))
/* don't warn about errors */ /* don't warn about errors */
@ -57,8 +61,6 @@ typedef __u32 __bitwise req_flags_t;
#define RQF_ZONE_WRITE_LOCKED ((__force req_flags_t)(1 << 19)) #define RQF_ZONE_WRITE_LOCKED ((__force req_flags_t)(1 << 19))
/* ->timeout has been called, don't expire again */ /* ->timeout has been called, don't expire again */
#define RQF_TIMED_OUT ((__force req_flags_t)(1 << 21)) #define RQF_TIMED_OUT ((__force req_flags_t)(1 << 21))
/* queue has elevator attached */
#define RQF_ELV ((__force req_flags_t)(1 << 22))
#define RQF_RESV ((__force req_flags_t)(1 << 23)) #define RQF_RESV ((__force req_flags_t)(1 << 23))
/* flags that prevent us from merging requests: */ /* flags that prevent us from merging requests: */
@ -842,7 +844,7 @@ void blk_mq_end_request_batch(struct io_comp_batch *ib);
*/ */
static inline bool blk_mq_need_time_stamp(struct request *rq) static inline bool blk_mq_need_time_stamp(struct request *rq)
{ {
return (rq->rq_flags & (RQF_IO_STAT | RQF_STATS | RQF_ELV)); return (rq->rq_flags & (RQF_IO_STAT | RQF_STATS | RQF_USE_SCHED));
} }
static inline bool blk_mq_is_reserved_rq(struct request *rq) static inline bool blk_mq_is_reserved_rq(struct request *rq)
@ -858,7 +860,7 @@ static inline bool blk_mq_add_to_batch(struct request *req,
struct io_comp_batch *iob, int ioerror, struct io_comp_batch *iob, int ioerror,
void (*complete)(struct io_comp_batch *)) void (*complete)(struct io_comp_batch *))
{ {
if (!iob || (req->rq_flags & RQF_ELV) || ioerror || if (!iob || (req->rq_flags & RQF_USE_SCHED) || ioerror ||
(req->end_io && !blk_rq_is_passthrough(req))) (req->end_io && !blk_rq_is_passthrough(req)))
return false; return false;