blk-mq: Introduce blk_mq_quiesce_queue()
blk_mq_quiesce_queue() waits until ongoing .queue_rq() invocations have finished. This function does *not* wait until all outstanding requests have finished (this means invocation of request.end_io()). The algorithm used by blk_mq_quiesce_queue() is as follows: * Hold either an RCU read lock or an SRCU read lock around .queue_rq() calls. The former is used if .queue_rq() does not block and the latter if .queue_rq() may block. * blk_mq_quiesce_queue() first calls blk_mq_stop_hw_queues() followed by synchronize_srcu() or synchronize_rcu(). The latter call waits for .queue_rq() invocations that started before blk_mq_quiesce_queue() was called. * The blk_mq_hctx_stopped() calls that control whether or not .queue_rq() will be called are called with the (S)RCU read lock held. This is necessary to avoid race conditions against blk_mq_quiesce_queue(). Signed-off-by: Bart Van Assche <bart.vanassche@sandisk.com> Cc: Hannes Reinecke <hare@suse.com> Cc: Johannes Thumshirn <jthumshirn@suse.de> Reviewed-by: Sagi Grimberg <sagi@grimberg.me> Reviewed-by: Ming Lei <tom.leiming@gmail.com> Reviewed-by: Christoph Hellwig <hch@lst.de> Signed-off-by: Jens Axboe <axboe@fb.com>
This commit is contained in:
parent
9b7dd572cc
commit
6a83e74d21
@ -5,6 +5,7 @@ menuconfig BLOCK
|
||||
bool "Enable the block layer" if EXPERT
|
||||
default y
|
||||
select SBITMAP
|
||||
select SRCU
|
||||
help
|
||||
Provide block layer support for the kernel.
|
||||
|
||||
|
@ -115,6 +115,33 @@ void blk_mq_unfreeze_queue(struct request_queue *q)
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(blk_mq_unfreeze_queue);
|
||||
|
||||
/**
|
||||
* blk_mq_quiesce_queue() - wait until all ongoing queue_rq calls have finished
|
||||
* @q: request queue.
|
||||
*
|
||||
* Note: this function does not prevent that the struct request end_io()
|
||||
* callback function is invoked. Additionally, it is not prevented that
|
||||
* new queue_rq() calls occur unless the queue has been stopped first.
|
||||
*/
|
||||
void blk_mq_quiesce_queue(struct request_queue *q)
|
||||
{
|
||||
struct blk_mq_hw_ctx *hctx;
|
||||
unsigned int i;
|
||||
bool rcu = false;
|
||||
|
||||
blk_mq_stop_hw_queues(q);
|
||||
|
||||
queue_for_each_hw_ctx(q, hctx, i) {
|
||||
if (hctx->flags & BLK_MQ_F_BLOCKING)
|
||||
synchronize_srcu(&hctx->queue_rq_srcu);
|
||||
else
|
||||
rcu = true;
|
||||
}
|
||||
if (rcu)
|
||||
synchronize_rcu();
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(blk_mq_quiesce_queue);
|
||||
|
||||
void blk_mq_wake_waiters(struct request_queue *q)
|
||||
{
|
||||
struct blk_mq_hw_ctx *hctx;
|
||||
@ -766,7 +793,7 @@ static inline unsigned int queued_to_index(unsigned int queued)
|
||||
* of IO. In particular, we'd like FIFO behaviour on handling existing
|
||||
* items on the hctx->dispatch list. Ignore that for now.
|
||||
*/
|
||||
static void __blk_mq_run_hw_queue(struct blk_mq_hw_ctx *hctx)
|
||||
static void blk_mq_process_rq_list(struct blk_mq_hw_ctx *hctx)
|
||||
{
|
||||
struct request_queue *q = hctx->queue;
|
||||
struct request *rq;
|
||||
@ -778,9 +805,6 @@ static void __blk_mq_run_hw_queue(struct blk_mq_hw_ctx *hctx)
|
||||
if (unlikely(blk_mq_hctx_stopped(hctx)))
|
||||
return;
|
||||
|
||||
WARN_ON(!cpumask_test_cpu(raw_smp_processor_id(), hctx->cpumask) &&
|
||||
cpu_online(hctx->next_cpu));
|
||||
|
||||
hctx->run++;
|
||||
|
||||
/*
|
||||
@ -871,6 +895,24 @@ static void __blk_mq_run_hw_queue(struct blk_mq_hw_ctx *hctx)
|
||||
}
|
||||
}
|
||||
|
||||
static void __blk_mq_run_hw_queue(struct blk_mq_hw_ctx *hctx)
|
||||
{
|
||||
int srcu_idx;
|
||||
|
||||
WARN_ON(!cpumask_test_cpu(raw_smp_processor_id(), hctx->cpumask) &&
|
||||
cpu_online(hctx->next_cpu));
|
||||
|
||||
if (!(hctx->flags & BLK_MQ_F_BLOCKING)) {
|
||||
rcu_read_lock();
|
||||
blk_mq_process_rq_list(hctx);
|
||||
rcu_read_unlock();
|
||||
} else {
|
||||
srcu_idx = srcu_read_lock(&hctx->queue_rq_srcu);
|
||||
blk_mq_process_rq_list(hctx);
|
||||
srcu_read_unlock(&hctx->queue_rq_srcu, srcu_idx);
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
* It'd be great if the workqueue API had a way to pass
|
||||
* in a mask and had some smarts for more clever placement.
|
||||
@ -1268,7 +1310,7 @@ static blk_qc_t blk_mq_make_request(struct request_queue *q, struct bio *bio)
|
||||
const int is_flush_fua = bio->bi_opf & (REQ_PREFLUSH | REQ_FUA);
|
||||
struct blk_mq_alloc_data data;
|
||||
struct request *rq;
|
||||
unsigned int request_count = 0;
|
||||
unsigned int request_count = 0, srcu_idx;
|
||||
struct blk_plug *plug;
|
||||
struct request *same_queue_rq = NULL;
|
||||
blk_qc_t cookie;
|
||||
@ -1311,7 +1353,7 @@ static blk_qc_t blk_mq_make_request(struct request_queue *q, struct bio *bio)
|
||||
blk_mq_bio_to_request(rq, bio);
|
||||
|
||||
/*
|
||||
* We do limited pluging. If the bio can be merged, do that.
|
||||
* We do limited plugging. If the bio can be merged, do that.
|
||||
* Otherwise the existing request in the plug list will be
|
||||
* issued. So the plug list will have one request at most
|
||||
*/
|
||||
@ -1331,7 +1373,16 @@ static blk_qc_t blk_mq_make_request(struct request_queue *q, struct bio *bio)
|
||||
blk_mq_put_ctx(data.ctx);
|
||||
if (!old_rq)
|
||||
goto done;
|
||||
blk_mq_try_issue_directly(data.hctx, old_rq, &cookie);
|
||||
|
||||
if (!(data.hctx->flags & BLK_MQ_F_BLOCKING)) {
|
||||
rcu_read_lock();
|
||||
blk_mq_try_issue_directly(data.hctx, old_rq, &cookie);
|
||||
rcu_read_unlock();
|
||||
} else {
|
||||
srcu_idx = srcu_read_lock(&data.hctx->queue_rq_srcu);
|
||||
blk_mq_try_issue_directly(data.hctx, old_rq, &cookie);
|
||||
srcu_read_unlock(&data.hctx->queue_rq_srcu, srcu_idx);
|
||||
}
|
||||
goto done;
|
||||
}
|
||||
|
||||
@ -1610,6 +1661,9 @@ static void blk_mq_exit_hctx(struct request_queue *q,
|
||||
if (set->ops->exit_hctx)
|
||||
set->ops->exit_hctx(hctx, hctx_idx);
|
||||
|
||||
if (hctx->flags & BLK_MQ_F_BLOCKING)
|
||||
cleanup_srcu_struct(&hctx->queue_rq_srcu);
|
||||
|
||||
blk_mq_remove_cpuhp(hctx);
|
||||
blk_free_flush_queue(hctx->fq);
|
||||
sbitmap_free(&hctx->ctx_map);
|
||||
@ -1690,6 +1744,9 @@ static int blk_mq_init_hctx(struct request_queue *q,
|
||||
flush_start_tag + hctx_idx, node))
|
||||
goto free_fq;
|
||||
|
||||
if (hctx->flags & BLK_MQ_F_BLOCKING)
|
||||
init_srcu_struct(&hctx->queue_rq_srcu);
|
||||
|
||||
return 0;
|
||||
|
||||
free_fq:
|
||||
|
@ -3,6 +3,7 @@
|
||||
|
||||
#include <linux/blkdev.h>
|
||||
#include <linux/sbitmap.h>
|
||||
#include <linux/srcu.h>
|
||||
|
||||
struct blk_mq_tags;
|
||||
struct blk_flush_queue;
|
||||
@ -35,6 +36,8 @@ struct blk_mq_hw_ctx {
|
||||
|
||||
struct blk_mq_tags *tags;
|
||||
|
||||
struct srcu_struct queue_rq_srcu;
|
||||
|
||||
unsigned long queued;
|
||||
unsigned long run;
|
||||
#define BLK_MQ_MAX_DISPATCH_ORDER 7
|
||||
|
@ -918,6 +918,7 @@ extern void __blk_run_queue(struct request_queue *q);
|
||||
extern void __blk_run_queue_uncond(struct request_queue *q);
|
||||
extern void blk_run_queue(struct request_queue *);
|
||||
extern void blk_run_queue_async(struct request_queue *q);
|
||||
extern void blk_mq_quiesce_queue(struct request_queue *q);
|
||||
extern int blk_rq_map_user(struct request_queue *, struct request *,
|
||||
struct rq_map_data *, void __user *, unsigned long,
|
||||
gfp_t);
|
||||
|
Loading…
Reference in New Issue
Block a user