mirror of
https://github.com/torvalds/linux.git
synced 2024-11-21 19:41:42 +00:00
block/mq-deadline: Add cgroup support
Maintain statistics per cgroup and export these to user space. These statistics are essential for verifying whether the proper I/O priorities have been assigned to requests. An example of the statistics data with this patch applied: $ cat /sys/fs/cgroup/io.stat 11:2 rbytes=0 wbytes=0 rios=3 wios=0 dbytes=0 dios=0 [NONE] dispatched=0 inserted=0 merged=171 [RT] dispatched=0 inserted=0 merged=0 [BE] dispatched=0 inserted=0 merged=0 [IDLE] dispatched=0 inserted=0 merged=0 8:32 rbytes=2142720 wbytes=0 rios=105 wios=0 dbytes=0 dios=0 [NONE] dispatched=0 inserted=0 merged=171 [RT] dispatched=0 inserted=0 merged=0 [BE] dispatched=0 inserted=0 merged=0 [IDLE] dispatched=0 inserted=0 merged=0 Cc: Damien Le Moal <damien.lemoal@wdc.com> Cc: Hannes Reinecke <hare@suse.de> Cc: Christoph Hellwig <hch@lst.de> Cc: Ming Lei <ming.lei@redhat.com> Cc: Johannes Thumshirn <johannes.thumshirn@wdc.com> Cc: Himanshu Madhani <himanshu.madhani@oracle.com> Signed-off-by: Bart Van Assche <bvanassche@acm.org> Link: https://lore.kernel.org/r/20210618004456.7280-16-bvanassche@acm.org Signed-off-by: Jens Axboe <axboe@kernel.dk>
This commit is contained in:
parent
38ba64d12d
commit
08a9ad8bf6
@ -9,6 +9,12 @@ config MQ_IOSCHED_DEADLINE
|
||||
help
|
||||
MQ version of the deadline IO scheduler.
|
||||
|
||||
config MQ_IOSCHED_DEADLINE_CGROUP
|
||||
tristate
|
||||
default y
|
||||
depends on MQ_IOSCHED_DEADLINE
|
||||
depends on BLK_CGROUP
|
||||
|
||||
config MQ_IOSCHED_KYBER
|
||||
tristate "Kyber I/O scheduler"
|
||||
default y
|
||||
|
@ -21,6 +21,8 @@ obj-$(CONFIG_BLK_CGROUP_IOPRIO) += blk-ioprio.o
|
||||
obj-$(CONFIG_BLK_CGROUP_IOLATENCY) += blk-iolatency.o
|
||||
obj-$(CONFIG_BLK_CGROUP_IOCOST) += blk-iocost.o
|
||||
obj-$(CONFIG_MQ_IOSCHED_DEADLINE) += mq-deadline.o
|
||||
mq-deadline-y += mq-deadline-main.o
|
||||
mq-deadline-$(CONFIG_MQ_IOSCHED_DEADLINE_CGROUP)+= mq-deadline-cgroup.o
|
||||
obj-$(CONFIG_MQ_IOSCHED_KYBER) += kyber-iosched.o
|
||||
bfq-y := bfq-iosched.o bfq-wf2q.o bfq-cgroup.o
|
||||
obj-$(CONFIG_IOSCHED_BFQ) += bfq.o
|
||||
|
126
block/mq-deadline-cgroup.c
Normal file
126
block/mq-deadline-cgroup.c
Normal file
@ -0,0 +1,126 @@
|
||||
// SPDX-License-Identifier: GPL-2.0
|
||||
|
||||
#include <linux/blk-cgroup.h>
|
||||
#include <linux/ioprio.h>
|
||||
|
||||
#include "mq-deadline-cgroup.h"
|
||||
|
||||
static struct blkcg_policy dd_blkcg_policy;
|
||||
|
||||
static struct blkcg_policy_data *dd_cpd_alloc(gfp_t gfp)
|
||||
{
|
||||
struct dd_blkcg *pd;
|
||||
|
||||
pd = kzalloc(sizeof(*pd), gfp);
|
||||
if (!pd)
|
||||
return NULL;
|
||||
pd->stats = alloc_percpu_gfp(typeof(*pd->stats),
|
||||
GFP_KERNEL | __GFP_ZERO);
|
||||
if (!pd->stats) {
|
||||
kfree(pd);
|
||||
return NULL;
|
||||
}
|
||||
return &pd->cpd;
|
||||
}
|
||||
|
||||
static void dd_cpd_free(struct blkcg_policy_data *cpd)
|
||||
{
|
||||
struct dd_blkcg *dd_blkcg = container_of(cpd, typeof(*dd_blkcg), cpd);
|
||||
|
||||
free_percpu(dd_blkcg->stats);
|
||||
kfree(dd_blkcg);
|
||||
}
|
||||
|
||||
static struct dd_blkcg *dd_blkcg_from_pd(struct blkg_policy_data *pd)
|
||||
{
|
||||
return container_of(blkcg_to_cpd(pd->blkg->blkcg, &dd_blkcg_policy),
|
||||
struct dd_blkcg, cpd);
|
||||
}
|
||||
|
||||
/*
|
||||
* Convert an association between a block cgroup and a request queue into a
|
||||
* pointer to the mq-deadline information associated with a (blkcg, queue) pair.
|
||||
*/
|
||||
struct dd_blkcg *dd_blkcg_from_bio(struct bio *bio)
|
||||
{
|
||||
struct blkg_policy_data *pd;
|
||||
|
||||
pd = blkg_to_pd(bio->bi_blkg, &dd_blkcg_policy);
|
||||
if (!pd)
|
||||
return NULL;
|
||||
|
||||
return dd_blkcg_from_pd(pd);
|
||||
}
|
||||
|
||||
static size_t dd_pd_stat(struct blkg_policy_data *pd, char *buf, size_t size)
|
||||
{
|
||||
static const char *const prio_class_name[] = {
|
||||
[IOPRIO_CLASS_NONE] = "NONE",
|
||||
[IOPRIO_CLASS_RT] = "RT",
|
||||
[IOPRIO_CLASS_BE] = "BE",
|
||||
[IOPRIO_CLASS_IDLE] = "IDLE",
|
||||
};
|
||||
struct dd_blkcg *blkcg = dd_blkcg_from_pd(pd);
|
||||
int res = 0;
|
||||
u8 prio;
|
||||
|
||||
for (prio = 0; prio < ARRAY_SIZE(blkcg->stats->stats); prio++)
|
||||
res += scnprintf(buf + res, size - res,
|
||||
" [%s] dispatched=%u inserted=%u merged=%u",
|
||||
prio_class_name[prio],
|
||||
ddcg_sum(blkcg, dispatched, prio) +
|
||||
ddcg_sum(blkcg, merged, prio) -
|
||||
ddcg_sum(blkcg, completed, prio),
|
||||
ddcg_sum(blkcg, inserted, prio) -
|
||||
ddcg_sum(blkcg, completed, prio),
|
||||
ddcg_sum(blkcg, merged, prio));
|
||||
|
||||
return res;
|
||||
}
|
||||
|
||||
static struct blkg_policy_data *dd_pd_alloc(gfp_t gfp, struct request_queue *q,
|
||||
struct blkcg *blkcg)
|
||||
{
|
||||
struct dd_blkg *pd;
|
||||
|
||||
pd = kzalloc(sizeof(*pd), gfp);
|
||||
if (!pd)
|
||||
return NULL;
|
||||
return &pd->pd;
|
||||
}
|
||||
|
||||
static void dd_pd_free(struct blkg_policy_data *pd)
|
||||
{
|
||||
struct dd_blkg *dd_blkg = container_of(pd, typeof(*dd_blkg), pd);
|
||||
|
||||
kfree(dd_blkg);
|
||||
}
|
||||
|
||||
static struct blkcg_policy dd_blkcg_policy = {
|
||||
.cpd_alloc_fn = dd_cpd_alloc,
|
||||
.cpd_free_fn = dd_cpd_free,
|
||||
|
||||
.pd_alloc_fn = dd_pd_alloc,
|
||||
.pd_free_fn = dd_pd_free,
|
||||
.pd_stat_fn = dd_pd_stat,
|
||||
};
|
||||
|
||||
int dd_activate_policy(struct request_queue *q)
|
||||
{
|
||||
return blkcg_activate_policy(q, &dd_blkcg_policy);
|
||||
}
|
||||
|
||||
void dd_deactivate_policy(struct request_queue *q)
|
||||
{
|
||||
blkcg_deactivate_policy(q, &dd_blkcg_policy);
|
||||
}
|
||||
|
||||
int __init dd_blkcg_init(void)
|
||||
{
|
||||
return blkcg_policy_register(&dd_blkcg_policy);
|
||||
}
|
||||
|
||||
void __exit dd_blkcg_exit(void)
|
||||
{
|
||||
blkcg_policy_unregister(&dd_blkcg_policy);
|
||||
}
|
114
block/mq-deadline-cgroup.h
Normal file
114
block/mq-deadline-cgroup.h
Normal file
@ -0,0 +1,114 @@
|
||||
/* SPDX-License-Identifier: GPL-2.0 */
|
||||
|
||||
#if !defined(_MQ_DEADLINE_CGROUP_H_)
|
||||
#define _MQ_DEADLINE_CGROUP_H_
|
||||
|
||||
#include <linux/blk-cgroup.h>
|
||||
|
||||
struct request_queue;
|
||||
|
||||
/**
|
||||
* struct io_stats_per_prio - I/O statistics per I/O priority class.
|
||||
* @inserted: Number of inserted requests.
|
||||
* @merged: Number of merged requests.
|
||||
* @dispatched: Number of dispatched requests.
|
||||
* @completed: Number of I/O completions.
|
||||
*/
|
||||
struct io_stats_per_prio {
|
||||
local_t inserted;
|
||||
local_t merged;
|
||||
local_t dispatched;
|
||||
local_t completed;
|
||||
};
|
||||
|
||||
/* I/O statistics per I/O cgroup per I/O priority class (IOPRIO_CLASS_*). */
|
||||
struct blkcg_io_stats {
|
||||
struct io_stats_per_prio stats[4];
|
||||
};
|
||||
|
||||
/**
|
||||
* struct dd_blkcg - Per cgroup data.
|
||||
* @cpd: blkcg_policy_data structure.
|
||||
* @stats: I/O statistics.
|
||||
*/
|
||||
struct dd_blkcg {
|
||||
struct blkcg_policy_data cpd; /* must be the first member */
|
||||
struct blkcg_io_stats __percpu *stats;
|
||||
};
|
||||
|
||||
/*
|
||||
* Count one event of type 'event_type' and with I/O priority class
|
||||
* 'prio_class'.
|
||||
*/
|
||||
#define ddcg_count(ddcg, event_type, prio_class) do { \
|
||||
if (ddcg) { \
|
||||
struct blkcg_io_stats *io_stats = get_cpu_ptr((ddcg)->stats); \
|
||||
\
|
||||
BUILD_BUG_ON(!__same_type((ddcg), struct dd_blkcg *)); \
|
||||
BUILD_BUG_ON(!__same_type((prio_class), u8)); \
|
||||
local_inc(&io_stats->stats[(prio_class)].event_type); \
|
||||
put_cpu_ptr(io_stats); \
|
||||
} \
|
||||
} while (0)
|
||||
|
||||
/*
|
||||
* Returns the total number of ddcg_count(ddcg, event_type, prio_class) calls
|
||||
* across all CPUs. No locking or barriers since it is fine if the returned
|
||||
* sum is slightly outdated.
|
||||
*/
|
||||
#define ddcg_sum(ddcg, event_type, prio) ({ \
|
||||
unsigned int cpu; \
|
||||
u32 sum = 0; \
|
||||
\
|
||||
BUILD_BUG_ON(!__same_type((ddcg), struct dd_blkcg *)); \
|
||||
BUILD_BUG_ON(!__same_type((prio), u8)); \
|
||||
for_each_present_cpu(cpu) \
|
||||
sum += local_read(&per_cpu_ptr((ddcg)->stats, cpu)-> \
|
||||
stats[(prio)].event_type); \
|
||||
sum; \
|
||||
})
|
||||
|
||||
#ifdef CONFIG_BLK_CGROUP
|
||||
|
||||
/**
|
||||
* struct dd_blkg - Per (cgroup, request queue) data.
|
||||
* @pd: blkg_policy_data structure.
|
||||
*/
|
||||
struct dd_blkg {
|
||||
struct blkg_policy_data pd; /* must be the first member */
|
||||
};
|
||||
|
||||
struct dd_blkcg *dd_blkcg_from_bio(struct bio *bio);
|
||||
int dd_activate_policy(struct request_queue *q);
|
||||
void dd_deactivate_policy(struct request_queue *q);
|
||||
int __init dd_blkcg_init(void);
|
||||
void __exit dd_blkcg_exit(void);
|
||||
|
||||
#else /* CONFIG_BLK_CGROUP */
|
||||
|
||||
static inline struct dd_blkcg *dd_blkcg_from_bio(struct bio *bio)
|
||||
{
|
||||
return NULL;
|
||||
}
|
||||
|
||||
static inline int dd_activate_policy(struct request_queue *q)
|
||||
{
|
||||
return 0;
|
||||
}
|
||||
|
||||
static inline void dd_deactivate_policy(struct request_queue *q)
|
||||
{
|
||||
}
|
||||
|
||||
static inline int dd_blkcg_init(void)
|
||||
{
|
||||
return 0;
|
||||
}
|
||||
|
||||
static inline void dd_blkcg_exit(void)
|
||||
{
|
||||
}
|
||||
|
||||
#endif /* CONFIG_BLK_CGROUP */
|
||||
|
||||
#endif /* _MQ_DEADLINE_CGROUP_H_ */
|
@ -25,6 +25,7 @@
|
||||
#include "blk-mq-debugfs.h"
|
||||
#include "blk-mq-tag.h"
|
||||
#include "blk-mq-sched.h"
|
||||
#include "mq-deadline-cgroup.h"
|
||||
|
||||
/*
|
||||
* See Documentation/block/deadline-iosched.rst
|
||||
@ -51,14 +52,6 @@ enum dd_prio {
|
||||
|
||||
enum { DD_PRIO_COUNT = 3 };
|
||||
|
||||
/* I/O statistics per I/O priority. */
|
||||
struct io_stats_per_prio {
|
||||
local_t inserted;
|
||||
local_t merged;
|
||||
local_t dispatched;
|
||||
local_t completed;
|
||||
};
|
||||
|
||||
/* I/O statistics for all I/O priorities (enum dd_prio). */
|
||||
struct io_stats {
|
||||
struct io_stats_per_prio stats[DD_PRIO_COUNT];
|
||||
@ -81,6 +74,9 @@ struct deadline_data {
|
||||
* run time data
|
||||
*/
|
||||
|
||||
/* Request queue that owns this data structure. */
|
||||
struct request_queue *queue;
|
||||
|
||||
struct dd_per_prio per_prio[DD_PRIO_COUNT];
|
||||
|
||||
/* Data direction of latest dispatched request. */
|
||||
@ -232,8 +228,10 @@ static void dd_merged_requests(struct request_queue *q, struct request *req,
|
||||
struct deadline_data *dd = q->elevator->elevator_data;
|
||||
const u8 ioprio_class = dd_rq_ioclass(next);
|
||||
const enum dd_prio prio = ioprio_class_to_prio[ioprio_class];
|
||||
struct dd_blkcg *blkcg = next->elv.priv[0];
|
||||
|
||||
dd_count(dd, merged, prio);
|
||||
ddcg_count(blkcg, merged, ioprio_class);
|
||||
|
||||
/*
|
||||
* if next expires before rq, assign its expire time to rq
|
||||
@ -370,6 +368,7 @@ static struct request *__dd_dispatch_request(struct deadline_data *dd,
|
||||
{
|
||||
struct request *rq, *next_rq;
|
||||
enum dd_data_dir data_dir;
|
||||
struct dd_blkcg *blkcg;
|
||||
enum dd_prio prio;
|
||||
u8 ioprio_class;
|
||||
|
||||
@ -464,6 +463,8 @@ done:
|
||||
ioprio_class = dd_rq_ioclass(rq);
|
||||
prio = ioprio_class_to_prio[ioprio_class];
|
||||
dd_count(dd, dispatched, prio);
|
||||
blkcg = rq->elv.priv[0];
|
||||
ddcg_count(blkcg, dispatched, ioprio_class);
|
||||
/*
|
||||
* If the request needs its target zone locked, do it.
|
||||
*/
|
||||
@ -540,6 +541,8 @@ static void dd_exit_sched(struct elevator_queue *e)
|
||||
struct deadline_data *dd = e->elevator_data;
|
||||
enum dd_prio prio;
|
||||
|
||||
dd_deactivate_policy(dd->queue);
|
||||
|
||||
for (prio = 0; prio <= DD_PRIO_MAX; prio++) {
|
||||
struct dd_per_prio *per_prio = &dd->per_prio[prio];
|
||||
|
||||
@ -553,7 +556,7 @@ static void dd_exit_sched(struct elevator_queue *e)
|
||||
}
|
||||
|
||||
/*
|
||||
* initialize elevator private data (deadline_data).
|
||||
* Initialize elevator private data (deadline_data) and associate with blkcg.
|
||||
*/
|
||||
static int dd_init_sched(struct request_queue *q, struct elevator_type *e)
|
||||
{
|
||||
@ -562,6 +565,12 @@ static int dd_init_sched(struct request_queue *q, struct elevator_type *e)
|
||||
enum dd_prio prio;
|
||||
int ret = -ENOMEM;
|
||||
|
||||
/*
|
||||
* Initialization would be very tricky if the queue is not frozen,
|
||||
* hence the warning statement below.
|
||||
*/
|
||||
WARN_ON_ONCE(!percpu_ref_is_zero(&q->q_usage_counter));
|
||||
|
||||
eq = elevator_alloc(q, e);
|
||||
if (!eq)
|
||||
return ret;
|
||||
@ -577,6 +586,8 @@ static int dd_init_sched(struct request_queue *q, struct elevator_type *e)
|
||||
if (!dd->stats)
|
||||
goto free_dd;
|
||||
|
||||
dd->queue = q;
|
||||
|
||||
for (prio = 0; prio <= DD_PRIO_MAX; prio++) {
|
||||
struct dd_per_prio *per_prio = &dd->per_prio[prio];
|
||||
|
||||
@ -595,9 +606,17 @@ static int dd_init_sched(struct request_queue *q, struct elevator_type *e)
|
||||
spin_lock_init(&dd->lock);
|
||||
spin_lock_init(&dd->zone_lock);
|
||||
|
||||
ret = dd_activate_policy(q);
|
||||
if (ret)
|
||||
goto free_stats;
|
||||
|
||||
ret = 0;
|
||||
q->elevator = eq;
|
||||
return 0;
|
||||
|
||||
free_stats:
|
||||
free_percpu(dd->stats);
|
||||
|
||||
free_dd:
|
||||
kfree(dd);
|
||||
|
||||
@ -670,6 +689,7 @@ static void dd_insert_request(struct blk_mq_hw_ctx *hctx, struct request *rq,
|
||||
u8 ioprio_class = IOPRIO_PRIO_CLASS(ioprio);
|
||||
struct dd_per_prio *per_prio;
|
||||
enum dd_prio prio;
|
||||
struct dd_blkcg *blkcg;
|
||||
|
||||
lockdep_assert_held(&dd->lock);
|
||||
|
||||
@ -679,8 +699,19 @@ static void dd_insert_request(struct blk_mq_hw_ctx *hctx, struct request *rq,
|
||||
*/
|
||||
blk_req_zone_write_unlock(rq);
|
||||
|
||||
/*
|
||||
* If a block cgroup has been associated with the submitter and if an
|
||||
* I/O priority has been set in the associated block cgroup, use the
|
||||
* lowest of the cgroup priority and the request priority for the
|
||||
* request. If no priority has been set in the request, use the cgroup
|
||||
* priority.
|
||||
*/
|
||||
prio = ioprio_class_to_prio[ioprio_class];
|
||||
dd_count(dd, inserted, prio);
|
||||
blkcg = dd_blkcg_from_bio(rq->bio);
|
||||
ddcg_count(blkcg, inserted, ioprio_class);
|
||||
WARN_ON_ONCE(rq->elv.priv[0]);
|
||||
rq->elv.priv[0] = blkcg;
|
||||
|
||||
if (blk_mq_sched_try_insert_merge(q, rq))
|
||||
return;
|
||||
@ -727,12 +758,10 @@ static void dd_insert_requests(struct blk_mq_hw_ctx *hctx,
|
||||
spin_unlock(&dd->lock);
|
||||
}
|
||||
|
||||
/*
|
||||
* Nothing to do here. This is defined only to ensure that .finish_request
|
||||
* method is called upon request completion.
|
||||
*/
|
||||
/* Callback from inside blk_mq_rq_ctx_init(). */
|
||||
static void dd_prepare_request(struct request *rq)
|
||||
{
|
||||
rq->elv.priv[0] = NULL;
|
||||
}
|
||||
|
||||
/*
|
||||
@ -755,11 +784,13 @@ static void dd_finish_request(struct request *rq)
|
||||
{
|
||||
struct request_queue *q = rq->q;
|
||||
struct deadline_data *dd = q->elevator->elevator_data;
|
||||
struct dd_blkcg *blkcg = rq->elv.priv[0];
|
||||
const u8 ioprio_class = dd_rq_ioclass(rq);
|
||||
const enum dd_prio prio = ioprio_class_to_prio[ioprio_class];
|
||||
struct dd_per_prio *per_prio = &dd->per_prio[prio];
|
||||
|
||||
dd_count(dd, completed, prio);
|
||||
ddcg_count(blkcg, completed, ioprio_class);
|
||||
|
||||
if (blk_queue_is_zoned(q)) {
|
||||
unsigned long flags;
|
||||
@ -1079,11 +1110,26 @@ MODULE_ALIAS("mq-deadline-iosched");
|
||||
|
||||
static int __init deadline_init(void)
|
||||
{
|
||||
return elv_register(&mq_deadline);
|
||||
int ret;
|
||||
|
||||
ret = elv_register(&mq_deadline);
|
||||
if (ret)
|
||||
goto out;
|
||||
ret = dd_blkcg_init();
|
||||
if (ret)
|
||||
goto unreg;
|
||||
|
||||
out:
|
||||
return ret;
|
||||
|
||||
unreg:
|
||||
elv_unregister(&mq_deadline);
|
||||
goto out;
|
||||
}
|
||||
|
||||
static void __exit deadline_exit(void)
|
||||
{
|
||||
dd_blkcg_exit();
|
||||
elv_unregister(&mq_deadline);
|
||||
}
|
||||
|
Loading…
Reference in New Issue
Block a user