From 74170118b26e55b611de5210f47657118a03a0e1 Mon Sep 17 00:00:00 2001 From: Ming Lei Date: Thu, 29 Jan 2015 20:17:26 +0800 Subject: [PATCH 1/2] Revert "blk-mq: fix hctx/ctx kobject use-after-free" This reverts commit 76d697d10769048e5721510100bf3a9413a56385. The commit 76d697d10769048 causes general protection fault reported from Bart Van Assche: https://lkml.org/lkml/2015/1/28/334 Reported-by: Bart Van Assche Signed-off-by: Ming Lei Signed-off-by: Jens Axboe --- block/blk-mq-sysfs.c | 25 ++----------------------- block/blk-mq.c | 6 +++++- 2 files changed, 7 insertions(+), 24 deletions(-) diff --git a/block/blk-mq-sysfs.c b/block/blk-mq-sysfs.c index 6774a0e69867..1630a20d5dcf 100644 --- a/block/blk-mq-sysfs.c +++ b/block/blk-mq-sysfs.c @@ -15,26 +15,6 @@ static void blk_mq_sysfs_release(struct kobject *kobj) { - struct request_queue *q; - - q = container_of(kobj, struct request_queue, mq_kobj); - free_percpu(q->queue_ctx); -} - -static void blk_mq_ctx_release(struct kobject *kobj) -{ - struct blk_mq_ctx *ctx; - - ctx = container_of(kobj, struct blk_mq_ctx, kobj); - kobject_put(&ctx->queue->mq_kobj); -} - -static void blk_mq_hctx_release(struct kobject *kobj) -{ - struct blk_mq_hw_ctx *hctx; - - hctx = container_of(kobj, struct blk_mq_hw_ctx, kobj); - kfree(hctx); } struct blk_mq_ctx_sysfs_entry { @@ -338,13 +318,13 @@ static struct kobj_type blk_mq_ktype = { static struct kobj_type blk_mq_ctx_ktype = { .sysfs_ops = &blk_mq_sysfs_ops, .default_attrs = default_ctx_attrs, - .release = blk_mq_ctx_release, + .release = blk_mq_sysfs_release, }; static struct kobj_type blk_mq_hw_ktype = { .sysfs_ops = &blk_mq_hw_sysfs_ops, .default_attrs = default_hw_ctx_attrs, - .release = blk_mq_hctx_release, + .release = blk_mq_sysfs_release, }; static void blk_mq_unregister_hctx(struct blk_mq_hw_ctx *hctx) @@ -375,7 +355,6 @@ static int blk_mq_register_hctx(struct blk_mq_hw_ctx *hctx) return ret; hctx_for_each_ctx(hctx, ctx, i) { - kobject_get(&q->mq_kobj); ret = kobject_add(&ctx->kobj, &hctx->kobj, "cpu%u", ctx->cpu); if (ret) break; diff --git a/block/blk-mq.c b/block/blk-mq.c index 9ee3b87c4498..2f95747c287e 100644 --- a/block/blk-mq.c +++ b/block/blk-mq.c @@ -1641,8 +1641,10 @@ static void blk_mq_free_hw_queues(struct request_queue *q, struct blk_mq_hw_ctx *hctx; unsigned int i; - queue_for_each_hw_ctx(q, hctx, i) + queue_for_each_hw_ctx(q, hctx, i) { free_cpumask_var(hctx->cpumask); + kfree(hctx); + } } static int blk_mq_init_hctx(struct request_queue *q, @@ -2000,9 +2002,11 @@ void blk_mq_free_queue(struct request_queue *q) percpu_ref_exit(&q->mq_usage_counter); + free_percpu(q->queue_ctx); kfree(q->queue_hw_ctx); kfree(q->mq_map); + q->queue_ctx = NULL; q->queue_hw_ctx = NULL; q->mq_map = NULL; From e09aae7edec1d20824c60a6f0ca4589f99ada17b Mon Sep 17 00:00:00 2001 From: Ming Lei Date: Thu, 29 Jan 2015 20:17:27 +0800 Subject: [PATCH 2/2] blk-mq: release mq's kobjects in blk_release_queue() The kobject memory inside blk-mq hctx/ctx shouldn't have been freed before the kobject is released because driver core can access it freely before its release. We can't do that in all ctx/hctx/mq_kobj's release handler because it can be run before blk_cleanup_queue(). Given mq_kobj shouldn't have been introduced, this patch simply moves mq's release into blk_release_queue(). Reported-by: Sasha Levin Signed-off-by: Ming Lei Signed-off-by: Jens Axboe --- block/blk-mq.c | 29 ++++++++++++++++++++++------- block/blk-mq.h | 2 ++ block/blk-sysfs.c | 2 ++ 3 files changed, 26 insertions(+), 7 deletions(-) diff --git a/block/blk-mq.c b/block/blk-mq.c index 2f95747c287e..2390c5541e71 100644 --- a/block/blk-mq.c +++ b/block/blk-mq.c @@ -1641,10 +1641,8 @@ static void blk_mq_free_hw_queues(struct request_queue *q, struct blk_mq_hw_ctx *hctx; unsigned int i; - queue_for_each_hw_ctx(q, hctx, i) { + queue_for_each_hw_ctx(q, hctx, i) free_cpumask_var(hctx->cpumask); - kfree(hctx); - } } static int blk_mq_init_hctx(struct request_queue *q, @@ -1869,6 +1867,27 @@ static void blk_mq_add_queue_tag_set(struct blk_mq_tag_set *set, mutex_unlock(&set->tag_list_lock); } +/* + * It is the actual release handler for mq, but we do it from + * request queue's release handler for avoiding use-after-free + * and headache because q->mq_kobj shouldn't have been introduced, + * but we can't group ctx/kctx kobj without it. + */ +void blk_mq_release(struct request_queue *q) +{ + struct blk_mq_hw_ctx *hctx; + unsigned int i; + + /* hctx kobj stays in hctx */ + queue_for_each_hw_ctx(q, hctx, i) + kfree(hctx); + + kfree(q->queue_hw_ctx); + + /* ctx kobj stays in queue_ctx */ + free_percpu(q->queue_ctx); +} + struct request_queue *blk_mq_init_queue(struct blk_mq_tag_set *set) { struct blk_mq_hw_ctx **hctxs; @@ -2002,12 +2021,8 @@ void blk_mq_free_queue(struct request_queue *q) percpu_ref_exit(&q->mq_usage_counter); - free_percpu(q->queue_ctx); - kfree(q->queue_hw_ctx); kfree(q->mq_map); - q->queue_ctx = NULL; - q->queue_hw_ctx = NULL; q->mq_map = NULL; mutex_lock(&all_q_mutex); diff --git a/block/blk-mq.h b/block/blk-mq.h index 4f4f943c22c3..6a48c4c0d8a2 100644 --- a/block/blk-mq.h +++ b/block/blk-mq.h @@ -62,6 +62,8 @@ extern void blk_mq_sysfs_unregister(struct request_queue *q); extern void blk_mq_rq_timed_out(struct request *req, bool reserved); +void blk_mq_release(struct request_queue *q); + /* * Basic implementation of sparser bitmap, allowing the user to spread * the bits over more cachelines. diff --git a/block/blk-sysfs.c b/block/blk-sysfs.c index 935ea2aa0730..faaf36ade7eb 100644 --- a/block/blk-sysfs.c +++ b/block/blk-sysfs.c @@ -517,6 +517,8 @@ static void blk_release_queue(struct kobject *kobj) if (!q->mq_ops) blk_free_flush_queue(q->fq); + else + blk_mq_release(q); blk_trace_shutdown(q);