blk-mq: not embed .mq_kobj and ctx->kobj into queue instance
Even though .mq_kobj, ctx->kobj and q->kobj share same lifetime from block layer's view, actually they don't because userspace may grab one kobject anytime via sysfs. This patch fixes the issue by the following approach: 1) introduce 'struct blk_mq_ctxs' for holding .mq_kobj and managing all ctxs 2) free all allocated ctxs and the 'blk_mq_ctxs' instance in release handler of .mq_kobj 3) grab one ref of .mq_kobj before initializing each ctx->kobj, so that .mq_kobj is always released after all ctxs are freed. This patch fixes kernel panic issue during booting when DEBUG_KOBJECT_RELEASE is enabled. Reported-by: Guenter Roeck <linux@roeck-us.net> Cc: "jianchao.wang" <jianchao.w.wang@oracle.com> Tested-by: Guenter Roeck <linux@roeck-us.net> Reviewed-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org> Signed-off-by: Ming Lei <ming.lei@redhat.com> Signed-off-by: Jens Axboe <axboe@kernel.dk>
This commit is contained in:
parent
0c62bff1fd
commit
1db4909e76
@ -15,6 +15,18 @@
|
||||
|
||||
static void blk_mq_sysfs_release(struct kobject *kobj)
|
||||
{
|
||||
struct blk_mq_ctxs *ctxs = container_of(kobj, struct blk_mq_ctxs, kobj);
|
||||
|
||||
free_percpu(ctxs->queue_ctx);
|
||||
kfree(ctxs);
|
||||
}
|
||||
|
||||
static void blk_mq_ctx_sysfs_release(struct kobject *kobj)
|
||||
{
|
||||
struct blk_mq_ctx *ctx = container_of(kobj, struct blk_mq_ctx, kobj);
|
||||
|
||||
/* ctx->ctxs won't be released until all ctx are freed */
|
||||
kobject_put(&ctx->ctxs->kobj);
|
||||
}
|
||||
|
||||
static void blk_mq_hw_sysfs_release(struct kobject *kobj)
|
||||
@ -213,7 +225,7 @@ static struct kobj_type blk_mq_ktype = {
|
||||
static struct kobj_type blk_mq_ctx_ktype = {
|
||||
.sysfs_ops = &blk_mq_sysfs_ops,
|
||||
.default_attrs = default_ctx_attrs,
|
||||
.release = blk_mq_sysfs_release,
|
||||
.release = blk_mq_ctx_sysfs_release,
|
||||
};
|
||||
|
||||
static struct kobj_type blk_mq_hw_ktype = {
|
||||
@ -245,7 +257,7 @@ static int blk_mq_register_hctx(struct blk_mq_hw_ctx *hctx)
|
||||
if (!hctx->nr_ctx)
|
||||
return 0;
|
||||
|
||||
ret = kobject_add(&hctx->kobj, &q->mq_kobj, "%u", hctx->queue_num);
|
||||
ret = kobject_add(&hctx->kobj, q->mq_kobj, "%u", hctx->queue_num);
|
||||
if (ret)
|
||||
return ret;
|
||||
|
||||
@ -268,8 +280,8 @@ void blk_mq_unregister_dev(struct device *dev, struct request_queue *q)
|
||||
queue_for_each_hw_ctx(q, hctx, i)
|
||||
blk_mq_unregister_hctx(hctx);
|
||||
|
||||
kobject_uevent(&q->mq_kobj, KOBJ_REMOVE);
|
||||
kobject_del(&q->mq_kobj);
|
||||
kobject_uevent(q->mq_kobj, KOBJ_REMOVE);
|
||||
kobject_del(q->mq_kobj);
|
||||
kobject_put(&dev->kobj);
|
||||
|
||||
q->mq_sysfs_init_done = false;
|
||||
@ -289,7 +301,7 @@ void blk_mq_sysfs_deinit(struct request_queue *q)
|
||||
ctx = per_cpu_ptr(q->queue_ctx, cpu);
|
||||
kobject_put(&ctx->kobj);
|
||||
}
|
||||
kobject_put(&q->mq_kobj);
|
||||
kobject_put(q->mq_kobj);
|
||||
}
|
||||
|
||||
void blk_mq_sysfs_init(struct request_queue *q)
|
||||
@ -297,10 +309,12 @@ void blk_mq_sysfs_init(struct request_queue *q)
|
||||
struct blk_mq_ctx *ctx;
|
||||
int cpu;
|
||||
|
||||
kobject_init(&q->mq_kobj, &blk_mq_ktype);
|
||||
kobject_init(q->mq_kobj, &blk_mq_ktype);
|
||||
|
||||
for_each_possible_cpu(cpu) {
|
||||
ctx = per_cpu_ptr(q->queue_ctx, cpu);
|
||||
|
||||
kobject_get(q->mq_kobj);
|
||||
kobject_init(&ctx->kobj, &blk_mq_ctx_ktype);
|
||||
}
|
||||
}
|
||||
@ -313,11 +327,11 @@ int __blk_mq_register_dev(struct device *dev, struct request_queue *q)
|
||||
WARN_ON_ONCE(!q->kobj.parent);
|
||||
lockdep_assert_held(&q->sysfs_lock);
|
||||
|
||||
ret = kobject_add(&q->mq_kobj, kobject_get(&dev->kobj), "%s", "mq");
|
||||
ret = kobject_add(q->mq_kobj, kobject_get(&dev->kobj), "%s", "mq");
|
||||
if (ret < 0)
|
||||
goto out;
|
||||
|
||||
kobject_uevent(&q->mq_kobj, KOBJ_ADD);
|
||||
kobject_uevent(q->mq_kobj, KOBJ_ADD);
|
||||
|
||||
queue_for_each_hw_ctx(q, hctx, i) {
|
||||
ret = blk_mq_register_hctx(hctx);
|
||||
@ -334,8 +348,8 @@ unreg:
|
||||
while (--i >= 0)
|
||||
blk_mq_unregister_hctx(q->queue_hw_ctx[i]);
|
||||
|
||||
kobject_uevent(&q->mq_kobj, KOBJ_REMOVE);
|
||||
kobject_del(&q->mq_kobj);
|
||||
kobject_uevent(q->mq_kobj, KOBJ_REMOVE);
|
||||
kobject_del(q->mq_kobj);
|
||||
kobject_put(&dev->kobj);
|
||||
return ret;
|
||||
}
|
||||
|
@ -2515,6 +2515,34 @@ static void blk_mq_add_queue_tag_set(struct blk_mq_tag_set *set,
|
||||
mutex_unlock(&set->tag_list_lock);
|
||||
}
|
||||
|
||||
/* All allocations will be freed in release handler of q->mq_kobj */
|
||||
static int blk_mq_alloc_ctxs(struct request_queue *q)
|
||||
{
|
||||
struct blk_mq_ctxs *ctxs;
|
||||
int cpu;
|
||||
|
||||
ctxs = kzalloc(sizeof(*ctxs), GFP_KERNEL);
|
||||
if (!ctxs)
|
||||
return -ENOMEM;
|
||||
|
||||
ctxs->queue_ctx = alloc_percpu(struct blk_mq_ctx);
|
||||
if (!ctxs->queue_ctx)
|
||||
goto fail;
|
||||
|
||||
for_each_possible_cpu(cpu) {
|
||||
struct blk_mq_ctx *ctx = per_cpu_ptr(ctxs->queue_ctx, cpu);
|
||||
ctx->ctxs = ctxs;
|
||||
}
|
||||
|
||||
q->mq_kobj = &ctxs->kobj;
|
||||
q->queue_ctx = ctxs->queue_ctx;
|
||||
|
||||
return 0;
|
||||
fail:
|
||||
kfree(ctxs);
|
||||
return -ENOMEM;
|
||||
}
|
||||
|
||||
/*
|
||||
* It is the actual release handler for mq, but we do it from
|
||||
* request queue's release handler for avoiding use-after-free
|
||||
@ -2540,8 +2568,6 @@ void blk_mq_release(struct request_queue *q)
|
||||
* both share lifetime with request queue.
|
||||
*/
|
||||
blk_mq_sysfs_deinit(q);
|
||||
|
||||
free_percpu(q->queue_ctx);
|
||||
}
|
||||
|
||||
struct request_queue *blk_mq_init_queue(struct blk_mq_tag_set *set)
|
||||
@ -2731,8 +2757,7 @@ struct request_queue *blk_mq_init_allocated_queue(struct blk_mq_tag_set *set,
|
||||
if (!q->poll_cb)
|
||||
goto err_exit;
|
||||
|
||||
q->queue_ctx = alloc_percpu(struct blk_mq_ctx);
|
||||
if (!q->queue_ctx)
|
||||
if (blk_mq_alloc_ctxs(q))
|
||||
goto err_exit;
|
||||
|
||||
/* init q->mq_kobj and sw queues' kobjects */
|
||||
@ -2742,7 +2767,7 @@ struct request_queue *blk_mq_init_allocated_queue(struct blk_mq_tag_set *set,
|
||||
q->queue_hw_ctx = kcalloc_node(q->nr_queues, sizeof(*(q->queue_hw_ctx)),
|
||||
GFP_KERNEL, set->numa_node);
|
||||
if (!q->queue_hw_ctx)
|
||||
goto err_percpu;
|
||||
goto err_sys_init;
|
||||
|
||||
blk_mq_realloc_hw_ctxs(set, q);
|
||||
if (!q->nr_hw_queues)
|
||||
@ -2794,8 +2819,8 @@ struct request_queue *blk_mq_init_allocated_queue(struct blk_mq_tag_set *set,
|
||||
|
||||
err_hctxs:
|
||||
kfree(q->queue_hw_ctx);
|
||||
err_percpu:
|
||||
free_percpu(q->queue_ctx);
|
||||
err_sys_init:
|
||||
blk_mq_sysfs_deinit(q);
|
||||
err_exit:
|
||||
q->mq_ops = NULL;
|
||||
return ERR_PTR(-ENOMEM);
|
||||
|
@ -7,6 +7,11 @@
|
||||
|
||||
struct blk_mq_tag_set;
|
||||
|
||||
struct blk_mq_ctxs {
|
||||
struct kobject kobj;
|
||||
struct blk_mq_ctx __percpu *queue_ctx;
|
||||
};
|
||||
|
||||
/**
|
||||
* struct blk_mq_ctx - State for a software queue facing the submitting CPUs
|
||||
*/
|
||||
@ -27,6 +32,7 @@ struct blk_mq_ctx {
|
||||
unsigned long ____cacheline_aligned_in_smp rq_completed[2];
|
||||
|
||||
struct request_queue *queue;
|
||||
struct blk_mq_ctxs *ctxs;
|
||||
struct kobject kobj;
|
||||
} ____cacheline_aligned_in_smp;
|
||||
|
||||
|
@ -456,7 +456,7 @@ struct request_queue {
|
||||
/*
|
||||
* mq queue kobject
|
||||
*/
|
||||
struct kobject mq_kobj;
|
||||
struct kobject *mq_kobj;
|
||||
|
||||
#ifdef CONFIG_BLK_DEV_INTEGRITY
|
||||
struct blk_integrity integrity;
|
||||
|
Loading…
Reference in New Issue
Block a user