linux/block/blk-mq-sysfs.c
Dan Williams 3ef28e83ab block: generic request_queue reference counting
Allow pmem, and other synchronous/bio-based block drivers, to fallback
on a per-cpu reference count managed by the core for tracking queue
live/dead state.

The existing per-cpu reference count for the blk_mq case is promoted to
be used in all block i/o scenarios.  This involves initializing it by
default, waiting for it to drop to zero at exit, and holding a live
reference over the invocation of q->make_request_fn() in
generic_make_request().  The blk_mq code continues to take its own
reference per blk_mq request and retains the ability to freeze the
queue, but the check that the queue is frozen is moved to
generic_make_request().

This fixes crash signatures like the following:

 BUG: unable to handle kernel paging request at ffff880140000000
 [..]
 Call Trace:
  [<ffffffff8145e8bf>] ? copy_user_handle_tail+0x5f/0x70
  [<ffffffffa004e1e0>] pmem_do_bvec.isra.11+0x70/0xf0 [nd_pmem]
  [<ffffffffa004e331>] pmem_make_request+0xd1/0x200 [nd_pmem]
  [<ffffffff811c3162>] ? mempool_alloc+0x72/0x1a0
  [<ffffffff8141f8b6>] generic_make_request+0xd6/0x110
  [<ffffffff8141f966>] submit_bio+0x76/0x170
  [<ffffffff81286dff>] submit_bh_wbc+0x12f/0x160
  [<ffffffff81286e62>] submit_bh+0x12/0x20
  [<ffffffff813395bd>] jbd2_write_superblock+0x8d/0x170
  [<ffffffff8133974d>] jbd2_mark_journal_empty+0x5d/0x90
  [<ffffffff813399cb>] jbd2_journal_destroy+0x24b/0x270
  [<ffffffff810bc4ca>] ? put_pwq_unlocked+0x2a/0x30
  [<ffffffff810bc6f5>] ? destroy_workqueue+0x225/0x250
  [<ffffffff81303494>] ext4_put_super+0x64/0x360
  [<ffffffff8124ab1a>] generic_shutdown_super+0x6a/0xf0

Cc: Jens Axboe <axboe@kernel.dk>
Cc: Keith Busch <keith.busch@intel.com>
Cc: Ross Zwisler <ross.zwisler@linux.intel.com>
Suggested-by: Christoph Hellwig <hch@lst.de>
Reviewed-by: Christoph Hellwig <hch@lst.de>
Tested-by: Ross Zwisler <ross.zwisler@linux.intel.com>
Signed-off-by: Dan Williams <dan.j.williams@intel.com>
Signed-off-by: Jens Axboe <axboe@fb.com>
2015-10-21 14:43:41 -06:00

478 lines
11 KiB
C

#include <linux/kernel.h>
#include <linux/module.h>
#include <linux/backing-dev.h>
#include <linux/bio.h>
#include <linux/blkdev.h>
#include <linux/mm.h>
#include <linux/init.h>
#include <linux/slab.h>
#include <linux/workqueue.h>
#include <linux/smp.h>
#include <linux/blk-mq.h>
#include "blk-mq.h"
#include "blk-mq-tag.h"
static void blk_mq_sysfs_release(struct kobject *kobj)
{
}
struct blk_mq_ctx_sysfs_entry {
struct attribute attr;
ssize_t (*show)(struct blk_mq_ctx *, char *);
ssize_t (*store)(struct blk_mq_ctx *, const char *, size_t);
};
struct blk_mq_hw_ctx_sysfs_entry {
struct attribute attr;
ssize_t (*show)(struct blk_mq_hw_ctx *, char *);
ssize_t (*store)(struct blk_mq_hw_ctx *, const char *, size_t);
};
static ssize_t blk_mq_sysfs_show(struct kobject *kobj, struct attribute *attr,
char *page)
{
struct blk_mq_ctx_sysfs_entry *entry;
struct blk_mq_ctx *ctx;
struct request_queue *q;
ssize_t res;
entry = container_of(attr, struct blk_mq_ctx_sysfs_entry, attr);
ctx = container_of(kobj, struct blk_mq_ctx, kobj);
q = ctx->queue;
if (!entry->show)
return -EIO;
res = -ENOENT;
mutex_lock(&q->sysfs_lock);
if (!blk_queue_dying(q))
res = entry->show(ctx, page);
mutex_unlock(&q->sysfs_lock);
return res;
}
static ssize_t blk_mq_sysfs_store(struct kobject *kobj, struct attribute *attr,
const char *page, size_t length)
{
struct blk_mq_ctx_sysfs_entry *entry;
struct blk_mq_ctx *ctx;
struct request_queue *q;
ssize_t res;
entry = container_of(attr, struct blk_mq_ctx_sysfs_entry, attr);
ctx = container_of(kobj, struct blk_mq_ctx, kobj);
q = ctx->queue;
if (!entry->store)
return -EIO;
res = -ENOENT;
mutex_lock(&q->sysfs_lock);
if (!blk_queue_dying(q))
res = entry->store(ctx, page, length);
mutex_unlock(&q->sysfs_lock);
return res;
}
static ssize_t blk_mq_hw_sysfs_show(struct kobject *kobj,
struct attribute *attr, char *page)
{
struct blk_mq_hw_ctx_sysfs_entry *entry;
struct blk_mq_hw_ctx *hctx;
struct request_queue *q;
ssize_t res;
entry = container_of(attr, struct blk_mq_hw_ctx_sysfs_entry, attr);
hctx = container_of(kobj, struct blk_mq_hw_ctx, kobj);
q = hctx->queue;
if (!entry->show)
return -EIO;
res = -ENOENT;
mutex_lock(&q->sysfs_lock);
if (!blk_queue_dying(q))
res = entry->show(hctx, page);
mutex_unlock(&q->sysfs_lock);
return res;
}
static ssize_t blk_mq_hw_sysfs_store(struct kobject *kobj,
struct attribute *attr, const char *page,
size_t length)
{
struct blk_mq_hw_ctx_sysfs_entry *entry;
struct blk_mq_hw_ctx *hctx;
struct request_queue *q;
ssize_t res;
entry = container_of(attr, struct blk_mq_hw_ctx_sysfs_entry, attr);
hctx = container_of(kobj, struct blk_mq_hw_ctx, kobj);
q = hctx->queue;
if (!entry->store)
return -EIO;
res = -ENOENT;
mutex_lock(&q->sysfs_lock);
if (!blk_queue_dying(q))
res = entry->store(hctx, page, length);
mutex_unlock(&q->sysfs_lock);
return res;
}
static ssize_t blk_mq_sysfs_dispatched_show(struct blk_mq_ctx *ctx, char *page)
{
return sprintf(page, "%lu %lu\n", ctx->rq_dispatched[1],
ctx->rq_dispatched[0]);
}
static ssize_t blk_mq_sysfs_merged_show(struct blk_mq_ctx *ctx, char *page)
{
return sprintf(page, "%lu\n", ctx->rq_merged);
}
static ssize_t blk_mq_sysfs_completed_show(struct blk_mq_ctx *ctx, char *page)
{
return sprintf(page, "%lu %lu\n", ctx->rq_completed[1],
ctx->rq_completed[0]);
}
static ssize_t sysfs_list_show(char *page, struct list_head *list, char *msg)
{
struct request *rq;
int len = snprintf(page, PAGE_SIZE - 1, "%s:\n", msg);
list_for_each_entry(rq, list, queuelist) {
const int rq_len = 2 * sizeof(rq) + 2;
/* if the output will be truncated */
if (PAGE_SIZE - 1 < len + rq_len) {
/* backspacing if it can't hold '\t...\n' */
if (PAGE_SIZE - 1 < len + 5)
len -= rq_len;
len += snprintf(page + len, PAGE_SIZE - 1 - len,
"\t...\n");
break;
}
len += snprintf(page + len, PAGE_SIZE - 1 - len,
"\t%p\n", rq);
}
return len;
}
static ssize_t blk_mq_sysfs_rq_list_show(struct blk_mq_ctx *ctx, char *page)
{
ssize_t ret;
spin_lock(&ctx->lock);
ret = sysfs_list_show(page, &ctx->rq_list, "CTX pending");
spin_unlock(&ctx->lock);
return ret;
}
static ssize_t blk_mq_hw_sysfs_queued_show(struct blk_mq_hw_ctx *hctx,
char *page)
{
return sprintf(page, "%lu\n", hctx->queued);
}
static ssize_t blk_mq_hw_sysfs_run_show(struct blk_mq_hw_ctx *hctx, char *page)
{
return sprintf(page, "%lu\n", hctx->run);
}
static ssize_t blk_mq_hw_sysfs_dispatched_show(struct blk_mq_hw_ctx *hctx,
char *page)
{
char *start_page = page;
int i;
page += sprintf(page, "%8u\t%lu\n", 0U, hctx->dispatched[0]);
for (i = 1; i < BLK_MQ_MAX_DISPATCH_ORDER; i++) {
unsigned long d = 1U << (i - 1);
page += sprintf(page, "%8lu\t%lu\n", d, hctx->dispatched[i]);
}
return page - start_page;
}
static ssize_t blk_mq_hw_sysfs_rq_list_show(struct blk_mq_hw_ctx *hctx,
char *page)
{
ssize_t ret;
spin_lock(&hctx->lock);
ret = sysfs_list_show(page, &hctx->dispatch, "HCTX pending");
spin_unlock(&hctx->lock);
return ret;
}
static ssize_t blk_mq_hw_sysfs_tags_show(struct blk_mq_hw_ctx *hctx, char *page)
{
return blk_mq_tag_sysfs_show(hctx->tags, page);
}
static ssize_t blk_mq_hw_sysfs_active_show(struct blk_mq_hw_ctx *hctx, char *page)
{
return sprintf(page, "%u\n", atomic_read(&hctx->nr_active));
}
static ssize_t blk_mq_hw_sysfs_cpus_show(struct blk_mq_hw_ctx *hctx, char *page)
{
unsigned int i, first = 1;
ssize_t ret = 0;
for_each_cpu(i, hctx->cpumask) {
if (first)
ret += sprintf(ret + page, "%u", i);
else
ret += sprintf(ret + page, ", %u", i);
first = 0;
}
ret += sprintf(ret + page, "\n");
return ret;
}
static struct blk_mq_ctx_sysfs_entry blk_mq_sysfs_dispatched = {
.attr = {.name = "dispatched", .mode = S_IRUGO },
.show = blk_mq_sysfs_dispatched_show,
};
static struct blk_mq_ctx_sysfs_entry blk_mq_sysfs_merged = {
.attr = {.name = "merged", .mode = S_IRUGO },
.show = blk_mq_sysfs_merged_show,
};
static struct blk_mq_ctx_sysfs_entry blk_mq_sysfs_completed = {
.attr = {.name = "completed", .mode = S_IRUGO },
.show = blk_mq_sysfs_completed_show,
};
static struct blk_mq_ctx_sysfs_entry blk_mq_sysfs_rq_list = {
.attr = {.name = "rq_list", .mode = S_IRUGO },
.show = blk_mq_sysfs_rq_list_show,
};
static struct attribute *default_ctx_attrs[] = {
&blk_mq_sysfs_dispatched.attr,
&blk_mq_sysfs_merged.attr,
&blk_mq_sysfs_completed.attr,
&blk_mq_sysfs_rq_list.attr,
NULL,
};
static struct blk_mq_hw_ctx_sysfs_entry blk_mq_hw_sysfs_queued = {
.attr = {.name = "queued", .mode = S_IRUGO },
.show = blk_mq_hw_sysfs_queued_show,
};
static struct blk_mq_hw_ctx_sysfs_entry blk_mq_hw_sysfs_run = {
.attr = {.name = "run", .mode = S_IRUGO },
.show = blk_mq_hw_sysfs_run_show,
};
static struct blk_mq_hw_ctx_sysfs_entry blk_mq_hw_sysfs_dispatched = {
.attr = {.name = "dispatched", .mode = S_IRUGO },
.show = blk_mq_hw_sysfs_dispatched_show,
};
static struct blk_mq_hw_ctx_sysfs_entry blk_mq_hw_sysfs_active = {
.attr = {.name = "active", .mode = S_IRUGO },
.show = blk_mq_hw_sysfs_active_show,
};
static struct blk_mq_hw_ctx_sysfs_entry blk_mq_hw_sysfs_pending = {
.attr = {.name = "pending", .mode = S_IRUGO },
.show = blk_mq_hw_sysfs_rq_list_show,
};
static struct blk_mq_hw_ctx_sysfs_entry blk_mq_hw_sysfs_tags = {
.attr = {.name = "tags", .mode = S_IRUGO },
.show = blk_mq_hw_sysfs_tags_show,
};
static struct blk_mq_hw_ctx_sysfs_entry blk_mq_hw_sysfs_cpus = {
.attr = {.name = "cpu_list", .mode = S_IRUGO },
.show = blk_mq_hw_sysfs_cpus_show,
};
static struct attribute *default_hw_ctx_attrs[] = {
&blk_mq_hw_sysfs_queued.attr,
&blk_mq_hw_sysfs_run.attr,
&blk_mq_hw_sysfs_dispatched.attr,
&blk_mq_hw_sysfs_pending.attr,
&blk_mq_hw_sysfs_tags.attr,
&blk_mq_hw_sysfs_cpus.attr,
&blk_mq_hw_sysfs_active.attr,
NULL,
};
static const struct sysfs_ops blk_mq_sysfs_ops = {
.show = blk_mq_sysfs_show,
.store = blk_mq_sysfs_store,
};
static const struct sysfs_ops blk_mq_hw_sysfs_ops = {
.show = blk_mq_hw_sysfs_show,
.store = blk_mq_hw_sysfs_store,
};
static struct kobj_type blk_mq_ktype = {
.sysfs_ops = &blk_mq_sysfs_ops,
.release = blk_mq_sysfs_release,
};
static struct kobj_type blk_mq_ctx_ktype = {
.sysfs_ops = &blk_mq_sysfs_ops,
.default_attrs = default_ctx_attrs,
.release = blk_mq_sysfs_release,
};
static struct kobj_type blk_mq_hw_ktype = {
.sysfs_ops = &blk_mq_hw_sysfs_ops,
.default_attrs = default_hw_ctx_attrs,
.release = blk_mq_sysfs_release,
};
static void blk_mq_unregister_hctx(struct blk_mq_hw_ctx *hctx)
{
struct blk_mq_ctx *ctx;
int i;
if (!hctx->nr_ctx)
return;
hctx_for_each_ctx(hctx, ctx, i)
kobject_del(&ctx->kobj);
kobject_del(&hctx->kobj);
}
static int blk_mq_register_hctx(struct blk_mq_hw_ctx *hctx)
{
struct request_queue *q = hctx->queue;
struct blk_mq_ctx *ctx;
int i, ret;
if (!hctx->nr_ctx)
return 0;
ret = kobject_add(&hctx->kobj, &q->mq_kobj, "%u", hctx->queue_num);
if (ret)
return ret;
hctx_for_each_ctx(hctx, ctx, i) {
ret = kobject_add(&ctx->kobj, &hctx->kobj, "cpu%u", ctx->cpu);
if (ret)
break;
}
return ret;
}
void blk_mq_unregister_disk(struct gendisk *disk)
{
struct request_queue *q = disk->queue;
struct blk_mq_hw_ctx *hctx;
struct blk_mq_ctx *ctx;
int i, j;
blk_mq_disable_hotplug();
queue_for_each_hw_ctx(q, hctx, i) {
blk_mq_unregister_hctx(hctx);
hctx_for_each_ctx(hctx, ctx, j)
kobject_put(&ctx->kobj);
kobject_put(&hctx->kobj);
}
kobject_uevent(&q->mq_kobj, KOBJ_REMOVE);
kobject_del(&q->mq_kobj);
kobject_put(&q->mq_kobj);
kobject_put(&disk_to_dev(disk)->kobj);
q->mq_sysfs_init_done = false;
blk_mq_enable_hotplug();
}
static void blk_mq_sysfs_init(struct request_queue *q)
{
struct blk_mq_hw_ctx *hctx;
struct blk_mq_ctx *ctx;
int i;
kobject_init(&q->mq_kobj, &blk_mq_ktype);
queue_for_each_hw_ctx(q, hctx, i)
kobject_init(&hctx->kobj, &blk_mq_hw_ktype);
queue_for_each_ctx(q, ctx, i)
kobject_init(&ctx->kobj, &blk_mq_ctx_ktype);
}
int blk_mq_register_disk(struct gendisk *disk)
{
struct device *dev = disk_to_dev(disk);
struct request_queue *q = disk->queue;
struct blk_mq_hw_ctx *hctx;
int ret, i;
blk_mq_disable_hotplug();
blk_mq_sysfs_init(q);
ret = kobject_add(&q->mq_kobj, kobject_get(&dev->kobj), "%s", "mq");
if (ret < 0)
goto out;
kobject_uevent(&q->mq_kobj, KOBJ_ADD);
queue_for_each_hw_ctx(q, hctx, i) {
ret = blk_mq_register_hctx(hctx);
if (ret)
break;
}
if (ret)
blk_mq_unregister_disk(disk);
else
q->mq_sysfs_init_done = true;
out:
blk_mq_enable_hotplug();
return ret;
}
EXPORT_SYMBOL_GPL(blk_mq_register_disk);
void blk_mq_sysfs_unregister(struct request_queue *q)
{
struct blk_mq_hw_ctx *hctx;
int i;
if (!q->mq_sysfs_init_done)
return;
queue_for_each_hw_ctx(q, hctx, i)
blk_mq_unregister_hctx(hctx);
}
int blk_mq_sysfs_register(struct request_queue *q)
{
struct blk_mq_hw_ctx *hctx;
int i, ret = 0;
if (!q->mq_sysfs_init_done)
return ret;
queue_for_each_hw_ctx(q, hctx, i) {
ret = blk_mq_register_hctx(hctx);
if (ret)
break;
}
return ret;
}