linux/drivers/md/bcache/debug.c
Kent Overstreet 5794351146 bcache: Refactor btree io
The most significant change is that btree reads are now done
synchronously, instead of asynchronously and doing the post read stuff
from a workqueue.

This was originally done because we can't block on IO under
generic_make_request(). But - we already have a mechanism to punt cache
lookups to workqueue if needed, so if we just use that we don't have to
deal with the complexity of doing things asynchronously.

The main benefit is this makes the locking situation saner; we can hold
our write lock on the btree node until we're finished reading it, and we
don't need that btree_node_read_done() flag anymore.

Also, for writes, btree_write() was broken out into btree_node_write()
and btree_leaf_dirty() - the old code with the boolean argument was dumb
and confusing.

The prio_blocked mechanism was improved a bit too, now the only counter
is in struct btree_write, we don't mess with transfering a count from
struct btree anymore.

This required changing garbage collection to block prios at the start
and unblock when it finishes, which is cleaner than what it was doing
anyways (the old code had mostly the same effect, but was doing it in a
convoluted way)

And the btree iter btree_node_read_done() uses was converted to a real
mempool.

Signed-off-by: Kent Overstreet <koverstreet@google.com>
2013-06-26 17:09:14 -07:00

566 lines
11 KiB
C

/*
* Assorted bcache debug code
*
* Copyright 2010, 2011 Kent Overstreet <kent.overstreet@gmail.com>
* Copyright 2012 Google, Inc.
*/
#include "bcache.h"
#include "btree.h"
#include "debug.h"
#include "request.h"
#include <linux/console.h>
#include <linux/debugfs.h>
#include <linux/module.h>
#include <linux/random.h>
#include <linux/seq_file.h>
static struct dentry *debug;
const char *bch_ptr_status(struct cache_set *c, const struct bkey *k)
{
unsigned i;
for (i = 0; i < KEY_PTRS(k); i++)
if (ptr_available(c, k, i)) {
struct cache *ca = PTR_CACHE(c, k, i);
size_t bucket = PTR_BUCKET_NR(c, k, i);
size_t r = bucket_remainder(c, PTR_OFFSET(k, i));
if (KEY_SIZE(k) + r > c->sb.bucket_size)
return "bad, length too big";
if (bucket < ca->sb.first_bucket)
return "bad, short offset";
if (bucket >= ca->sb.nbuckets)
return "bad, offset past end of device";
if (ptr_stale(c, k, i))
return "stale";
}
if (!bkey_cmp(k, &ZERO_KEY))
return "bad, null key";
if (!KEY_PTRS(k))
return "bad, no pointers";
if (!KEY_SIZE(k))
return "zeroed key";
return "";
}
struct keyprint_hack bch_pkey(const struct bkey *k)
{
unsigned i = 0;
struct keyprint_hack r;
char *out = r.s, *end = r.s + KEYHACK_SIZE;
#define p(...) (out += scnprintf(out, end - out, __VA_ARGS__))
p("%llu:%llu len %llu -> [", KEY_INODE(k), KEY_OFFSET(k), KEY_SIZE(k));
if (KEY_PTRS(k))
while (1) {
p("%llu:%llu gen %llu",
PTR_DEV(k, i), PTR_OFFSET(k, i), PTR_GEN(k, i));
if (++i == KEY_PTRS(k))
break;
p(", ");
}
p("]");
if (KEY_DIRTY(k))
p(" dirty");
if (KEY_CSUM(k))
p(" cs%llu %llx", KEY_CSUM(k), k->ptr[1]);
#undef p
return r;
}
struct keyprint_hack bch_pbtree(const struct btree *b)
{
struct keyprint_hack r;
snprintf(r.s, 40, "%zu level %i/%i", PTR_BUCKET_NR(b->c, &b->key, 0),
b->level, b->c->root ? b->c->root->level : -1);
return r;
}
#if defined(CONFIG_BCACHE_DEBUG) || defined(CONFIG_BCACHE_EDEBUG)
static bool skipped_backwards(struct btree *b, struct bkey *k)
{
return bkey_cmp(k, (!b->level)
? &START_KEY(bkey_next(k))
: bkey_next(k)) > 0;
}
static void dump_bset(struct btree *b, struct bset *i)
{
struct bkey *k;
unsigned j;
for (k = i->start; k < end(i); k = bkey_next(k)) {
printk(KERN_ERR "block %zu key %zi/%u: %s", index(i, b),
(uint64_t *) k - i->d, i->keys, pkey(k));
for (j = 0; j < KEY_PTRS(k); j++) {
size_t n = PTR_BUCKET_NR(b->c, k, j);
printk(" bucket %zu", n);
if (n >= b->c->sb.first_bucket && n < b->c->sb.nbuckets)
printk(" prio %i",
PTR_BUCKET(b->c, k, j)->prio);
}
printk(" %s\n", bch_ptr_status(b->c, k));
if (bkey_next(k) < end(i) &&
skipped_backwards(b, k))
printk(KERN_ERR "Key skipped backwards\n");
}
}
#endif
#ifdef CONFIG_BCACHE_DEBUG
void bch_btree_verify(struct btree *b, struct bset *new)
{
struct btree *v = b->c->verify_data;
struct closure cl;
closure_init_stack(&cl);
if (!b->c->verify)
return;
closure_wait_event(&b->io.wait, &cl,
atomic_read(&b->io.cl.remaining) == -1);
mutex_lock(&b->c->verify_lock);
bkey_copy(&v->key, &b->key);
v->written = 0;
v->level = b->level;
bch_btree_node_read(v);
closure_wait_event(&v->io.wait, &cl,
atomic_read(&b->io.cl.remaining) == -1);
if (new->keys != v->sets[0].data->keys ||
memcmp(new->start,
v->sets[0].data->start,
(void *) end(new) - (void *) new->start)) {
unsigned i, j;
console_lock();
printk(KERN_ERR "*** original memory node:\n");
for (i = 0; i <= b->nsets; i++)
dump_bset(b, b->sets[i].data);
printk(KERN_ERR "*** sorted memory node:\n");
dump_bset(b, new);
printk(KERN_ERR "*** on disk node:\n");
dump_bset(v, v->sets[0].data);
for (j = 0; j < new->keys; j++)
if (new->d[j] != v->sets[0].data->d[j])
break;
console_unlock();
panic("verify failed at %u\n", j);
}
mutex_unlock(&b->c->verify_lock);
}
static void data_verify_endio(struct bio *bio, int error)
{
struct closure *cl = bio->bi_private;
closure_put(cl);
}
void bch_data_verify(struct search *s)
{
char name[BDEVNAME_SIZE];
struct cached_dev *dc = container_of(s->d, struct cached_dev, disk);
struct closure *cl = &s->cl;
struct bio *check;
struct bio_vec *bv;
int i;
if (!s->unaligned_bvec)
bio_for_each_segment(bv, s->orig_bio, i)
bv->bv_offset = 0, bv->bv_len = PAGE_SIZE;
check = bio_clone(s->orig_bio, GFP_NOIO);
if (!check)
return;
if (bch_bio_alloc_pages(check, GFP_NOIO))
goto out_put;
check->bi_rw = READ_SYNC;
check->bi_private = cl;
check->bi_end_io = data_verify_endio;
closure_bio_submit(check, cl, &dc->disk);
closure_sync(cl);
bio_for_each_segment(bv, s->orig_bio, i) {
void *p1 = kmap(bv->bv_page);
void *p2 = kmap(check->bi_io_vec[i].bv_page);
if (memcmp(p1 + bv->bv_offset,
p2 + bv->bv_offset,
bv->bv_len))
printk(KERN_ERR
"bcache (%s): verify failed at sector %llu\n",
bdevname(dc->bdev, name),
(uint64_t) s->orig_bio->bi_sector);
kunmap(bv->bv_page);
kunmap(check->bi_io_vec[i].bv_page);
}
__bio_for_each_segment(bv, check, i, 0)
__free_page(bv->bv_page);
out_put:
bio_put(check);
}
#endif
#ifdef CONFIG_BCACHE_EDEBUG
unsigned bch_count_data(struct btree *b)
{
unsigned ret = 0;
struct btree_iter iter;
struct bkey *k;
if (!b->level)
for_each_key(b, k, &iter)
ret += KEY_SIZE(k);
return ret;
}
static void vdump_bucket_and_panic(struct btree *b, const char *fmt,
va_list args)
{
unsigned i;
console_lock();
for (i = 0; i <= b->nsets; i++)
dump_bset(b, b->sets[i].data);
vprintk(fmt, args);
console_unlock();
panic("at %s\n", pbtree(b));
}
void bch_check_key_order_msg(struct btree *b, struct bset *i,
const char *fmt, ...)
{
struct bkey *k;
if (!i->keys)
return;
for (k = i->start; bkey_next(k) < end(i); k = bkey_next(k))
if (skipped_backwards(b, k)) {
va_list args;
va_start(args, fmt);
vdump_bucket_and_panic(b, fmt, args);
va_end(args);
}
}
void bch_check_keys(struct btree *b, const char *fmt, ...)
{
va_list args;
struct bkey *k, *p = NULL;
struct btree_iter iter;
if (b->level)
return;
for_each_key(b, k, &iter) {
if (p && bkey_cmp(&START_KEY(p), &START_KEY(k)) > 0) {
printk(KERN_ERR "Keys out of order:\n");
goto bug;
}
if (bch_ptr_invalid(b, k))
continue;
if (p && bkey_cmp(p, &START_KEY(k)) > 0) {
printk(KERN_ERR "Overlapping keys:\n");
goto bug;
}
p = k;
}
return;
bug:
va_start(args, fmt);
vdump_bucket_and_panic(b, fmt, args);
va_end(args);
}
#endif
#ifdef CONFIG_DEBUG_FS
/* XXX: cache set refcounting */
struct dump_iterator {
char buf[PAGE_SIZE];
size_t bytes;
struct cache_set *c;
struct keybuf keys;
};
static bool dump_pred(struct keybuf *buf, struct bkey *k)
{
return true;
}
static ssize_t bch_dump_read(struct file *file, char __user *buf,
size_t size, loff_t *ppos)
{
struct dump_iterator *i = file->private_data;
ssize_t ret = 0;
while (size) {
struct keybuf_key *w;
unsigned bytes = min(i->bytes, size);
int err = copy_to_user(buf, i->buf, bytes);
if (err)
return err;
ret += bytes;
buf += bytes;
size -= bytes;
i->bytes -= bytes;
memmove(i->buf, i->buf + bytes, i->bytes);
if (i->bytes)
break;
w = bch_keybuf_next_rescan(i->c, &i->keys, &MAX_KEY);
if (!w)
break;
i->bytes = snprintf(i->buf, PAGE_SIZE, "%s\n", pkey(&w->key));
bch_keybuf_del(&i->keys, w);
}
return ret;
}
static int bch_dump_open(struct inode *inode, struct file *file)
{
struct cache_set *c = inode->i_private;
struct dump_iterator *i;
i = kzalloc(sizeof(struct dump_iterator), GFP_KERNEL);
if (!i)
return -ENOMEM;
file->private_data = i;
i->c = c;
bch_keybuf_init(&i->keys, dump_pred);
i->keys.last_scanned = KEY(0, 0, 0);
return 0;
}
static int bch_dump_release(struct inode *inode, struct file *file)
{
kfree(file->private_data);
return 0;
}
static const struct file_operations cache_set_debug_ops = {
.owner = THIS_MODULE,
.open = bch_dump_open,
.read = bch_dump_read,
.release = bch_dump_release
};
void bch_debug_init_cache_set(struct cache_set *c)
{
if (!IS_ERR_OR_NULL(debug)) {
char name[50];
snprintf(name, 50, "bcache-%pU", c->sb.set_uuid);
c->debug = debugfs_create_file(name, 0400, debug, c,
&cache_set_debug_ops);
}
}
#endif
/* Fuzz tester has rotted: */
#if 0
static ssize_t btree_fuzz(struct kobject *k, struct kobj_attribute *a,
const char *buffer, size_t size)
{
void dump(struct btree *b)
{
struct bset *i;
for (i = b->sets[0].data;
index(i, b) < btree_blocks(b) &&
i->seq == b->sets[0].data->seq;
i = ((void *) i) + set_blocks(i, b->c) * block_bytes(b->c))
dump_bset(b, i);
}
struct cache_sb *sb;
struct cache_set *c;
struct btree *all[3], *b, *fill, *orig;
int j;
struct btree_op op;
bch_btree_op_init_stack(&op);
sb = kzalloc(sizeof(struct cache_sb), GFP_KERNEL);
if (!sb)
return -ENOMEM;
sb->bucket_size = 128;
sb->block_size = 4;
c = bch_cache_set_alloc(sb);
if (!c)
return -ENOMEM;
for (j = 0; j < 3; j++) {
BUG_ON(list_empty(&c->btree_cache));
all[j] = list_first_entry(&c->btree_cache, struct btree, list);
list_del_init(&all[j]->list);
all[j]->key = KEY(0, 0, c->sb.bucket_size);
bkey_copy_key(&all[j]->key, &MAX_KEY);
}
b = all[0];
fill = all[1];
orig = all[2];
while (1) {
for (j = 0; j < 3; j++)
all[j]->written = all[j]->nsets = 0;
bch_bset_init_next(b);
while (1) {
struct bset *i = write_block(b);
struct bkey *k = op.keys.top;
unsigned rand;
bkey_init(k);
rand = get_random_int();
op.type = rand & 1
? BTREE_INSERT
: BTREE_REPLACE;
rand >>= 1;
SET_KEY_SIZE(k, bucket_remainder(c, rand));
rand >>= c->bucket_bits;
rand &= 1024 * 512 - 1;
rand += c->sb.bucket_size;
SET_KEY_OFFSET(k, rand);
#if 0
SET_KEY_PTRS(k, 1);
#endif
bch_keylist_push(&op.keys);
bch_btree_insert_keys(b, &op);
if (should_split(b) ||
set_blocks(i, b->c) !=
__set_blocks(i, i->keys + 15, b->c)) {
i->csum = csum_set(i);
memcpy(write_block(fill),
i, set_bytes(i));
b->written += set_blocks(i, b->c);
fill->written = b->written;
if (b->written == btree_blocks(b))
break;
bch_btree_sort_lazy(b);
bch_bset_init_next(b);
}
}
memcpy(orig->sets[0].data,
fill->sets[0].data,
btree_bytes(c));
bch_btree_sort(b);
fill->written = 0;
bch_btree_node_read_done(fill);
if (b->sets[0].data->keys != fill->sets[0].data->keys ||
memcmp(b->sets[0].data->start,
fill->sets[0].data->start,
b->sets[0].data->keys * sizeof(uint64_t))) {
struct bset *i = b->sets[0].data;
struct bkey *k, *l;
for (k = i->start,
l = fill->sets[0].data->start;
k < end(i);
k = bkey_next(k), l = bkey_next(l))
if (bkey_cmp(k, l) ||
KEY_SIZE(k) != KEY_SIZE(l))
pr_err("key %zi differs: %s != %s",
(uint64_t *) k - i->d,
pkey(k), pkey(l));
for (j = 0; j < 3; j++) {
pr_err("**** Set %i ****", j);
dump(all[j]);
}
panic("\n");
}
pr_info("fuzz complete: %i keys", b->sets[0].data->keys);
}
}
kobj_attribute_write(fuzz, btree_fuzz);
#endif
void bch_debug_exit(void)
{
if (!IS_ERR_OR_NULL(debug))
debugfs_remove_recursive(debug);
}
int __init bch_debug_init(struct kobject *kobj)
{
int ret = 0;
#if 0
ret = sysfs_create_file(kobj, &ksysfs_fuzz.attr);
if (ret)
return ret;
#endif
debug = debugfs_create_dir("bcache", NULL);
return ret;
}