mirror of
https://github.com/torvalds/linux.git
synced 2024-11-12 23:23:03 +00:00
bcachefs: Disk accounting device validation fixes
- Fix failure to validate that accounting replicas entries point to valid devices: this wasn't a real bug since they'd be cleaned up by GC, but is still something we should know about - Fix failure to validate that dev_data_type entries point to valid devices: this does fix a real bug, since bch2_accounting_read() would then try to copy the counters to that device and pop an inconsistent error when the device didn't exist - Remove accounting entries that are zeroed or invalid: if we're not validating them we need to get rid of them: they might not exist in the superblock, so we need the to trigger the superblock mark path when they're readded. This fixes the replication.ktest rereplicate test, which was failing with "superblock not marked for replicas..." Signed-off-by: Kent Overstreet <kent.overstreet@linux.dev>
This commit is contained in:
parent
9d86178782
commit
19773ec997
@ -242,6 +242,14 @@ void bch2_accounting_swab(struct bkey_s k)
|
||||
*p = swab64(*p);
|
||||
}
|
||||
|
||||
static inline void __accounting_to_replicas(struct bch_replicas_entry_v1 *r,
|
||||
struct disk_accounting_pos acc)
|
||||
{
|
||||
unsafe_memcpy(r, &acc.replicas,
|
||||
replicas_entry_bytes(&acc.replicas),
|
||||
"variable length struct");
|
||||
}
|
||||
|
||||
static inline bool accounting_to_replicas(struct bch_replicas_entry_v1 *r, struct bpos p)
|
||||
{
|
||||
struct disk_accounting_pos acc_k;
|
||||
@ -249,9 +257,7 @@ static inline bool accounting_to_replicas(struct bch_replicas_entry_v1 *r, struc
|
||||
|
||||
switch (acc_k.type) {
|
||||
case BCH_DISK_ACCOUNTING_replicas:
|
||||
unsafe_memcpy(r, &acc_k.replicas,
|
||||
replicas_entry_bytes(&acc_k.replicas),
|
||||
"variable length struct");
|
||||
__accounting_to_replicas(r, acc_k);
|
||||
return true;
|
||||
default:
|
||||
return false;
|
||||
@ -608,6 +614,81 @@ static int accounting_read_key(struct btree_trans *trans, struct bkey_s_c k)
|
||||
return ret;
|
||||
}
|
||||
|
||||
static int bch2_disk_accounting_validate_late(struct btree_trans *trans,
|
||||
struct disk_accounting_pos acc,
|
||||
u64 *v, unsigned nr)
|
||||
{
|
||||
struct bch_fs *c = trans->c;
|
||||
struct printbuf buf = PRINTBUF;
|
||||
int ret = 0, invalid_dev = -1;
|
||||
|
||||
switch (acc.type) {
|
||||
case BCH_DISK_ACCOUNTING_replicas: {
|
||||
struct bch_replicas_padded r;
|
||||
__accounting_to_replicas(&r.e, acc);
|
||||
|
||||
for (unsigned i = 0; i < r.e.nr_devs; i++)
|
||||
if (r.e.devs[i] != BCH_SB_MEMBER_INVALID &&
|
||||
!bch2_dev_exists(c, r.e.devs[i])) {
|
||||
invalid_dev = r.e.devs[i];
|
||||
goto invalid_device;
|
||||
}
|
||||
|
||||
/*
|
||||
* All replicas entry checks except for invalid device are done
|
||||
* in bch2_accounting_validate
|
||||
*/
|
||||
BUG_ON(bch2_replicas_entry_validate(&r.e, c, &buf));
|
||||
|
||||
if (fsck_err_on(!bch2_replicas_marked_locked(c, &r.e),
|
||||
trans, accounting_replicas_not_marked,
|
||||
"accounting not marked in superblock replicas\n %s",
|
||||
(printbuf_reset(&buf),
|
||||
bch2_accounting_key_to_text(&buf, &acc),
|
||||
buf.buf))) {
|
||||
/*
|
||||
* We're not RW yet and still single threaded, dropping
|
||||
* and retaking lock is ok:
|
||||
*/
|
||||
percpu_up_write(&c->mark_lock);
|
||||
ret = bch2_mark_replicas(c, &r.e);
|
||||
if (ret)
|
||||
goto fsck_err;
|
||||
percpu_down_write(&c->mark_lock);
|
||||
}
|
||||
break;
|
||||
}
|
||||
|
||||
case BCH_DISK_ACCOUNTING_dev_data_type:
|
||||
if (!bch2_dev_exists(c, acc.dev_data_type.dev)) {
|
||||
invalid_dev = acc.dev_data_type.dev;
|
||||
goto invalid_device;
|
||||
}
|
||||
break;
|
||||
}
|
||||
|
||||
fsck_err:
|
||||
printbuf_exit(&buf);
|
||||
return ret;
|
||||
invalid_device:
|
||||
if (fsck_err(trans, accounting_to_invalid_device,
|
||||
"accounting entry points to invalid device %i\n %s",
|
||||
invalid_dev,
|
||||
(printbuf_reset(&buf),
|
||||
bch2_accounting_key_to_text(&buf, &acc),
|
||||
buf.buf))) {
|
||||
for (unsigned i = 0; i < nr; i++)
|
||||
v[i] = -v[i];
|
||||
|
||||
ret = commit_do(trans, NULL, NULL, 0,
|
||||
bch2_disk_accounting_mod(trans, &acc, v, nr, false)) ?:
|
||||
-BCH_ERR_remove_disk_accounting_entry;
|
||||
} else {
|
||||
ret = -BCH_ERR_remove_disk_accounting_entry;
|
||||
}
|
||||
goto fsck_err;
|
||||
}
|
||||
|
||||
/*
|
||||
* At startup time, initialize the in memory accounting from the btree (and
|
||||
* journal)
|
||||
@ -666,44 +747,42 @@ int bch2_accounting_read(struct bch_fs *c)
|
||||
}
|
||||
keys->gap = keys->nr = dst - keys->data;
|
||||
|
||||
percpu_down_read(&c->mark_lock);
|
||||
for (unsigned i = 0; i < acc->k.nr; i++) {
|
||||
percpu_down_write(&c->mark_lock);
|
||||
unsigned i = 0;
|
||||
while (i < acc->k.nr) {
|
||||
unsigned idx = inorder_to_eytzinger0(i, acc->k.nr);
|
||||
|
||||
struct disk_accounting_pos acc_k;
|
||||
bpos_to_disk_accounting_pos(&acc_k, acc->k.data[idx].pos);
|
||||
|
||||
u64 v[BCH_ACCOUNTING_MAX_COUNTERS];
|
||||
bch2_accounting_mem_read_counters(acc, i, v, ARRAY_SIZE(v), false);
|
||||
|
||||
if (bch2_is_zero(v, sizeof(v[0]) * acc->k.data[i].nr_counters))
|
||||
continue;
|
||||
|
||||
struct bch_replicas_padded r;
|
||||
if (!accounting_to_replicas(&r.e, acc->k.data[i].pos))
|
||||
continue;
|
||||
bch2_accounting_mem_read_counters(acc, idx, v, ARRAY_SIZE(v), false);
|
||||
|
||||
/*
|
||||
* If the replicas entry is invalid it'll get cleaned up by
|
||||
* check_allocations:
|
||||
* If the entry counters are zeroed, it should be treated as
|
||||
* nonexistent - it might point to an invalid device.
|
||||
*
|
||||
* Remove it, so that if it's re-added it gets re-marked in the
|
||||
* superblock:
|
||||
*/
|
||||
if (bch2_replicas_entry_validate(&r.e, c, &buf))
|
||||
ret = bch2_is_zero(v, sizeof(v[0]) * acc->k.data[idx].nr_counters)
|
||||
? -BCH_ERR_remove_disk_accounting_entry
|
||||
: bch2_disk_accounting_validate_late(trans, acc_k,
|
||||
v, acc->k.data[idx].nr_counters);
|
||||
|
||||
if (ret == -BCH_ERR_remove_disk_accounting_entry) {
|
||||
free_percpu(acc->k.data[idx].v[0]);
|
||||
free_percpu(acc->k.data[idx].v[1]);
|
||||
darray_remove_item(&acc->k, &acc->k.data[idx]);
|
||||
eytzinger0_sort(acc->k.data, acc->k.nr, sizeof(acc->k.data[0]),
|
||||
accounting_pos_cmp, NULL);
|
||||
ret = 0;
|
||||
continue;
|
||||
|
||||
struct disk_accounting_pos k;
|
||||
bpos_to_disk_accounting_pos(&k, acc->k.data[i].pos);
|
||||
|
||||
if (fsck_err_on(!bch2_replicas_marked_locked(c, &r.e),
|
||||
trans, accounting_replicas_not_marked,
|
||||
"accounting not marked in superblock replicas\n %s",
|
||||
(printbuf_reset(&buf),
|
||||
bch2_accounting_key_to_text(&buf, &k),
|
||||
buf.buf))) {
|
||||
/*
|
||||
* We're not RW yet and still single threaded, dropping
|
||||
* and retaking lock is ok:
|
||||
*/
|
||||
percpu_up_read(&c->mark_lock);
|
||||
ret = bch2_mark_replicas(c, &r.e);
|
||||
if (ret)
|
||||
goto fsck_err;
|
||||
percpu_down_read(&c->mark_lock);
|
||||
}
|
||||
|
||||
if (ret)
|
||||
goto fsck_err;
|
||||
i++;
|
||||
}
|
||||
|
||||
preempt_disable();
|
||||
@ -742,7 +821,7 @@ int bch2_accounting_read(struct bch_fs *c)
|
||||
}
|
||||
preempt_enable();
|
||||
fsck_err:
|
||||
percpu_up_read(&c->mark_lock);
|
||||
percpu_up_write(&c->mark_lock);
|
||||
err:
|
||||
printbuf_exit(&buf);
|
||||
bch2_trans_put(trans);
|
||||
|
@ -268,7 +268,8 @@
|
||||
x(BCH_ERR_nopromote, nopromote_no_writes) \
|
||||
x(BCH_ERR_nopromote, nopromote_enomem) \
|
||||
x(0, invalid_snapshot_node) \
|
||||
x(0, option_needs_open_fs)
|
||||
x(0, option_needs_open_fs) \
|
||||
x(0, remove_disk_accounting_entry)
|
||||
|
||||
enum bch_errcode {
|
||||
BCH_ERR_START = 2048,
|
||||
|
@ -291,6 +291,7 @@ enum bch_fsck_flags {
|
||||
x(alloc_key_stripe_sectors_wrong, 271, FSCK_AUTOFIX) \
|
||||
x(accounting_mismatch, 272, FSCK_AUTOFIX) \
|
||||
x(accounting_replicas_not_marked, 273, 0) \
|
||||
x(accounting_to_invalid_device, 289, 0) \
|
||||
x(invalid_btree_id, 274, 0) \
|
||||
x(alloc_key_io_time_bad, 275, 0) \
|
||||
x(alloc_key_fragmentation_lru_wrong, 276, FSCK_AUTOFIX) \
|
||||
@ -300,7 +301,7 @@ enum bch_fsck_flags {
|
||||
x(accounting_key_replicas_devs_unsorted, 280, FSCK_AUTOFIX) \
|
||||
x(accounting_key_version_0, 282, FSCK_AUTOFIX) \
|
||||
x(logged_op_but_clean, 283, FSCK_AUTOFIX) \
|
||||
x(MAX, 289, 0)
|
||||
x(MAX, 290, 0)
|
||||
|
||||
enum bch_sb_error_id {
|
||||
#define x(t, n, ...) BCH_FSCK_ERR_##t = n,
|
||||
|
Loading…
Reference in New Issue
Block a user