bcachefs: Disk accounting device validation fixes

- Fix failure to validate that accounting replicas entries point to
  valid devices: this wasn't a real bug since they'd be cleaned up by
  GC, but is still something we should know about

- Fix failure to validate that dev_data_type entries point to valid
  devices: this does fix a real bug, since bch2_accounting_read() would
  then try to copy the counters to that device and pop an inconsistent
  error when the device didn't exist

- Remove accounting entries that are zeroed or invalid: if we're not
  validating them we need to get rid of them: they might not exist in
  the superblock, so we need the to trigger the superblock mark path
  when they're readded.

  This fixes the replication.ktest rereplicate test, which was failing
  with "superblock not marked for replicas..."

Signed-off-by: Kent Overstreet <kent.overstreet@linux.dev>
This commit is contained in:
Kent Overstreet 2024-10-07 18:04:21 -04:00
parent 9d86178782
commit 19773ec997
3 changed files with 119 additions and 38 deletions

View File

@ -242,6 +242,14 @@ void bch2_accounting_swab(struct bkey_s k)
*p = swab64(*p);
}
static inline void __accounting_to_replicas(struct bch_replicas_entry_v1 *r,
struct disk_accounting_pos acc)
{
unsafe_memcpy(r, &acc.replicas,
replicas_entry_bytes(&acc.replicas),
"variable length struct");
}
static inline bool accounting_to_replicas(struct bch_replicas_entry_v1 *r, struct bpos p)
{
struct disk_accounting_pos acc_k;
@ -249,9 +257,7 @@ static inline bool accounting_to_replicas(struct bch_replicas_entry_v1 *r, struc
switch (acc_k.type) {
case BCH_DISK_ACCOUNTING_replicas:
unsafe_memcpy(r, &acc_k.replicas,
replicas_entry_bytes(&acc_k.replicas),
"variable length struct");
__accounting_to_replicas(r, acc_k);
return true;
default:
return false;
@ -608,6 +614,81 @@ static int accounting_read_key(struct btree_trans *trans, struct bkey_s_c k)
return ret;
}
static int bch2_disk_accounting_validate_late(struct btree_trans *trans,
struct disk_accounting_pos acc,
u64 *v, unsigned nr)
{
struct bch_fs *c = trans->c;
struct printbuf buf = PRINTBUF;
int ret = 0, invalid_dev = -1;
switch (acc.type) {
case BCH_DISK_ACCOUNTING_replicas: {
struct bch_replicas_padded r;
__accounting_to_replicas(&r.e, acc);
for (unsigned i = 0; i < r.e.nr_devs; i++)
if (r.e.devs[i] != BCH_SB_MEMBER_INVALID &&
!bch2_dev_exists(c, r.e.devs[i])) {
invalid_dev = r.e.devs[i];
goto invalid_device;
}
/*
* All replicas entry checks except for invalid device are done
* in bch2_accounting_validate
*/
BUG_ON(bch2_replicas_entry_validate(&r.e, c, &buf));
if (fsck_err_on(!bch2_replicas_marked_locked(c, &r.e),
trans, accounting_replicas_not_marked,
"accounting not marked in superblock replicas\n %s",
(printbuf_reset(&buf),
bch2_accounting_key_to_text(&buf, &acc),
buf.buf))) {
/*
* We're not RW yet and still single threaded, dropping
* and retaking lock is ok:
*/
percpu_up_write(&c->mark_lock);
ret = bch2_mark_replicas(c, &r.e);
if (ret)
goto fsck_err;
percpu_down_write(&c->mark_lock);
}
break;
}
case BCH_DISK_ACCOUNTING_dev_data_type:
if (!bch2_dev_exists(c, acc.dev_data_type.dev)) {
invalid_dev = acc.dev_data_type.dev;
goto invalid_device;
}
break;
}
fsck_err:
printbuf_exit(&buf);
return ret;
invalid_device:
if (fsck_err(trans, accounting_to_invalid_device,
"accounting entry points to invalid device %i\n %s",
invalid_dev,
(printbuf_reset(&buf),
bch2_accounting_key_to_text(&buf, &acc),
buf.buf))) {
for (unsigned i = 0; i < nr; i++)
v[i] = -v[i];
ret = commit_do(trans, NULL, NULL, 0,
bch2_disk_accounting_mod(trans, &acc, v, nr, false)) ?:
-BCH_ERR_remove_disk_accounting_entry;
} else {
ret = -BCH_ERR_remove_disk_accounting_entry;
}
goto fsck_err;
}
/*
* At startup time, initialize the in memory accounting from the btree (and
* journal)
@ -666,44 +747,42 @@ int bch2_accounting_read(struct bch_fs *c)
}
keys->gap = keys->nr = dst - keys->data;
percpu_down_read(&c->mark_lock);
for (unsigned i = 0; i < acc->k.nr; i++) {
percpu_down_write(&c->mark_lock);
unsigned i = 0;
while (i < acc->k.nr) {
unsigned idx = inorder_to_eytzinger0(i, acc->k.nr);
struct disk_accounting_pos acc_k;
bpos_to_disk_accounting_pos(&acc_k, acc->k.data[idx].pos);
u64 v[BCH_ACCOUNTING_MAX_COUNTERS];
bch2_accounting_mem_read_counters(acc, i, v, ARRAY_SIZE(v), false);
if (bch2_is_zero(v, sizeof(v[0]) * acc->k.data[i].nr_counters))
continue;
struct bch_replicas_padded r;
if (!accounting_to_replicas(&r.e, acc->k.data[i].pos))
continue;
bch2_accounting_mem_read_counters(acc, idx, v, ARRAY_SIZE(v), false);
/*
* If the replicas entry is invalid it'll get cleaned up by
* check_allocations:
* If the entry counters are zeroed, it should be treated as
* nonexistent - it might point to an invalid device.
*
* Remove it, so that if it's re-added it gets re-marked in the
* superblock:
*/
if (bch2_replicas_entry_validate(&r.e, c, &buf))
ret = bch2_is_zero(v, sizeof(v[0]) * acc->k.data[idx].nr_counters)
? -BCH_ERR_remove_disk_accounting_entry
: bch2_disk_accounting_validate_late(trans, acc_k,
v, acc->k.data[idx].nr_counters);
if (ret == -BCH_ERR_remove_disk_accounting_entry) {
free_percpu(acc->k.data[idx].v[0]);
free_percpu(acc->k.data[idx].v[1]);
darray_remove_item(&acc->k, &acc->k.data[idx]);
eytzinger0_sort(acc->k.data, acc->k.nr, sizeof(acc->k.data[0]),
accounting_pos_cmp, NULL);
ret = 0;
continue;
struct disk_accounting_pos k;
bpos_to_disk_accounting_pos(&k, acc->k.data[i].pos);
if (fsck_err_on(!bch2_replicas_marked_locked(c, &r.e),
trans, accounting_replicas_not_marked,
"accounting not marked in superblock replicas\n %s",
(printbuf_reset(&buf),
bch2_accounting_key_to_text(&buf, &k),
buf.buf))) {
/*
* We're not RW yet and still single threaded, dropping
* and retaking lock is ok:
*/
percpu_up_read(&c->mark_lock);
ret = bch2_mark_replicas(c, &r.e);
if (ret)
goto fsck_err;
percpu_down_read(&c->mark_lock);
}
if (ret)
goto fsck_err;
i++;
}
preempt_disable();
@ -742,7 +821,7 @@ int bch2_accounting_read(struct bch_fs *c)
}
preempt_enable();
fsck_err:
percpu_up_read(&c->mark_lock);
percpu_up_write(&c->mark_lock);
err:
printbuf_exit(&buf);
bch2_trans_put(trans);

View File

@ -268,7 +268,8 @@
x(BCH_ERR_nopromote, nopromote_no_writes) \
x(BCH_ERR_nopromote, nopromote_enomem) \
x(0, invalid_snapshot_node) \
x(0, option_needs_open_fs)
x(0, option_needs_open_fs) \
x(0, remove_disk_accounting_entry)
enum bch_errcode {
BCH_ERR_START = 2048,

View File

@ -291,6 +291,7 @@ enum bch_fsck_flags {
x(alloc_key_stripe_sectors_wrong, 271, FSCK_AUTOFIX) \
x(accounting_mismatch, 272, FSCK_AUTOFIX) \
x(accounting_replicas_not_marked, 273, 0) \
x(accounting_to_invalid_device, 289, 0) \
x(invalid_btree_id, 274, 0) \
x(alloc_key_io_time_bad, 275, 0) \
x(alloc_key_fragmentation_lru_wrong, 276, FSCK_AUTOFIX) \
@ -300,7 +301,7 @@ enum bch_fsck_flags {
x(accounting_key_replicas_devs_unsorted, 280, FSCK_AUTOFIX) \
x(accounting_key_version_0, 282, FSCK_AUTOFIX) \
x(logged_op_but_clean, 283, FSCK_AUTOFIX) \
x(MAX, 289, 0)
x(MAX, 290, 0)
enum bch_sb_error_id {
#define x(t, n, ...) BCH_FSCK_ERR_##t = n,