From fb23d57a6dfc4e521c003dc542799f07d22d269e Mon Sep 17 00:00:00 2001 From: Kent Overstreet Date: Sun, 11 Feb 2024 22:48:05 -0500 Subject: [PATCH] bcachefs: Convert gc to new accounting Rewrite fsck/gc for the new accounting scheme. This adds a second set of in-memory accounting counters for gc to use; like with other parts of gc we run all trigger in TRIGGER_GC mode, then compare what we calculated to existing in-memory accounting at the end. Signed-off-by: Kent Overstreet --- fs/bcachefs/alloc_background.c | 34 +----- fs/bcachefs/alloc_foreground.c | 16 ++- fs/bcachefs/bcachefs.h | 4 +- fs/bcachefs/btree_gc.c | 133 +++------------------ fs/bcachefs/btree_trans_commit.c | 4 +- fs/bcachefs/buckets.c | 185 +++++------------------------ fs/bcachefs/buckets.h | 16 --- fs/bcachefs/buckets_types.h | 7 -- fs/bcachefs/disk_accounting.c | 193 ++++++++++++++++++++++++++----- fs/bcachefs/disk_accounting.h | 86 ++++++++------ fs/bcachefs/ec.c | 97 +++++++--------- fs/bcachefs/inode.c | 43 +++---- fs/bcachefs/recovery.c | 3 +- fs/bcachefs/replicas.c | 86 +------------- fs/bcachefs/replicas.h | 1 - fs/bcachefs/super.c | 9 +- fs/bcachefs/util.h | 4 +- 17 files changed, 344 insertions(+), 577 deletions(-) diff --git a/fs/bcachefs/alloc_background.c b/fs/bcachefs/alloc_background.c index 3df1099750af..9bb0dbe134d5 100644 --- a/fs/bcachefs/alloc_background.c +++ b/fs/bcachefs/alloc_background.c @@ -774,7 +774,7 @@ static inline int bch2_dev_data_type_accounting_mod(struct btree_trans *trans, s }; s64 d[3] = { delta_buckets, delta_sectors, delta_fragmented }; - return bch2_disk_accounting_mod(trans, &acc, d, 3); + return bch2_disk_accounting_mod(trans, &acc, d, 3, flags & BTREE_TRIGGER_gc); } int bch2_alloc_key_to_dev_counters(struct btree_trans *trans, struct bch_dev *ca, @@ -894,7 +894,8 @@ int bch2_trigger_alloc(struct btree_trans *trans, if ((flags & BTREE_TRIGGER_bucket_invalidate) && old_a->cached_sectors) { ret = bch2_mod_dev_cached_sectors(trans, ca->dev_idx, - -((s64) old_a->cached_sectors)); + -((s64) old_a->cached_sectors), + flags & BTREE_TRIGGER_gc); if (ret) goto err; } @@ -973,35 +974,6 @@ int bch2_trigger_alloc(struct btree_trans *trans, if (statechange(a->data_type == BCH_DATA_need_gc_gens)) bch2_gc_gens_async(c); } - - if ((flags & BTREE_TRIGGER_gc) && - (flags & BTREE_TRIGGER_bucket_invalidate)) { - struct bch_alloc_v4 new_a_convert; - const struct bch_alloc_v4 *new_a = bch2_alloc_to_v4(new.s_c, &new_a_convert); - - percpu_down_read(&c->mark_lock); - struct bucket *g = gc_bucket(ca, new.k->p.offset); - if (unlikely(!g)) { - percpu_up_read(&c->mark_lock); - goto invalid_bucket; - } - g->gen_valid = 1; - - bucket_lock(g); - - g->gen_valid = 1; - g->gen = new_a->gen; - g->data_type = new_a->data_type; - g->stripe = new_a->stripe; - g->stripe_redundancy = new_a->stripe_redundancy; - g->dirty_sectors = new_a->dirty_sectors; - g->cached_sectors = new_a->cached_sectors; - - bucket_unlock(g); - percpu_up_read(&c->mark_lock); - - bch2_dev_usage_update(c, ca, old_a, new_a); - } err: printbuf_exit(&buf); bch2_dev_put(ca); diff --git a/fs/bcachefs/alloc_foreground.c b/fs/bcachefs/alloc_foreground.c index 991e07a79064..cabf866c7956 100644 --- a/fs/bcachefs/alloc_foreground.c +++ b/fs/bcachefs/alloc_foreground.c @@ -1708,15 +1708,13 @@ void bch2_fs_alloc_debug_to_text(struct printbuf *out, struct bch_fs *c) prt_printf(out, "capacity\t%llu\n", c->capacity); prt_printf(out, "reserved\t%llu\n", c->reserved); - percpu_down_read(&c->mark_lock); - prt_printf(out, "hidden\t%llu\n", bch2_fs_usage_read_one(c, &c->usage_base->b.hidden)); - prt_printf(out, "btree\t%llu\n", bch2_fs_usage_read_one(c, &c->usage_base->b.btree)); - prt_printf(out, "data\t%llu\n", bch2_fs_usage_read_one(c, &c->usage_base->b.data)); - prt_printf(out, "cached\t%llu\n", bch2_fs_usage_read_one(c, &c->usage_base->b.cached)); - prt_printf(out, "reserved\t%llu\n", bch2_fs_usage_read_one(c, &c->usage_base->b.reserved)); - prt_printf(out, "online_reserved\t%llu\n", percpu_u64_get(c->online_reserved)); - prt_printf(out, "nr_inodes\t%llu\n", bch2_fs_usage_read_one(c, &c->usage_base->b.nr_inodes)); - percpu_up_read(&c->mark_lock); + prt_printf(out, "hidden\t%llu\n", percpu_u64_get(&c->usage->hidden)); + prt_printf(out, "btree\t%llu\n", percpu_u64_get(&c->usage->btree)); + prt_printf(out, "data\t%llu\n", percpu_u64_get(&c->usage->data)); + prt_printf(out, "cached\t%llu\n", percpu_u64_get(&c->usage->cached)); + prt_printf(out, "reserved\t%llu\n", percpu_u64_get(&c->usage->reserved)); + prt_printf(out, "online_reserved\t%llu\n", percpu_u64_get(c->online_reserved)); + prt_printf(out, "nr_inodes\t%llu\n", percpu_u64_get(&c->usage->nr_inodes)); prt_newline(out); prt_printf(out, "freelist_wait\t%s\n", c->freelist_wait.list.first ? "waiting" : "empty"); diff --git a/fs/bcachefs/bcachefs.h b/fs/bcachefs/bcachefs.h index b9f5327ab033..33605fa8e70f 100644 --- a/fs/bcachefs/bcachefs.h +++ b/fs/bcachefs/bcachefs.h @@ -546,7 +546,6 @@ struct bch_dev { struct rw_semaphore bucket_lock; struct bch_dev_usage __percpu *usage; - struct bch_dev_usage __percpu *usage_gc; /* Allocator: */ u64 new_fs_bucket_idx; @@ -741,7 +740,7 @@ struct bch_fs { struct bch_dev __rcu *devs[BCH_SB_MEMBERS_MAX]; - struct bch_accounting_mem accounting; + struct bch_accounting_mem accounting[2]; struct bch_replicas_cpu replicas; struct bch_replicas_cpu replicas_gc; @@ -890,7 +889,6 @@ struct bch_fs { seqcount_t usage_lock; struct bch_fs_usage_base __percpu *usage; - struct bch_fs_usage __percpu *usage_gc; u64 __percpu *online_reserved; struct io_clock io_clock[2]; diff --git a/fs/bcachefs/btree_gc.c b/fs/bcachefs/btree_gc.c index c79258e3e69c..0fe869cff8be 100644 --- a/fs/bcachefs/btree_gc.c +++ b/fs/bcachefs/btree_gc.c @@ -20,6 +20,7 @@ #include "buckets.h" #include "clock.h" #include "debug.h" +#include "disk_accounting.h" #include "ec.h" #include "error.h" #include "extents.h" @@ -735,132 +736,25 @@ static int bch2_mark_superblocks(struct bch_fs *c) static void bch2_gc_free(struct bch_fs *c) { + bch2_accounting_free(&c->accounting[1]); + genradix_free(&c->reflink_gc_table); genradix_free(&c->gc_stripes); for_each_member_device(c, ca) { kvfree(rcu_dereference_protected(ca->buckets_gc, 1)); ca->buckets_gc = NULL; - - free_percpu(ca->usage_gc); - ca->usage_gc = NULL; } - - free_percpu(c->usage_gc); - c->usage_gc = NULL; -} - -static int bch2_gc_done(struct bch_fs *c) -{ - struct bch_dev *ca = NULL; - struct printbuf buf = PRINTBUF; - unsigned i; - int ret = 0; - - percpu_down_write(&c->mark_lock); - -#define copy_field(_err, _f, _msg, ...) \ - if (fsck_err_on(dst->_f != src->_f, c, _err, \ - _msg ": got %llu, should be %llu" , ##__VA_ARGS__, \ - dst->_f, src->_f)) \ - dst->_f = src->_f -#define copy_dev_field(_err, _f, _msg, ...) \ - copy_field(_err, _f, "dev %u has wrong " _msg, ca->dev_idx, ##__VA_ARGS__) -#define copy_fs_field(_err, _f, _msg, ...) \ - copy_field(_err, _f, "fs has wrong " _msg, ##__VA_ARGS__) - - __for_each_member_device(c, ca) { - /* XXX */ - struct bch_dev_usage *dst = this_cpu_ptr(ca->usage); - struct bch_dev_usage *src = (void *) - bch2_acc_percpu_u64s((u64 __percpu *) ca->usage_gc, - dev_usage_u64s()); - - for (i = 0; i < BCH_DATA_NR; i++) { - copy_dev_field(dev_usage_buckets_wrong, - d[i].buckets, "%s buckets", bch2_data_type_str(i)); - copy_dev_field(dev_usage_sectors_wrong, - d[i].sectors, "%s sectors", bch2_data_type_str(i)); - copy_dev_field(dev_usage_fragmented_wrong, - d[i].fragmented, "%s fragmented", bch2_data_type_str(i)); - } - } - - { -#if 0 - unsigned nr = fs_usage_u64s(c); - /* XX: */ - struct bch_fs_usage *dst = this_cpu_ptr(c->usage); - struct bch_fs_usage *src = (void *) - bch2_acc_percpu_u64s((u64 __percpu *) c->usage_gc, nr); - - copy_fs_field(fs_usage_hidden_wrong, - b.hidden, "hidden"); - copy_fs_field(fs_usage_btree_wrong, - b.btree, "btree"); - - copy_fs_field(fs_usage_data_wrong, - b.data, "data"); - copy_fs_field(fs_usage_cached_wrong, - b.cached, "cached"); - copy_fs_field(fs_usage_reserved_wrong, - b.reserved, "reserved"); - copy_fs_field(fs_usage_nr_inodes_wrong, - b.nr_inodes,"nr_inodes"); - - for (i = 0; i < BCH_REPLICAS_MAX; i++) - copy_fs_field(fs_usage_persistent_reserved_wrong, - persistent_reserved[i], - "persistent_reserved[%i]", i); - - for (i = 0; i < c->replicas.nr; i++) { - struct bch_replicas_entry_v1 *e = - cpu_replicas_entry(&c->replicas, i); - - printbuf_reset(&buf); - bch2_replicas_entry_to_text(&buf, e); - - copy_fs_field(fs_usage_replicas_wrong, - replicas[i], "%s", buf.buf); - } -#endif - } - -#undef copy_fs_field -#undef copy_dev_field -#undef copy_stripe_field -#undef copy_field -fsck_err: - bch2_dev_put(ca); - bch_err_fn(c, ret); - percpu_up_write(&c->mark_lock); - printbuf_exit(&buf); - return ret; } static int bch2_gc_start(struct bch_fs *c) { - BUG_ON(c->usage_gc); - - c->usage_gc = __alloc_percpu_gfp(fs_usage_u64s(c) * sizeof(u64), - sizeof(u64), GFP_KERNEL); - if (!c->usage_gc) { - bch_err(c, "error allocating c->usage_gc"); - return -BCH_ERR_ENOMEM_gc_start; - } - for_each_member_device(c, ca) { - BUG_ON(ca->usage_gc); - - ca->usage_gc = alloc_percpu(struct bch_dev_usage); - if (!ca->usage_gc) { - bch_err(c, "error allocating ca->usage_gc"); + int ret = bch2_dev_usage_init(ca, true); + if (ret) { bch2_dev_put(ca); - return -BCH_ERR_ENOMEM_gc_start; + return ret; } - - this_cpu_write(ca->usage_gc->d[BCH_DATA_free].buckets, - ca->mi.nbuckets - ca->mi.first_bucket); } return 0; @@ -908,6 +802,7 @@ static int bch2_alloc_write_key(struct btree_trans *trans, gc.data_type = old->data_type; gc.dirty_sectors = old->dirty_sectors; } + percpu_up_read(&c->mark_lock); /* * gc.data_type doesn't yet include need_discard & need_gc_gen states - @@ -916,9 +811,11 @@ static int bch2_alloc_write_key(struct btree_trans *trans, alloc_data_type_set(&gc, gc.data_type); if (gc.data_type != old_gc.data_type || - gc.dirty_sectors != old_gc.dirty_sectors) - bch2_dev_usage_update(c, ca, &old_gc, &gc); - percpu_up_read(&c->mark_lock); + gc.dirty_sectors != old_gc.dirty_sectors) { + ret = bch2_alloc_key_to_dev_counters(trans, ca, &old_gc, &gc, BTREE_TRIGGER_gc); + if (ret) + return ret; + } gc.fragmentation_lru = alloc_lru_idx_fragmentation(gc, ca); @@ -1235,7 +1132,9 @@ int bch2_check_allocations(struct bch_fs *c) gc_pos_set(c, gc_phase(GC_PHASE_start)); ret = bch2_mark_superblocks(c); - BUG_ON(ret); + bch_err_msg(c, ret, "marking superblocks"); + if (ret) + goto out; ret = bch2_gc_btrees(c); if (ret) @@ -1246,7 +1145,7 @@ int bch2_check_allocations(struct bch_fs *c) bch2_journal_block(&c->journal); out: ret = bch2_gc_alloc_done(c) ?: - bch2_gc_done(c) ?: + bch2_accounting_gc_done(c) ?: bch2_gc_stripes_done(c) ?: bch2_gc_reflink_done(c); diff --git a/fs/bcachefs/btree_trans_commit.c b/fs/bcachefs/btree_trans_commit.c index 92305c12cb75..30e24725eb12 100644 --- a/fs/bcachefs/btree_trans_commit.c +++ b/fs/bcachefs/btree_trans_commit.c @@ -724,7 +724,7 @@ bch2_trans_commit_write_locked(struct btree_trans *trans, unsigned flags, a->k.version = journal_pos_to_bversion(&trans->journal_res, (u64 *) entry - (u64 *) trans->journal_entries); BUG_ON(bversion_zero(a->k.version)); - ret = bch2_accounting_mem_mod(trans, accounting_i_to_s_c(a)); + ret = bch2_accounting_mem_mod_locked(trans, accounting_i_to_s_c(a), false); if (ret) goto revert_fs_usage; } @@ -812,7 +812,7 @@ revert_fs_usage: struct bkey_s_accounting a = bkey_i_to_s_accounting(entry2->start); bch2_accounting_neg(a); - bch2_accounting_mem_mod(trans, a.c); + bch2_accounting_mem_mod_locked(trans, a.c, false); bch2_accounting_neg(a); } percpu_up_read(&c->mark_lock); diff --git a/fs/bcachefs/buckets.c b/fs/bcachefs/buckets.c index 12faf2ffda1c..e3bf7ed5c073 100644 --- a/fs/bcachefs/buckets.c +++ b/fs/bcachefs/buckets.c @@ -84,96 +84,6 @@ void bch2_dev_usage_to_text(struct printbuf *out, struct bch_dev_usage *usage) } } -void bch2_dev_usage_update(struct bch_fs *c, struct bch_dev *ca, - const struct bch_alloc_v4 *old, - const struct bch_alloc_v4 *new) -{ - struct bch_fs_usage *fs_usage; - struct bch_dev_usage *u; - - preempt_disable(); - fs_usage = this_cpu_ptr(c->usage_gc); - - if (data_type_is_hidden(old->data_type)) - fs_usage->b.hidden -= ca->mi.bucket_size; - if (data_type_is_hidden(new->data_type)) - fs_usage->b.hidden += ca->mi.bucket_size; - - u = this_cpu_ptr(ca->usage_gc); - - u->d[old->data_type].buckets--; - u->d[new->data_type].buckets++; - - u->d[old->data_type].sectors -= bch2_bucket_sectors_dirty(*old); - u->d[new->data_type].sectors += bch2_bucket_sectors_dirty(*new); - - u->d[old->data_type].fragmented -= bch2_bucket_sectors_fragmented(ca, *old); - u->d[new->data_type].fragmented += bch2_bucket_sectors_fragmented(ca, *new); - - u->d[BCH_DATA_cached].sectors -= old->cached_sectors; - u->d[BCH_DATA_cached].sectors += new->cached_sectors; - - unsigned old_unstriped = bch2_bucket_sectors_unstriped(*old); - u->d[BCH_DATA_unstriped].buckets -= old_unstriped != 0; - u->d[BCH_DATA_unstriped].sectors -= old_unstriped; - - unsigned new_unstriped = bch2_bucket_sectors_unstriped(*new); - u->d[BCH_DATA_unstriped].buckets += new_unstriped != 0; - u->d[BCH_DATA_unstriped].sectors += new_unstriped; - - preempt_enable(); -} - -int bch2_update_replicas(struct bch_fs *c, struct bkey_s_c k, - struct bch_replicas_entry_v1 *r, s64 sectors) -{ - struct bch_fs_usage *fs_usage; - int idx, ret = 0; - struct printbuf buf = PRINTBUF; - - percpu_down_read(&c->mark_lock); - - idx = bch2_replicas_entry_idx(c, r); - if (idx < 0 && - fsck_err(c, ptr_to_missing_replicas_entry, - "no replicas entry\n while marking %s", - (bch2_bkey_val_to_text(&buf, c, k), buf.buf))) { - percpu_up_read(&c->mark_lock); - ret = bch2_mark_replicas(c, r); - percpu_down_read(&c->mark_lock); - - if (ret) - goto err; - idx = bch2_replicas_entry_idx(c, r); - } - if (idx < 0) { - ret = -1; - goto err; - } - - preempt_disable(); - fs_usage = this_cpu_ptr(c->usage_gc); - fs_usage_data_type_to_base(&fs_usage->b, r->data_type, sectors); - fs_usage->replicas[idx] += sectors; - preempt_enable(); -err: -fsck_err: - percpu_up_read(&c->mark_lock); - printbuf_exit(&buf); - return ret; -} - -static inline int update_cached_sectors(struct bch_fs *c, - struct bkey_s_c k, - unsigned dev, s64 sectors) -{ - struct bch_replicas_padded r; - - bch2_replicas_entry_cached(&r.e, dev); - - return bch2_update_replicas(c, k, &r.e, sectors); -} - static int bch2_check_fix_ptr(struct btree_trans *trans, struct bkey_s_c k, struct extent_ptr_decoded p, @@ -574,8 +484,6 @@ void bch2_trans_account_disk_usage_change(struct btree_trans *trans) bool warn = false; percpu_down_read(&c->mark_lock); - preempt_disable(); - struct bch_fs_usage_base *dst = this_cpu_ptr(c->usage); struct bch_fs_usage_base *src = &trans->fs_usage_delta; s64 added = src->btree + src->data + src->reserved; @@ -603,13 +511,9 @@ void bch2_trans_account_disk_usage_change(struct btree_trans *trans) this_cpu_sub(*c->online_reserved, added); } - dst->hidden += src->hidden; - dst->btree += src->btree; - dst->data += src->data; - dst->cached += src->cached; - dst->reserved += src->reserved; - dst->nr_inodes += src->nr_inodes; - + preempt_disable(); + struct bch_fs_usage_base *dst = this_cpu_ptr(c->usage); + acc_u64s((u64 *) dst, (u64 *) src, sizeof(*src) / sizeof(u64)); preempt_enable(); percpu_up_read(&c->mark_lock); @@ -691,13 +595,13 @@ static int bch2_trigger_pointer(struct btree_trans *trans, bucket_lock(g); struct bch_alloc_v4 old = bucket_m_to_alloc(*g), new = old; ret = __mark_pointer(trans, ca, k, &p, *sectors, bp.data_type, &new); - if (!ret) { - alloc_to_bucket(g, new); - bch2_dev_usage_update(c, ca, &old, &new); - } + alloc_to_bucket(g, new); bucket_unlock(g); err_unlock: percpu_up_read(&c->mark_lock); + + if (!ret) + ret = bch2_alloc_key_to_dev_counters(trans, ca, &old, &new, flags); } err: bch2_dev_put(ca); @@ -742,7 +646,7 @@ static int bch2_trigger_stripe_ptr(struct btree_trans *trans, }; bch2_bkey_to_replicas(&acc.replicas, bkey_i_to_s_c(&s->k_i)); acc.replicas.data_type = data_type; - ret = bch2_disk_accounting_mod(trans, &acc, §ors, 1); + ret = bch2_disk_accounting_mod(trans, &acc, §ors, 1, false); err: bch2_trans_iter_exit(trans, &iter); return ret; @@ -751,8 +655,6 @@ err: if (flags & BTREE_TRIGGER_gc) { struct bch_fs *c = trans->c; - BUG_ON(!(flags & BTREE_TRIGGER_gc)); - struct gc_stripe *m = genradix_ptr_alloc(&c->gc_stripes, p.ec.idx, GFP_KERNEL); if (!m) { bch_err(c, "error allocating memory for gc_stripes, idx %llu", @@ -775,11 +677,16 @@ err: m->block_sectors[p.ec.block] += sectors; - struct bch_replicas_padded r = m->r; + struct disk_accounting_pos acc = { + .type = BCH_DISK_ACCOUNTING_replicas, + }; + memcpy(&acc.replicas, &m->r.e, replicas_entry_bytes(&m->r.e)); mutex_unlock(&c->ec_stripes_heap_lock); - r.e.data_type = data_type; - bch2_update_replicas(c, k, &r.e, sectors); + acc.replicas.data_type = data_type; + int ret = bch2_disk_accounting_mod(trans, &acc, §ors, 1, true); + if (ret) + return ret; } return 0; @@ -791,7 +698,6 @@ static int __trigger_extent(struct btree_trans *trans, enum btree_iter_update_trigger_flags flags) { bool gc = flags & BTREE_TRIGGER_gc; - struct bch_fs *c = trans->c; struct bkey_ptrs_c ptrs = bch2_bkey_ptrs_c(k); const union bch_extent_entry *entry; struct extent_ptr_decoded p; @@ -818,11 +724,7 @@ static int __trigger_extent(struct btree_trans *trans, if (p.ptr.cached) { if (!stale) { - ret = !gc - ? bch2_mod_dev_cached_sectors(trans, p.ptr.dev, disk_sectors) - : update_cached_sectors(c, k, p.ptr.dev, disk_sectors); - bch2_fs_fatal_err_on(ret && gc, c, "%s: no replicas entry while updating cached sectors", - bch2_err_str(ret)); + ret = bch2_mod_dev_cached_sectors(trans, p.ptr.dev, disk_sectors, gc); if (ret) return ret; } @@ -844,16 +746,7 @@ static int __trigger_extent(struct btree_trans *trans, } if (acc.replicas.nr_devs) { - ret = !gc - ? bch2_disk_accounting_mod(trans, &acc, &replicas_sectors, 1) - : bch2_update_replicas(c, k, &acc.replicas, replicas_sectors); - if (unlikely(ret && gc)) { - struct printbuf buf = PRINTBUF; - - bch2_bkey_val_to_text(&buf, c, k); - bch2_fs_fatal_error(c, ": no replicas entry for %s", buf.buf); - printbuf_exit(&buf); - } + ret = bch2_disk_accounting_mod(trans, &acc, &replicas_sectors, 1, gc); if (ret) return ret; } @@ -906,36 +799,18 @@ static int __trigger_reservation(struct btree_trans *trans, enum btree_id btree_id, unsigned level, struct bkey_s_c k, enum btree_iter_update_trigger_flags flags) { - struct bch_fs *c = trans->c; - unsigned replicas = bkey_s_c_to_reservation(k).v->nr_replicas; - s64 sectors = (s64) k.k->size; + if (flags & (BTREE_TRIGGER_transactional|BTREE_TRIGGER_gc)) { + s64 sectors = k.k->size; - if (flags & BTREE_TRIGGER_overwrite) - sectors = -sectors; + if (flags & BTREE_TRIGGER_overwrite) + sectors = -sectors; - if (flags & BTREE_TRIGGER_transactional) { struct disk_accounting_pos acc = { .type = BCH_DISK_ACCOUNTING_persistent_reserved, - .persistent_reserved.nr_replicas = replicas, + .persistent_reserved.nr_replicas = bkey_s_c_to_reservation(k).v->nr_replicas, }; - return bch2_disk_accounting_mod(trans, &acc, §ors, 1); - } - - if (flags & BTREE_TRIGGER_gc) { - sectors *= replicas; - - percpu_down_read(&c->mark_lock); - preempt_disable(); - - struct bch_fs_usage *fs_usage = this_cpu_ptr(c->usage_gc); - - replicas = min(replicas, ARRAY_SIZE(fs_usage->persistent_reserved)); - fs_usage->b.reserved += sectors; - fs_usage->persistent_reserved[replicas - 1] += sectors; - - preempt_enable(); - percpu_up_read(&c->mark_lock); + return bch2_disk_accounting_mod(trans, &acc, §ors, 1, flags & BTREE_TRIGGER_gc); } return 0; @@ -989,10 +864,13 @@ err: return ret; } -static int bch2_mark_metadata_bucket(struct bch_fs *c, struct bch_dev *ca, +static int bch2_mark_metadata_bucket(struct btree_trans *trans, struct bch_dev *ca, u64 b, enum bch_data_type data_type, unsigned sectors, enum btree_iter_update_trigger_flags flags) { + struct bch_fs *c = trans->c; + int ret = 0; + percpu_down_read(&c->mark_lock); struct bucket *g = gc_bucket(ca, b); if (bch2_fs_inconsistent_on(!g, c, "reference to invalid bucket on device %u when marking metadata type %s", @@ -1019,9 +897,10 @@ static int bch2_mark_metadata_bucket(struct bch_fs *c, struct bch_dev *ca, g->data_type = data_type; g->dirty_sectors += sectors; struct bch_alloc_v4 new = bucket_m_to_alloc(*g); - bch2_dev_usage_update(c, ca, &old, &new); + bucket_unlock(g); percpu_up_read(&c->mark_lock); - return 0; + ret = bch2_alloc_key_to_dev_counters(trans, ca, &old, &new, flags); + return ret; err: bucket_unlock(g); err_unlock: @@ -1045,7 +924,7 @@ int bch2_trans_mark_metadata_bucket(struct btree_trans *trans, return 0; if (flags & BTREE_TRIGGER_gc) - return bch2_mark_metadata_bucket(trans->c, ca, b, type, sectors, flags); + return bch2_mark_metadata_bucket(trans, ca, b, type, sectors, flags); else if (flags & BTREE_TRIGGER_transactional) return commit_do(trans, NULL, NULL, 0, __bch2_trans_mark_metadata_bucket(trans, ca, b, type, sectors)); diff --git a/fs/bcachefs/buckets.h b/fs/bcachefs/buckets.h index 42ff3e9df587..fc6359f84e82 100644 --- a/fs/bcachefs/buckets.h +++ b/fs/bcachefs/buckets.h @@ -273,16 +273,6 @@ static inline u64 dev_buckets_available(struct bch_dev *ca, /* Filesystem usage: */ -static inline unsigned __fs_usage_u64s(unsigned nr_replicas) -{ - return sizeof(struct bch_fs_usage) / sizeof(u64) + nr_replicas; -} - -static inline unsigned fs_usage_u64s(struct bch_fs *c) -{ - return __fs_usage_u64s(READ_ONCE(c->replicas.nr)); -} - static inline unsigned dev_usage_u64s(void) { return sizeof(struct bch_dev_usage) / sizeof(u64); @@ -291,12 +281,6 @@ static inline unsigned dev_usage_u64s(void) struct bch_fs_usage_short bch2_fs_usage_read_short(struct bch_fs *); -void bch2_dev_usage_update(struct bch_fs *, struct bch_dev *, - const struct bch_alloc_v4 *, - const struct bch_alloc_v4 *); -int bch2_update_replicas(struct bch_fs *, struct bkey_s_c, - struct bch_replicas_entry_v1 *, s64); - int bch2_bucket_ref_update(struct btree_trans *, struct bch_dev *, struct bkey_s_c, const struct bch_extent_ptr *, s64, enum bch_data_type, u8, u8, u32 *); diff --git a/fs/bcachefs/buckets_types.h b/fs/bcachefs/buckets_types.h index 7ad15f809348..c9698cdf866f 100644 --- a/fs/bcachefs/buckets_types.h +++ b/fs/bcachefs/buckets_types.h @@ -57,13 +57,6 @@ struct bch_fs_usage_base { u64 nr_inodes; }; -struct bch_fs_usage { - /* all fields are in units of 512 byte sectors: */ - struct bch_fs_usage_base b; - u64 persistent_reserved[BCH_REPLICAS_MAX]; - u64 replicas[]; -}; - struct bch_fs_usage_short { u64 capacity; u64 used; diff --git a/fs/bcachefs/disk_accounting.c b/fs/bcachefs/disk_accounting.c index f5b5d896979e..e8dfd67eab8a 100644 --- a/fs/bcachefs/disk_accounting.c +++ b/fs/bcachefs/disk_accounting.c @@ -66,9 +66,20 @@ static const char * const disk_accounting_type_strs[] = { NULL }; +static inline void accounting_key_init(struct bkey_i *k, struct disk_accounting_pos *pos, + s64 *d, unsigned nr) +{ + struct bkey_i_accounting *acc = bkey_accounting_init(k); + + acc->k.p = disk_accounting_pos_to_bpos(pos); + set_bkey_val_u64s(&acc->k, sizeof(struct bch_accounting) / sizeof(u64) + nr); + + memcpy_u64s_small(acc->v.d, d, nr); +} + int bch2_disk_accounting_mod(struct btree_trans *trans, struct disk_accounting_pos *k, - s64 *d, unsigned nr) + s64 *d, unsigned nr, bool gc) { /* Normalize: */ switch (k->type) { @@ -79,21 +90,18 @@ int bch2_disk_accounting_mod(struct btree_trans *trans, BUG_ON(nr > BCH_ACCOUNTING_MAX_COUNTERS); - struct { - __BKEY_PADDED(k, BCH_ACCOUNTING_MAX_COUNTERS); - } k_i; - struct bkey_i_accounting *acc = bkey_accounting_init(&k_i.k); + struct { __BKEY_PADDED(k, BCH_ACCOUNTING_MAX_COUNTERS); } k_i; - acc->k.p = disk_accounting_pos_to_bpos(k); - set_bkey_val_u64s(&acc->k, sizeof(struct bch_accounting) / sizeof(u64) + nr); + accounting_key_init(&k_i.k, k, d, nr); - memcpy_u64s_small(acc->v.d, d, nr); - - return bch2_trans_update_buffered(trans, BTREE_ID_accounting, &acc->k_i); + return likely(!gc) + ? bch2_trans_update_buffered(trans, BTREE_ID_accounting, &k_i.k) + : bch2_accounting_mem_add(trans, bkey_i_to_s_c_accounting(&k_i.k), true); } int bch2_mod_dev_cached_sectors(struct btree_trans *trans, - unsigned dev, s64 sectors) + unsigned dev, s64 sectors, + bool gc) { struct disk_accounting_pos acc = { .type = BCH_DISK_ACCOUNTING_replicas, @@ -101,7 +109,7 @@ int bch2_mod_dev_cached_sectors(struct btree_trans *trans, bch2_replicas_entry_cached(&acc.replicas, dev); - return bch2_disk_accounting_mod(trans, &acc, §ors, 1); + return bch2_disk_accounting_mod(trans, &acc, §ors, 1, gc); } int bch2_accounting_invalid(struct bch_fs *c, struct bkey_s_c k, @@ -199,7 +207,7 @@ int bch2_accounting_update_sb(struct btree_trans *trans) return 0; } -static int __bch2_accounting_mem_mod_slowpath(struct bch_fs *c, struct bkey_s_c_accounting a) +static int __bch2_accounting_mem_mod_slowpath(struct bch_fs *c, struct bkey_s_c_accounting a, bool gc) { struct bch_replicas_padded r; @@ -207,7 +215,7 @@ static int __bch2_accounting_mem_mod_slowpath(struct bch_fs *c, struct bkey_s_c_ !bch2_replicas_marked_locked(c, &r.e)) return -BCH_ERR_btree_insert_need_mark_replicas; - struct bch_accounting_mem *acc = &c->accounting; + struct bch_accounting_mem *acc = &c->accounting[gc]; unsigned new_nr_counters = acc->nr_counters + bch2_accounting_counters(a.k); u64 __percpu *new_counters = __alloc_percpu_gfp(new_nr_counters * sizeof(u64), @@ -243,11 +251,11 @@ static int __bch2_accounting_mem_mod_slowpath(struct bch_fs *c, struct bkey_s_c_ return 0; } -int bch2_accounting_mem_mod_slowpath(struct bch_fs *c, struct bkey_s_c_accounting a) +int bch2_accounting_mem_mod_slowpath(struct bch_fs *c, struct bkey_s_c_accounting a, bool gc) { percpu_up_read(&c->mark_lock); percpu_down_write(&c->mark_lock); - int ret = __bch2_accounting_mem_mod_slowpath(c, a); + int ret = __bch2_accounting_mem_mod_slowpath(c, a, gc); percpu_up_write(&c->mark_lock); percpu_down_read(&c->mark_lock); return ret; @@ -263,7 +271,7 @@ int bch2_accounting_mem_mod_slowpath(struct bch_fs *c, struct bkey_s_c_accountin */ int bch2_fs_replicas_usage_read(struct bch_fs *c, darray_char *usage) { - struct bch_accounting_mem *acc = &c->accounting; + struct bch_accounting_mem *acc = &c->accounting[0]; int ret = 0; darray_init(usage); @@ -296,6 +304,129 @@ int bch2_fs_replicas_usage_read(struct bch_fs *c, darray_char *usage) return ret; } +/* Ensures all counters in @src exist in @dst: */ +static int copy_counters(struct bch_accounting_mem *dst, + struct bch_accounting_mem *src) +{ + unsigned orig_dst_k_nr = dst->k.nr; + unsigned dst_counters = dst->nr_counters; + + darray_for_each(src->k, i) + if (eytzinger0_find(dst->k.data, orig_dst_k_nr, sizeof(dst->k.data[0]), + accounting_pos_cmp, &i->pos) >= orig_dst_k_nr) { + if (darray_push(&dst->k, ((struct accounting_pos_offset) { + .pos = i->pos, + .offset = dst_counters, + .nr_counters = i->nr_counters }))) + goto err; + + dst_counters += i->nr_counters; + } + + if (dst->k.nr == orig_dst_k_nr) + return 0; + + u64 __percpu *new_counters = __alloc_percpu_gfp(dst_counters * sizeof(u64), + sizeof(u64), GFP_KERNEL); + if (!new_counters) + goto err; + + preempt_disable(); + memcpy(this_cpu_ptr(new_counters), + bch2_acc_percpu_u64s(dst->v, dst->nr_counters), + dst->nr_counters * sizeof(u64)); + preempt_enable(); + + free_percpu(dst->v); + dst->v = new_counters; + dst->nr_counters = dst_counters; + + eytzinger0_sort(dst->k.data, dst->k.nr, sizeof(dst->k.data[0]), accounting_pos_cmp, NULL); + + return 0; +err: + dst->k.nr = orig_dst_k_nr; + return -BCH_ERR_ENOMEM_disk_accounting; +} + +int bch2_accounting_gc_done(struct bch_fs *c) +{ + struct bch_accounting_mem *dst = &c->accounting[0]; + struct bch_accounting_mem *src = &c->accounting[1]; + struct btree_trans *trans = bch2_trans_get(c); + struct printbuf buf = PRINTBUF; + int ret = 0; + + percpu_down_write(&c->mark_lock); + + ret = copy_counters(dst, src) ?: + copy_counters(src, dst); + if (ret) + goto err; + + BUG_ON(dst->k.nr != src->k.nr); + + for (unsigned i = 0; i < src->k.nr; i++) { + BUG_ON(src->k.data[i].nr_counters != dst->k.data[i].nr_counters); + BUG_ON(!bpos_eq(dst->k.data[i].pos, src->k.data[i].pos)); + + struct disk_accounting_pos acc_k; + bpos_to_disk_accounting_pos(&acc_k, src->k.data[i].pos); + + unsigned nr = src->k.data[i].nr_counters; + u64 src_v[BCH_ACCOUNTING_MAX_COUNTERS]; + u64 dst_v[BCH_ACCOUNTING_MAX_COUNTERS]; + + bch2_accounting_mem_read_counters(c, i, dst_v, nr, false); + bch2_accounting_mem_read_counters(c, i, src_v, nr, true); + + if (memcmp(dst_v, src_v, nr * sizeof(u64))) { + printbuf_reset(&buf); + prt_str(&buf, "accounting mismatch for "); + bch2_accounting_key_to_text(&buf, &acc_k); + + prt_str(&buf, ": got"); + for (unsigned j = 0; j < nr; j++) + prt_printf(&buf, " %llu", dst_v[j]); + + prt_str(&buf, " should be"); + for (unsigned j = 0; j < nr; j++) + prt_printf(&buf, " %llu", src_v[j]); + + for (unsigned j = 0; j < nr; j++) + src_v[j] -= dst_v[j]; + + if (fsck_err(c, accounting_mismatch, "%s", buf.buf)) { + ret = commit_do(trans, NULL, NULL, 0, + bch2_disk_accounting_mod(trans, &acc_k, src_v, nr, false)); + if (ret) + goto err; + + if (!test_bit(BCH_FS_may_go_rw, &c->flags)) { + memset(&trans->fs_usage_delta, 0, sizeof(trans->fs_usage_delta)); + struct { __BKEY_PADDED(k, BCH_ACCOUNTING_MAX_COUNTERS); } k_i; + + accounting_key_init(&k_i.k, &acc_k, src_v, nr); + bch2_accounting_mem_mod_locked(trans, bkey_i_to_s_c_accounting(&k_i.k), false); + + preempt_disable(); + struct bch_fs_usage_base *dst = this_cpu_ptr(c->usage); + struct bch_fs_usage_base *src = &trans->fs_usage_delta; + acc_u64s((u64 *) dst, (u64 *) src, sizeof(*src) / sizeof(u64)); + preempt_enable(); + } + } + } + } +err: +fsck_err: + percpu_up_write(&c->mark_lock); + printbuf_exit(&buf); + bch2_trans_put(trans); + bch_err_fn(c, ret); + return ret; +} + static int accounting_read_key(struct bch_fs *c, struct bkey_s_c k) { struct printbuf buf = PRINTBUF; @@ -304,7 +435,7 @@ static int accounting_read_key(struct bch_fs *c, struct bkey_s_c k) return 0; percpu_down_read(&c->mark_lock); - int ret = __bch2_accounting_mem_mod(c, bkey_s_c_to_accounting(k)); + int ret = __bch2_accounting_mem_mod(c, bkey_s_c_to_accounting(k), false); percpu_up_read(&c->mark_lock); if (bch2_accounting_key_is_zero(bkey_s_c_to_accounting(k)) && @@ -331,7 +462,7 @@ fsck_err: */ int bch2_accounting_read(struct bch_fs *c) { - struct bch_accounting_mem *acc = &c->accounting; + struct bch_accounting_mem *acc = &c->accounting[0]; int ret = bch2_trans_run(c, for_each_btree_key(trans, iter, @@ -391,7 +522,7 @@ int bch2_accounting_read(struct bch_fs *c) bpos_to_disk_accounting_pos(&k, acc->k.data[i].pos); u64 v[BCH_ACCOUNTING_MAX_COUNTERS]; - bch2_accounting_mem_read_counters(c, i, v, ARRAY_SIZE(v)); + bch2_accounting_mem_read_counters(c, i, v, ARRAY_SIZE(v), false); switch (k.type) { case BCH_DISK_ACCOUNTING_persistent_reserved: @@ -441,8 +572,9 @@ int bch2_dev_usage_remove(struct bch_fs *c, unsigned dev) bch2_btree_write_buffer_flush_sync(trans)); } -int bch2_dev_usage_init(struct bch_dev *ca) +int bch2_dev_usage_init(struct bch_dev *ca, bool gc) { + struct bch_fs *c = ca->fs; struct disk_accounting_pos acc = { .type = BCH_DISK_ACCOUNTING_dev_data_type, .dev_data_type.dev = ca->dev_idx, @@ -450,14 +582,21 @@ int bch2_dev_usage_init(struct bch_dev *ca) }; u64 v[3] = { ca->mi.nbuckets - ca->mi.first_bucket, 0, 0 }; - return bch2_trans_do(ca->fs, NULL, NULL, 0, - bch2_disk_accounting_mod(trans, &acc, v, ARRAY_SIZE(v))); + int ret = bch2_trans_do(c, NULL, NULL, 0, + bch2_disk_accounting_mod(trans, &acc, v, ARRAY_SIZE(v), gc)); + bch_err_fn(c, ret); + return ret; +} + +void bch2_accounting_free(struct bch_accounting_mem *acc) +{ + darray_exit(&acc->k); + free_percpu(acc->v); + acc->v = NULL; + acc->nr_counters = 0; } void bch2_fs_accounting_exit(struct bch_fs *c) { - struct bch_accounting_mem *acc = &c->accounting; - - darray_exit(&acc->k); - free_percpu(acc->v); + bch2_accounting_free(&c->accounting[0]); } diff --git a/fs/bcachefs/disk_accounting.h b/fs/bcachefs/disk_accounting.h index 9f2078c970f3..76445ffd9172 100644 --- a/fs/bcachefs/disk_accounting.h +++ b/fs/bcachefs/disk_accounting.h @@ -78,11 +78,9 @@ static inline struct bpos disk_accounting_pos_to_bpos(struct disk_accounting_pos return ret; } -int bch2_disk_accounting_mod(struct btree_trans *, - struct disk_accounting_pos *, - s64 *, unsigned); -int bch2_mod_dev_cached_sectors(struct btree_trans *trans, - unsigned dev, s64 sectors); +int bch2_disk_accounting_mod(struct btree_trans *, struct disk_accounting_pos *, + s64 *, unsigned, bool); +int bch2_mod_dev_cached_sectors(struct btree_trans *, unsigned, s64, bool); int bch2_accounting_invalid(struct bch_fs *, struct bkey_s_c, enum bch_validate_flags, struct printbuf *); @@ -106,15 +104,15 @@ static inline int accounting_pos_cmp(const void *_l, const void *_r) return bpos_cmp(*l, *r); } -int bch2_accounting_mem_mod_slowpath(struct bch_fs *, struct bkey_s_c_accounting); +int bch2_accounting_mem_mod_slowpath(struct bch_fs *, struct bkey_s_c_accounting, bool); -static inline int __bch2_accounting_mem_mod(struct bch_fs *c, struct bkey_s_c_accounting a) +static inline int __bch2_accounting_mem_mod(struct bch_fs *c, struct bkey_s_c_accounting a, bool gc) { - struct bch_accounting_mem *acc = &c->accounting; + struct bch_accounting_mem *acc = &c->accounting[gc]; unsigned idx = eytzinger0_find(acc->k.data, acc->k.nr, sizeof(acc->k.data[0]), accounting_pos_cmp, &a.k->p); if (unlikely(idx >= acc->k.nr)) - return bch2_accounting_mem_mod_slowpath(c, a); + return bch2_accounting_mem_mod_slowpath(c, a, gc); unsigned offset = acc->k.data[idx].offset; @@ -129,41 +127,51 @@ static inline int __bch2_accounting_mem_mod(struct bch_fs *c, struct bkey_s_c_ac * Update in memory counters so they match the btree update we're doing; called * from transaction commit path */ -static inline int bch2_accounting_mem_mod(struct btree_trans *trans, struct - bkey_s_c_accounting a) +static inline int bch2_accounting_mem_mod_locked(struct btree_trans *trans, struct bkey_s_c_accounting a, bool gc) { struct bch_fs *c = trans->c; - struct disk_accounting_pos acc_k; - bpos_to_disk_accounting_pos(&acc_k, a.k->p); - switch (acc_k.type) { - case BCH_DISK_ACCOUNTING_persistent_reserved: - trans->fs_usage_delta.reserved += acc_k.persistent_reserved.nr_replicas * a.v->d[0]; - break; - case BCH_DISK_ACCOUNTING_replicas: - fs_usage_data_type_to_base(&trans->fs_usage_delta, acc_k.replicas.data_type, a.v->d[0]); - break; - case BCH_DISK_ACCOUNTING_dev_data_type: - rcu_read_lock(); - struct bch_dev *ca = bch2_dev_rcu(c, acc_k.dev_data_type.dev); - if (ca) { - this_cpu_add(ca->usage->d[acc_k.dev_data_type.data_type].buckets, a.v->d[0]); - this_cpu_add(ca->usage->d[acc_k.dev_data_type.data_type].sectors, a.v->d[1]); - this_cpu_add(ca->usage->d[acc_k.dev_data_type.data_type].fragmented, a.v->d[2]); + if (!gc) { + struct disk_accounting_pos acc_k; + bpos_to_disk_accounting_pos(&acc_k, a.k->p); + + switch (acc_k.type) { + case BCH_DISK_ACCOUNTING_persistent_reserved: + trans->fs_usage_delta.reserved += acc_k.persistent_reserved.nr_replicas * a.v->d[0]; + break; + case BCH_DISK_ACCOUNTING_replicas: + fs_usage_data_type_to_base(&trans->fs_usage_delta, acc_k.replicas.data_type, a.v->d[0]); + break; + case BCH_DISK_ACCOUNTING_dev_data_type: + rcu_read_lock(); + struct bch_dev *ca = bch2_dev_rcu(c, acc_k.dev_data_type.dev); + if (ca) { + this_cpu_add(ca->usage->d[acc_k.dev_data_type.data_type].buckets, a.v->d[0]); + this_cpu_add(ca->usage->d[acc_k.dev_data_type.data_type].sectors, a.v->d[1]); + this_cpu_add(ca->usage->d[acc_k.dev_data_type.data_type].fragmented, a.v->d[2]); + } + rcu_read_unlock(); + break; } - rcu_read_unlock(); - break; } - return __bch2_accounting_mem_mod(c, a); + + return __bch2_accounting_mem_mod(c, a, gc); } -static inline void bch2_accounting_mem_read_counters(struct bch_fs *c, - unsigned idx, - u64 *v, unsigned nr) +static inline int bch2_accounting_mem_add(struct btree_trans *trans, struct bkey_s_c_accounting a, bool gc) +{ + percpu_down_read(&trans->c->mark_lock); + int ret = bch2_accounting_mem_mod_locked(trans, a, gc); + percpu_up_read(&trans->c->mark_lock); + return ret; +} + +static inline void bch2_accounting_mem_read_counters(struct bch_fs *c, unsigned idx, + u64 *v, unsigned nr, bool gc) { memset(v, 0, sizeof(*v) * nr); - struct bch_accounting_mem *acc = &c->accounting; + struct bch_accounting_mem *acc = &c->accounting[0]; if (unlikely(idx >= acc->k.nr)) return; @@ -177,19 +185,23 @@ static inline void bch2_accounting_mem_read_counters(struct bch_fs *c, static inline void bch2_accounting_mem_read(struct bch_fs *c, struct bpos p, u64 *v, unsigned nr) { - struct bch_accounting_mem *acc = &c->accounting; + struct bch_accounting_mem *acc = &c->accounting[0]; unsigned idx = eytzinger0_find(acc->k.data, acc->k.nr, sizeof(acc->k.data[0]), accounting_pos_cmp, &p); - bch2_accounting_mem_read_counters(c, idx, v, nr); + bch2_accounting_mem_read_counters(c, idx, v, nr, false); } int bch2_fs_replicas_usage_read(struct bch_fs *, darray_char *); +int bch2_accounting_gc_done(struct bch_fs *); + int bch2_accounting_read(struct bch_fs *); int bch2_dev_usage_remove(struct bch_fs *, unsigned); -int bch2_dev_usage_init(struct bch_dev *); +int bch2_dev_usage_init(struct bch_dev *, bool); + +void bch2_accounting_free(struct bch_accounting_mem *); void bch2_fs_accounting_exit(struct bch_fs *); #endif /* _BCACHEFS_DISK_ACCOUNTING_H */ diff --git a/fs/bcachefs/ec.c b/fs/bcachefs/ec.c index 819d61b9ab83..3c3a2a7e8389 100644 --- a/fs/bcachefs/ec.c +++ b/fs/bcachefs/ec.c @@ -301,13 +301,12 @@ static int mark_stripe_bucket(struct btree_trans *trans, bucket_lock(g); struct bch_alloc_v4 old = bucket_m_to_alloc(*g), new = old; ret = __mark_stripe_bucket(trans, ca, s, ptr_idx, deleting, bucket, &new, flags); - if (!ret) { - alloc_to_bucket(g, new); - bch2_dev_usage_update(c, ca, &old, &new); - } + alloc_to_bucket(g, new); bucket_unlock(g); err_unlock: percpu_up_read(&c->mark_lock); + if (!ret) + ret = bch2_alloc_key_to_dev_counters(trans, ca, &old, &new, flags); } err: bch2_dev_put(ca); @@ -369,7 +368,12 @@ int bch2_trigger_stripe(struct btree_trans *trans, if (unlikely(flags & BTREE_TRIGGER_check_repair)) return bch2_check_fix_ptrs(trans, btree, level, _new.s_c, flags); - if (flags & BTREE_TRIGGER_transactional) { + BUG_ON(new_s && old_s && + (new_s->nr_blocks != old_s->nr_blocks || + new_s->nr_redundant != old_s->nr_redundant)); + + + if (flags & (BTREE_TRIGGER_transactional|BTREE_TRIGGER_gc)) { /* * If the pointers aren't changing, we don't need to do anything: */ @@ -380,9 +384,34 @@ int bch2_trigger_stripe(struct btree_trans *trans, new_s->nr_blocks * sizeof(struct bch_extent_ptr))) return 0; - BUG_ON(new_s && old_s && - (new_s->nr_blocks != old_s->nr_blocks || - new_s->nr_redundant != old_s->nr_redundant)); + struct gc_stripe *gc = NULL; + if (flags & BTREE_TRIGGER_gc) { + gc = genradix_ptr_alloc(&c->gc_stripes, idx, GFP_KERNEL); + if (!gc) { + bch_err(c, "error allocating memory for gc_stripes, idx %llu", idx); + return -BCH_ERR_ENOMEM_mark_stripe; + } + + /* + * This will be wrong when we bring back runtime gc: we should + * be unmarking the old key and then marking the new key + * + * Also: when we bring back runtime gc, locking + */ + gc->alive = true; + gc->sectors = le16_to_cpu(new_s->sectors); + gc->nr_blocks = new_s->nr_blocks; + gc->nr_redundant = new_s->nr_redundant; + + for (unsigned i = 0; i < new_s->nr_blocks; i++) + gc->ptrs[i] = new_s->ptrs[i]; + + /* + * gc recalculates this field from stripe ptr + * references: + */ + memset(gc->block_sectors, 0, sizeof(gc->block_sectors)); + } if (new_s) { s64 sectors = (u64) le16_to_cpu(new_s->sectors) * new_s->nr_redundant; @@ -391,9 +420,12 @@ int bch2_trigger_stripe(struct btree_trans *trans, .type = BCH_DISK_ACCOUNTING_replicas, }; bch2_bkey_to_replicas(&acc.replicas, new); - int ret = bch2_disk_accounting_mod(trans, &acc, §ors, 1); + int ret = bch2_disk_accounting_mod(trans, &acc, §ors, 1, gc); if (ret) return ret; + + if (gc) + memcpy(&gc->r.e, &acc.replicas, replicas_entry_bytes(&acc.replicas)); } if (old_s) { @@ -403,7 +435,7 @@ int bch2_trigger_stripe(struct btree_trans *trans, .type = BCH_DISK_ACCOUNTING_replicas, }; bch2_bkey_to_replicas(&acc.replicas, old); - int ret = bch2_disk_accounting_mod(trans, &acc, §ors, 1); + int ret = bch2_disk_accounting_mod(trans, &acc, §ors, 1, gc); if (ret) return ret; } @@ -452,51 +484,6 @@ int bch2_trigger_stripe(struct btree_trans *trans, } } - if (flags & BTREE_TRIGGER_gc) { - struct gc_stripe *m = - genradix_ptr_alloc(&c->gc_stripes, idx, GFP_KERNEL); - - if (!m) { - bch_err(c, "error allocating memory for gc_stripes, idx %llu", - idx); - return -BCH_ERR_ENOMEM_mark_stripe; - } - /* - * This will be wrong when we bring back runtime gc: we should - * be unmarking the old key and then marking the new key - */ - m->alive = true; - m->sectors = le16_to_cpu(new_s->sectors); - m->nr_blocks = new_s->nr_blocks; - m->nr_redundant = new_s->nr_redundant; - - for (unsigned i = 0; i < new_s->nr_blocks; i++) - m->ptrs[i] = new_s->ptrs[i]; - - bch2_bkey_to_replicas(&m->r.e, new); - - /* - * gc recalculates this field from stripe ptr - * references: - */ - memset(m->block_sectors, 0, sizeof(m->block_sectors)); - - int ret = mark_stripe_buckets(trans, old, new, flags); - if (ret) - return ret; - - ret = bch2_update_replicas(c, new, &m->r.e, - ((s64) m->sectors * m->nr_redundant)); - if (ret) { - struct printbuf buf = PRINTBUF; - - bch2_bkey_val_to_text(&buf, c, new); - bch2_fs_fatal_error(c, ": no replicas entry for %s", buf.buf); - printbuf_exit(&buf); - return ret; - } - } - return 0; } diff --git a/fs/bcachefs/inode.c b/fs/bcachefs/inode.c index 4ad55ca15775..d556d7f28661 100644 --- a/fs/bcachefs/inode.c +++ b/fs/bcachefs/inode.c @@ -600,41 +600,26 @@ int bch2_trigger_inode(struct btree_trans *trans, struct bkey_s new, enum btree_iter_update_trigger_flags flags) { - s64 nr = (s64) bkey_is_inode(new.k) - (s64) bkey_is_inode(old.k); - - if (flags & BTREE_TRIGGER_transactional) { - if (nr) { - struct disk_accounting_pos acc = { - .type = BCH_DISK_ACCOUNTING_nr_inodes - }; - - int ret = bch2_disk_accounting_mod(trans, &acc, &nr, 1); - if (ret) - return ret; - } - - bool old_deleted = bkey_is_deleted_inode(old); - bool new_deleted = bkey_is_deleted_inode(new.s_c); - if (old_deleted != new_deleted) { - int ret = bch2_btree_bit_mod_buffered(trans, BTREE_ID_deleted_inodes, - new.k->p, new_deleted); - if (ret) - return ret; - } - } - if ((flags & BTREE_TRIGGER_atomic) && (flags & BTREE_TRIGGER_insert)) { BUG_ON(!trans->journal_res.seq); - bkey_s_to_inode_v3(new).v->bi_journal_seq = cpu_to_le64(trans->journal_res.seq); } - if (flags & BTREE_TRIGGER_gc) { - struct bch_fs *c = trans->c; + s64 nr = bkey_is_inode(new.k) - bkey_is_inode(old.k); + if ((flags & (BTREE_TRIGGER_transactional|BTREE_TRIGGER_gc)) && nr) { + struct disk_accounting_pos acc = { .type = BCH_DISK_ACCOUNTING_nr_inodes }; + int ret = bch2_disk_accounting_mod(trans, &acc, &nr, 1, flags & BTREE_TRIGGER_gc); + if (ret) + return ret; + } - percpu_down_read(&c->mark_lock); - this_cpu_add(c->usage_gc->b.nr_inodes, nr); - percpu_up_read(&c->mark_lock); + int deleted_delta = (int) bkey_is_deleted_inode(new.s_c) - + (int) bkey_is_deleted_inode(old); + if ((flags & BTREE_TRIGGER_transactional) && deleted_delta) { + int ret = bch2_btree_bit_mod_buffered(trans, BTREE_ID_deleted_inodes, + new.k->p, deleted_delta > 0); + if (ret) + return ret; } return 0; diff --git a/fs/bcachefs/recovery.c b/fs/bcachefs/recovery.c index 4006b8ec4fe8..514bff68d971 100644 --- a/fs/bcachefs/recovery.c +++ b/fs/bcachefs/recovery.c @@ -1031,8 +1031,7 @@ int bch2_fs_initialize(struct bch_fs *c) goto err; for_each_member_device(c, ca) { - ret = bch2_dev_usage_init(ca); - bch_err_msg(c, ret, "initializing device usage"); + ret = bch2_dev_usage_init(ca, false); if (ret) { bch2_dev_put(ca); goto err; diff --git a/fs/bcachefs/replicas.c b/fs/bcachefs/replicas.c index 5252d3ee8a2a..ac68ef5e453f 100644 --- a/fs/bcachefs/replicas.c +++ b/fs/bcachefs/replicas.c @@ -264,73 +264,6 @@ bool bch2_replicas_marked(struct bch_fs *c, return ret; } -static void __replicas_table_update(struct bch_fs_usage *dst, - struct bch_replicas_cpu *dst_r, - struct bch_fs_usage *src, - struct bch_replicas_cpu *src_r) -{ - int src_idx, dst_idx; - - *dst = *src; - - for (src_idx = 0; src_idx < src_r->nr; src_idx++) { - if (!src->replicas[src_idx]) - continue; - - dst_idx = __replicas_entry_idx(dst_r, - cpu_replicas_entry(src_r, src_idx)); - BUG_ON(dst_idx < 0); - - dst->replicas[dst_idx] = src->replicas[src_idx]; - } -} - -static void __replicas_table_update_pcpu(struct bch_fs_usage __percpu *dst_p, - struct bch_replicas_cpu *dst_r, - struct bch_fs_usage __percpu *src_p, - struct bch_replicas_cpu *src_r) -{ - unsigned src_nr = sizeof(struct bch_fs_usage) / sizeof(u64) + src_r->nr; - struct bch_fs_usage *dst, *src = (void *) - bch2_acc_percpu_u64s((u64 __percpu *) src_p, src_nr); - - preempt_disable(); - dst = this_cpu_ptr(dst_p); - preempt_enable(); - - __replicas_table_update(dst, dst_r, src, src_r); -} - -/* - * Resize filesystem accounting: - */ -static int replicas_table_update(struct bch_fs *c, - struct bch_replicas_cpu *new_r) -{ - struct bch_fs_usage __percpu *new_gc = NULL; - unsigned bytes = sizeof(struct bch_fs_usage) + - sizeof(u64) * new_r->nr; - int ret = 0; - - if ((c->usage_gc && - !(new_gc = __alloc_percpu_gfp(bytes, sizeof(u64), GFP_KERNEL)))) - goto err; - - if (c->usage_gc) - __replicas_table_update_pcpu(new_gc, new_r, - c->usage_gc, &c->replicas); - - swap(c->usage_gc, new_gc); - swap(c->replicas, *new_r); -out: - free_percpu(new_gc); - return ret; -err: - bch_err(c, "error updating replicas table: memory allocation failure"); - ret = -BCH_ERR_ENOMEM_replicas_table; - goto out; -} - noinline static int bch2_mark_replicas_slowpath(struct bch_fs *c, struct bch_replicas_entry_v1 *new_entry) @@ -378,7 +311,7 @@ static int bch2_mark_replicas_slowpath(struct bch_fs *c, /* don't update in memory replicas until changes are persistent */ percpu_down_write(&c->mark_lock); if (new_r.entries) - ret = replicas_table_update(c, &new_r); + swap(c->replicas, new_r); if (new_gc.entries) swap(new_gc, c->replicas_gc); percpu_up_write(&c->mark_lock); @@ -413,8 +346,9 @@ int bch2_replicas_gc_end(struct bch_fs *c, int ret) percpu_down_write(&c->mark_lock); ret = ret ?: - bch2_cpu_replicas_to_sb_replicas(c, &c->replicas_gc) ?: - replicas_table_update(c, &c->replicas_gc); + bch2_cpu_replicas_to_sb_replicas(c, &c->replicas_gc); + if (!ret) + swap(c->replicas, c->replicas_gc); kfree(c->replicas_gc.entries); c->replicas_gc.entries = NULL; @@ -628,8 +562,7 @@ int bch2_sb_replicas_to_cpu_replicas(struct bch_fs *c) bch2_cpu_replicas_sort(&new_r); percpu_down_write(&c->mark_lock); - - ret = replicas_table_update(c, &new_r); + swap(c->replicas, new_r); percpu_up_write(&c->mark_lock); kfree(new_r.entries); @@ -931,10 +864,8 @@ unsigned bch2_sb_dev_has_data(struct bch_sb *sb, unsigned dev) unsigned bch2_dev_has_data(struct bch_fs *c, struct bch_dev *ca) { - unsigned ret; - mutex_lock(&c->sb_lock); - ret = bch2_sb_dev_has_data(c->disk_sb.sb, ca->dev_idx); + unsigned ret = bch2_sb_dev_has_data(c->disk_sb.sb, ca->dev_idx); mutex_unlock(&c->sb_lock); return ret; @@ -945,8 +876,3 @@ void bch2_fs_replicas_exit(struct bch_fs *c) kfree(c->replicas.entries); kfree(c->replicas_gc.entries); } - -int bch2_fs_replicas_init(struct bch_fs *c) -{ - return replicas_table_update(c, &c->replicas); -} diff --git a/fs/bcachefs/replicas.h b/fs/bcachefs/replicas.h index eade75ed4839..622482559c3d 100644 --- a/fs/bcachefs/replicas.h +++ b/fs/bcachefs/replicas.h @@ -79,6 +79,5 @@ extern const struct bch_sb_field_ops bch_sb_field_ops_replicas; extern const struct bch_sb_field_ops bch_sb_field_ops_replicas_v0; void bch2_fs_replicas_exit(struct bch_fs *); -int bch2_fs_replicas_init(struct bch_fs *); #endif /* _BCACHEFS_REPLICAS_H */ diff --git a/fs/bcachefs/super.c b/fs/bcachefs/super.c index baee01d7856f..af947f19e388 100644 --- a/fs/bcachefs/super.c +++ b/fs/bcachefs/super.c @@ -899,7 +899,6 @@ static struct bch_fs *bch2_fs_alloc(struct bch_sb *sb, struct bch_opts opts) bch2_io_clock_init(&c->io_clock[READ]) ?: bch2_io_clock_init(&c->io_clock[WRITE]) ?: bch2_fs_journal_init(&c->journal) ?: - bch2_fs_replicas_init(c) ?: bch2_fs_btree_iter_init(c) ?: bch2_fs_btree_cache_init(c) ?: bch2_fs_btree_key_cache_init(&c->btree_key_cache) ?: @@ -1830,7 +1829,7 @@ have_slot: bch2_write_super(c); mutex_unlock(&c->sb_lock); - ret = bch2_dev_usage_init(ca); + ret = bch2_dev_usage_init(ca, false); if (ret) goto err_late; @@ -2011,9 +2010,9 @@ int bch2_dev_resize(struct bch_fs *c, struct bch_dev *ca, u64 nbuckets) }; u64 v[3] = { nbuckets - old_nbuckets, 0, 0 }; - ret = bch2_dev_freespace_init(c, ca, old_nbuckets, nbuckets) ?: - bch2_trans_do(ca->fs, NULL, NULL, 0, - bch2_disk_accounting_mod(trans, &acc, v, ARRAY_SIZE(v))); + ret = bch2_trans_do(ca->fs, NULL, NULL, 0, + bch2_disk_accounting_mod(trans, &acc, v, ARRAY_SIZE(v), false)) ?: + bch2_dev_freespace_init(c, ca, old_nbuckets, nbuckets); if (ret) goto err; } diff --git a/fs/bcachefs/util.h b/fs/bcachefs/util.h index 5b0533ec4c7e..cd09edd12d8a 100644 --- a/fs/bcachefs/util.h +++ b/fs/bcachefs/util.h @@ -719,9 +719,7 @@ static inline void percpu_u64_set(u64 __percpu *dst, u64 src) static inline void acc_u64s(u64 *acc, const u64 *src, unsigned nr) { - unsigned i; - - for (i = 0; i < nr; i++) + for (unsigned i = 0; i < nr; i++) acc[i] += src[i]; }