bcachefs: Fold bucket_state in to BCH_DATA_TYPES()

Previously, we were missing accounting for buckets in need_gc_gens and
need_discard states. This matters because buckets in those states need
other btree operations done before they can be used, so they can't be
conuted when checking current number of free buckets against the
allocation watermark.

Also, we weren't directly counting free buckets at all. Now, data type 0
== BCH_DATA_free, and free buckets are counted; this means we can get
rid of the separate (poorly defined) count of unavailable buckets.

This is a new on disk format version, with upgrade and fsck required for
the accounting changes.

Signed-off-by: Kent Overstreet <kent.overstreet@gmail.com>
This commit is contained in:
Kent Overstreet 2022-04-01 01:29:59 -04:00 committed by Kent Overstreet
parent 8058ea64c3
commit 822835ffea
19 changed files with 298 additions and 202 deletions

View File

@ -35,15 +35,6 @@ static const unsigned BCH_ALLOC_V1_FIELD_BYTES[] = {
#undef x
};
const char * const bch2_bucket_states[] = {
"free",
"need gc gens",
"need discard",
"cached",
"dirty",
NULL
};
struct bkey_alloc_unpacked {
u64 journal_seq;
u64 bucket;
@ -355,19 +346,54 @@ int bch2_alloc_v4_invalid(const struct bch_fs *c, struct bkey_s_c k,
}
if (rw == WRITE) {
if (a.v->cached_sectors &&
!a.v->dirty_sectors &&
!a.v->io_time[READ]) {
pr_buf(err, "cached bucket with read_time == 0");
if (alloc_data_type(*a.v, a.v->data_type) != a.v->data_type) {
pr_buf(err, "invalid data type (got %u should be %u)",
a.v->data_type, alloc_data_type(*a.v, a.v->data_type));
return -EINVAL;
}
if (!a.v->dirty_sectors &&
!a.v->cached_sectors &&
!a.v->stripe &&
a.v->data_type) {
pr_buf(err, "empty, but data_type nonzero");
return -EINVAL;
switch (a.v->data_type) {
case BCH_DATA_free:
case BCH_DATA_need_gc_gens:
case BCH_DATA_need_discard:
if (a.v->dirty_sectors ||
a.v->cached_sectors ||
a.v->stripe) {
pr_buf(err, "empty data type free but have data");
return -EINVAL;
}
break;
case BCH_DATA_sb:
case BCH_DATA_journal:
case BCH_DATA_btree:
case BCH_DATA_user:
case BCH_DATA_parity:
if (!a.v->dirty_sectors) {
pr_buf(err, "data_type %s but dirty_sectors==0",
bch2_data_types[a.v->data_type]);
return -EINVAL;
}
break;
case BCH_DATA_cached:
if (!a.v->cached_sectors ||
a.v->dirty_sectors ||
a.v->stripe) {
pr_buf(err, "data type inconsistency");
return -EINVAL;
}
if (!a.v->io_time[READ]) {
pr_buf(err, "cached bucket with read_time == 0");
return -EINVAL;
}
break;
case BCH_DATA_stripe:
if (!a.v->stripe) {
pr_buf(err, "data_type %s but stripe==0",
bch2_data_types[a.v->data_type]);
return -EINVAL;
}
break;
}
}
@ -394,9 +420,11 @@ void bch2_alloc_to_text(struct printbuf *out, struct bch_fs *c, struct bkey_s_c
bch2_alloc_to_v4(k, &a);
pr_buf(out, "gen %u oldest_gen %u data_type %s journal_seq %llu need_discard %llu",
pr_buf(out, "gen %u oldest_gen %u data_type %s journal_seq %llu need_discard %llu need_inc_gen %llu",
a.gen, a.oldest_gen, bch2_data_types[a.data_type],
a.journal_seq, BCH_ALLOC_V4_NEED_DISCARD(&a));
a.journal_seq,
BCH_ALLOC_V4_NEED_DISCARD(&a),
BCH_ALLOC_V4_NEED_INC_GEN(&a));
pr_buf(out, " dirty_sectors %u", a.dirty_sectors);
pr_buf(out, " cached_sectors %u", a.cached_sectors);
pr_buf(out, " stripe %u", a.stripe);
@ -437,7 +465,7 @@ int bch2_alloc_read(struct bch_fs *c)
static int bch2_bucket_do_index(struct btree_trans *trans,
struct bkey_s_c alloc_k,
struct bch_alloc_v4 a,
const struct bch_alloc_v4 *a,
bool set)
{
struct bch_fs *c = trans->c;
@ -445,15 +473,14 @@ static int bch2_bucket_do_index(struct btree_trans *trans,
struct btree_iter iter;
struct bkey_s_c old;
struct bkey_i *k;
enum bucket_state state = bucket_state(a);
enum btree_id btree;
enum bch_bkey_type old_type = !set ? KEY_TYPE_set : KEY_TYPE_deleted;
enum bch_bkey_type new_type = set ? KEY_TYPE_set : KEY_TYPE_deleted;
struct printbuf buf = PRINTBUF;
int ret;
if (state != BUCKET_free &&
state != BUCKET_need_discard)
if (a->data_type != BCH_DATA_free &&
a->data_type != BCH_DATA_need_discard)
return 0;
k = bch2_trans_kmalloc(trans, sizeof(*k));
@ -463,13 +490,13 @@ static int bch2_bucket_do_index(struct btree_trans *trans,
bkey_init(&k->k);
k->k.type = new_type;
switch (state) {
case BUCKET_free:
switch (a->data_type) {
case BCH_DATA_free:
btree = BTREE_ID_freespace;
k->k.p = alloc_freespace_pos(alloc_k.k->p, a);
k->k.p = alloc_freespace_pos(alloc_k.k->p, *a);
bch2_key_resize(&k->k, 1);
break;
case BUCKET_need_discard:
case BCH_DATA_need_discard:
btree = BTREE_ID_need_discard;
k->k.p = alloc_k.k->p;
break;
@ -523,6 +550,8 @@ int bch2_trans_mark_alloc(struct btree_trans *trans,
bch2_alloc_to_v4(old, &old_a);
new_a = &bkey_i_to_alloc_v4(new)->v;
new_a->data_type = alloc_data_type(*new_a, new_a->data_type);
if (new_a->dirty_sectors > old_a.dirty_sectors ||
new_a->cached_sectors > old_a.cached_sectors) {
new_a->io_time[READ] = max_t(u64, 1, atomic64_read(&c->io_clock[READ].now));
@ -531,18 +560,18 @@ int bch2_trans_mark_alloc(struct btree_trans *trans,
SET_BCH_ALLOC_V4_NEED_DISCARD(new_a, true);
}
if (old_a.data_type && !new_a->data_type &&
old_a.gen == new_a->gen &&
if (data_type_is_empty(new_a->data_type) &&
BCH_ALLOC_V4_NEED_INC_GEN(new_a) &&
!bch2_bucket_is_open_safe(c, new->k.p.inode, new->k.p.offset)) {
new_a->gen++;
SET_BCH_ALLOC_V4_NEED_INC_GEN(new_a, false);
}
if (bucket_state(old_a) != bucket_state(*new_a) ||
(bucket_state(*new_a) == BUCKET_free &&
if (old_a.data_type != new_a->data_type ||
(new_a->data_type == BCH_DATA_free &&
alloc_freespace_genbits(old_a) != alloc_freespace_genbits(*new_a))) {
ret = bch2_bucket_do_index(trans, old, old_a, false) ?:
bch2_bucket_do_index(trans, bkey_i_to_s_c(new), *new_a, true);
ret = bch2_bucket_do_index(trans, old, &old_a, false) ?:
bch2_bucket_do_index(trans, bkey_i_to_s_c(new), new_a, true);
if (ret)
return ret;
}
@ -594,9 +623,9 @@ static int bch2_check_alloc_key(struct btree_trans *trans,
bch2_alloc_to_v4(alloc_k, &a);
discard_key_type = bucket_state(a) == BUCKET_need_discard
discard_key_type = a.data_type == BCH_DATA_need_discard
? KEY_TYPE_set : 0;
freespace_key_type = bucket_state(a) == BUCKET_free
freespace_key_type = a.data_type == BCH_DATA_free
? KEY_TYPE_set : 0;
bch2_trans_iter_init(trans, &discard_iter, BTREE_ID_need_discard,
@ -678,9 +707,9 @@ static int bch2_check_discard_freespace_key(struct btree_trans *trans,
u64 genbits;
struct bpos pos;
struct bkey_i *update;
enum bucket_state state = iter->btree_id == BTREE_ID_need_discard
? BUCKET_need_discard
: BUCKET_free;
enum bch_data_type state = iter->btree_id == BTREE_ID_need_discard
? BCH_DATA_need_discard
: BCH_DATA_free;
struct printbuf buf = PRINTBUF;
int ret;
@ -711,13 +740,13 @@ static int bch2_check_discard_freespace_key(struct btree_trans *trans,
bch2_alloc_to_v4(k, &a);
if (fsck_err_on(bucket_state(a) != state ||
(state == BUCKET_free &&
if (fsck_err_on(a.data_type != state ||
(state == BCH_DATA_free &&
genbits != alloc_freespace_genbits(a)), c,
"%s\n incorrectly set in %s index (free %u, genbits %llu should be %llu)",
(bch2_bkey_val_to_text(&buf, c, k), buf.buf),
bch2_btree_ids[iter->btree_id],
bucket_state(a) == state,
a.data_type == state,
genbits >> 56, alloc_freespace_genbits(a) >> 56))
goto delete;
out:
@ -818,7 +847,7 @@ static int bch2_check_alloc_to_lru_ref(struct btree_trans *trans,
bch2_alloc_to_v4(alloc_k, &a);
if (bucket_state(a) != BUCKET_cached)
if (a.data_type != BCH_DATA_cached)
return 0;
bch2_trans_iter_init(trans, &lru_iter, BTREE_ID_lru,
@ -928,10 +957,19 @@ static int bch2_clear_need_discard(struct btree_trans *trans, struct bpos pos,
goto write;
}
BUG_ON(a->v.journal_seq > c->journal.flushed_seq_ondisk);
if (bch2_fs_inconsistent_on(a->v.journal_seq > c->journal.flushed_seq_ondisk, c,
"clearing need_discard but journal_seq %llu > flushed_seq %llu\n"
"%s",
a->v.journal_seq,
c->journal.flushed_seq_ondisk,
(bch2_bkey_val_to_text(&buf, c, k), buf.buf))) {
ret = -EIO;
goto out;
}
if (bch2_fs_inconsistent_on(!BCH_ALLOC_V4_NEED_DISCARD(&a->v), c,
"%s\n incorrectly set in need_discard btree",
if (bch2_fs_inconsistent_on(a->v.data_type != BCH_DATA_need_discard, c,
"bucket incorrectly set in need_discard btree\n"
"%s",
(bch2_bkey_val_to_text(&buf, c, k), buf.buf))) {
ret = -EIO;
goto out;
@ -955,6 +993,7 @@ static int bch2_clear_need_discard(struct btree_trans *trans, struct bpos pos,
}
SET_BCH_ALLOC_V4_NEED_DISCARD(&a->v, false);
a->v.data_type = alloc_data_type(a->v, a->v.data_type);
write:
ret = bch2_trans_update(trans, &iter, &a->k_i, 0);
out:
@ -1101,12 +1140,16 @@ static void bch2_do_invalidates_work(struct work_struct *work)
bch2_trans_init(&trans, c, 0, 0);
for_each_member_device(ca, c, i)
while (!ret && should_invalidate_buckets(ca))
for_each_member_device(ca, c, i) {
s64 nr_to_invalidate =
should_invalidate_buckets(ca, bch2_dev_usage_read(ca));
while (!ret && nr_to_invalidate-- >= 0)
ret = __bch2_trans_do(&trans, NULL, NULL,
BTREE_INSERT_USE_RESERVE|
BTREE_INSERT_NOFAIL,
invalidate_one_bucket(&trans, ca));
}
bch2_trans_exit(&trans);
percpu_ref_put(&c->writes);
@ -1139,7 +1182,7 @@ static int bch2_dev_freespace_init(struct bch_fs *c, struct bch_dev *ca)
bch2_alloc_to_v4(k, &a);
ret = __bch2_trans_do(&trans, NULL, NULL,
BTREE_INSERT_LAZY_RW,
bch2_bucket_do_index(&trans, k, a, true));
bch2_bucket_do_index(&trans, k, &a, true));
if (ret)
break;
}

View File

@ -28,32 +28,35 @@ static inline u8 alloc_gc_gen(struct bch_alloc_v4 a)
return a.gen - a.oldest_gen;
}
enum bucket_state {
BUCKET_free,
BUCKET_need_gc_gens,
BUCKET_need_discard,
BUCKET_cached,
BUCKET_dirty,
};
extern const char * const bch2_bucket_states[];
static inline enum bucket_state bucket_state(struct bch_alloc_v4 a)
static inline enum bch_data_type __alloc_data_type(u32 dirty_sectors,
u32 cached_sectors,
u32 stripe,
struct bch_alloc_v4 a,
enum bch_data_type data_type)
{
if (a.dirty_sectors || a.stripe)
return BUCKET_dirty;
if (a.cached_sectors)
return BUCKET_cached;
if (dirty_sectors)
return data_type;
if (stripe)
return BCH_DATA_stripe;
if (cached_sectors)
return BCH_DATA_cached;
if (BCH_ALLOC_V4_NEED_DISCARD(&a))
return BUCKET_need_discard;
return BCH_DATA_need_discard;
if (alloc_gc_gen(a) >= BUCKET_GC_GEN_MAX)
return BUCKET_need_gc_gens;
return BUCKET_free;
return BCH_DATA_need_gc_gens;
return BCH_DATA_free;
}
static inline enum bch_data_type alloc_data_type(struct bch_alloc_v4 a,
enum bch_data_type data_type)
{
return __alloc_data_type(a.dirty_sectors, a.cached_sectors,
a.stripe, a, data_type);
}
static inline u64 alloc_lru_idx(struct bch_alloc_v4 a)
{
return bucket_state(a) == BUCKET_cached ? a.io_time[READ] : 0;
return a.data_type == BCH_DATA_cached ? a.io_time[READ] : 0;
}
static inline u64 alloc_freespace_genbits(struct bch_alloc_v4 a)
@ -128,13 +131,14 @@ int bch2_check_alloc_info(struct bch_fs *);
int bch2_check_alloc_to_lru_refs(struct bch_fs *);
void bch2_do_discards(struct bch_fs *);
static inline bool should_invalidate_buckets(struct bch_dev *ca)
static inline u64 should_invalidate_buckets(struct bch_dev *ca,
struct bch_dev_usage u)
{
struct bch_dev_usage u = bch2_dev_usage_read(ca);
u64 free = u.d[BCH_DATA_free].buckets +
u.d[BCH_DATA_need_discard].buckets;
return u.d[BCH_DATA_cached].buckets &&
u.buckets_unavailable + u.d[BCH_DATA_cached].buckets <
ca->mi.nbuckets >> 7;
return clamp_t(s64, (ca->mi.nbuckets >> 7) - free,
0, u.d[BCH_DATA_cached].buckets);
}
void bch2_do_invalidates(struct bch_fs *);

View File

@ -331,7 +331,7 @@ static struct open_bucket *try_alloc_bucket(struct btree_trans *trans, struct bc
}
if (a.data_type != BUCKET_free) {
if (a.data_type != BCH_DATA_free) {
pr_buf(&buf, "non free bucket in freespace btree\n"
" freespace key ");
bch2_bkey_val_to_text(&buf, c, freespace_k);
@ -417,7 +417,7 @@ again:
bch2_alloc_to_v4(k, &a);
if (bucket_state(a) != BUCKET_free)
if (a.data_type != BCH_DATA_free)
continue;
(*buckets_seen)++;
@ -517,27 +517,31 @@ static struct open_bucket *bch2_bucket_alloc_trans(struct btree_trans *trans,
{
struct bch_fs *c = trans->c;
struct open_bucket *ob = NULL;
u64 avail = dev_buckets_available(ca, reserve);
struct bch_dev_usage usage;
u64 avail;
u64 buckets_seen = 0;
u64 skipped_open = 0;
u64 skipped_need_journal_commit = 0;
u64 skipped_nouse = 0;
if (may_alloc_partial) {
ob = try_alloc_partial_bucket(c, ca, reserve);
if (ob)
return ob;
}
bool waiting = false;
again:
usage = bch2_dev_usage_read(ca);
avail = __dev_buckets_available(ca, usage,reserve);
if (usage.d[BCH_DATA_need_discard].buckets > avail)
bch2_do_discards(c);
if (usage.d[BCH_DATA_need_gc_gens].buckets > avail)
bch2_do_gc_gens(c);
if (should_invalidate_buckets(ca, usage))
bch2_do_invalidates(c);
if (!avail) {
if (cl) {
if (cl && !waiting) {
closure_wait(&c->freelist_wait, cl);
/* recheck after putting ourself on waitlist */
avail = dev_buckets_available(ca, reserve);
if (avail) {
closure_wake_up(&c->freelist_wait);
goto again;
}
waiting = true;
goto again;
}
if (!c->blocked_allocate)
@ -547,6 +551,15 @@ again:
goto err;
}
if (waiting)
closure_wake_up(&c->freelist_wait);
if (may_alloc_partial) {
ob = try_alloc_partial_bucket(c, ca, reserve);
if (ob)
return ob;
}
ob = likely(ca->mi.freespace_initialized)
? bch2_bucket_alloc_freelist(trans, ca, reserve,
&buckets_seen,

View File

@ -43,14 +43,14 @@ struct open_bucket {
* the block in the stripe this open_bucket corresponds to:
*/
u8 ec_idx;
enum bch_data_type data_type:3;
enum bch_data_type data_type:8;
unsigned valid:1;
unsigned on_partial_list:1;
int alloc_reserve:3;
unsigned alloc_reserve:3;
unsigned sectors_free;
u8 dev;
u8 gen;
u32 sectors_free;
u64 bucket;
struct ec_stripe_new *ec;
};

View File

@ -1222,13 +1222,16 @@ LE64_BITMASK(BCH_KDF_SCRYPT_P, struct bch_sb_field_crypt, kdf_flags, 32, 48);
/* BCH_SB_FIELD_replicas: */
#define BCH_DATA_TYPES() \
x(none, 0) \
x(free, 0) \
x(sb, 1) \
x(journal, 2) \
x(btree, 3) \
x(user, 4) \
x(cached, 5) \
x(parity, 6)
x(parity, 6) \
x(stripe, 7) \
x(need_gc_gens, 8) \
x(need_discard, 9)
enum bch_data_type {
#define x(t, n) BCH_DATA_##t,
@ -1237,6 +1240,29 @@ enum bch_data_type {
BCH_DATA_NR
};
static inline bool data_type_is_empty(enum bch_data_type type)
{
switch (type) {
case BCH_DATA_free:
case BCH_DATA_need_gc_gens:
case BCH_DATA_need_discard:
return true;
default:
return false;
}
}
static inline bool data_type_is_hidden(enum bch_data_type type)
{
switch (type) {
case BCH_DATA_sb:
case BCH_DATA_journal:
return true;
default:
return false;
}
}
struct bch_replicas_entry_v0 {
__u8 data_type;
__u8 nr_devs;
@ -1364,7 +1390,8 @@ struct bch_sb_field_journal_seq_blacklist {
x(subvol_dirent, 17) \
x(inode_v2, 18) \
x(freespace, 19) \
x(alloc_v4, 20)
x(alloc_v4, 20) \
x(new_data_types, 21)
enum bcachefs_metadata_version {
bcachefs_metadata_version_min = 9,
@ -1822,7 +1849,7 @@ struct jset_entry_dev_usage {
__u32 pad;
__le64 buckets_ec;
__le64 buckets_unavailable;
__le64 _buckets_unavailable; /* No longer used */
struct jset_entry_dev_usage_type d[];
} __attribute__((packed));

View File

@ -285,13 +285,14 @@ struct bch_ioctl_dev_usage {
__u32 bucket_size;
__u64 nr_buckets;
__u64 available_buckets;
__u64 buckets[BCH_DATA_NR];
__u64 sectors[BCH_DATA_NR];
__u64 buckets_ec;
__u64 ec_buckets;
__u64 ec_sectors;
struct bch_ioctl_dev_usage_type {
__u64 buckets;
__u64 sectors;
__u64 fragmented;
} d[BCH_DATA_NR];
};
/*

View File

@ -1216,7 +1216,6 @@ static int bch2_gc_done(struct bch_fs *c,
dev_usage_u64s());
copy_dev_field(buckets_ec, "buckets_ec");
copy_dev_field(buckets_unavailable, "buckets_unavailable");
for (i = 0; i < BCH_DATA_NR; i++) {
copy_dev_field(d[i].buckets, "%s buckets", bch2_data_types[i]);
@ -1301,6 +1300,9 @@ static int bch2_gc_start(struct bch_fs *c,
percpu_ref_put(&ca->ref);
return -ENOMEM;
}
this_cpu_write(ca->usage_gc->d[BCH_DATA_free].buckets,
ca->mi.nbuckets - ca->mi.first_bucket);
}
return 0;
@ -1325,10 +1327,11 @@ static int bch2_alloc_write_key(struct btree_trans *trans,
{
struct bch_fs *c = trans->c;
struct bch_dev *ca = bch_dev_bkey_exists(c, iter->pos.inode);
struct bucket gc;
struct bucket gc, *b;
struct bkey_s_c k;
struct bkey_i_alloc_v4 *a;
struct bch_alloc_v4 old, new;
enum bch_data_type type;
int ret;
k = bch2_btree_iter_peek_slot(iter);
@ -1340,7 +1343,29 @@ static int bch2_alloc_write_key(struct btree_trans *trans,
new = old;
percpu_down_read(&c->mark_lock);
gc = *gc_bucket(ca, iter->pos.offset);
b = gc_bucket(ca, iter->pos.offset);
/*
* b->data_type doesn't yet include need_discard & need_gc_gen states -
* fix that here:
*/
type = __alloc_data_type(b->dirty_sectors,
b->cached_sectors,
b->stripe,
old,
b->data_type);
if (b->data_type != type) {
struct bch_dev_usage *u;
preempt_disable();
u = this_cpu_ptr(ca->usage_gc);
u->d[b->data_type].buckets--;
b->data_type = type;
u->d[b->data_type].buckets++;
preempt_enable();
}
gc = *b;
percpu_up_read(&c->mark_lock);
if (metadata_only &&
@ -1926,6 +1951,7 @@ static int bch2_alloc_write_oldest_gen(struct btree_trans *trans, struct btree_i
return ret;
a_mut->v.oldest_gen = ca->oldest_gen[iter->pos.offset];
a_mut->v.data_type = alloc_data_type(a_mut->v, a_mut->v.data_type);
return bch2_trans_update(trans, iter, &a_mut->k_i, 0);
}

View File

@ -102,4 +102,10 @@ static inline bool gc_visited(struct bch_fs *c, struct gc_pos pos)
return ret;
}
static inline void bch2_do_gc_gens(struct bch_fs *c)
{
atomic_inc(&c->kick_gc);
wake_up_process(c->gc_thread);
}
#endif /* _BCACHEFS_BTREE_GC_H */

View File

@ -283,9 +283,9 @@ bch2_fs_usage_read_short(struct bch_fs *c)
return ret;
}
static inline int is_unavailable_bucket(struct bch_alloc_v4 a)
void bch2_dev_usage_init(struct bch_dev *ca)
{
return a.dirty_sectors || a.stripe;
ca->usage_base->d[BCH_DATA_free].buckets = ca->mi.nbuckets - ca->mi.first_bucket;
}
static inline int bucket_sectors_fragmented(struct bch_dev *ca,
@ -296,24 +296,6 @@ static inline int bucket_sectors_fragmented(struct bch_dev *ca,
: 0;
}
static inline enum bch_data_type bucket_type(struct bch_alloc_v4 a)
{
return a.cached_sectors && !a.dirty_sectors
? BCH_DATA_cached
: a.data_type;
}
static inline void account_bucket(struct bch_fs_usage *fs_usage,
struct bch_dev_usage *dev_usage,
enum bch_data_type type,
int nr, s64 size)
{
if (type == BCH_DATA_sb || type == BCH_DATA_journal)
fs_usage->hidden += size;
dev_usage->d[type].buckets += nr;
}
static void bch2_dev_usage_update(struct bch_fs *c, struct bch_dev *ca,
struct bch_alloc_v4 old,
struct bch_alloc_v4 new,
@ -324,23 +306,25 @@ static void bch2_dev_usage_update(struct bch_fs *c, struct bch_dev *ca,
preempt_disable();
fs_usage = fs_usage_ptr(c, journal_seq, gc);
if (data_type_is_hidden(old.data_type))
fs_usage->hidden -= ca->mi.bucket_size;
if (data_type_is_hidden(new.data_type))
fs_usage->hidden += ca->mi.bucket_size;
u = dev_usage_ptr(ca, journal_seq, gc);
if (bucket_type(old))
account_bucket(fs_usage, u, bucket_type(old),
-1, -ca->mi.bucket_size);
u->d[old.data_type].buckets--;
u->d[new.data_type].buckets++;
if (bucket_type(new))
account_bucket(fs_usage, u, bucket_type(new),
1, ca->mi.bucket_size);
u->buckets_unavailable +=
is_unavailable_bucket(new) - is_unavailable_bucket(old);
u->buckets_ec -= (int) !!old.stripe;
u->buckets_ec += (int) !!new.stripe;
u->d[old.data_type].sectors -= old.dirty_sectors;
u->d[new.data_type].sectors += new.dirty_sectors;
u->d[BCH_DATA_cached].sectors +=
(int) new.cached_sectors - (int) old.cached_sectors;
u->d[BCH_DATA_cached].sectors += new.cached_sectors;
u->d[BCH_DATA_cached].sectors -= old.cached_sectors;
u->d[old.data_type].fragmented -= bucket_sectors_fragmented(ca, old);
u->d[new.data_type].fragmented += bucket_sectors_fragmented(ca, new);
@ -531,7 +515,8 @@ int bch2_mark_alloc(struct btree_trans *trans,
bch2_alloc_to_v4(new, &new_a);
if ((flags & BTREE_TRIGGER_INSERT) &&
!old_a.data_type != !new_a.data_type &&
data_type_is_empty(old_a.data_type) !=
data_type_is_empty(new_a.data_type) &&
new.k->type == KEY_TYPE_alloc_v4) {
struct bch_alloc_v4 *v = (struct bch_alloc_v4 *) new.v;
@ -542,14 +527,16 @@ int bch2_mark_alloc(struct btree_trans *trans,
* before the bucket became empty again, then the we don't have
* to wait on a journal flush before we can reuse the bucket:
*/
new_a.journal_seq = !new_a.data_type &&
new_a.journal_seq = data_type_is_empty(new_a.data_type) &&
(journal_seq == v->journal_seq ||
bch2_journal_noflush_seq(&c->journal, v->journal_seq))
? 0 : journal_seq;
v->journal_seq = new_a.journal_seq;
}
if (old_a.data_type && !new_a.data_type && new_a.journal_seq) {
if (!data_type_is_empty(old_a.data_type) &&
data_type_is_empty(new_a.data_type) &&
new_a.journal_seq) {
ret = bch2_set_bucket_needs_journal_commit(&c->buckets_waiting_for_journal,
c->journal.flushed_seq_ondisk,
new.k->p.inode, new.k->p.offset,
@ -561,24 +548,21 @@ int bch2_mark_alloc(struct btree_trans *trans,
}
}
if (!new_a.data_type &&
if (new_a.data_type == BCH_DATA_free &&
(!new_a.journal_seq || new_a.journal_seq < c->journal.flushed_seq_ondisk))
closure_wake_up(&c->freelist_wait);
if ((flags & BTREE_TRIGGER_INSERT) &&
BCH_ALLOC_V4_NEED_DISCARD(&new_a) &&
!new_a.journal_seq)
if (new_a.data_type == BCH_DATA_need_discard &&
(!new_a.journal_seq || new_a.journal_seq < c->journal.flushed_seq_ondisk))
bch2_do_discards(c);
if (!old_a.data_type &&
new_a.data_type &&
should_invalidate_buckets(ca))
if (old_a.data_type != BCH_DATA_cached &&
new_a.data_type == BCH_DATA_cached &&
should_invalidate_buckets(ca, bch2_dev_usage_read(ca)))
bch2_do_invalidates(c);
if (bucket_state(new_a) == BUCKET_need_gc_gens) {
atomic_inc(&c->kick_gc);
wake_up_process(c->gc_thread);
}
if (new_a.data_type == BCH_DATA_need_gc_gens)
bch2_do_gc_gens(c);
percpu_down_read(&c->mark_lock);
if (!gc && new_a.gen != old_a.gen)
@ -704,6 +688,9 @@ static int check_bucket_ref(struct bch_fs *c,
struct printbuf buf = PRINTBUF;
int ret = 0;
if (bucket_data_type == BCH_DATA_cached)
bucket_data_type = BCH_DATA_user;
if (gen_after(ptr->gen, b_gen)) {
bch2_fsck_err(c, FSCK_CAN_IGNORE|FSCK_NEED_FSCK,
"bucket %u:%zu gen %u data type %s: ptr gen %u newer than bucket gen\n"
@ -748,7 +735,8 @@ static int check_bucket_ref(struct bch_fs *c,
goto err;
}
if (bucket_data_type && ptr_data_type &&
if (!data_type_is_empty(bucket_data_type) &&
ptr_data_type &&
bucket_data_type != ptr_data_type) {
bch2_fsck_err(c, FSCK_CAN_IGNORE|FSCK_NEED_FSCK,
"bucket %u:%zu gen %u different types of data in same bucket: %s, %s\n"
@ -1401,14 +1389,8 @@ static int bch2_trans_mark_pointer(struct btree_trans *trans,
ret = __mark_pointer(trans, k, &p.ptr, sectors, data_type,
a->v.gen, &a->v.data_type,
&a->v.dirty_sectors, &a->v.cached_sectors);
if (ret)
goto out;
ret = bch2_trans_update(trans, &iter, &a->k_i, 0);
if (ret)
goto out;
out:
&a->v.dirty_sectors, &a->v.cached_sectors) ?:
bch2_trans_update(trans, &iter, &a->k_i, 0);
bch2_trans_iter_exit(trans, &iter);
return ret;
}

View File

@ -121,12 +121,10 @@ static inline u8 ptr_stale(struct bch_dev *ca,
/* Device usage: */
struct bch_dev_usage bch2_dev_usage_read(struct bch_dev *);
void bch2_dev_usage_init(struct bch_dev *);
static inline u64 __dev_buckets_available(struct bch_dev *ca,
struct bch_dev_usage stats,
enum alloc_reserve reserve)
static inline u64 bch2_dev_buckets_reserved(struct bch_dev *ca, enum alloc_reserve reserve)
{
s64 total = ca->mi.nbuckets - ca->mi.first_bucket;
s64 reserved = 0;
switch (reserve) {
@ -141,20 +139,19 @@ static inline u64 __dev_buckets_available(struct bch_dev *ca,
fallthrough;
case RESERVE_btree_movinggc:
break;
default:
BUG();
}
if (WARN_ONCE(stats.buckets_unavailable > total,
"buckets_unavailable overflow (%llu > %llu)\n",
stats.buckets_unavailable, total))
return 0;
return reserved;
}
static inline u64 __dev_buckets_available(struct bch_dev *ca,
struct bch_dev_usage usage,
enum alloc_reserve reserve)
{
return max_t(s64, 0,
total -
stats.buckets_unavailable -
usage.d[BCH_DATA_free].buckets -
ca->nr_open_buckets -
reserved);
bch2_dev_buckets_reserved(ca, reserve));
}
static inline u64 dev_buckets_available(struct bch_dev *ca,

View File

@ -34,7 +34,6 @@ struct bucket_gens {
struct bch_dev_usage {
u64 buckets_ec;
u64 buckets_unavailable;
struct {
u64 buckets;

View File

@ -501,13 +501,12 @@ static long bch2_ioctl_dev_usage(struct bch_fs *c,
arg.state = ca->mi.state;
arg.bucket_size = ca->mi.bucket_size;
arg.nr_buckets = ca->mi.nbuckets - ca->mi.first_bucket;
arg.available_buckets = arg.nr_buckets - src.buckets_unavailable;
arg.ec_buckets = src.buckets_ec;
arg.ec_sectors = 0;
arg.buckets_ec = src.buckets_ec;
for (i = 0; i < BCH_DATA_NR; i++) {
arg.buckets[i] = src.d[i].buckets;
arg.sectors[i] = src.d[i].sectors;
arg.d[i].buckets = src.d[i].buckets;
arg.d[i].sectors = src.d[i].sectors;
arg.d[i].fragmented = src.d[i].fragmented;
}
percpu_ref_put(&ca->ref);

View File

@ -585,9 +585,7 @@ static void journal_entry_dev_usage_to_text(struct printbuf *out, struct bch_fs
le64_to_cpu(u->d[i].fragmented));
}
pr_buf(out, " buckets_ec: %llu buckets_unavailable: %llu",
le64_to_cpu(u->buckets_ec),
le64_to_cpu(u->buckets_unavailable));
pr_buf(out, " buckets_ec: %llu", le64_to_cpu(u->buckets_ec));
}
static int journal_entry_log_validate(struct bch_fs *c,

View File

@ -155,7 +155,7 @@ static int bch2_check_lru_key(struct btree_trans *trans,
bch2_alloc_to_v4(k, &a);
if (fsck_err_on(bucket_state(a) != BUCKET_cached ||
if (fsck_err_on(a.data_type != BCH_DATA_cached ||
a.io_time[READ] != lru_k.k->p.offset, c,
"incorrect lru entry %s\n"
" for %s",

View File

@ -235,8 +235,15 @@ static int bch2_copygc(struct bch_fs *c)
}
for_each_rw_member(ca, c, dev_idx) {
s64 avail = min(dev_buckets_available(ca, RESERVE_movinggc),
ca->mi.nbuckets >> 6);
struct bch_dev_usage usage = bch2_dev_usage_read(ca);
u64 avail = max_t(s64, 0,
usage.d[BCH_DATA_free].buckets +
usage.d[BCH_DATA_need_discard].buckets -
ca->nr_open_buckets -
bch2_dev_buckets_reserved(ca, RESERVE_movinggc));
avail = min(avail, ca->mi.nbuckets >> 6);
sectors_reserved += avail * ca->mi.bucket_size;
}

View File

@ -713,7 +713,6 @@ static int journal_replay_entry_early(struct bch_fs *c,
unsigned i, nr_types = jset_entry_dev_usage_nr_types(u);
ca->usage_base->buckets_ec = le64_to_cpu(u->buckets_ec);
ca->usage_base->buckets_unavailable = le64_to_cpu(u->buckets_unavailable);
for (i = 0; i < min_t(unsigned, nr_types, BCH_DATA_NR); i++) {
ca->usage_base->d[i].buckets = le64_to_cpu(u->d[i].buckets);
@ -1080,18 +1079,11 @@ int bch2_fs_recovery(struct bch_fs *c)
}
if (!c->opts.nochanges) {
if (c->sb.version < bcachefs_metadata_version_inode_backpointers) {
bch_info(c, "version prior to inode backpointers, upgrade and fsck required");
if (c->sb.version < bcachefs_metadata_version_new_data_types) {
bch_info(c, "version prior to new_data_types, upgrade and fsck required");
c->opts.version_upgrade = true;
c->opts.fsck = true;
c->opts.fix_errors = FSCK_OPT_YES;
} else if (c->sb.version < bcachefs_metadata_version_subvol_dirent) {
bch_info(c, "filesystem version is prior to subvol_dirent - upgrading");
c->opts.version_upgrade = true;
c->opts.fsck = true;
} else if (c->sb.version < bcachefs_metadata_version_alloc_v4) {
bch_info(c, "filesystem version is prior to alloc_v4 - upgrading");
c->opts.version_upgrade = true;
}
}
@ -1436,6 +1428,9 @@ int bch2_fs_initialize(struct bch_fs *c)
for (i = 0; i < BTREE_ID_NR; i++)
bch2_btree_root_alloc(c, i);
for_each_online_member(ca, c, i)
bch2_dev_usage_init(ca);
err = "unable to allocate journal buckets";
for_each_online_member(ca, c, i) {
ret = bch2_dev_journal_alloc(ca);

View File

@ -1275,7 +1275,6 @@ void bch2_journal_super_entries_add_common(struct bch_fs *c,
u->entry.type = BCH_JSET_ENTRY_dev_usage;
u->dev = cpu_to_le32(dev);
u->buckets_ec = cpu_to_le64(ca->usage_base->buckets_ec);
u->buckets_unavailable = cpu_to_le64(ca->usage_base->buckets_unavailable);
for (i = 0; i < BCH_DATA_NR; i++) {
u->d[i].buckets = cpu_to_le64(ca->usage_base->d[i].buckets);

View File

@ -1566,6 +1566,8 @@ int bch2_dev_add(struct bch_fs *c, const char *path)
goto err;
}
bch2_dev_usage_init(ca);
ret = __bch2_dev_attach_bdev(ca, &sb);
if (ret) {
bch2_dev_free(ca);

View File

@ -724,18 +724,17 @@ static void dev_alloc_debug_to_text(struct printbuf *out, struct bch_dev *ca)
nr[c->open_buckets[i].data_type]++;
pr_buf(out,
"\t\t buckets\t sectors fragmented\n"
"capacity%16llu\n",
"\t\t\t buckets\t sectors fragmented\n"
"capacity\t%16llu\n",
ca->mi.nbuckets - ca->mi.first_bucket);
for (i = 1; i < BCH_DATA_NR; i++)
pr_buf(out, "%-8s%16llu%16llu%16llu\n",
for (i = 0; i < BCH_DATA_NR; i++)
pr_buf(out, "%-16s%16llu%16llu%16llu\n",
bch2_data_types[i], stats.d[i].buckets,
stats.d[i].sectors, stats.d[i].fragmented);
pr_buf(out,
"ec\t%16llu\n"
"available%15llu\n"
"ec\t\t%16llu\n"
"\n"
"freelist_wait\t\t%s\n"
"open buckets allocated\t%u\n"
@ -746,7 +745,6 @@ static void dev_alloc_debug_to_text(struct printbuf *out, struct bch_dev *ca)
"open_buckets_user\t%u\n"
"btree reserve cache\t%u\n",
stats.buckets_ec,
__dev_buckets_available(ca, stats, RESERVE_none),
c->freelist_wait.list.first ? "waiting" : "empty",
OPEN_BUCKETS_COUNT - c->open_buckets_nr_free,
ca->nr_open_buckets,