mirror of
https://github.com/torvalds/linux.git
synced 2024-11-16 00:52:01 +00:00
bcachefs: Go RW before bch2_check_lrus()
btree updates before going RW are expensive if they're in random order, since they use the list of keys for journal replay to insert, which is just a gap buffer. This patch improves the bucket invalidate path so that if bch2_check_lrus() hasn't finished it only prints warnings instead of doing an emergency shutdown, which means we can now set BCH_FS_MAY_GO_RW before bch2_check_lrus(). Also, the filesystem state bits are reorganized a bit. Signed-off-by: Kent Overstreet <kent.overstreet@gmail.com>
This commit is contained in:
parent
104c69745f
commit
1cab5a82cc
@ -382,7 +382,8 @@ int bch2_alloc_v4_invalid(const struct bch_fs *c, struct bkey_s_c k,
|
||||
return -EINVAL;
|
||||
}
|
||||
|
||||
if (!a.v->io_time[READ]) {
|
||||
if (!a.v->io_time[READ] &&
|
||||
test_bit(BCH_FS_CHECK_ALLOC_TO_LRU_REFS_DONE, &c->flags)) {
|
||||
pr_buf(err, "cached bucket with read_time == 0");
|
||||
return -EINVAL;
|
||||
}
|
||||
@ -588,7 +589,6 @@ int bch2_trans_mark_alloc(struct btree_trans *trans,
|
||||
!new_a->io_time[READ])
|
||||
new_a->io_time[READ] = max_t(u64, 1, atomic64_read(&c->io_clock[READ].now));
|
||||
|
||||
|
||||
old_lru = alloc_lru_idx(old_a);
|
||||
new_lru = alloc_lru_idx(*new_a);
|
||||
|
||||
@ -1088,6 +1088,7 @@ static int invalidate_one_bucket(struct btree_trans *trans, struct bch_dev *ca)
|
||||
|
||||
bch2_trans_iter_init(trans, &lru_iter, BTREE_ID_lru,
|
||||
POS(ca->dev_idx, 0), 0);
|
||||
next_lru:
|
||||
k = bch2_btree_iter_peek(&lru_iter);
|
||||
ret = bkey_err(k);
|
||||
if (ret)
|
||||
@ -1096,9 +1097,20 @@ static int invalidate_one_bucket(struct btree_trans *trans, struct bch_dev *ca)
|
||||
if (!k.k || k.k->p.inode != ca->dev_idx)
|
||||
goto out;
|
||||
|
||||
if (bch2_trans_inconsistent_on(k.k->type != KEY_TYPE_lru, trans,
|
||||
"non lru key in lru btree"))
|
||||
goto out;
|
||||
if (k.k->type != KEY_TYPE_lru) {
|
||||
pr_buf(&buf, "non lru key in lru btree:\n ");
|
||||
bch2_bkey_val_to_text(&buf, c, k);
|
||||
|
||||
if (!test_bit(BCH_FS_CHECK_LRUS_DONE, &c->flags)) {
|
||||
bch_err(c, "%s", buf.buf);
|
||||
bch2_btree_iter_advance(&lru_iter);
|
||||
goto next_lru;
|
||||
} else {
|
||||
bch2_trans_inconsistent(trans, "%s", buf.buf);
|
||||
ret = -EINVAL;
|
||||
goto out;
|
||||
}
|
||||
}
|
||||
|
||||
idx = k.k->p.offset;
|
||||
bucket = le64_to_cpu(bkey_s_c_to_lru(k).v->idx);
|
||||
@ -1111,13 +1123,19 @@ static int invalidate_one_bucket(struct btree_trans *trans, struct bch_dev *ca)
|
||||
|
||||
if (idx != alloc_lru_idx(a->v)) {
|
||||
pr_buf(&buf, "alloc key does not point back to lru entry when invalidating bucket:\n ");
|
||||
|
||||
bch2_bkey_val_to_text(&buf, c, bkey_i_to_s_c(&a->k_i));
|
||||
pr_buf(&buf, "\n ");
|
||||
bch2_bkey_val_to_text(&buf, c, k);
|
||||
bch2_trans_inconsistent(trans, "%s", buf.buf);
|
||||
ret = -EINVAL;
|
||||
goto out;
|
||||
|
||||
if (!test_bit(BCH_FS_CHECK_LRUS_DONE, &c->flags)) {
|
||||
bch_err(c, "%s", buf.buf);
|
||||
bch2_btree_iter_advance(&lru_iter);
|
||||
goto next_lru;
|
||||
} else {
|
||||
bch2_trans_inconsistent(trans, "%s", buf.buf);
|
||||
ret = -EINVAL;
|
||||
goto out;
|
||||
}
|
||||
}
|
||||
|
||||
SET_BCH_ALLOC_V4_NEED_INC_GEN(&a->v, false);
|
||||
|
@ -494,11 +494,6 @@ struct bch_dev {
|
||||
|
||||
enum {
|
||||
/* startup: */
|
||||
BCH_FS_CLEAN_SHUTDOWN,
|
||||
BCH_FS_INITIAL_GC_DONE,
|
||||
BCH_FS_INITIAL_GC_UNFIXED,
|
||||
BCH_FS_TOPOLOGY_REPAIR_DONE,
|
||||
BCH_FS_FSCK_DONE,
|
||||
BCH_FS_STARTED,
|
||||
BCH_FS_MAY_GO_RW,
|
||||
BCH_FS_RW,
|
||||
@ -508,16 +503,22 @@ enum {
|
||||
BCH_FS_STOPPING,
|
||||
BCH_FS_EMERGENCY_RO,
|
||||
BCH_FS_WRITE_DISABLE_COMPLETE,
|
||||
BCH_FS_CLEAN_SHUTDOWN,
|
||||
|
||||
/* fsck passes: */
|
||||
BCH_FS_TOPOLOGY_REPAIR_DONE,
|
||||
BCH_FS_INITIAL_GC_DONE, /* kill when we enumerate fsck passes */
|
||||
BCH_FS_CHECK_LRUS_DONE,
|
||||
BCH_FS_CHECK_ALLOC_TO_LRU_REFS_DONE,
|
||||
BCH_FS_FSCK_DONE,
|
||||
BCH_FS_INITIAL_GC_UNFIXED, /* kill when we enumerate fsck errors */
|
||||
BCH_FS_NEED_ANOTHER_GC,
|
||||
|
||||
/* errors: */
|
||||
BCH_FS_ERROR,
|
||||
BCH_FS_TOPOLOGY_ERROR,
|
||||
BCH_FS_ERRORS_FIXED,
|
||||
BCH_FS_ERRORS_NOT_FIXED,
|
||||
|
||||
/* misc: */
|
||||
BCH_FS_NEED_ANOTHER_GC,
|
||||
BCH_FS_DELETED_NODES,
|
||||
};
|
||||
|
||||
struct btree_debug {
|
||||
|
@ -204,7 +204,9 @@ int bch2_check_lrus(struct bch_fs *c, bool initial)
|
||||
|
||||
for_each_btree_key(&trans, iter, BTREE_ID_lru, POS_MIN,
|
||||
BTREE_ITER_PREFETCH, k, ret) {
|
||||
ret = __bch2_trans_do(&trans, NULL, NULL, 0,
|
||||
ret = __bch2_trans_do(&trans, NULL, NULL,
|
||||
BTREE_INSERT_NOFAIL|
|
||||
BTREE_INSERT_LAZY_RW,
|
||||
bch2_check_lru_key(&trans, &iter, initial));
|
||||
if (ret)
|
||||
break;
|
||||
|
@ -994,7 +994,6 @@ static int bch2_fs_initialize_subvolumes(struct bch_fs *c)
|
||||
if (ret)
|
||||
return ret;
|
||||
|
||||
|
||||
bkey_subvolume_init(&root_volume.k_i);
|
||||
root_volume.k.p.offset = BCACHEFS_ROOT_SUBVOL;
|
||||
root_volume.v.flags = 0;
|
||||
@ -1096,6 +1095,12 @@ int bch2_fs_recovery(struct bch_fs *c)
|
||||
}
|
||||
}
|
||||
|
||||
if (c->opts.fsck && c->opts.norecovery) {
|
||||
bch_err(c, "cannot select both norecovery and fsck");
|
||||
ret = -EINVAL;
|
||||
goto err;
|
||||
}
|
||||
|
||||
ret = bch2_blacklist_table_initialize(c);
|
||||
if (ret) {
|
||||
bch_err(c, "error initializing blacklist table");
|
||||
@ -1189,6 +1194,13 @@ use_clean:
|
||||
if (ret)
|
||||
goto err;
|
||||
|
||||
/*
|
||||
* Skip past versions that might have possibly been used (as nonces),
|
||||
* but hadn't had their pointers written:
|
||||
*/
|
||||
if (c->sb.encryption_type && !c->sb.clean)
|
||||
atomic64_add(1 << 16, &c->key_version);
|
||||
|
||||
ret = read_btree_roots(c);
|
||||
if (ret)
|
||||
goto err;
|
||||
@ -1211,12 +1223,7 @@ use_clean:
|
||||
goto err;
|
||||
bch_verbose(c, "stripes_read done");
|
||||
|
||||
/*
|
||||
* If we're not running fsck, this ensures bch2_fsck_err() calls are
|
||||
* instead interpreted as bch2_inconsistent_err() calls:
|
||||
*/
|
||||
if (!c->opts.fsck)
|
||||
set_bit(BCH_FS_FSCK_DONE, &c->flags);
|
||||
bch2_stripes_heap_start(c);
|
||||
|
||||
if (c->opts.fsck) {
|
||||
bool metadata_only = c->opts.norecovery;
|
||||
@ -1228,6 +1235,8 @@ use_clean:
|
||||
goto err;
|
||||
bch_verbose(c, "done checking allocations");
|
||||
|
||||
set_bit(BCH_FS_INITIAL_GC_DONE, &c->flags);
|
||||
|
||||
bch_info(c, "checking need_discard and freespace btrees");
|
||||
err = "error checking need_discard and freespace btrees";
|
||||
ret = bch2_check_alloc_info(c);
|
||||
@ -1235,55 +1244,60 @@ use_clean:
|
||||
goto err;
|
||||
bch_verbose(c, "done checking need_discard and freespace btrees");
|
||||
|
||||
set_bit(BCH_FS_MAY_GO_RW, &c->flags);
|
||||
|
||||
bch_verbose(c, "starting journal replay, %zu keys", c->journal_keys.nr);
|
||||
err = "journal replay failed";
|
||||
ret = bch2_journal_replay(c);
|
||||
if (ret)
|
||||
goto err;
|
||||
if (c->opts.verbose || !c->sb.clean)
|
||||
bch_info(c, "journal replay done");
|
||||
|
||||
bch_info(c, "checking lrus");
|
||||
err = "error checking lrus";
|
||||
ret = bch2_check_lrus(c, true);
|
||||
if (ret)
|
||||
goto err;
|
||||
bch_verbose(c, "done checking lrus");
|
||||
}
|
||||
|
||||
bch2_stripes_heap_start(c);
|
||||
set_bit(BCH_FS_CHECK_LRUS_DONE, &c->flags);
|
||||
|
||||
set_bit(BCH_FS_INITIAL_GC_DONE, &c->flags);
|
||||
set_bit(BCH_FS_MAY_GO_RW, &c->flags);
|
||||
|
||||
/*
|
||||
* Skip past versions that might have possibly been used (as nonces),
|
||||
* but hadn't had their pointers written:
|
||||
*/
|
||||
if (c->sb.encryption_type && !c->sb.clean)
|
||||
atomic64_add(1 << 16, &c->key_version);
|
||||
|
||||
if (c->opts.norecovery)
|
||||
goto out;
|
||||
|
||||
bch_verbose(c, "starting journal replay, %zu keys", c->journal_keys.nr);
|
||||
err = "journal replay failed";
|
||||
ret = bch2_journal_replay(c);
|
||||
if (ret)
|
||||
goto err;
|
||||
if (c->opts.verbose || !c->sb.clean)
|
||||
bch_info(c, "journal replay done");
|
||||
|
||||
err = "error initializing freespace";
|
||||
ret = bch2_fs_freespace_init(c);
|
||||
if (ret)
|
||||
goto err;
|
||||
|
||||
if (c->opts.fsck) {
|
||||
bch_info(c, "checking alloc to lru refs");
|
||||
err = "error checking alloc to lru refs";
|
||||
ret = bch2_check_alloc_to_lru_refs(c);
|
||||
if (ret)
|
||||
goto err;
|
||||
set_bit(BCH_FS_CHECK_ALLOC_TO_LRU_REFS_DONE, &c->flags);
|
||||
|
||||
ret = bch2_check_lrus(c, true);
|
||||
if (ret)
|
||||
goto err;
|
||||
bch_verbose(c, "done checking alloc to lru refs");
|
||||
} else {
|
||||
set_bit(BCH_FS_MAY_GO_RW, &c->flags);
|
||||
set_bit(BCH_FS_INITIAL_GC_DONE, &c->flags);
|
||||
set_bit(BCH_FS_CHECK_LRUS_DONE, &c->flags);
|
||||
set_bit(BCH_FS_CHECK_ALLOC_TO_LRU_REFS_DONE, &c->flags);
|
||||
set_bit(BCH_FS_FSCK_DONE, &c->flags);
|
||||
|
||||
if (c->opts.norecovery)
|
||||
goto out;
|
||||
|
||||
bch_verbose(c, "starting journal replay, %zu keys", c->journal_keys.nr);
|
||||
err = "journal replay failed";
|
||||
ret = bch2_journal_replay(c);
|
||||
if (ret)
|
||||
goto err;
|
||||
if (c->opts.verbose || !c->sb.clean)
|
||||
bch_info(c, "journal replay done");
|
||||
}
|
||||
|
||||
err = "error initializing freespace";
|
||||
ret = bch2_fs_freespace_init(c);
|
||||
if (ret)
|
||||
goto err;
|
||||
|
||||
if (c->sb.version < bcachefs_metadata_version_snapshot_2) {
|
||||
bch2_fs_lazy_rw(c);
|
||||
|
||||
|
Loading…
Reference in New Issue
Block a user