mirror of
https://github.com/torvalds/linux.git
synced 2025-01-01 15:51:46 +00:00
bcachefs fixes for 6.9-rc1
Assorted bugfixes. Most are fixes for simple assertion pops; the most significant fix is for a deadlock in recovery when we have to rewrite large numbers of btree nodes to fix errors. This was incorrectly running out of the same workqueue as the core interior btree update path - we now give it its own single threaded workqueue. This was visible to users as "bch2_btree_update_start(): error: BCH_ERR_journal_reclaim_would_deadlock" - and then recovery hanging. -----BEGIN PGP SIGNATURE----- iQIzBAABCgAdFiEEKnAFLkS8Qha+jvQrE6szbY3KbnYFAmX6CoYACgkQE6szbY3K bnZp2hAAwAw8haQKeR0+0aAaqTavvBcjcloeKlQhRl+OxV1rAgxcjKGai5txZ9rI d4FVOOo7MqHq1oN9Ydsy1+0R70eCFzhDxhT1Ph5MhIzc7nd8lC0GQjO0atx23cni 4UZgSxi6quEP401MTVhvVbCPLmvfPJLpIBzptJUDS/eysxSZpS4A10gEzipoNjPv DOdrsvoo8nQX53tERJ/IxtroFL44p4y8OyZK65NILFF9xZosKz1P9ktrWufmRVoY /Hl8SUfhSNJDFW5pIMPOmoG/+RG+hJK4BaiNWPXLaSvO+3PmQskJ2tvHQVNjHQYt dMYWcy4hN47XtYvrHG9xmaQP+lZCDijdBrhmik4brqfZbloH43MVdDFysjfIPhUm qk+zzb0uE0ZhwRvQOjnYEQpHjXmj7Bm80+dhfNuuiKlhz4bOeDz8UZykJOzgD0zH n4cd+nbCxuogkukzLLQMbFv1+MCsCZpStkXP3GQXCK0k+H2briPGALuA74sxfAhH ajHLNr6qMU+uB6Ce0oM7e+9dPLfV/NalEwWW7aR/4TamxPBt575Hpjp0BV//BRfD IxdEKrMNdbKBJDUj1s5aTwcSF6ae6zHtyQXuKr93mWQqNvVXvX5/FPQYr70uA1VP iieBkde7aSTGCbTdTEcY9NcXdT2X/91aobsPvwGeq1z5Y1JJ0nU= =J/hu -----END PGP SIGNATURE----- Merge tag 'bcachefs-2024-03-19' of https://evilpiepirate.org/git/bcachefs Pull bcachefs fixes from Kent Overstreet: "Assorted bugfixes. Most are fixes for simple assertion pops; the most significant fix is for a deadlock in recovery when we have to rewrite large numbers of btree nodes to fix errors. This was incorrectly running out of the same workqueue as the core interior btree update path - we now give it its own single threaded workqueue. This was visible to users as "bch2_btree_update_start(): error: BCH_ERR_journal_reclaim_would_deadlock" - and then recovery hanging" * tag 'bcachefs-2024-03-19' of https://evilpiepirate.org/git/bcachefs: bcachefs: Fix lost wakeup on journal shutdown bcachefs; Fix deadlock in bch2_btree_update_start() bcachefs: ratelimit errors from async_btree_node_rewrite bcachefs: Run check_topology() first bcachefs: Improve bch2_fatal_error() bcachefs: Fix lost transaction restart error bcachefs: Don't corrupt journal keys gap buffer when dropping alloc info bcachefs: fix for building in userspace bcachefs: bch2_snapshot_is_ancestor() now safe to call in early recovery bcachefs: Fix nested transaction restart handling in bch2_bucket_gens_init() bcachefs: Improve sysfs internal/btree_updates bcachefs: Split out btree_node_rewrite_worker bcachefs: Fix locking in bch2_alloc_write_key() bcachefs: Avoid extent entry type assertions in .invalid() bcachefs: Fix spurious -BCH_ERR_transaction_restart_nested bcachefs: Fix check_key_has_snapshot() call bcachefs: Change "accounting overran journal reservation" to a warning
This commit is contained in:
commit
a4145ce1e7
@ -532,13 +532,13 @@ int bch2_bucket_gens_init(struct bch_fs *c)
|
||||
u8 gen = bch2_alloc_to_v4(k, &a)->gen;
|
||||
unsigned offset;
|
||||
struct bpos pos = alloc_gens_pos(iter.pos, &offset);
|
||||
int ret2 = 0;
|
||||
|
||||
if (have_bucket_gens_key && bkey_cmp(iter.pos, pos)) {
|
||||
ret = commit_do(trans, NULL, NULL,
|
||||
BCH_TRANS_COMMIT_no_enospc,
|
||||
bch2_btree_insert_trans(trans, BTREE_ID_bucket_gens, &g.k_i, 0));
|
||||
if (ret)
|
||||
break;
|
||||
ret2 = bch2_btree_insert_trans(trans, BTREE_ID_bucket_gens, &g.k_i, 0) ?:
|
||||
bch2_trans_commit(trans, NULL, NULL, BCH_TRANS_COMMIT_no_enospc);
|
||||
if (ret2)
|
||||
goto iter_err;
|
||||
have_bucket_gens_key = false;
|
||||
}
|
||||
|
||||
@ -549,7 +549,8 @@ int bch2_bucket_gens_init(struct bch_fs *c)
|
||||
}
|
||||
|
||||
g.v.gens[offset] = gen;
|
||||
0;
|
||||
iter_err:
|
||||
ret2;
|
||||
}));
|
||||
|
||||
if (have_bucket_gens_key && !ret)
|
||||
@ -852,7 +853,7 @@ int bch2_trigger_alloc(struct btree_trans *trans,
|
||||
bucket_journal_seq);
|
||||
if (ret) {
|
||||
bch2_fs_fatal_error(c,
|
||||
"error setting bucket_needs_journal_commit: %i", ret);
|
||||
"setting bucket_needs_journal_commit: %s", bch2_err_str(ret));
|
||||
return ret;
|
||||
}
|
||||
}
|
||||
|
@ -1356,15 +1356,17 @@ retry:
|
||||
|
||||
/* Don't retry from all devices if we're out of open buckets: */
|
||||
if (bch2_err_matches(ret, BCH_ERR_open_buckets_empty)) {
|
||||
int ret = open_bucket_add_buckets(trans, &ptrs, wp, devs_have,
|
||||
int ret2 = open_bucket_add_buckets(trans, &ptrs, wp, devs_have,
|
||||
target, erasure_code,
|
||||
nr_replicas, &nr_effective,
|
||||
&have_cache, watermark,
|
||||
flags, cl);
|
||||
if (!ret ||
|
||||
bch2_err_matches(ret, BCH_ERR_transaction_restart) ||
|
||||
bch2_err_matches(ret, BCH_ERR_open_buckets_empty))
|
||||
if (!ret2 ||
|
||||
bch2_err_matches(ret2, BCH_ERR_transaction_restart) ||
|
||||
bch2_err_matches(ret2, BCH_ERR_open_buckets_empty)) {
|
||||
ret = ret2;
|
||||
goto alloc_done;
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
|
@ -849,6 +849,8 @@ struct bch_fs {
|
||||
struct workqueue_struct *btree_interior_update_worker;
|
||||
struct work_struct btree_interior_update_work;
|
||||
|
||||
struct workqueue_struct *btree_node_rewrite_worker;
|
||||
|
||||
struct list_head pending_node_rewrites;
|
||||
struct mutex pending_node_rewrites_lock;
|
||||
|
||||
|
@ -1392,11 +1392,11 @@ static int bch2_alloc_write_key(struct btree_trans *trans,
|
||||
*old,
|
||||
b->data_type);
|
||||
gc = *b;
|
||||
percpu_up_read(&c->mark_lock);
|
||||
|
||||
if (gc.data_type != old_gc.data_type ||
|
||||
gc.dirty_sectors != old_gc.dirty_sectors)
|
||||
bch2_dev_usage_update_m(c, ca, &old_gc, &gc);
|
||||
percpu_up_read(&c->mark_lock);
|
||||
|
||||
if (metadata_only &&
|
||||
gc.data_type != BCH_DATA_sb &&
|
||||
|
@ -1066,7 +1066,7 @@ int bch2_btree_node_read_done(struct bch_fs *c, struct bch_dev *ca,
|
||||
|
||||
ret = bset_encrypt(c, i, b->written << 9);
|
||||
if (bch2_fs_fatal_err_on(ret, c,
|
||||
"error decrypting btree node: %i", ret))
|
||||
"decrypting btree node: %s", bch2_err_str(ret)))
|
||||
goto fsck_err;
|
||||
|
||||
btree_err_on(btree_node_type_is_extents(btree_node_type(b)) &&
|
||||
@ -1107,7 +1107,7 @@ int bch2_btree_node_read_done(struct bch_fs *c, struct bch_dev *ca,
|
||||
|
||||
ret = bset_encrypt(c, i, b->written << 9);
|
||||
if (bch2_fs_fatal_err_on(ret, c,
|
||||
"error decrypting btree node: %i\n", ret))
|
||||
"decrypting btree node: %s", bch2_err_str(ret)))
|
||||
goto fsck_err;
|
||||
|
||||
sectors = vstruct_sectors(bne, c->block_bits);
|
||||
@ -1338,7 +1338,7 @@ start:
|
||||
if (saw_error && !btree_node_read_error(b)) {
|
||||
printbuf_reset(&buf);
|
||||
bch2_bpos_to_text(&buf, b->key.k.p);
|
||||
bch_info(c, "%s: rewriting btree node at btree=%s level=%u %s due to error",
|
||||
bch_err_ratelimited(c, "%s: rewriting btree node at btree=%s level=%u %s due to error",
|
||||
__func__, bch2_btree_id_str(b->c.btree_id), b->c.level, buf.buf);
|
||||
|
||||
bch2_btree_node_rewrite_async(c, b);
|
||||
@ -1874,8 +1874,8 @@ out:
|
||||
return;
|
||||
err:
|
||||
set_btree_node_noevict(b);
|
||||
if (!bch2_err_matches(ret, EROFS))
|
||||
bch2_fs_fatal_error(c, "fatal error writing btree node: %s", bch2_err_str(ret));
|
||||
bch2_fs_fatal_err_on(!bch2_err_matches(ret, EROFS), c,
|
||||
"writing btree node: %s", bch2_err_str(ret));
|
||||
goto out;
|
||||
}
|
||||
|
||||
@ -2131,7 +2131,7 @@ do_write:
|
||||
|
||||
ret = bset_encrypt(c, i, b->written << 9);
|
||||
if (bch2_fs_fatal_err_on(ret, c,
|
||||
"error encrypting btree node: %i\n", ret))
|
||||
"encrypting btree node: %s", bch2_err_str(ret)))
|
||||
goto err;
|
||||
|
||||
nonce = btree_nonce(i, b->written << 9);
|
||||
|
@ -676,7 +676,7 @@ static int btree_key_cache_flush_pos(struct btree_trans *trans,
|
||||
!bch2_err_matches(ret, BCH_ERR_transaction_restart) &&
|
||||
!bch2_err_matches(ret, BCH_ERR_journal_reclaim_would_deadlock) &&
|
||||
!bch2_journal_error(j), c,
|
||||
"error flushing key cache: %s", bch2_err_str(ret));
|
||||
"flushing key cache: %s", bch2_err_str(ret));
|
||||
if (ret)
|
||||
goto out;
|
||||
|
||||
|
@ -646,7 +646,7 @@ static void btree_update_nodes_written(struct btree_update *as)
|
||||
bch2_trans_unlock(trans);
|
||||
|
||||
bch2_fs_fatal_err_on(ret && !bch2_journal_error(&c->journal), c,
|
||||
"%s(): error %s", __func__, bch2_err_str(ret));
|
||||
"%s", bch2_err_str(ret));
|
||||
err:
|
||||
if (as->b) {
|
||||
|
||||
@ -1067,13 +1067,18 @@ bch2_btree_update_start(struct btree_trans *trans, struct btree_path *path,
|
||||
flags &= ~BCH_WATERMARK_MASK;
|
||||
flags |= watermark;
|
||||
|
||||
if (!(flags & BCH_TRANS_COMMIT_journal_reclaim) &&
|
||||
watermark < c->journal.watermark) {
|
||||
if (watermark < c->journal.watermark) {
|
||||
struct journal_res res = { 0 };
|
||||
unsigned journal_flags = watermark|JOURNAL_RES_GET_CHECK;
|
||||
|
||||
if ((flags & BCH_TRANS_COMMIT_journal_reclaim) &&
|
||||
watermark != BCH_WATERMARK_reclaim)
|
||||
journal_flags |= JOURNAL_RES_GET_NONBLOCK;
|
||||
|
||||
ret = drop_locks_do(trans,
|
||||
bch2_journal_res_get(&c->journal, &res, 1,
|
||||
watermark|JOURNAL_RES_GET_CHECK));
|
||||
bch2_journal_res_get(&c->journal, &res, 1, journal_flags));
|
||||
if (bch2_err_matches(ret, BCH_ERR_operation_blocked))
|
||||
ret = -BCH_ERR_journal_reclaim_would_deadlock;
|
||||
if (ret)
|
||||
return ERR_PTR(ret);
|
||||
}
|
||||
@ -1117,6 +1122,7 @@ bch2_btree_update_start(struct btree_trans *trans, struct btree_path *path,
|
||||
closure_init(&as->cl, NULL);
|
||||
as->c = c;
|
||||
as->start_time = start_time;
|
||||
as->ip_started = _RET_IP_;
|
||||
as->mode = BTREE_INTERIOR_NO_UPDATE;
|
||||
as->took_gc_lock = true;
|
||||
as->btree_id = path->btree_id;
|
||||
@ -1192,7 +1198,8 @@ bch2_btree_update_start(struct btree_trans *trans, struct btree_path *path,
|
||||
err:
|
||||
bch2_btree_update_free(as, trans);
|
||||
if (!bch2_err_matches(ret, ENOSPC) &&
|
||||
!bch2_err_matches(ret, EROFS))
|
||||
!bch2_err_matches(ret, EROFS) &&
|
||||
ret != -BCH_ERR_journal_reclaim_would_deadlock)
|
||||
bch_err_fn_ratelimited(c, ret);
|
||||
return ERR_PTR(ret);
|
||||
}
|
||||
@ -2114,7 +2121,7 @@ static void async_btree_node_rewrite_work(struct work_struct *work)
|
||||
|
||||
ret = bch2_trans_do(c, NULL, NULL, 0,
|
||||
async_btree_node_rewrite_trans(trans, a));
|
||||
bch_err_fn(c, ret);
|
||||
bch_err_fn_ratelimited(c, ret);
|
||||
bch2_write_ref_put(c, BCH_WRITE_REF_node_rewrite);
|
||||
kfree(a);
|
||||
}
|
||||
@ -2161,7 +2168,7 @@ void bch2_btree_node_rewrite_async(struct bch_fs *c, struct btree *b)
|
||||
bch2_write_ref_get(c, BCH_WRITE_REF_node_rewrite);
|
||||
}
|
||||
|
||||
queue_work(c->btree_interior_update_worker, &a->work);
|
||||
queue_work(c->btree_node_rewrite_worker, &a->work);
|
||||
}
|
||||
|
||||
void bch2_do_pending_node_rewrites(struct bch_fs *c)
|
||||
@ -2173,7 +2180,7 @@ void bch2_do_pending_node_rewrites(struct bch_fs *c)
|
||||
list_del(&a->list);
|
||||
|
||||
bch2_write_ref_get(c, BCH_WRITE_REF_node_rewrite);
|
||||
queue_work(c->btree_interior_update_worker, &a->work);
|
||||
queue_work(c->btree_node_rewrite_worker, &a->work);
|
||||
}
|
||||
mutex_unlock(&c->pending_node_rewrites_lock);
|
||||
}
|
||||
@ -2441,12 +2448,12 @@ void bch2_btree_updates_to_text(struct printbuf *out, struct bch_fs *c)
|
||||
|
||||
mutex_lock(&c->btree_interior_update_lock);
|
||||
list_for_each_entry(as, &c->btree_interior_update_list, list)
|
||||
prt_printf(out, "%p m %u w %u r %u j %llu\n",
|
||||
as,
|
||||
as->mode,
|
||||
as->nodes_written,
|
||||
closure_nr_remaining(&as->cl),
|
||||
as->journal.seq);
|
||||
prt_printf(out, "%ps: mode=%u nodes_written=%u cl.remaining=%u journal_seq=%llu\n",
|
||||
(void *) as->ip_started,
|
||||
as->mode,
|
||||
as->nodes_written,
|
||||
closure_nr_remaining(&as->cl),
|
||||
as->journal.seq);
|
||||
mutex_unlock(&c->btree_interior_update_lock);
|
||||
}
|
||||
|
||||
@ -2510,6 +2517,8 @@ bch2_btree_roots_to_journal_entries(struct bch_fs *c,
|
||||
|
||||
void bch2_fs_btree_interior_update_exit(struct bch_fs *c)
|
||||
{
|
||||
if (c->btree_node_rewrite_worker)
|
||||
destroy_workqueue(c->btree_node_rewrite_worker);
|
||||
if (c->btree_interior_update_worker)
|
||||
destroy_workqueue(c->btree_interior_update_worker);
|
||||
mempool_exit(&c->btree_interior_update_pool);
|
||||
@ -2534,6 +2543,11 @@ int bch2_fs_btree_interior_update_init(struct bch_fs *c)
|
||||
if (!c->btree_interior_update_worker)
|
||||
return -BCH_ERR_ENOMEM_btree_interior_update_worker_init;
|
||||
|
||||
c->btree_node_rewrite_worker =
|
||||
alloc_ordered_workqueue("btree_node_rewrite", WQ_UNBOUND);
|
||||
if (!c->btree_node_rewrite_worker)
|
||||
return -BCH_ERR_ENOMEM_btree_interior_update_worker_init;
|
||||
|
||||
if (mempool_init_kmalloc_pool(&c->btree_interior_update_pool, 1,
|
||||
sizeof(struct btree_update)))
|
||||
return -BCH_ERR_ENOMEM_btree_interior_update_pool_init;
|
||||
|
@ -32,6 +32,7 @@ struct btree_update {
|
||||
struct closure cl;
|
||||
struct bch_fs *c;
|
||||
u64 start_time;
|
||||
unsigned long ip_started;
|
||||
|
||||
struct list_head list;
|
||||
struct list_head unwritten_list;
|
||||
|
@ -378,7 +378,7 @@ static int bch2_btree_write_buffer_flush_locked(struct btree_trans *trans)
|
||||
}
|
||||
}
|
||||
err:
|
||||
bch2_fs_fatal_err_on(ret, c, "%s: insert error %s", __func__, bch2_err_str(ret));
|
||||
bch2_fs_fatal_err_on(ret, c, "%s", bch2_err_str(ret));
|
||||
trace_write_buffer_flush(trans, wb->flushing.keys.nr, skipped, fast, 0);
|
||||
bch2_journal_pin_drop(j, &wb->flushing.pin);
|
||||
wb->flushing.keys.nr = 0;
|
||||
|
@ -990,8 +990,8 @@ static int __trigger_extent(struct btree_trans *trans,
|
||||
ret = !gc
|
||||
? bch2_update_cached_sectors_list(trans, p.ptr.dev, disk_sectors)
|
||||
: update_cached_sectors(c, k, p.ptr.dev, disk_sectors, 0, true);
|
||||
bch2_fs_fatal_err_on(ret && gc, c, "%s(): no replicas entry while updating cached sectors",
|
||||
__func__);
|
||||
bch2_fs_fatal_err_on(ret && gc, c, "%s: no replicas entry while updating cached sectors",
|
||||
bch2_err_str(ret));
|
||||
if (ret)
|
||||
return ret;
|
||||
}
|
||||
@ -1020,7 +1020,7 @@ static int __trigger_extent(struct btree_trans *trans,
|
||||
struct printbuf buf = PRINTBUF;
|
||||
|
||||
bch2_bkey_val_to_text(&buf, c, k);
|
||||
bch2_fs_fatal_error(c, "%s(): no replicas entry for %s", __func__, buf.buf);
|
||||
bch2_fs_fatal_error(c, ": no replicas entry for %s", buf.buf);
|
||||
printbuf_exit(&buf);
|
||||
}
|
||||
if (ret)
|
||||
|
@ -170,7 +170,7 @@ void __bch2_btree_verify(struct bch_fs *c, struct btree *b)
|
||||
struct printbuf buf = PRINTBUF;
|
||||
|
||||
bch2_bkey_val_to_text(&buf, c, bkey_i_to_s_c(&b->key));
|
||||
bch2_fs_fatal_error(c, "btree node verify failed for : %s\n", buf.buf);
|
||||
bch2_fs_fatal_error(c, ": btree node verify failed for: %s\n", buf.buf);
|
||||
printbuf_exit(&buf);
|
||||
}
|
||||
out:
|
||||
|
@ -448,7 +448,7 @@ int bch2_trigger_stripe(struct btree_trans *trans,
|
||||
struct printbuf buf = PRINTBUF;
|
||||
|
||||
bch2_bkey_val_to_text(&buf, c, new);
|
||||
bch2_fs_fatal_error(c, "no replicas entry for %s", buf.buf);
|
||||
bch2_fs_fatal_error(c, ": no replicas entry for %s", buf.buf);
|
||||
printbuf_exit(&buf);
|
||||
return ret;
|
||||
}
|
||||
@ -1868,10 +1868,10 @@ static int __bch2_ec_stripe_head_reuse(struct btree_trans *trans, struct ec_stri
|
||||
return -BCH_ERR_stripe_alloc_blocked;
|
||||
|
||||
ret = get_stripe_key_trans(trans, idx, &h->s->existing_stripe);
|
||||
bch2_fs_fatal_err_on(ret && !bch2_err_matches(ret, BCH_ERR_transaction_restart), c,
|
||||
"reading stripe key: %s", bch2_err_str(ret));
|
||||
if (ret) {
|
||||
bch2_stripe_close(c, h->s);
|
||||
if (!bch2_err_matches(ret, BCH_ERR_transaction_restart))
|
||||
bch2_fs_fatal_error(c, "error reading stripe key: %s", bch2_err_str(ret));
|
||||
return ret;
|
||||
}
|
||||
|
||||
|
@ -191,9 +191,9 @@ do { \
|
||||
|
||||
void bch2_fatal_error(struct bch_fs *);
|
||||
|
||||
#define bch2_fs_fatal_error(c, ...) \
|
||||
#define bch2_fs_fatal_error(c, _msg, ...) \
|
||||
do { \
|
||||
bch_err(c, __VA_ARGS__); \
|
||||
bch_err(c, "%s(): fatal error " _msg, __func__, ##__VA_ARGS__); \
|
||||
bch2_fatal_error(c); \
|
||||
} while (0)
|
||||
|
||||
|
@ -108,17 +108,17 @@ static inline void extent_entry_drop(struct bkey_s k, union bch_extent_entry *en
|
||||
|
||||
static inline bool extent_entry_is_ptr(const union bch_extent_entry *e)
|
||||
{
|
||||
return extent_entry_type(e) == BCH_EXTENT_ENTRY_ptr;
|
||||
return __extent_entry_type(e) == BCH_EXTENT_ENTRY_ptr;
|
||||
}
|
||||
|
||||
static inline bool extent_entry_is_stripe_ptr(const union bch_extent_entry *e)
|
||||
{
|
||||
return extent_entry_type(e) == BCH_EXTENT_ENTRY_stripe_ptr;
|
||||
return __extent_entry_type(e) == BCH_EXTENT_ENTRY_stripe_ptr;
|
||||
}
|
||||
|
||||
static inline bool extent_entry_is_crc(const union bch_extent_entry *e)
|
||||
{
|
||||
switch (extent_entry_type(e)) {
|
||||
switch (__extent_entry_type(e)) {
|
||||
case BCH_EXTENT_ENTRY_crc32:
|
||||
case BCH_EXTENT_ENTRY_crc64:
|
||||
case BCH_EXTENT_ENTRY_crc128:
|
||||
|
@ -108,7 +108,8 @@ retry:
|
||||
goto retry;
|
||||
|
||||
bch2_fs_fatal_err_on(bch2_err_matches(ret, ENOENT), c,
|
||||
"inode %u:%llu not found when updating",
|
||||
"%s: inode %u:%llu not found when updating",
|
||||
bch2_err_str(ret),
|
||||
inode_inum(inode).subvol,
|
||||
inode_inum(inode).inum);
|
||||
|
||||
|
@ -1114,10 +1114,9 @@ int bch2_check_inodes(struct bch_fs *c)
|
||||
return ret;
|
||||
}
|
||||
|
||||
static int check_i_sectors(struct btree_trans *trans, struct inode_walker *w)
|
||||
static int check_i_sectors_notnested(struct btree_trans *trans, struct inode_walker *w)
|
||||
{
|
||||
struct bch_fs *c = trans->c;
|
||||
u32 restart_count = trans->restart_count;
|
||||
int ret = 0;
|
||||
s64 count2;
|
||||
|
||||
@ -1149,7 +1148,14 @@ static int check_i_sectors(struct btree_trans *trans, struct inode_walker *w)
|
||||
}
|
||||
fsck_err:
|
||||
bch_err_fn(c, ret);
|
||||
return ret ?: trans_was_restarted(trans, restart_count);
|
||||
return ret;
|
||||
}
|
||||
|
||||
static int check_i_sectors(struct btree_trans *trans, struct inode_walker *w)
|
||||
{
|
||||
u32 restart_count = trans->restart_count;
|
||||
return check_i_sectors_notnested(trans, w) ?:
|
||||
trans_was_restarted(trans, restart_count);
|
||||
}
|
||||
|
||||
struct extent_end {
|
||||
@ -1533,7 +1539,7 @@ int bch2_check_extents(struct bch_fs *c)
|
||||
check_extent(trans, &iter, k, &w, &s, &extent_ends) ?:
|
||||
check_extent_overbig(trans, &iter, k);
|
||||
})) ?:
|
||||
check_i_sectors(trans, &w));
|
||||
check_i_sectors_notnested(trans, &w));
|
||||
|
||||
bch2_disk_reservation_put(c, &res);
|
||||
extent_ends_exit(&extent_ends);
|
||||
@ -1563,10 +1569,9 @@ int bch2_check_indirect_extents(struct bch_fs *c)
|
||||
return ret;
|
||||
}
|
||||
|
||||
static int check_subdir_count(struct btree_trans *trans, struct inode_walker *w)
|
||||
static int check_subdir_count_notnested(struct btree_trans *trans, struct inode_walker *w)
|
||||
{
|
||||
struct bch_fs *c = trans->c;
|
||||
u32 restart_count = trans->restart_count;
|
||||
int ret = 0;
|
||||
s64 count2;
|
||||
|
||||
@ -1598,7 +1603,14 @@ static int check_subdir_count(struct btree_trans *trans, struct inode_walker *w)
|
||||
}
|
||||
fsck_err:
|
||||
bch_err_fn(c, ret);
|
||||
return ret ?: trans_was_restarted(trans, restart_count);
|
||||
return ret;
|
||||
}
|
||||
|
||||
static int check_subdir_count(struct btree_trans *trans, struct inode_walker *w)
|
||||
{
|
||||
u32 restart_count = trans->restart_count;
|
||||
return check_subdir_count_notnested(trans, w) ?:
|
||||
trans_was_restarted(trans, restart_count);
|
||||
}
|
||||
|
||||
static int check_dirent_inode_dirent(struct btree_trans *trans,
|
||||
@ -2003,7 +2015,8 @@ int bch2_check_dirents(struct bch_fs *c)
|
||||
k,
|
||||
NULL, NULL,
|
||||
BCH_TRANS_COMMIT_no_enospc,
|
||||
check_dirent(trans, &iter, k, &hash_info, &dir, &target, &s)));
|
||||
check_dirent(trans, &iter, k, &hash_info, &dir, &target, &s)) ?:
|
||||
check_subdir_count_notnested(trans, &dir));
|
||||
|
||||
snapshots_seen_exit(&s);
|
||||
inode_walker_exit(&dir);
|
||||
@ -2022,8 +2035,10 @@ static int check_xattr(struct btree_trans *trans, struct btree_iter *iter,
|
||||
int ret;
|
||||
|
||||
ret = check_key_has_snapshot(trans, iter, k);
|
||||
if (ret)
|
||||
if (ret < 0)
|
||||
return ret;
|
||||
if (ret)
|
||||
return 0;
|
||||
|
||||
i = walk_inode(trans, inode, k);
|
||||
ret = PTR_ERR_OR_ZERO(i);
|
||||
|
@ -511,18 +511,18 @@ retry:
|
||||
if (journal_res_get_fast(j, res, flags))
|
||||
return 0;
|
||||
|
||||
if (bch2_journal_error(j))
|
||||
return -BCH_ERR_erofs_journal_err;
|
||||
|
||||
if (j->blocked)
|
||||
return -BCH_ERR_journal_res_get_blocked;
|
||||
|
||||
if ((flags & BCH_WATERMARK_MASK) < j->watermark) {
|
||||
ret = JOURNAL_ERR_journal_full;
|
||||
can_discard = j->can_discard;
|
||||
goto out;
|
||||
}
|
||||
|
||||
if (j->blocked)
|
||||
return -BCH_ERR_journal_res_get_blocked;
|
||||
|
||||
if (bch2_journal_error(j))
|
||||
return -BCH_ERR_erofs_journal_err;
|
||||
|
||||
if (nr_unwritten_journal_entries(j) == ARRAY_SIZE(j->buf) && !journal_entry_is_open(j)) {
|
||||
ret = JOURNAL_ERR_max_in_flight;
|
||||
goto out;
|
||||
|
@ -1082,9 +1082,7 @@ reread:
|
||||
ret = bch2_encrypt(c, JSET_CSUM_TYPE(j), journal_nonce(j),
|
||||
j->encrypted_start,
|
||||
vstruct_end(j) - (void *) j->encrypted_start);
|
||||
bch2_fs_fatal_err_on(ret, c,
|
||||
"error decrypting journal entry: %s",
|
||||
bch2_err_str(ret));
|
||||
bch2_fs_fatal_err_on(ret, c, "decrypting journal entry: %s", bch2_err_str(ret));
|
||||
|
||||
mutex_lock(&jlist->lock);
|
||||
ret = journal_entry_add(c, ca, (struct journal_ptr) {
|
||||
@ -1820,7 +1818,8 @@ static int bch2_journal_write_prep(struct journal *j, struct journal_buf *w)
|
||||
jset_entry_for_each_key(i, k) {
|
||||
ret = bch2_journal_key_to_wb(c, &wb, i->btree_id, k);
|
||||
if (ret) {
|
||||
bch2_fs_fatal_error(c, "-ENOMEM flushing journal keys to btree write buffer");
|
||||
bch2_fs_fatal_error(c, "flushing journal keys to btree write buffer: %s",
|
||||
bch2_err_str(ret));
|
||||
bch2_journal_keys_to_write_buffer_end(c, &wb);
|
||||
return ret;
|
||||
}
|
||||
@ -1848,7 +1847,8 @@ static int bch2_journal_write_prep(struct journal *j, struct journal_buf *w)
|
||||
|
||||
bch2_journal_super_entries_add_common(c, &end, seq);
|
||||
u64s = (u64 *) end - (u64 *) start;
|
||||
BUG_ON(u64s > j->entry_u64s_reserved);
|
||||
|
||||
WARN_ON(u64s > j->entry_u64s_reserved);
|
||||
|
||||
le32_add_cpu(&jset->u64s, u64s);
|
||||
|
||||
@ -1856,7 +1856,7 @@ static int bch2_journal_write_prep(struct journal *j, struct journal_buf *w)
|
||||
bytes = vstruct_bytes(jset);
|
||||
|
||||
if (sectors > w->sectors) {
|
||||
bch2_fs_fatal_error(c, "aieeee! journal write overran available space, %zu > %u (extra %u reserved %u/%u)",
|
||||
bch2_fs_fatal_error(c, ": journal write overran available space, %zu > %u (extra %u reserved %u/%u)",
|
||||
vstruct_bytes(jset), w->sectors << 9,
|
||||
u64s, w->u64s_reserved, j->entry_u64s_reserved);
|
||||
return -EINVAL;
|
||||
@ -1884,8 +1884,7 @@ static int bch2_journal_write_prep(struct journal *j, struct journal_buf *w)
|
||||
ret = bch2_encrypt(c, JSET_CSUM_TYPE(jset), journal_nonce(jset),
|
||||
jset->encrypted_start,
|
||||
vstruct_end(jset) - (void *) jset->encrypted_start);
|
||||
if (bch2_fs_fatal_err_on(ret, c,
|
||||
"error decrypting journal entry: %i", ret))
|
||||
if (bch2_fs_fatal_err_on(ret, c, "decrypting journal entry: %s", bch2_err_str(ret)))
|
||||
return ret;
|
||||
|
||||
jset->csum = csum_vstruct(c, JSET_CSUM_TYPE(jset),
|
||||
|
@ -101,8 +101,8 @@ void bch2_logged_op_finish(struct btree_trans *trans, struct bkey_i *k)
|
||||
struct printbuf buf = PRINTBUF;
|
||||
|
||||
bch2_bkey_val_to_text(&buf, c, bkey_i_to_s_c(k));
|
||||
bch2_fs_fatal_error(c, "%s: error deleting logged operation %s: %s",
|
||||
__func__, buf.buf, bch2_err_str(ret));
|
||||
bch2_fs_fatal_error(c, "deleting logged operation %s: %s",
|
||||
buf.buf, bch2_err_str(ret));
|
||||
printbuf_exit(&buf);
|
||||
}
|
||||
}
|
||||
|
@ -155,8 +155,7 @@ static int bch2_copygc_get_buckets(struct moving_context *ctxt,
|
||||
if (bch2_err_matches(ret, EROFS))
|
||||
return ret;
|
||||
|
||||
if (bch2_fs_fatal_err_on(ret, c, "%s: error %s from bch2_btree_write_buffer_tryflush()",
|
||||
__func__, bch2_err_str(ret)))
|
||||
if (bch2_fs_fatal_err_on(ret, c, "%s: from bch2_btree_write_buffer_tryflush()", bch2_err_str(ret)))
|
||||
return ret;
|
||||
|
||||
ret = for_each_btree_key_upto(trans, iter, BTREE_ID_lru,
|
||||
|
@ -90,10 +90,12 @@ static void do_reconstruct_alloc(struct bch_fs *c)
|
||||
struct journal_keys *keys = &c->journal_keys;
|
||||
size_t src, dst;
|
||||
|
||||
move_gap(keys, keys->nr);
|
||||
|
||||
for (src = 0, dst = 0; src < keys->nr; src++)
|
||||
if (!btree_id_is_alloc(keys->data[src].btree_id))
|
||||
keys->data[dst++] = keys->data[src];
|
||||
keys->nr = dst;
|
||||
keys->nr = keys->gap = dst;
|
||||
}
|
||||
|
||||
/*
|
||||
@ -203,6 +205,8 @@ static int bch2_journal_replay(struct bch_fs *c)
|
||||
|
||||
BUG_ON(!atomic_read(&keys->ref));
|
||||
|
||||
move_gap(keys, keys->nr);
|
||||
|
||||
/*
|
||||
* First, attempt to replay keys in sorted order. This is more
|
||||
* efficient - better locality of btree access - but some might fail if
|
||||
|
@ -13,11 +13,11 @@
|
||||
* must never change:
|
||||
*/
|
||||
#define BCH_RECOVERY_PASSES() \
|
||||
x(check_topology, 4, 0) \
|
||||
x(alloc_read, 0, PASS_ALWAYS) \
|
||||
x(stripes_read, 1, PASS_ALWAYS) \
|
||||
x(initialize_subvolumes, 2, 0) \
|
||||
x(snapshots_read, 3, PASS_ALWAYS) \
|
||||
x(check_topology, 4, 0) \
|
||||
x(check_allocations, 5, PASS_FSCK) \
|
||||
x(trans_mark_dev_sbs, 6, PASS_ALWAYS|PASS_SILENT) \
|
||||
x(fs_journal_alloc, 7, PASS_ALWAYS|PASS_SILENT) \
|
||||
|
@ -91,18 +91,20 @@ static int bch2_snapshot_tree_create(struct btree_trans *trans,
|
||||
|
||||
/* Snapshot nodes: */
|
||||
|
||||
static bool bch2_snapshot_is_ancestor_early(struct bch_fs *c, u32 id, u32 ancestor)
|
||||
static bool __bch2_snapshot_is_ancestor_early(struct snapshot_table *t, u32 id, u32 ancestor)
|
||||
{
|
||||
struct snapshot_table *t;
|
||||
|
||||
rcu_read_lock();
|
||||
t = rcu_dereference(c->snapshots);
|
||||
|
||||
while (id && id < ancestor)
|
||||
id = __snapshot_t(t, id)->parent;
|
||||
return id == ancestor;
|
||||
}
|
||||
|
||||
static bool bch2_snapshot_is_ancestor_early(struct bch_fs *c, u32 id, u32 ancestor)
|
||||
{
|
||||
rcu_read_lock();
|
||||
bool ret = __bch2_snapshot_is_ancestor_early(rcu_dereference(c->snapshots), id, ancestor);
|
||||
rcu_read_unlock();
|
||||
|
||||
return id == ancestor;
|
||||
return ret;
|
||||
}
|
||||
|
||||
static inline u32 get_ancestor_below(struct snapshot_table *t, u32 id, u32 ancestor)
|
||||
@ -120,13 +122,15 @@ static inline u32 get_ancestor_below(struct snapshot_table *t, u32 id, u32 ances
|
||||
|
||||
bool __bch2_snapshot_is_ancestor(struct bch_fs *c, u32 id, u32 ancestor)
|
||||
{
|
||||
struct snapshot_table *t;
|
||||
bool ret;
|
||||
|
||||
EBUG_ON(c->recovery_pass_done <= BCH_RECOVERY_PASS_check_snapshots);
|
||||
|
||||
rcu_read_lock();
|
||||
t = rcu_dereference(c->snapshots);
|
||||
struct snapshot_table *t = rcu_dereference(c->snapshots);
|
||||
|
||||
if (unlikely(c->recovery_pass_done <= BCH_RECOVERY_PASS_check_snapshots)) {
|
||||
ret = __bch2_snapshot_is_ancestor_early(t, id, ancestor);
|
||||
goto out;
|
||||
}
|
||||
|
||||
while (id && id < ancestor - IS_ANCESTOR_BITMAP)
|
||||
id = get_ancestor_below(t, id, ancestor);
|
||||
@ -134,11 +138,11 @@ bool __bch2_snapshot_is_ancestor(struct bch_fs *c, u32 id, u32 ancestor)
|
||||
if (id && id < ancestor) {
|
||||
ret = test_bit(ancestor - id - 1, __snapshot_t(t, id)->is_ancestor);
|
||||
|
||||
EBUG_ON(ret != bch2_snapshot_is_ancestor_early(c, id, ancestor));
|
||||
EBUG_ON(ret != __bch2_snapshot_is_ancestor_early(t, id, ancestor));
|
||||
} else {
|
||||
ret = id == ancestor;
|
||||
}
|
||||
|
||||
out:
|
||||
rcu_read_unlock();
|
||||
|
||||
return ret;
|
||||
@ -547,7 +551,7 @@ static int check_snapshot_tree(struct btree_trans *trans,
|
||||
"snapshot tree points to missing subvolume:\n %s",
|
||||
(printbuf_reset(&buf),
|
||||
bch2_bkey_val_to_text(&buf, c, st.s_c), buf.buf)) ||
|
||||
fsck_err_on(!bch2_snapshot_is_ancestor_early(c,
|
||||
fsck_err_on(!bch2_snapshot_is_ancestor(c,
|
||||
le32_to_cpu(subvol.snapshot),
|
||||
root_id),
|
||||
c, snapshot_tree_to_wrong_subvol,
|
||||
|
@ -985,7 +985,7 @@ int bch2_write_super(struct bch_fs *c)
|
||||
prt_str(&buf, " > ");
|
||||
bch2_version_to_text(&buf, bcachefs_metadata_version_current);
|
||||
prt_str(&buf, ")");
|
||||
bch2_fs_fatal_error(c, "%s", buf.buf);
|
||||
bch2_fs_fatal_error(c, ": %s", buf.buf);
|
||||
printbuf_exit(&buf);
|
||||
return -BCH_ERR_sb_not_downgraded;
|
||||
}
|
||||
@ -1005,7 +1005,7 @@ int bch2_write_super(struct bch_fs *c)
|
||||
|
||||
if (le64_to_cpu(ca->sb_read_scratch->seq) < ca->disk_sb.seq) {
|
||||
bch2_fs_fatal_error(c,
|
||||
"Superblock write was silently dropped! (seq %llu expected %llu)",
|
||||
": Superblock write was silently dropped! (seq %llu expected %llu)",
|
||||
le64_to_cpu(ca->sb_read_scratch->seq),
|
||||
ca->disk_sb.seq);
|
||||
percpu_ref_put(&ca->io_ref);
|
||||
@ -1015,7 +1015,7 @@ int bch2_write_super(struct bch_fs *c)
|
||||
|
||||
if (le64_to_cpu(ca->sb_read_scratch->seq) > ca->disk_sb.seq) {
|
||||
bch2_fs_fatal_error(c,
|
||||
"Superblock modified by another process (seq %llu expected %llu)",
|
||||
": Superblock modified by another process (seq %llu expected %llu)",
|
||||
le64_to_cpu(ca->sb_read_scratch->seq),
|
||||
ca->disk_sb.seq);
|
||||
percpu_ref_put(&ca->io_ref);
|
||||
@ -1066,7 +1066,7 @@ int bch2_write_super(struct bch_fs *c)
|
||||
!can_mount_with_written ||
|
||||
(can_mount_without_written &&
|
||||
!can_mount_with_written), c,
|
||||
"Unable to write superblock to sufficient devices (from %ps)",
|
||||
": Unable to write superblock to sufficient devices (from %ps)",
|
||||
(void *) _RET_IP_))
|
||||
ret = -1;
|
||||
out:
|
||||
|
@ -87,20 +87,28 @@ const char * const bch2_fs_flag_strs[] = {
|
||||
NULL
|
||||
};
|
||||
|
||||
__printf(2, 0)
|
||||
static void bch2_print_maybe_redirect(struct stdio_redirect *stdio, const char *fmt, va_list args)
|
||||
{
|
||||
#ifdef __KERNEL__
|
||||
if (unlikely(stdio)) {
|
||||
if (fmt[0] == KERN_SOH[0])
|
||||
fmt += 2;
|
||||
|
||||
bch2_stdio_redirect_vprintf(stdio, true, fmt, args);
|
||||
return;
|
||||
}
|
||||
#endif
|
||||
vprintk(fmt, args);
|
||||
}
|
||||
|
||||
void bch2_print_opts(struct bch_opts *opts, const char *fmt, ...)
|
||||
{
|
||||
struct stdio_redirect *stdio = (void *)(unsigned long)opts->stdio;
|
||||
|
||||
va_list args;
|
||||
va_start(args, fmt);
|
||||
if (likely(!stdio)) {
|
||||
vprintk(fmt, args);
|
||||
} else {
|
||||
if (fmt[0] == KERN_SOH[0])
|
||||
fmt += 2;
|
||||
|
||||
bch2_stdio_redirect_vprintf(stdio, true, fmt, args);
|
||||
}
|
||||
bch2_print_maybe_redirect(stdio, fmt, args);
|
||||
va_end(args);
|
||||
}
|
||||
|
||||
@ -110,14 +118,7 @@ void __bch2_print(struct bch_fs *c, const char *fmt, ...)
|
||||
|
||||
va_list args;
|
||||
va_start(args, fmt);
|
||||
if (likely(!stdio)) {
|
||||
vprintk(fmt, args);
|
||||
} else {
|
||||
if (fmt[0] == KERN_SOH[0])
|
||||
fmt += 2;
|
||||
|
||||
bch2_stdio_redirect_vprintf(stdio, true, fmt, args);
|
||||
}
|
||||
bch2_print_maybe_redirect(stdio, fmt, args);
|
||||
va_end(args);
|
||||
}
|
||||
|
||||
|
@ -683,6 +683,9 @@ static inline void __move_gap(void *array, size_t element_size,
|
||||
/* Move the gap in a gap buffer: */
|
||||
#define move_gap(_d, _new_gap) \
|
||||
do { \
|
||||
BUG_ON(_new_gap > (_d)->nr); \
|
||||
BUG_ON((_d)->gap > (_d)->nr); \
|
||||
\
|
||||
__move_gap((_d)->data, sizeof((_d)->data[0]), \
|
||||
(_d)->nr, (_d)->size, (_d)->gap, _new_gap); \
|
||||
(_d)->gap = _new_gap; \
|
||||
|
Loading…
Reference in New Issue
Block a user