From a8eefbd324cd40fab57ab8eef88347d4f745db93 Mon Sep 17 00:00:00 2001 From: Kent Overstreet Date: Sat, 1 Oct 2022 22:15:30 -0400 Subject: [PATCH] bcachefs: Add error path to btree_split() The next patch in the series is (finally!) going to change btree splits (and interior updates in general) to not take intent locks all the way up to the root - instead only locking the nodes they'll need to modify. However, this will be introducing a race since if we're not holding a write lock on a btree node it can be written out by another thread, and then we might not have enough space for a new bset entry. We can handle this by retrying - we just need to introduce a new error path. Signed-off-by: Kent Overstreet --- fs/bcachefs/btree_update_interior.c | 105 +++++++++++++++++++++++----- fs/bcachefs/errcode.h | 1 + 2 files changed, 88 insertions(+), 18 deletions(-) diff --git a/fs/bcachefs/btree_update_interior.c b/fs/bcachefs/btree_update_interior.c index ac1e6d7286aa..b0a15757aaea 100644 --- a/fs/bcachefs/btree_update_interior.c +++ b/fs/bcachefs/btree_update_interior.c @@ -23,9 +23,9 @@ #include -static void bch2_btree_insert_node(struct btree_update *, struct btree_trans *, - struct btree_path *, struct btree *, - struct keylist *, unsigned); +static int bch2_btree_insert_node(struct btree_update *, struct btree_trans *, + struct btree_path *, struct btree *, + struct keylist *, unsigned); static void bch2_btree_update_add_new_node(struct btree_update *, struct btree *); static struct btree_path *get_unlocked_mut_path(struct btree_trans *trans, @@ -194,6 +194,43 @@ static void bch2_btree_node_free_inmem(struct btree_trans *trans, } } +static void bch2_btree_node_free_never_used(struct btree_update *as, + struct btree_trans *trans, + struct btree *b) +{ + struct bch_fs *c = as->c; + struct prealloc_nodes *p = &as->prealloc_nodes[b->c.lock.readers != NULL]; + struct btree_path *path; + unsigned level = b->c.level; + + BUG_ON(!list_empty(&b->write_blocked)); + BUG_ON(b->will_make_reachable != (1UL|(unsigned long) as)); + + b->will_make_reachable = 0; + closure_put(&as->cl); + + clear_btree_node_will_make_reachable(b); + clear_btree_node_accessed(b); + clear_btree_node_dirty_acct(c, b); + clear_btree_node_need_write(b); + + mutex_lock(&c->btree_cache.lock); + list_del_init(&b->list); + bch2_btree_node_hash_remove(&c->btree_cache, b); + mutex_unlock(&c->btree_cache.lock); + + BUG_ON(p->nr >= ARRAY_SIZE(p->b)); + p->b[p->nr++] = b; + + six_unlock_intent(&b->c.lock); + + trans_for_each_path(trans, path) + if (path->l[level].b == b) { + btree_node_unlock(trans, path, level); + path->l[level].b = ERR_PTR(-BCH_ERR_no_btree_node_init); + } +} + static struct btree *__bch2_btree_node_alloc(struct btree_trans *trans, struct disk_reservation *res, struct closure *cl, @@ -1462,15 +1499,16 @@ static void btree_split_insert_keys(struct btree_update *as, btree_node_interior_verify(as->c, b); } -static void btree_split(struct btree_update *as, struct btree_trans *trans, - struct btree_path *path, struct btree *b, - struct keylist *keys, unsigned flags) +static int btree_split(struct btree_update *as, struct btree_trans *trans, + struct btree_path *path, struct btree *b, + struct keylist *keys, unsigned flags) { struct bch_fs *c = as->c; struct btree *parent = btree_node_parent(path, b); struct btree *n1, *n2 = NULL, *n3 = NULL; struct btree_path *path1 = NULL, *path2 = NULL; u64 start_time = local_clock(); + int ret = 0; BUG_ON(!parent && (b != btree_node_root(c, b))); BUG_ON(!btree_node_intent_locked(path, btree_node_root(c, b)->c.level)); @@ -1551,7 +1589,9 @@ static void btree_split(struct btree_update *as, struct btree_trans *trans, if (parent) { /* Split a non root node */ - bch2_btree_insert_node(as, trans, path, parent, &as->parent_keys, flags); + ret = bch2_btree_insert_node(as, trans, path, parent, &as->parent_keys, flags); + if (ret) + goto err; } else if (n3) { bch2_btree_set_root(as, trans, path, n3); } else { @@ -1589,7 +1629,7 @@ static void btree_split(struct btree_update *as, struct btree_trans *trans, if (n2) six_unlock_intent(&n2->c.lock); six_unlock_intent(&n1->c.lock); - +out: if (path2) { __bch2_btree_path_unlock(trans, path2); bch2_path_put(trans, path2, true); @@ -1605,6 +1645,14 @@ static void btree_split(struct btree_update *as, struct btree_trans *trans, ? BCH_TIME_btree_node_split : BCH_TIME_btree_node_compact], start_time); + return ret; +err: + if (n3) + bch2_btree_node_free_never_used(as, trans, n3); + if (n2) + bch2_btree_node_free_never_used(as, trans, n2); + bch2_btree_node_free_never_used(as, trans, n1); + goto out; } static void @@ -1639,9 +1687,9 @@ bch2_btree_insert_keys_interior(struct btree_update *as, * If a split occurred, this function will return early. This can only happen * for leaf nodes -- inserts into interior nodes have to be atomic. */ -static void bch2_btree_insert_node(struct btree_update *as, struct btree_trans *trans, - struct btree_path *path, struct btree *b, - struct keylist *keys, unsigned flags) +static int bch2_btree_insert_node(struct btree_update *as, struct btree_trans *trans, + struct btree_path *path, struct btree *b, + struct keylist *keys, unsigned flags) { struct bch_fs *c = as->c; int old_u64s = le16_to_cpu(btree_bset_last(b)->u64s); @@ -1654,6 +1702,9 @@ static void bch2_btree_insert_node(struct btree_update *as, struct btree_trans * BUG_ON(!as || as->b); bch2_verify_keylist_sorted(keys); + if (!(local_clock() & 63)) + return btree_trans_restart(trans, BCH_ERR_transaction_restart_split_race); + bch2_btree_node_lock_for_insert(trans, path, b); if (!bch2_btree_node_insert_fits(c, b, bch2_keylist_u64s(keys))) { @@ -1680,9 +1731,9 @@ static void bch2_btree_insert_node(struct btree_update *as, struct btree_trans * bch2_btree_node_unlock_write(trans, path, b); btree_node_interior_verify(c, b); - return; + return 0; split: - btree_split(as, trans, path, b, keys, flags); + return btree_split(as, trans, path, b, keys, flags); } int bch2_btree_split_leaf(struct btree_trans *trans, @@ -1699,7 +1750,12 @@ int bch2_btree_split_leaf(struct btree_trans *trans, if (IS_ERR(as)) return PTR_ERR(as); - btree_split(as, trans, path, b, NULL, flags); + ret = btree_split(as, trans, path, b, NULL, flags); + if (ret) { + bch2_btree_update_free(as, trans); + return ret; + } + bch2_btree_update_done(as, trans); for (l = path->level + 1; btree_path_node(path, l) && !ret; l++) @@ -1851,7 +1907,9 @@ int __bch2_foreground_maybe_merge(struct btree_trans *trans, bch2_trans_verify_paths(trans); - bch2_btree_insert_node(as, trans, path, parent, &as->parent_keys, flags); + ret = bch2_btree_insert_node(as, trans, path, parent, &as->parent_keys, flags); + if (ret) + goto err_free_update; bch2_trans_verify_paths(trans); @@ -1877,6 +1935,10 @@ err: bch2_path_put(trans, sib_path, true); bch2_trans_verify_locks(trans); return ret; +err_free_update: + bch2_btree_node_free_never_used(as, trans, n); + bch2_btree_update_free(as, trans); + goto out; } /** @@ -1919,8 +1981,10 @@ int bch2_btree_node_rewrite(struct btree_trans *trans, if (parent) { bch2_keylist_add(&as->parent_keys, &n->key); - bch2_btree_insert_node(as, trans, iter->path, parent, - &as->parent_keys, flags); + ret = bch2_btree_insert_node(as, trans, iter->path, parent, + &as->parent_keys, flags); + if (ret) + goto err; } else { bch2_btree_set_root(as, trans, iter->path, n); } @@ -1934,10 +1998,15 @@ int bch2_btree_node_rewrite(struct btree_trans *trans, six_unlock_intent(&n->c.lock); bch2_btree_update_done(as, trans); - bch2_path_put(trans, new_path, true); out: + if (new_path) + bch2_path_put(trans, new_path, true); bch2_btree_path_downgrade(trans, iter->path); return ret; +err: + bch2_btree_node_free_never_used(as, trans, n); + bch2_btree_update_free(as, trans); + goto out; } struct async_btree_rewrite { diff --git a/fs/bcachefs/errcode.h b/fs/bcachefs/errcode.h index bf7ae99d9cce..fb1e1cd0f864 100644 --- a/fs/bcachefs/errcode.h +++ b/fs/bcachefs/errcode.h @@ -42,6 +42,7 @@ x(BCH_ERR_transaction_restart, transaction_restart_key_cache_raced) \ x(BCH_ERR_transaction_restart, transaction_restart_key_cache_realloced)\ x(BCH_ERR_transaction_restart, transaction_restart_journal_preres_get) \ + x(BCH_ERR_transaction_restart, transaction_restart_split_race) \ x(BCH_ERR_transaction_restart, transaction_restart_nested) \ x(0, no_btree_node) \ x(BCH_ERR_no_btree_node, no_btree_node_relock) \