bcachefs: Add error path to btree_split()

The next patch in the series is (finally!) going to change btree splits
(and interior updates in general) to not take intent locks all the way
up to the root - instead only locking the nodes they'll need to modify.

However, this will be introducing a race since if we're not holding a
write lock on a btree node it can be written out by another thread, and
then we might not have enough space for a new bset entry.

We can handle this by retrying - we just need to introduce a new error
path.

Signed-off-by: Kent Overstreet <kent.overstreet@linux.dev>
This commit is contained in:
Kent Overstreet 2022-10-01 22:15:30 -04:00
parent 8cbb000250
commit a8eefbd324
2 changed files with 88 additions and 18 deletions

View File

@ -23,9 +23,9 @@
#include <linux/random.h>
static void bch2_btree_insert_node(struct btree_update *, struct btree_trans *,
struct btree_path *, struct btree *,
struct keylist *, unsigned);
static int bch2_btree_insert_node(struct btree_update *, struct btree_trans *,
struct btree_path *, struct btree *,
struct keylist *, unsigned);
static void bch2_btree_update_add_new_node(struct btree_update *, struct btree *);
static struct btree_path *get_unlocked_mut_path(struct btree_trans *trans,
@ -194,6 +194,43 @@ static void bch2_btree_node_free_inmem(struct btree_trans *trans,
}
}
static void bch2_btree_node_free_never_used(struct btree_update *as,
struct btree_trans *trans,
struct btree *b)
{
struct bch_fs *c = as->c;
struct prealloc_nodes *p = &as->prealloc_nodes[b->c.lock.readers != NULL];
struct btree_path *path;
unsigned level = b->c.level;
BUG_ON(!list_empty(&b->write_blocked));
BUG_ON(b->will_make_reachable != (1UL|(unsigned long) as));
b->will_make_reachable = 0;
closure_put(&as->cl);
clear_btree_node_will_make_reachable(b);
clear_btree_node_accessed(b);
clear_btree_node_dirty_acct(c, b);
clear_btree_node_need_write(b);
mutex_lock(&c->btree_cache.lock);
list_del_init(&b->list);
bch2_btree_node_hash_remove(&c->btree_cache, b);
mutex_unlock(&c->btree_cache.lock);
BUG_ON(p->nr >= ARRAY_SIZE(p->b));
p->b[p->nr++] = b;
six_unlock_intent(&b->c.lock);
trans_for_each_path(trans, path)
if (path->l[level].b == b) {
btree_node_unlock(trans, path, level);
path->l[level].b = ERR_PTR(-BCH_ERR_no_btree_node_init);
}
}
static struct btree *__bch2_btree_node_alloc(struct btree_trans *trans,
struct disk_reservation *res,
struct closure *cl,
@ -1462,15 +1499,16 @@ static void btree_split_insert_keys(struct btree_update *as,
btree_node_interior_verify(as->c, b);
}
static void btree_split(struct btree_update *as, struct btree_trans *trans,
struct btree_path *path, struct btree *b,
struct keylist *keys, unsigned flags)
static int btree_split(struct btree_update *as, struct btree_trans *trans,
struct btree_path *path, struct btree *b,
struct keylist *keys, unsigned flags)
{
struct bch_fs *c = as->c;
struct btree *parent = btree_node_parent(path, b);
struct btree *n1, *n2 = NULL, *n3 = NULL;
struct btree_path *path1 = NULL, *path2 = NULL;
u64 start_time = local_clock();
int ret = 0;
BUG_ON(!parent && (b != btree_node_root(c, b)));
BUG_ON(!btree_node_intent_locked(path, btree_node_root(c, b)->c.level));
@ -1551,7 +1589,9 @@ static void btree_split(struct btree_update *as, struct btree_trans *trans,
if (parent) {
/* Split a non root node */
bch2_btree_insert_node(as, trans, path, parent, &as->parent_keys, flags);
ret = bch2_btree_insert_node(as, trans, path, parent, &as->parent_keys, flags);
if (ret)
goto err;
} else if (n3) {
bch2_btree_set_root(as, trans, path, n3);
} else {
@ -1589,7 +1629,7 @@ static void btree_split(struct btree_update *as, struct btree_trans *trans,
if (n2)
six_unlock_intent(&n2->c.lock);
six_unlock_intent(&n1->c.lock);
out:
if (path2) {
__bch2_btree_path_unlock(trans, path2);
bch2_path_put(trans, path2, true);
@ -1605,6 +1645,14 @@ static void btree_split(struct btree_update *as, struct btree_trans *trans,
? BCH_TIME_btree_node_split
: BCH_TIME_btree_node_compact],
start_time);
return ret;
err:
if (n3)
bch2_btree_node_free_never_used(as, trans, n3);
if (n2)
bch2_btree_node_free_never_used(as, trans, n2);
bch2_btree_node_free_never_used(as, trans, n1);
goto out;
}
static void
@ -1639,9 +1687,9 @@ bch2_btree_insert_keys_interior(struct btree_update *as,
* If a split occurred, this function will return early. This can only happen
* for leaf nodes -- inserts into interior nodes have to be atomic.
*/
static void bch2_btree_insert_node(struct btree_update *as, struct btree_trans *trans,
struct btree_path *path, struct btree *b,
struct keylist *keys, unsigned flags)
static int bch2_btree_insert_node(struct btree_update *as, struct btree_trans *trans,
struct btree_path *path, struct btree *b,
struct keylist *keys, unsigned flags)
{
struct bch_fs *c = as->c;
int old_u64s = le16_to_cpu(btree_bset_last(b)->u64s);
@ -1654,6 +1702,9 @@ static void bch2_btree_insert_node(struct btree_update *as, struct btree_trans *
BUG_ON(!as || as->b);
bch2_verify_keylist_sorted(keys);
if (!(local_clock() & 63))
return btree_trans_restart(trans, BCH_ERR_transaction_restart_split_race);
bch2_btree_node_lock_for_insert(trans, path, b);
if (!bch2_btree_node_insert_fits(c, b, bch2_keylist_u64s(keys))) {
@ -1680,9 +1731,9 @@ static void bch2_btree_insert_node(struct btree_update *as, struct btree_trans *
bch2_btree_node_unlock_write(trans, path, b);
btree_node_interior_verify(c, b);
return;
return 0;
split:
btree_split(as, trans, path, b, keys, flags);
return btree_split(as, trans, path, b, keys, flags);
}
int bch2_btree_split_leaf(struct btree_trans *trans,
@ -1699,7 +1750,12 @@ int bch2_btree_split_leaf(struct btree_trans *trans,
if (IS_ERR(as))
return PTR_ERR(as);
btree_split(as, trans, path, b, NULL, flags);
ret = btree_split(as, trans, path, b, NULL, flags);
if (ret) {
bch2_btree_update_free(as, trans);
return ret;
}
bch2_btree_update_done(as, trans);
for (l = path->level + 1; btree_path_node(path, l) && !ret; l++)
@ -1851,7 +1907,9 @@ int __bch2_foreground_maybe_merge(struct btree_trans *trans,
bch2_trans_verify_paths(trans);
bch2_btree_insert_node(as, trans, path, parent, &as->parent_keys, flags);
ret = bch2_btree_insert_node(as, trans, path, parent, &as->parent_keys, flags);
if (ret)
goto err_free_update;
bch2_trans_verify_paths(trans);
@ -1877,6 +1935,10 @@ err:
bch2_path_put(trans, sib_path, true);
bch2_trans_verify_locks(trans);
return ret;
err_free_update:
bch2_btree_node_free_never_used(as, trans, n);
bch2_btree_update_free(as, trans);
goto out;
}
/**
@ -1919,8 +1981,10 @@ int bch2_btree_node_rewrite(struct btree_trans *trans,
if (parent) {
bch2_keylist_add(&as->parent_keys, &n->key);
bch2_btree_insert_node(as, trans, iter->path, parent,
&as->parent_keys, flags);
ret = bch2_btree_insert_node(as, trans, iter->path, parent,
&as->parent_keys, flags);
if (ret)
goto err;
} else {
bch2_btree_set_root(as, trans, iter->path, n);
}
@ -1934,10 +1998,15 @@ int bch2_btree_node_rewrite(struct btree_trans *trans,
six_unlock_intent(&n->c.lock);
bch2_btree_update_done(as, trans);
bch2_path_put(trans, new_path, true);
out:
if (new_path)
bch2_path_put(trans, new_path, true);
bch2_btree_path_downgrade(trans, iter->path);
return ret;
err:
bch2_btree_node_free_never_used(as, trans, n);
bch2_btree_update_free(as, trans);
goto out;
}
struct async_btree_rewrite {

View File

@ -42,6 +42,7 @@
x(BCH_ERR_transaction_restart, transaction_restart_key_cache_raced) \
x(BCH_ERR_transaction_restart, transaction_restart_key_cache_realloced)\
x(BCH_ERR_transaction_restart, transaction_restart_journal_preres_get) \
x(BCH_ERR_transaction_restart, transaction_restart_split_race) \
x(BCH_ERR_transaction_restart, transaction_restart_nested) \
x(0, no_btree_node) \
x(BCH_ERR_no_btree_node, no_btree_node_relock) \