mirror of
https://github.com/torvalds/linux.git
synced 2024-11-10 22:21:40 +00:00
bcachefs: Don't downgrade locks on transaction restart
We should only be downgrading locks on success - otherwise, our transaction restarts won't be getting the correct locks and we'll livelock. Signed-off-by: Kent Overstreet <kent.overstreet@linux.dev>
This commit is contained in:
parent
2e7acdfbca
commit
be9e782df3
@ -1523,6 +1523,7 @@ static inline struct btree_path *btree_path_alloc(struct btree_trans *trans,
|
||||
path->ref = 0;
|
||||
path->intent_ref = 0;
|
||||
path->nodes_locked = 0;
|
||||
path->alloc_seq++;
|
||||
|
||||
btree_path_list_add(trans, pos, path);
|
||||
trans->paths_sorted = false;
|
||||
@ -1598,7 +1599,7 @@ struct btree_path *bch2_path_get(struct btree_trans *trans,
|
||||
|
||||
locks_want = min(locks_want, BTREE_MAX_DEPTH);
|
||||
if (locks_want > path->locks_want)
|
||||
bch2_btree_path_upgrade_noupgrade_sibs(trans, path, locks_want);
|
||||
bch2_btree_path_upgrade_noupgrade_sibs(trans, path, locks_want, NULL);
|
||||
|
||||
return path;
|
||||
}
|
||||
|
@ -509,7 +509,7 @@ fill:
|
||||
* path->uptodate yet:
|
||||
*/
|
||||
if (!path->locks_want &&
|
||||
!__bch2_btree_path_upgrade(trans, path, 1)) {
|
||||
!__bch2_btree_path_upgrade(trans, path, 1, NULL)) {
|
||||
trace_and_count(trans->c, trans_restart_key_cache_upgrade, trans, _THIS_IP_);
|
||||
ret = btree_trans_restart(trans, BCH_ERR_transaction_restart_key_cache_upgrade);
|
||||
goto err;
|
||||
|
@ -431,7 +431,8 @@ void bch2_btree_node_lock_write_nofail(struct btree_trans *trans,
|
||||
|
||||
static inline bool btree_path_get_locks(struct btree_trans *trans,
|
||||
struct btree_path *path,
|
||||
bool upgrade)
|
||||
bool upgrade,
|
||||
struct get_locks_fail *f)
|
||||
{
|
||||
unsigned l = path->level;
|
||||
int fail_idx = -1;
|
||||
@ -442,8 +443,14 @@ static inline bool btree_path_get_locks(struct btree_trans *trans,
|
||||
|
||||
if (!(upgrade
|
||||
? bch2_btree_node_upgrade(trans, path, l)
|
||||
: bch2_btree_node_relock(trans, path, l)))
|
||||
fail_idx = l;
|
||||
: bch2_btree_node_relock(trans, path, l))) {
|
||||
fail_idx = l;
|
||||
|
||||
if (f) {
|
||||
f->l = l;
|
||||
f->b = path->l[l].b;
|
||||
}
|
||||
}
|
||||
|
||||
l++;
|
||||
} while (l < path->locks_want);
|
||||
@ -584,7 +591,9 @@ __flatten
|
||||
bool bch2_btree_path_relock_norestart(struct btree_trans *trans,
|
||||
struct btree_path *path, unsigned long trace_ip)
|
||||
{
|
||||
return btree_path_get_locks(trans, path, false);
|
||||
struct get_locks_fail f;
|
||||
|
||||
return btree_path_get_locks(trans, path, false, &f);
|
||||
}
|
||||
|
||||
int __bch2_btree_path_relock(struct btree_trans *trans,
|
||||
@ -600,22 +609,24 @@ int __bch2_btree_path_relock(struct btree_trans *trans,
|
||||
|
||||
bool bch2_btree_path_upgrade_noupgrade_sibs(struct btree_trans *trans,
|
||||
struct btree_path *path,
|
||||
unsigned new_locks_want)
|
||||
unsigned new_locks_want,
|
||||
struct get_locks_fail *f)
|
||||
{
|
||||
EBUG_ON(path->locks_want >= new_locks_want);
|
||||
|
||||
path->locks_want = new_locks_want;
|
||||
|
||||
return btree_path_get_locks(trans, path, true);
|
||||
return btree_path_get_locks(trans, path, true, f);
|
||||
}
|
||||
|
||||
bool __bch2_btree_path_upgrade(struct btree_trans *trans,
|
||||
struct btree_path *path,
|
||||
unsigned new_locks_want)
|
||||
unsigned new_locks_want,
|
||||
struct get_locks_fail *f)
|
||||
{
|
||||
struct btree_path *linked;
|
||||
|
||||
if (bch2_btree_path_upgrade_noupgrade_sibs(trans, path, new_locks_want))
|
||||
if (bch2_btree_path_upgrade_noupgrade_sibs(trans, path, new_locks_want, f))
|
||||
return true;
|
||||
|
||||
/*
|
||||
@ -644,7 +655,7 @@ bool __bch2_btree_path_upgrade(struct btree_trans *trans,
|
||||
linked->btree_id == path->btree_id &&
|
||||
linked->locks_want < new_locks_want) {
|
||||
linked->locks_want = new_locks_want;
|
||||
btree_path_get_locks(trans, linked, true);
|
||||
btree_path_get_locks(trans, linked, true, NULL);
|
||||
}
|
||||
|
||||
return false;
|
||||
@ -656,6 +667,9 @@ void __bch2_btree_path_downgrade(struct btree_trans *trans,
|
||||
{
|
||||
unsigned l;
|
||||
|
||||
if (trans->restarted)
|
||||
return;
|
||||
|
||||
EBUG_ON(path->locks_want < new_locks_want);
|
||||
|
||||
path->locks_want = new_locks_want;
|
||||
@ -674,6 +688,9 @@ void __bch2_btree_path_downgrade(struct btree_trans *trans,
|
||||
}
|
||||
|
||||
bch2_btree_path_verify_locks(path);
|
||||
|
||||
path->downgrade_seq++;
|
||||
trace_path_downgrade(trans, _RET_IP_, path);
|
||||
}
|
||||
|
||||
/* Btree transaction locking: */
|
||||
@ -682,6 +699,9 @@ void bch2_trans_downgrade(struct btree_trans *trans)
|
||||
{
|
||||
struct btree_path *path;
|
||||
|
||||
if (trans->restarted)
|
||||
return;
|
||||
|
||||
trans_for_each_path(trans, path)
|
||||
bch2_btree_path_downgrade(trans, path);
|
||||
}
|
||||
|
@ -355,26 +355,36 @@ static inline bool bch2_btree_node_relock_notrace(struct btree_trans *trans,
|
||||
|
||||
/* upgrade */
|
||||
|
||||
|
||||
struct get_locks_fail {
|
||||
unsigned l;
|
||||
struct btree *b;
|
||||
};
|
||||
|
||||
bool bch2_btree_path_upgrade_noupgrade_sibs(struct btree_trans *,
|
||||
struct btree_path *, unsigned);
|
||||
struct btree_path *, unsigned,
|
||||
struct get_locks_fail *);
|
||||
|
||||
bool __bch2_btree_path_upgrade(struct btree_trans *,
|
||||
struct btree_path *, unsigned);
|
||||
struct btree_path *, unsigned,
|
||||
struct get_locks_fail *);
|
||||
|
||||
static inline int bch2_btree_path_upgrade(struct btree_trans *trans,
|
||||
struct btree_path *path,
|
||||
unsigned new_locks_want)
|
||||
{
|
||||
struct get_locks_fail f;
|
||||
unsigned old_locks_want = path->locks_want;
|
||||
|
||||
new_locks_want = min(new_locks_want, BTREE_MAX_DEPTH);
|
||||
|
||||
if (path->locks_want < new_locks_want
|
||||
? __bch2_btree_path_upgrade(trans, path, new_locks_want)
|
||||
? __bch2_btree_path_upgrade(trans, path, new_locks_want, &f)
|
||||
: path->uptodate == BTREE_ITER_UPTODATE)
|
||||
return 0;
|
||||
|
||||
trace_and_count(trans->c, trans_restart_upgrade, trans, _THIS_IP_, path,
|
||||
old_locks_want, new_locks_want);
|
||||
old_locks_want, new_locks_want, &f);
|
||||
return btree_trans_restart(trans, BCH_ERR_transaction_restart_upgrade);
|
||||
}
|
||||
|
||||
|
@ -861,12 +861,7 @@ static inline int do_bch2_trans_commit(struct btree_trans *trans, unsigned flags
|
||||
*/
|
||||
bch2_journal_res_put(&c->journal, &trans->journal_res);
|
||||
|
||||
if (unlikely(ret))
|
||||
return ret;
|
||||
|
||||
bch2_trans_downgrade(trans);
|
||||
|
||||
return 0;
|
||||
return ret;
|
||||
}
|
||||
|
||||
static int journal_reclaim_wait_done(struct bch_fs *c)
|
||||
@ -1135,6 +1130,8 @@ out:
|
||||
if (likely(!(flags & BTREE_INSERT_NOCHECK_RW)))
|
||||
bch2_write_ref_put(c, BCH_WRITE_REF_trans);
|
||||
out_reset:
|
||||
if (!ret)
|
||||
bch2_trans_downgrade(trans);
|
||||
bch2_trans_reset_updates(trans);
|
||||
|
||||
return ret;
|
||||
|
@ -228,6 +228,8 @@ struct btree_path {
|
||||
u8 sorted_idx;
|
||||
u8 ref;
|
||||
u8 intent_ref;
|
||||
u32 alloc_seq;
|
||||
u32 downgrade_seq;
|
||||
|
||||
/* btree_iter_copy starts here: */
|
||||
struct bpos pos;
|
||||
|
@ -1987,7 +1987,7 @@ int bch2_btree_node_rewrite(struct btree_trans *trans,
|
||||
out:
|
||||
if (new_path)
|
||||
bch2_path_put(trans, new_path, true);
|
||||
bch2_btree_path_downgrade(trans, iter->path);
|
||||
bch2_trans_downgrade(trans);
|
||||
return ret;
|
||||
err:
|
||||
bch2_btree_node_free_never_used(as, trans, n);
|
||||
|
@ -162,11 +162,7 @@ static int __bch2_data_update_index_update(struct btree_trans *trans,
|
||||
if (((1U << i) & m->data_opts.rewrite_ptrs) &&
|
||||
(ptr = bch2_extent_has_ptr(old, p, bkey_i_to_s(insert))) &&
|
||||
!ptr->cached) {
|
||||
bch2_bkey_drop_ptr_noerror(bkey_i_to_s(insert), ptr);
|
||||
/*
|
||||
* See comment below:
|
||||
bch2_extent_ptr_set_cached(bkey_i_to_s(insert), ptr);
|
||||
*/
|
||||
rewrites_found |= 1U << i;
|
||||
}
|
||||
i++;
|
||||
@ -212,14 +208,8 @@ restart_drop_extra_replicas:
|
||||
if (!p.ptr.cached &&
|
||||
durability - ptr_durability >= m->op.opts.data_replicas) {
|
||||
durability -= ptr_durability;
|
||||
bch2_bkey_drop_ptr_noerror(bkey_i_to_s(insert), &entry->ptr);
|
||||
/*
|
||||
* Currently, we're dropping unneeded replicas
|
||||
* instead of marking them as cached, since
|
||||
* cached data in stripe buckets prevents them
|
||||
* from being reused:
|
||||
|
||||
bch2_extent_ptr_set_cached(bkey_i_to_s(insert), &entry->ptr);
|
||||
*/
|
||||
goto restart_drop_extra_replicas;
|
||||
}
|
||||
}
|
||||
|
@ -1043,13 +1043,16 @@ DEFINE_EVENT(transaction_restart_iter, trans_restart_btree_node_split,
|
||||
TP_ARGS(trans, caller_ip, path)
|
||||
);
|
||||
|
||||
struct get_locks_fail;
|
||||
|
||||
TRACE_EVENT(trans_restart_upgrade,
|
||||
TP_PROTO(struct btree_trans *trans,
|
||||
unsigned long caller_ip,
|
||||
struct btree_path *path,
|
||||
unsigned old_locks_want,
|
||||
unsigned new_locks_want),
|
||||
TP_ARGS(trans, caller_ip, path, old_locks_want, new_locks_want),
|
||||
unsigned new_locks_want,
|
||||
struct get_locks_fail *f),
|
||||
TP_ARGS(trans, caller_ip, path, old_locks_want, new_locks_want, f),
|
||||
|
||||
TP_STRUCT__entry(
|
||||
__array(char, trans_fn, 32 )
|
||||
@ -1057,6 +1060,11 @@ TRACE_EVENT(trans_restart_upgrade,
|
||||
__field(u8, btree_id )
|
||||
__field(u8, old_locks_want )
|
||||
__field(u8, new_locks_want )
|
||||
__field(u8, level )
|
||||
__field(u32, path_seq )
|
||||
__field(u32, node_seq )
|
||||
__field(u32, path_alloc_seq )
|
||||
__field(u32, downgrade_seq)
|
||||
TRACE_BPOS_entries(pos)
|
||||
),
|
||||
|
||||
@ -1066,10 +1074,15 @@ TRACE_EVENT(trans_restart_upgrade,
|
||||
__entry->btree_id = path->btree_id;
|
||||
__entry->old_locks_want = old_locks_want;
|
||||
__entry->new_locks_want = new_locks_want;
|
||||
__entry->level = f->l;
|
||||
__entry->path_seq = path->l[f->l].lock_seq;
|
||||
__entry->node_seq = IS_ERR_OR_NULL(f->b) ? 0 : f->b->c.lock.seq;
|
||||
__entry->path_alloc_seq = path->alloc_seq;
|
||||
__entry->downgrade_seq = path->downgrade_seq;
|
||||
TRACE_BPOS_assign(pos, path->pos)
|
||||
),
|
||||
|
||||
TP_printk("%s %pS btree %s pos %llu:%llu:%u locks_want %u -> %u",
|
||||
TP_printk("%s %pS btree %s pos %llu:%llu:%u locks_want %u -> %u level %u path seq %u node seq %u alloc_seq %u downgrade_seq %u",
|
||||
__entry->trans_fn,
|
||||
(void *) __entry->caller_ip,
|
||||
bch2_btree_id_str(__entry->btree_id),
|
||||
@ -1077,7 +1090,12 @@ TRACE_EVENT(trans_restart_upgrade,
|
||||
__entry->pos_offset,
|
||||
__entry->pos_snapshot,
|
||||
__entry->old_locks_want,
|
||||
__entry->new_locks_want)
|
||||
__entry->new_locks_want,
|
||||
__entry->level,
|
||||
__entry->path_seq,
|
||||
__entry->node_seq,
|
||||
__entry->path_alloc_seq,
|
||||
__entry->downgrade_seq)
|
||||
);
|
||||
|
||||
DEFINE_EVENT(transaction_restart_iter, trans_restart_relock,
|
||||
@ -1238,6 +1256,27 @@ TRACE_EVENT(trans_restart_key_cache_key_realloced,
|
||||
__entry->new_u64s)
|
||||
);
|
||||
|
||||
TRACE_EVENT(path_downgrade,
|
||||
TP_PROTO(struct btree_trans *trans,
|
||||
unsigned long caller_ip,
|
||||
struct btree_path *path),
|
||||
TP_ARGS(trans, caller_ip, path),
|
||||
|
||||
TP_STRUCT__entry(
|
||||
__array(char, trans_fn, 32 )
|
||||
__field(unsigned long, caller_ip )
|
||||
),
|
||||
|
||||
TP_fast_assign(
|
||||
strscpy(__entry->trans_fn, trans->fn, sizeof(__entry->trans_fn));
|
||||
__entry->caller_ip = caller_ip;
|
||||
),
|
||||
|
||||
TP_printk("%s %pS",
|
||||
__entry->trans_fn,
|
||||
(void *) __entry->caller_ip)
|
||||
);
|
||||
|
||||
DEFINE_EVENT(transaction_event, trans_restart_write_buffer_flush,
|
||||
TP_PROTO(struct btree_trans *trans,
|
||||
unsigned long caller_ip),
|
||||
|
Loading…
Reference in New Issue
Block a user