bcachefs: Journal seq now incremented at entry open, not close

This patch changes journal_entry_open() to initialize the new journal
entry, not __journal_entry_close().

This also means that journal_cur_seq() refers to the sequence number of
the last journal entry when we don't have an open journal entry, not the
next one.

Signed-off-by: Kent Overstreet <kent.overstreet@gmail.com>
This commit is contained in:
Kent Overstreet 2022-02-28 16:35:42 -05:00 committed by Kent Overstreet
parent b66fbf3342
commit f0a3a2ccab
5 changed files with 44 additions and 98 deletions

View File

@ -781,7 +781,7 @@ static int bch2_gc_mark_key(struct btree_trans *trans, enum btree_id btree_id,
if (initial) {
BUG_ON(bch2_journal_seq_verify &&
k->k->version.lo > journal_cur_seq(&c->journal));
k->k->version.lo > atomic64_read(&c->journal.seq));
ret = bch2_check_fix_ptrs(c, btree_id, level, is_root, k);
if (ret)

View File

@ -21,16 +21,12 @@
static u64 last_unwritten_seq(struct journal *j)
{
union journal_res_state s = READ_ONCE(j->reservations);
lockdep_assert_held(&j->lock);
return journal_cur_seq(j) - ((s.idx - s.unwritten_idx) & JOURNAL_BUF_MASK);
return j->seq_ondisk + 1;
}
static inline bool journal_seq_unwritten(struct journal *j, u64 seq)
{
return seq >= last_unwritten_seq(j);
return seq > j->seq_ondisk;
}
static bool __journal_entry_is_open(union journal_res_state state)
@ -49,8 +45,6 @@ journal_seq_to_buf(struct journal *j, u64 seq)
struct journal_buf *buf = NULL;
EBUG_ON(seq > journal_cur_seq(j));
EBUG_ON(seq == journal_cur_seq(j) &&
j->reservations.cur_entry_offset == JOURNAL_ENTRY_CLOSED_VAL);
if (journal_seq_unwritten(j, seq)) {
buf = j->buf + (seq & JOURNAL_BUF_MASK);
@ -68,31 +62,6 @@ static void journal_pin_list_init(struct journal_entry_pin_list *p, int count)
p->devs.nr = 0;
}
static void journal_pin_new_entry(struct journal *j)
{
/*
* The fifo_push() needs to happen at the same time as j->seq is
* incremented for journal_last_seq() to be calculated correctly
*/
atomic64_inc(&j->seq);
journal_pin_list_init(fifo_push_ref(&j->pin), 1);
}
static void bch2_journal_buf_init(struct journal *j)
{
struct journal_buf *buf = journal_cur_buf(j);
bkey_extent_init(&buf->key);
buf->noflush = false;
buf->must_flush = false;
buf->separate_flush = false;
buf->flush_time = 0;
memset(buf->data, 0, sizeof(*buf->data));
buf->data->seq = cpu_to_le64(journal_cur_seq(j));
buf->data->u64s = 0;
}
void bch2_journal_halt(struct journal *j)
{
union journal_res_state old, new;
@ -200,11 +169,6 @@ static bool __journal_entry_close(struct journal *j)
__bch2_journal_pin_put(j, le64_to_cpu(buf->data->seq));
/* Initialize new buffer: */
journal_pin_new_entry(j);
bch2_journal_buf_init(j);
cancel_delayed_work(&j->write_work);
bch2_journal_space_available(j);
@ -274,27 +238,47 @@ static int journal_entry_open(struct journal *j)
if (bch2_journal_error(j))
return cur_entry_insufficient_devices; /* -EROFS */
if (!fifo_free(&j->pin))
return cur_entry_journal_pin_full;
BUG_ON(!j->cur_entry_sectors);
/* We used to add things to the first journal entry before opening it,
* as a way to deal with a chicken-and-the-egg problem, but we shouldn't
* be anymore:
*/
BUG_ON(buf->data->u64s);
buf->expires = jiffies +
buf->expires =
(journal_cur_seq(j) == j->flushed_seq_ondisk
? jiffies
: j->last_flush_write) +
msecs_to_jiffies(c->opts.journal_flush_delay);
buf->u64s_reserved = j->entry_u64s_reserved;
buf->disk_sectors = j->cur_entry_sectors;
buf->sectors = min(buf->disk_sectors, buf->buf_size >> 9);
u64s = (int) (buf->sectors << 9) / sizeof(u64) -
journal_entry_overhead(j);
u64s = clamp_t(int, u64s, 0, JOURNAL_ENTRY_CLOSED_VAL - 1);
u64s = clamp_t(int, u64s, 0, JOURNAL_ENTRY_CLOSED_VAL - 1);
if (u64s <= 0)
return cur_entry_journal_full;
/*
* The fifo_push() needs to happen at the same time as j->seq is
* incremented for journal_last_seq() to be calculated correctly
*/
atomic64_inc(&j->seq);
journal_pin_list_init(fifo_push_ref(&j->pin), 1);
BUG_ON(j->buf + (journal_cur_seq(j) & JOURNAL_BUF_MASK) != buf);
bkey_extent_init(&buf->key);
buf->noflush = false;
buf->must_flush = false;
buf->separate_flush = false;
buf->flush_time = 0;
memset(buf->data, 0, sizeof(*buf->data));
buf->data->seq = cpu_to_le64(journal_cur_seq(j));
buf->data->u64s = 0;
/*
* Must be set before marking the journal entry as open:
*/
@ -305,8 +289,8 @@ static int journal_entry_open(struct journal *j)
old.v = new.v = v;
BUG_ON(old.cur_entry_offset == JOURNAL_ENTRY_ERROR_VAL);
BUG_ON(journal_state_count(new, new.idx));
EBUG_ON(journal_state_count(new, new.idx));
journal_state_inc(&new);
new.cur_entry_offset = 0;
} while ((v = atomic64_cmpxchg(&j->reservations.counter,
@ -595,9 +579,12 @@ int bch2_journal_flush_seq_async(struct journal *j, u64 seq,
seq = max(seq, last_unwritten_seq(j));
recheck_need_open:
if (seq == journal_cur_seq(j) && !journal_entry_is_open(j)) {
if (seq > journal_cur_seq(j)) {
struct journal_res res = { 0 };
if (journal_entry_is_open(j))
__journal_entry_close(j);
spin_unlock(&j->lock);
ret = bch2_journal_res_get(j, &res, jset_u64s(0), 0);
@ -694,42 +681,12 @@ int bch2_journal_meta(struct journal *j)
*/
void bch2_journal_flush_async(struct journal *j, struct closure *parent)
{
u64 seq, journal_seq;
spin_lock(&j->lock);
journal_seq = journal_cur_seq(j);
if (journal_entry_is_open(j)) {
seq = journal_seq;
} else if (journal_seq) {
seq = journal_seq - 1;
} else {
spin_unlock(&j->lock);
return;
}
spin_unlock(&j->lock);
bch2_journal_flush_seq_async(j, seq, parent);
bch2_journal_flush_seq_async(j, atomic64_read(&j->seq), parent);
}
int bch2_journal_flush(struct journal *j)
{
u64 seq, journal_seq;
spin_lock(&j->lock);
journal_seq = journal_cur_seq(j);
if (journal_entry_is_open(j)) {
seq = journal_seq;
} else if (journal_seq) {
seq = journal_seq - 1;
} else {
spin_unlock(&j->lock);
return 0;
}
spin_unlock(&j->lock);
return bch2_journal_flush_seq(j, seq);
return bch2_journal_flush_seq(j, atomic64_read(&j->seq));
}
/*
@ -1022,8 +979,7 @@ void bch2_fs_journal_stop(struct journal *j)
BUG_ON(!bch2_journal_error(j) &&
test_bit(JOURNAL_REPLAY_DONE, &j->flags) &&
(journal_entry_is_open(j) ||
j->last_empty_seq + 1 != journal_cur_seq(j)));
j->last_empty_seq != journal_cur_seq(j));
cancel_delayed_work_sync(&j->write_work);
bch2_journal_reclaim_stop(j);
@ -1093,11 +1049,7 @@ int bch2_fs_journal_start(struct journal *j, u64 cur_seq,
set_bit(JOURNAL_STARTED, &j->flags);
j->last_flush_write = jiffies;
journal_pin_new_entry(j);
j->reservations.idx = j->reservations.unwritten_idx = journal_cur_seq(j);
bch2_journal_buf_init(j);
j->reservations.idx = j->reservations.unwritten_idx = journal_cur_seq(j) + 1;
c->last_bucket_seq_cleanup = journal_cur_seq(j);

View File

@ -1368,8 +1368,6 @@ static void journal_write_done(struct closure *cl)
journal_seq_pin(j, seq)->devs = w->devs_written;
if (!err) {
j->seq_ondisk = seq;
if (!JSET_NO_FLUSH(w->data)) {
j->flushed_seq_ondisk = seq;
j->last_seq_ondisk = w->last_seq;
@ -1377,6 +1375,8 @@ static void journal_write_done(struct closure *cl)
} else if (!j->err_seq || seq < j->err_seq)
j->err_seq = seq;
j->seq_ondisk = seq;
/*
* Updating last_seq_ondisk may let bch2_journal_reclaim_work() discard
* more buckets:

View File

@ -226,8 +226,6 @@ void bch2_journal_space_available(struct journal *j)
ret = cur_entry_journal_stuck;
} else if (!j->space[journal_space_discarded].next_entry)
ret = cur_entry_journal_full;
else if (!fifo_free(&j->pin))
ret = cur_entry_journal_pin_full;
if ((j->space[journal_space_clean_ondisk].next_entry <
j->space[journal_space_clean_ondisk].total) &&
@ -369,9 +367,6 @@ static inline void __journal_pin_drop(struct journal *j,
if (atomic_dec_and_test(&pin_list->count) &&
pin_list == &fifo_peek_front(&j->pin))
bch2_journal_reclaim_fast(j);
else if (fifo_used(&j->pin) == 1 &&
atomic_read(&pin_list->count) == 1)
journal_wake(j);
}
void bch2_journal_pin_drop(struct journal *j,
@ -769,8 +764,7 @@ static int journal_flush_done(struct journal *j, u64 seq_to_flush,
*/
ret = !test_bit(JOURNAL_REPLAY_DONE, &j->flags) ||
journal_last_seq(j) > seq_to_flush ||
(fifo_used(&j->pin) == 1 &&
atomic_read(&fifo_peek_front(&j->pin).count) == 1);
!fifo_used(&j->pin);
spin_unlock(&j->lock);
mutex_unlock(&j->reclaim_lock);

View File

@ -1344,7 +1344,7 @@ void bch2_fs_mark_clean(struct bch_fs *c)
}
sb_clean->flags = 0;
sb_clean->journal_seq = cpu_to_le64(journal_cur_seq(&c->journal) - 1);
sb_clean->journal_seq = cpu_to_le64(atomic64_read(&c->journal.seq));
/* Trying to catch outstanding bug: */
BUG_ON(le64_to_cpu(sb_clean->journal_seq) > S64_MAX);