bcachefs: Journal seq now incremented at entry open, not close

This patch changes journal_entry_open() to initialize the new journal entry, not __journal_entry_close(). This also means that journal_cur_seq() refers to the sequence number of the last journal entry when we don't have an open journal entry, not the next one. Signed-off-by: Kent Overstreet <kent.overstreet@gmail.com>
2024-11-13 07:31:45 +00:00 · 2022-02-28 16:35:42 -05:00 · 2022-02-28 16:35:42 -05:00 · f0a3a2ccab
commit f0a3a2ccab
parent b66fbf3342
5 changed files with 44 additions and 98 deletions
--- a/fs/bcachefs/btree_gc.c
+++ b/fs/bcachefs/btree_gc.c
@ -781,7 +781,7 @@ static int bch2_gc_mark_key(struct btree_trans *trans, enum btree_id btree_id,

 	if (initial) {
 		BUG_ON(bch2_journal_seq_verify &&
-		       k->k->version.lo > journal_cur_seq(&c->journal));
+		       k->k->version.lo > atomic64_read(&c->journal.seq));

 		ret = bch2_check_fix_ptrs(c, btree_id, level, is_root, k);
 		if (ret)
--- a/fs/bcachefs/journal.c
+++ b/fs/bcachefs/journal.c
@ -21,16 +21,12 @@

 static u64 last_unwritten_seq(struct journal *j)
 {
-	union journal_res_state s = READ_ONCE(j->reservations);
-
-	lockdep_assert_held(&j->lock);
-
-	return journal_cur_seq(j) - ((s.idx - s.unwritten_idx) & JOURNAL_BUF_MASK);
+	return j->seq_ondisk + 1;
 }

 static inline bool journal_seq_unwritten(struct journal *j, u64 seq)
 {
-	return seq >= last_unwritten_seq(j);
+	return seq > j->seq_ondisk;
 }

 static bool __journal_entry_is_open(union journal_res_state state)
@ -49,8 +45,6 @@ journal_seq_to_buf(struct journal *j, u64 seq)
 	struct journal_buf *buf = NULL;

 	EBUG_ON(seq > journal_cur_seq(j));
-	EBUG_ON(seq == journal_cur_seq(j) &&
-		j->reservations.cur_entry_offset == JOURNAL_ENTRY_CLOSED_VAL);

 	if (journal_seq_unwritten(j, seq)) {
 		buf = j->buf + (seq & JOURNAL_BUF_MASK);
@ -68,31 +62,6 @@ static void journal_pin_list_init(struct journal_entry_pin_list *p, int count)
 	p->devs.nr = 0;
 }

-static void journal_pin_new_entry(struct journal *j)
-{
-	/*
-	 * The fifo_push() needs to happen at the same time as j->seq is
-	 * incremented for journal_last_seq() to be calculated correctly
-	 */
-	atomic64_inc(&j->seq);
-	journal_pin_list_init(fifo_push_ref(&j->pin), 1);
-}
-
-static void bch2_journal_buf_init(struct journal *j)
-{
-	struct journal_buf *buf = journal_cur_buf(j);
-
-	bkey_extent_init(&buf->key);
-	buf->noflush	= false;
-	buf->must_flush	= false;
-	buf->separate_flush = false;
-	buf->flush_time	= 0;
-
-	memset(buf->data, 0, sizeof(*buf->data));
-	buf->data->seq	= cpu_to_le64(journal_cur_seq(j));
-	buf->data->u64s	= 0;
-}
-
 void bch2_journal_halt(struct journal *j)
 {
 	union journal_res_state old, new;
@ -200,11 +169,6 @@ static bool __journal_entry_close(struct journal *j)

 	__bch2_journal_pin_put(j, le64_to_cpu(buf->data->seq));

-	/* Initialize new buffer: */
-	journal_pin_new_entry(j);
-
-	bch2_journal_buf_init(j);
-
 	cancel_delayed_work(&j->write_work);

 	bch2_journal_space_available(j);
@ -274,27 +238,47 @@ static int journal_entry_open(struct journal *j)
 	if (bch2_journal_error(j))
 		return cur_entry_insufficient_devices; /* -EROFS */

+	if (!fifo_free(&j->pin))
+		return cur_entry_journal_pin_full;
+
 	BUG_ON(!j->cur_entry_sectors);

-	/* We used to add things to the first journal entry before opening it,
-	 * as a way to deal with a chicken-and-the-egg problem, but we shouldn't
-	 * be anymore:
-	 */
-	BUG_ON(buf->data->u64s);
-
-	buf->expires		= jiffies +
+	buf->expires		=
+		(journal_cur_seq(j) == j->flushed_seq_ondisk
+		 ? jiffies
+		 : j->last_flush_write) +
 		msecs_to_jiffies(c->opts.journal_flush_delay);
+
 	buf->u64s_reserved	= j->entry_u64s_reserved;
 	buf->disk_sectors	= j->cur_entry_sectors;
 	buf->sectors		= min(buf->disk_sectors, buf->buf_size >> 9);

 	u64s = (int) (buf->sectors << 9) / sizeof(u64) -
 		journal_entry_overhead(j);
-	u64s  = clamp_t(int, u64s, 0, JOURNAL_ENTRY_CLOSED_VAL - 1);
+	u64s = clamp_t(int, u64s, 0, JOURNAL_ENTRY_CLOSED_VAL - 1);

 	if (u64s <= 0)
 		return cur_entry_journal_full;

+	/*
+	 * The fifo_push() needs to happen at the same time as j->seq is
+	 * incremented for journal_last_seq() to be calculated correctly
+	 */
+	atomic64_inc(&j->seq);
+	journal_pin_list_init(fifo_push_ref(&j->pin), 1);
+
+	BUG_ON(j->buf + (journal_cur_seq(j) & JOURNAL_BUF_MASK) != buf);
+
+	bkey_extent_init(&buf->key);
+	buf->noflush	= false;
+	buf->must_flush	= false;
+	buf->separate_flush = false;
+	buf->flush_time	= 0;
+
+	memset(buf->data, 0, sizeof(*buf->data));
+	buf->data->seq	= cpu_to_le64(journal_cur_seq(j));
+	buf->data->u64s	= 0;
+
 	/*
 	 * Must be set before marking the journal entry as open:
 	 */
@ -305,8 +289,8 @@ static int journal_entry_open(struct journal *j)
 		old.v = new.v = v;

 		BUG_ON(old.cur_entry_offset == JOURNAL_ENTRY_ERROR_VAL);
+		BUG_ON(journal_state_count(new, new.idx));

-		EBUG_ON(journal_state_count(new, new.idx));
 		journal_state_inc(&new);
 		new.cur_entry_offset = 0;
 	} while ((v = atomic64_cmpxchg(&j->reservations.counter,
@ -595,9 +579,12 @@ int bch2_journal_flush_seq_async(struct journal *j, u64 seq,
 	seq = max(seq, last_unwritten_seq(j));

 recheck_need_open:
-	if (seq == journal_cur_seq(j) && !journal_entry_is_open(j)) {
+	if (seq > journal_cur_seq(j)) {
 		struct journal_res res = { 0 };

+		if (journal_entry_is_open(j))
+			__journal_entry_close(j);
+
 		spin_unlock(&j->lock);

 		ret = bch2_journal_res_get(j, &res, jset_u64s(0), 0);
@ -694,42 +681,12 @@ int bch2_journal_meta(struct journal *j)
 */
 void bch2_journal_flush_async(struct journal *j, struct closure *parent)
 {
-	u64 seq, journal_seq;
-
-	spin_lock(&j->lock);
-	journal_seq = journal_cur_seq(j);
-
-	if (journal_entry_is_open(j)) {
-		seq = journal_seq;
-	} else if (journal_seq) {
-		seq = journal_seq - 1;
-	} else {
-		spin_unlock(&j->lock);
-		return;
-	}
-	spin_unlock(&j->lock);
-
-	bch2_journal_flush_seq_async(j, seq, parent);
+	bch2_journal_flush_seq_async(j, atomic64_read(&j->seq), parent);
 }

 int bch2_journal_flush(struct journal *j)
 {
-	u64 seq, journal_seq;
-
-	spin_lock(&j->lock);
-	journal_seq = journal_cur_seq(j);
-
-	if (journal_entry_is_open(j)) {
-		seq = journal_seq;
-	} else if (journal_seq) {
-		seq = journal_seq - 1;
-	} else {
-		spin_unlock(&j->lock);
-		return 0;
-	}
-	spin_unlock(&j->lock);
-
-	return bch2_journal_flush_seq(j, seq);
+	return bch2_journal_flush_seq(j, atomic64_read(&j->seq));
 }

 /*
@ -1022,8 +979,7 @@ void bch2_fs_journal_stop(struct journal *j)

 	BUG_ON(!bch2_journal_error(j) &&
 	       test_bit(JOURNAL_REPLAY_DONE, &j->flags) &&
-	       (journal_entry_is_open(j) ||
-		j->last_empty_seq + 1 != journal_cur_seq(j)));
+	       j->last_empty_seq != journal_cur_seq(j));

 	cancel_delayed_work_sync(&j->write_work);
 	bch2_journal_reclaim_stop(j);
@ -1093,11 +1049,7 @@ int bch2_fs_journal_start(struct journal *j, u64 cur_seq,
 	set_bit(JOURNAL_STARTED, &j->flags);
 	j->last_flush_write = jiffies;

-	journal_pin_new_entry(j);
-
-	j->reservations.idx = j->reservations.unwritten_idx = journal_cur_seq(j);
-
-	bch2_journal_buf_init(j);
+	j->reservations.idx = j->reservations.unwritten_idx = journal_cur_seq(j) + 1;

 	c->last_bucket_seq_cleanup = journal_cur_seq(j);

--- a/fs/bcachefs/journal_io.c
+++ b/fs/bcachefs/journal_io.c
@ -1368,8 +1368,6 @@ static void journal_write_done(struct closure *cl)
 		journal_seq_pin(j, seq)->devs = w->devs_written;

 	if (!err) {
-		j->seq_ondisk		= seq;
-
 		if (!JSET_NO_FLUSH(w->data)) {
 			j->flushed_seq_ondisk = seq;
 			j->last_seq_ondisk = w->last_seq;
@ -1377,6 +1375,8 @@ static void journal_write_done(struct closure *cl)
 	} else if (!j->err_seq || seq < j->err_seq)
 		j->err_seq	= seq;

+	j->seq_ondisk		= seq;
+
 	/*
 	 * Updating last_seq_ondisk may let bch2_journal_reclaim_work() discard
 	 * more buckets:
--- a/fs/bcachefs/journal_reclaim.c
+++ b/fs/bcachefs/journal_reclaim.c
@ -226,8 +226,6 @@ void bch2_journal_space_available(struct journal *j)
 		ret = cur_entry_journal_stuck;
 	} else if (!j->space[journal_space_discarded].next_entry)
 		ret = cur_entry_journal_full;
-	else if (!fifo_free(&j->pin))
-		ret = cur_entry_journal_pin_full;

 	if ((j->space[journal_space_clean_ondisk].next_entry <
 	     j->space[journal_space_clean_ondisk].total) &&
@ -369,9 +367,6 @@ static inline void __journal_pin_drop(struct journal *j,
 	if (atomic_dec_and_test(&pin_list->count) &&
 	    pin_list == &fifo_peek_front(&j->pin))
 		bch2_journal_reclaim_fast(j);
-	else if (fifo_used(&j->pin) == 1 &&
-		 atomic_read(&pin_list->count) == 1)
-		journal_wake(j);
 }

 void bch2_journal_pin_drop(struct journal *j,
@ -769,8 +764,7 @@ static int journal_flush_done(struct journal *j, u64 seq_to_flush,
 	 */
 	ret = !test_bit(JOURNAL_REPLAY_DONE, &j->flags) ||
 		journal_last_seq(j) > seq_to_flush ||
-		(fifo_used(&j->pin) == 1 &&
-		 atomic_read(&fifo_peek_front(&j->pin).count) == 1);
+		!fifo_used(&j->pin);

 	spin_unlock(&j->lock);
 	mutex_unlock(&j->reclaim_lock);
--- a/fs/bcachefs/super-io.c
+++ b/fs/bcachefs/super-io.c
@ -1344,7 +1344,7 @@ void bch2_fs_mark_clean(struct bch_fs *c)
 	}

 	sb_clean->flags		= 0;
-	sb_clean->journal_seq	= cpu_to_le64(journal_cur_seq(&c->journal) - 1);
+	sb_clean->journal_seq	= cpu_to_le64(atomic64_read(&c->journal.seq));

 	/* Trying to catch outstanding bug: */
 	BUG_ON(le64_to_cpu(sb_clean->journal_seq) > S64_MAX);