summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorKent Overstreet <kent.overstreet@linux.dev>2022-12-02 11:45:58 -0500
committerKent Overstreet <kent.overstreet@linux.dev>2022-12-02 11:45:58 -0500
commit00fed1b3dd37fb81900981545eeac93bb835a81d (patch)
tree94fc82d2d57658aed51f4245650fb4c0e2255892
parentda437d6da4701aa94e0ce177ad440eef28edbe05 (diff)
bcachefs: Fix a "no journal entries found" bug
On startup, we need to ensure the first journal entry written is a flush write: after a clean shutdown we generally don't read the journal, which means we might be overwriting whatever was there previously, and there must always be at least one flush entry in the journal or recovery will fail. Found by fstests generic/388. Signed-off-by: Kent Overstreet <kent.overstreet@linux.dev>
-rw-r--r--fs/bcachefs/journal_io.c34
-rw-r--r--fs/bcachefs/journal_types.h3
-rw-r--r--fs/bcachefs/super.c8
3 files changed, 38 insertions, 7 deletions
diff --git a/fs/bcachefs/journal_io.c b/fs/bcachefs/journal_io.c
index 179361bea0e3..f1dbf65afe77 100644
--- a/fs/bcachefs/journal_io.c
+++ b/fs/bcachefs/journal_io.c
@@ -1660,20 +1660,42 @@ void bch2_journal_write(struct closure *cl)
j->write_start_time = local_clock();
spin_lock(&j->lock);
- if (bch2_journal_error(j) ||
- w->noflush ||
- (!w->must_flush &&
- (jiffies - j->last_flush_write) < msecs_to_jiffies(c->opts.journal_flush_delay) &&
- test_bit(JOURNAL_MAY_SKIP_FLUSH, &j->flags))) {
+
+ /*
+ * If the journal is in an error state - we did an emergency shutdown -
+ * we prefer to continue doing journal writes. We just mark them as
+ * noflush so they'll never be used, but they'll still be visible by the
+ * list_journal tool - this helps in debugging.
+ *
+ * There's a caveat: the first journal write after marking the
+ * superblock dirty must always be a flush write, because on startup
+ * from a clean shutdown we didn't necessarily read the journal and the
+ * new journal write might overwrite whatever was in the journal
+ * previously - we can't leave the journal without any flush writes in
+ * it.
+ *
+ * So if we're in an error state, and we're still starting up, we don't
+ * write anything at all.
+ */
+ if (!test_bit(JOURNAL_NEED_FLUSH_WRITE, &j->flags) &&
+ (bch2_journal_error(j) ||
+ w->noflush ||
+ (!w->must_flush &&
+ (jiffies - j->last_flush_write) < msecs_to_jiffies(c->opts.journal_flush_delay) &&
+ test_bit(JOURNAL_MAY_SKIP_FLUSH, &j->flags)))) {
w->noflush = true;
SET_JSET_NO_FLUSH(jset, true);
jset->last_seq = 0;
w->last_seq = 0;
j->nr_noflush_writes++;
- } else {
+ } else if (!bch2_journal_error(j)) {
j->last_flush_write = jiffies;
j->nr_flush_writes++;
+ clear_bit(JOURNAL_NEED_FLUSH_WRITE, &j->flags);
+ } else {
+ spin_unlock(&j->lock);
+ goto err;
}
spin_unlock(&j->lock);
diff --git a/fs/bcachefs/journal_types.h b/fs/bcachefs/journal_types.h
index a6cdb885ad41..045ee95a92a8 100644
--- a/fs/bcachefs/journal_types.h
+++ b/fs/bcachefs/journal_types.h
@@ -141,10 +141,11 @@ enum journal_space_from {
journal_space_nr,
};
-enum {
+enum journal_flags {
JOURNAL_REPLAY_DONE,
JOURNAL_STARTED,
JOURNAL_MAY_SKIP_FLUSH,
+ JOURNAL_NEED_FLUSH_WRITE,
};
#define JOURNAL_WATERMARKS() \
diff --git a/fs/bcachefs/super.c b/fs/bcachefs/super.c
index 47ca21536485..7cac0567037d 100644
--- a/fs/bcachefs/super.c
+++ b/fs/bcachefs/super.c
@@ -367,6 +367,14 @@ static int __bch2_fs_read_write(struct bch_fs *c, bool early)
clear_bit(BCH_FS_CLEAN_SHUTDOWN, &c->flags);
+ /*
+ * First journal write must be a flush write: after a clean shutdown we
+ * don't read the journal, so the first journal write may end up
+ * overwriting whatever was there previously, and there must always be
+ * at least one non-flush write in the journal or recovery will fail:
+ */
+ set_bit(JOURNAL_NEED_FLUSH_WRITE, &c->journal.flags);
+
for_each_rw_member(ca, c, i)
bch2_dev_allocator_add(c, ca);
bch2_recalc_capacity(c);