summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--fs/bcachefs/journal_io.c34
-rw-r--r--fs/bcachefs/journal_types.h3
-rw-r--r--fs/bcachefs/super.c8
3 files changed, 38 insertions, 7 deletions
diff --git a/fs/bcachefs/journal_io.c b/fs/bcachefs/journal_io.c
index 179361bea0e3..f1dbf65afe77 100644
--- a/fs/bcachefs/journal_io.c
+++ b/fs/bcachefs/journal_io.c
@@ -1660,20 +1660,42 @@ void bch2_journal_write(struct closure *cl)
j->write_start_time = local_clock();
spin_lock(&j->lock);
- if (bch2_journal_error(j) ||
- w->noflush ||
- (!w->must_flush &&
- (jiffies - j->last_flush_write) < msecs_to_jiffies(c->opts.journal_flush_delay) &&
- test_bit(JOURNAL_MAY_SKIP_FLUSH, &j->flags))) {
+
+ /*
+ * If the journal is in an error state - we did an emergency shutdown -
+ * we prefer to continue doing journal writes. We just mark them as
+ * noflush so they'll never be used, but they'll still be visible by the
+ * list_journal tool - this helps in debugging.
+ *
+ * There's a caveat: the first journal write after marking the
+ * superblock dirty must always be a flush write, because on startup
+ * from a clean shutdown we didn't necessarily read the journal and the
+ * new journal write might overwrite whatever was in the journal
+ * previously - we can't leave the journal without any flush writes in
+ * it.
+ *
+ * So if we're in an error state, and we're still starting up, we don't
+ * write anything at all.
+ */
+ if (!test_bit(JOURNAL_NEED_FLUSH_WRITE, &j->flags) &&
+ (bch2_journal_error(j) ||
+ w->noflush ||
+ (!w->must_flush &&
+ (jiffies - j->last_flush_write) < msecs_to_jiffies(c->opts.journal_flush_delay) &&
+ test_bit(JOURNAL_MAY_SKIP_FLUSH, &j->flags)))) {
w->noflush = true;
SET_JSET_NO_FLUSH(jset, true);
jset->last_seq = 0;
w->last_seq = 0;
j->nr_noflush_writes++;
- } else {
+ } else if (!bch2_journal_error(j)) {
j->last_flush_write = jiffies;
j->nr_flush_writes++;
+ clear_bit(JOURNAL_NEED_FLUSH_WRITE, &j->flags);
+ } else {
+ spin_unlock(&j->lock);
+ goto err;
}
spin_unlock(&j->lock);
diff --git a/fs/bcachefs/journal_types.h b/fs/bcachefs/journal_types.h
index a6cdb885ad41..045ee95a92a8 100644
--- a/fs/bcachefs/journal_types.h
+++ b/fs/bcachefs/journal_types.h
@@ -141,10 +141,11 @@ enum journal_space_from {
journal_space_nr,
};
-enum {
+enum journal_flags {
JOURNAL_REPLAY_DONE,
JOURNAL_STARTED,
JOURNAL_MAY_SKIP_FLUSH,
+ JOURNAL_NEED_FLUSH_WRITE,
};
#define JOURNAL_WATERMARKS() \
diff --git a/fs/bcachefs/super.c b/fs/bcachefs/super.c
index 47ca21536485..7cac0567037d 100644
--- a/fs/bcachefs/super.c
+++ b/fs/bcachefs/super.c
@@ -367,6 +367,14 @@ static int __bch2_fs_read_write(struct bch_fs *c, bool early)
clear_bit(BCH_FS_CLEAN_SHUTDOWN, &c->flags);
+ /*
+ * First journal write must be a flush write: after a clean shutdown we
+ * don't read the journal, so the first journal write may end up
+ * overwriting whatever was there previously, and there must always be
+ * at least one non-flush write in the journal or recovery will fail:
+ */
+ set_bit(JOURNAL_NEED_FLUSH_WRITE, &c->journal.flags);
+
for_each_rw_member(ca, c, i)
bch2_dev_allocator_add(c, ca);
bch2_recalc_capacity(c);