bcachefs: Make some improvements to the journal shutdown code

author: Kent Overstreet <kent.overstreet@gmail.com> 2018-06-11 07:05:25 -0400
committer: Kent Overstreet <kent.overstreet@gmail.com> 2018-06-11 11:35:24 -0400
commit: 5c5aa6371e17dea55e51ed508759287a43f813e7 (patch)
tree: 9e15a1a6f9fb69af9e86b774ee26dd8159c09223
parent: 210e5e5d91156066d2d9ba0ab49e9d689ecf0d0e (diff)
7 files changed, 105 insertions, 68 deletions
diff --git a/fs/bcachefs/journal.c b/fs/bcachefs/journal.c
index addd51f08c9a..b4fe27f8f5ca 100644
--- a/fs/bcachefs/journal.c
+++ b/fs/bcachefs/journal.c
@@ -75,6 +75,19 @@ static inline size_t journal_entry_u64s_reserve(struct journal_buf *buf)
 	return BTREE_ID_NR * (JSET_KEYS_U64s + BKEY_EXTENT_U64s_MAX);
 }
 
+static inline bool journal_entry_empty(struct jset *j)
+{
+	struct jset_entry *i;
+
+	if (j->seq != j->last_seq)
+		return false;
+
+	vstruct_for_each(j, i)
+		if (i->type || i->u64s)
+			return false;
+	return true;
+}
+
 static enum {
 	JOURNAL_ENTRY_ERROR,
 	JOURNAL_ENTRY_INUSE,
@@ -129,6 +142,11 @@ static enum {
 	/* XXX: why set this here, and not in bch2_journal_write()? */
 	buf->data->last_seq	= cpu_to_le64(journal_last_seq(j));
 
+	if (journal_entry_empty(buf->data))
+		clear_bit(JOURNAL_NOT_EMPTY, &j->flags);
+	else
+		set_bit(JOURNAL_NOT_EMPTY, &j->flags);
+
 	journal_pin_new_entry(j, 1);
 
 	bch2_journal_buf_init(j);
@@ -884,8 +902,18 @@ void bch2_dev_journal_stop(struct journal *j, struct bch_dev *ca)
 
 void bch2_fs_journal_stop(struct journal *j)
 {
+	struct bch_fs *c = container_of(j, struct bch_fs, journal);
+
 	wait_event(j->wait, journal_flush_write(j));
 
+	/* do we need to write another journal entry? */
+	if (test_bit(JOURNAL_NOT_EMPTY, &j->flags) ||
+	    c->btree_roots_dirty)
+		bch2_journal_meta(j);
+
+	BUG_ON(!bch2_journal_error(j) &&
+	       test_bit(JOURNAL_NOT_EMPTY, &j->flags));
+
 	cancel_delayed_work_sync(&j->write_work);
 	cancel_delayed_work_sync(&j->reclaim_work);
 }
diff --git a/fs/bcachefs/journal_io.c b/fs/bcachefs/journal_io.c
index 36ba6a4daf84..4f873ccc492a 100644
--- a/fs/bcachefs/journal_io.c
+++ b/fs/bcachefs/journal_io.c
@@ -950,7 +950,8 @@ int bch2_journal_replay(struct bch_fs *c, struct list_head *list)
 	j->replay_journal_seq = 0;
 
 	bch2_journal_set_replay_done(j);
-	ret = bch2_journal_flush_all_pins(j);
+	bch2_journal_flush_all_pins(j);
+	ret = bch2_journal_error(j);
 err:
 	bch2_journal_entries_free(list);
 	return ret;
diff --git a/fs/bcachefs/journal_reclaim.c b/fs/bcachefs/journal_reclaim.c
index 0e3e5b6abb39..4cabfb7c9646 100644
--- a/fs/bcachefs/journal_reclaim.c
+++ b/fs/bcachefs/journal_reclaim.c
@@ -337,34 +337,22 @@ static int journal_flush_done(struct journal *j, u64 seq_to_flush,
 	return ret;
 }
 
-int bch2_journal_flush_pins(struct journal *j, u64 seq_to_flush)
+void bch2_journal_flush_pins(struct journal *j, u64 seq_to_flush)
 {
-	struct bch_fs *c = container_of(j, struct bch_fs, journal);
 	struct journal_entry_pin *pin;
 	u64 pin_seq;
-	bool flush;
 
 	if (!test_bit(JOURNAL_STARTED, &j->flags))
-		return 0;
-again:
-	wait_event(j->wait, journal_flush_done(j, seq_to_flush, &pin, &pin_seq));
-	if (pin) {
-		/* flushing a journal pin might cause a new one to be added: */
-		pin->flush(j, pin, pin_seq);
-		goto again;
-	}
-
-	spin_lock(&j->lock);
-	flush = journal_last_seq(j) != j->last_seq_ondisk ||
-		(seq_to_flush == U64_MAX && c->btree_roots_dirty);
-	spin_unlock(&j->lock);
+		return;
 
-	return flush ? bch2_journal_meta(j) : 0;
-}
+	while (1) {
+		wait_event(j->wait, journal_flush_done(j, seq_to_flush,
+						       &pin, &pin_seq));
+		if (!pin)
+			break;
 
-int bch2_journal_flush_all_pins(struct journal *j)
-{
-	return bch2_journal_flush_pins(j, U64_MAX);
+		pin->flush(j, pin, pin_seq);
+	}
 }
 
 int bch2_journal_flush_device_pins(struct journal *j, int dev_idx)
@@ -383,7 +371,9 @@ int bch2_journal_flush_device_pins(struct journal *j, int dev_idx)
 			seq = iter;
 	spin_unlock(&j->lock);
 
-	ret = bch2_journal_flush_pins(j, seq);
+	bch2_journal_flush_pins(j, seq);
+
+	ret = bch2_journal_error(j);
 	if (ret)
 		return ret;
 
diff --git a/fs/bcachefs/journal_reclaim.h b/fs/bcachefs/journal_reclaim.h
index 7d460c35cfae..eb22790251de 100644
--- a/fs/bcachefs/journal_reclaim.h
+++ b/fs/bcachefs/journal_reclaim.h
@@ -29,8 +29,13 @@ void bch2_journal_pin_add_if_older(struct journal *,
 void bch2_journal_reclaim_fast(struct journal *);
 void bch2_journal_reclaim_work(struct work_struct *);
 
-int bch2_journal_flush_pins(struct journal *, u64);
-int bch2_journal_flush_all_pins(struct journal *);
+void bch2_journal_flush_pins(struct journal *, u64);
+
+static inline void bch2_journal_flush_all_pins(struct journal *j)
+{
+	bch2_journal_flush_pins(j, U64_MAX);
+}
+
 int bch2_journal_flush_device_pins(struct journal *, int);
 
 #endif /* _BCACHEFS_JOURNAL_RECLAIM_H */
diff --git a/fs/bcachefs/journal_types.h b/fs/bcachefs/journal_types.h
index a27e0548c098..effbeece1ed9 100644
--- a/fs/bcachefs/journal_types.h
+++ b/fs/bcachefs/journal_types.h
@@ -117,6 +117,7 @@ enum {
 	JOURNAL_REPLAY_DONE,
 	JOURNAL_STARTED,
 	JOURNAL_NEED_WRITE,
+	JOURNAL_NOT_EMPTY,
 };
 
 /* Embedded in struct bch_fs */
diff --git a/fs/bcachefs/recovery.c b/fs/bcachefs/recovery.c
index 0a87493961c8..271eca14753d 100644
--- a/fs/bcachefs/recovery.c
+++ b/fs/bcachefs/recovery.c
@@ -75,6 +75,11 @@ int bch2_fs_recovery(struct bch_fs *c)
 		goto err;
 	bch_verbose(c, "mark and sweep done");
 
+	mutex_lock(&c->sb_lock);
+	SET_BCH_SB_CLEAN(c->disk_sb.sb, false);
+	bch2_write_super(c);
+	mutex_unlock(&c->sb_lock);
+
 	if (c->opts.noreplay)
 		goto out;
 
@@ -184,6 +189,13 @@ int bch2_fs_initialize(struct bch_fs *c)
 	if (bch2_journal_meta(&c->journal))
 		goto err;
 
+	mutex_lock(&c->sb_lock);
+	SET_BCH_SB_INITIALIZED(c->disk_sb.sb, true);
+	SET_BCH_SB_CLEAN(c->disk_sb.sb, false);
+
+	bch2_write_super(c);
+	mutex_unlock(&c->sb_lock);
+
 	return 0;
 err:
 	BUG_ON(!ret);
diff --git a/fs/bcachefs/super.c b/fs/bcachefs/super.c
index bd2e197c020d..6cafbdb888ee 100644
--- a/fs/bcachefs/super.c
+++ b/fs/bcachefs/super.c
@@ -199,16 +199,14 @@ int bch2_congested(void *data, int bdi_bits)
  * - allocator depends on the journal (when it rewrites prios and gens)
  */
 
-static void bch_fs_mark_clean(struct bch_fs *c)
+static void bch2_fs_mark_clean(struct bch_fs *c, bool clean)
 {
-	if (!bch2_journal_error(&c->journal) &&
-	    !test_bit(BCH_FS_ERROR, &c->flags) &&
-	    !test_bit(BCH_FS_EMERGENCY_RO, &c->flags)) {
-		mutex_lock(&c->sb_lock);
-		SET_BCH_SB_CLEAN(c->disk_sb.sb, true);
+	mutex_lock(&c->sb_lock);
+	if (BCH_SB_CLEAN(c->disk_sb.sb) != clean) {
+		SET_BCH_SB_CLEAN(c->disk_sb.sb, clean);
 		bch2_write_super(c);
-		mutex_unlock(&c->sb_lock);
 	}
+	mutex_unlock(&c->sb_lock);
 }
 
 static void __bch2_fs_read_only(struct bch_fs *c)
@@ -227,7 +225,7 @@ static void __bch2_fs_read_only(struct bch_fs *c)
 	 * Flush journal before stopping allocators, because flushing journal
 	 * blacklist entries involves allocating new btree nodes:
 	 */
-	bch2_journal_flush_pins(&c->journal, U64_MAX - 1);
+	bch2_journal_flush_all_pins(&c->journal);
 
 	for_each_member_device(ca, c, i)
 		bch2_dev_allocator_stop(ca);
@@ -244,9 +242,6 @@ static void __bch2_fs_read_only(struct bch_fs *c)
 	closure_wait_event(&c->btree_interior_update_wait,
 			   !bch2_btree_interior_updates_nr_pending(c));
 
-	if (!test_bit(BCH_FS_EMERGENCY_RO, &c->flags))
-		bch2_btree_verify_flushed(c);
-
 	bch2_fs_journal_stop(&c->journal);
 
 	/*
@@ -255,6 +250,8 @@ static void __bch2_fs_read_only(struct bch_fs *c)
 	 */
 	if (test_bit(BCH_FS_EMERGENCY_RO, &c->flags))
 		bch2_btree_flush_all_writes(c);
+	else
+		bch2_btree_verify_flushed(c);
 
 	/*
 	 * After stopping journal:
@@ -273,12 +270,10 @@ static void bch2_writes_disabled(struct percpu_ref *writes)
 
 void bch2_fs_read_only(struct bch_fs *c)
 {
-	if (c->state != BCH_FS_STARTING &&
-	    c->state != BCH_FS_RW)
+	if (c->state == BCH_FS_RO)
 		return;
 
-	if (test_bit(BCH_FS_ERROR, &c->flags))
-		return;
+	BUG_ON(test_bit(BCH_FS_WRITE_DISABLE_COMPLETE, &c->flags));
 
 	/*
 	 * Block new foreground-end write operations from starting - any new
@@ -309,13 +304,20 @@ void bch2_fs_read_only(struct bch_fs *c)
 
 	__bch2_fs_read_only(c);
 
-	bch_fs_mark_clean(c);
-
 	wait_event(bch_read_only_wait,
 		   test_bit(BCH_FS_WRITE_DISABLE_COMPLETE, &c->flags));
 
 	clear_bit(BCH_FS_WRITE_DISABLE_COMPLETE, &c->flags);
-	c->state = BCH_FS_RO;
+
+	if (!bch2_journal_error(&c->journal) &&
+	    !test_bit(BCH_FS_ERROR, &c->flags) &&
+	    !test_bit(BCH_FS_EMERGENCY_RO, &c->flags)) {
+
+		bch2_fs_mark_clean(c, true);
+	}
+
+	if (c->state != BCH_FS_STOPPING)
+		c->state = BCH_FS_RO;
 }
 
 static void bch2_fs_read_only_work(struct work_struct *work)
@@ -350,10 +352,11 @@ const char *bch2_fs_read_write(struct bch_fs *c)
 	const char *err = NULL;
 	unsigned i;
 
-	if (c->state != BCH_FS_STARTING &&
-	    c->state != BCH_FS_RO)
+	if (c->state == BCH_FS_RW)
 		return NULL;
 
+	bch2_fs_mark_clean(c, false);
+
 	for_each_rw_member(ca, c, i)
 		bch2_dev_allocator_add(c, ca);
 	bch2_recalc_capacity(c);
@@ -444,11 +447,6 @@ void bch2_fs_stop(struct bch_fs *c)
 	struct bch_dev *ca;
 	unsigned i;
 
-	mutex_lock(&c->state_lock);
-	BUG_ON(c->state == BCH_FS_STOPPING);
-	c->state = BCH_FS_STOPPING;
-	mutex_unlock(&c->state_lock);
-
 	for_each_member_device(ca, c, i)
 		if (ca->kobj.state_in_sysfs &&
 		    ca->disk_sb.bdev)
@@ -473,11 +471,9 @@ void bch2_fs_stop(struct bch_fs *c)
 	closure_debug_destroy(&c->cl);
 
 	mutex_lock(&c->state_lock);
-	__bch2_fs_read_only(c);
+	bch2_fs_read_only(c);
 	mutex_unlock(&c->state_lock);
 
-	bch_fs_mark_clean(c);
-
 	/* btree prefetch might have kicked off reads in the background: */
 	bch2_btree_flush_all_reads(c);
 
@@ -693,7 +689,7 @@ const char *bch2_fs_start(struct bch_fs *c)
 	const char *err = "cannot allocate memory";
 	struct bch_sb_field_members *mi;
 	struct bch_dev *ca;
-	time64_t now;
+	time64_t now = ktime_get_seconds();
 	unsigned i;
 	int ret = -EINVAL;
 
@@ -702,8 +698,14 @@ const char *bch2_fs_start(struct bch_fs *c)
 	BUG_ON(c->state != BCH_FS_STARTING);
 
 	mutex_lock(&c->sb_lock);
+
 	for_each_online_member(ca, c, i)
 		bch2_sb_from_fs(c, ca);
+
+	mi = bch2_sb_get_members(c->disk_sb.sb);
+	for_each_online_member(ca, c, i)
+		mi->members[ca->dev_idx].last_mount = cpu_to_le64(now);
+
 	mutex_unlock(&c->sb_lock);
 
 	for_each_rw_member(ca, c, i)
@@ -728,19 +730,6 @@ const char *bch2_fs_start(struct bch_fs *c)
 			goto err;
 	}
 
-	mutex_lock(&c->sb_lock);
-	mi = bch2_sb_get_members(c->disk_sb.sb);
-	now = ktime_get_seconds();
-
-	for_each_member_device(ca, c, i)
-		mi->members[ca->dev_idx].last_mount = cpu_to_le64(now);
-
-	SET_BCH_SB_INITIALIZED(c->disk_sb.sb, true);
-	SET_BCH_SB_CLEAN(c->disk_sb.sb, false);
-
-	bch2_write_super(c);
-	mutex_unlock(&c->sb_lock);
-
 	set_bit(BCH_FS_STARTED, &c->flags);
 
 	err = NULL;
@@ -1314,7 +1303,8 @@ int bch2_dev_remove(struct bch_fs *c, struct bch_dev *ca, int flags)
 	 * must flush all existing journal entries, they might have
 	 * (overwritten) keys that point to the device we're removing:
 	 */
-	ret = bch2_journal_flush_all_pins(&c->journal);
+	bch2_journal_flush_all_pins(&c->journal);
+	ret = bch2_journal_error(&c->journal);
 	if (ret) {
 		bch_err(ca, "Remove failed, journal error");
 		goto err;
@@ -1475,6 +1465,7 @@ int bch2_dev_online(struct bch_fs *c, const char *path)
 {
 	struct bch_opts opts = bch2_opts_empty();
 	struct bch_sb_handle sb = { NULL };
+	struct bch_sb_field_members *mi;
 	struct bch_dev *ca;
 	unsigned dev_idx;
 	const char *err;
@@ -1506,6 +1497,15 @@ int bch2_dev_online(struct bch_fs *c, const char *path)
 			goto err;
 	}
 
+	mutex_lock(&c->sb_lock);
+	mi = bch2_sb_get_members(c->disk_sb.sb);
+
+	mi->members[ca->dev_idx].last_mount =
+		cpu_to_le64(ktime_get_seconds());
+
+	bch2_write_super(c);
+	mutex_unlock(&c->sb_lock);
+
 	mutex_unlock(&c->state_lock);
 	return 0;
 err:
author	Kent Overstreet <kent.overstreet@gmail.com>	2018-06-11 07:05:25 -0400
committer	Kent Overstreet <kent.overstreet@gmail.com>	2018-06-11 11:35:24 -0400
commit	5c5aa6371e17dea55e51ed508759287a43f813e7 (patch)
tree	9e15a1a6f9fb69af9e86b774ee26dd8159c09223
parent	210e5e5d91156066d2d9ba0ab49e9d689ecf0d0e (diff)