summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorKent Overstreet <kent.overstreet@gmail.com>2018-06-11 07:05:25 -0400
committerKent Overstreet <kent.overstreet@gmail.com>2018-06-11 11:35:24 -0400
commit5c5aa6371e17dea55e51ed508759287a43f813e7 (patch)
tree9e15a1a6f9fb69af9e86b774ee26dd8159c09223
parent210e5e5d91156066d2d9ba0ab49e9d689ecf0d0e (diff)
bcachefs: Make some improvements to the journal shutdown code
-rw-r--r--fs/bcachefs/journal.c28
-rw-r--r--fs/bcachefs/journal_io.c3
-rw-r--r--fs/bcachefs/journal_reclaim.c34
-rw-r--r--fs/bcachefs/journal_reclaim.h9
-rw-r--r--fs/bcachefs/journal_types.h1
-rw-r--r--fs/bcachefs/recovery.c12
-rw-r--r--fs/bcachefs/super.c86
7 files changed, 105 insertions, 68 deletions
diff --git a/fs/bcachefs/journal.c b/fs/bcachefs/journal.c
index addd51f08c9a..b4fe27f8f5ca 100644
--- a/fs/bcachefs/journal.c
+++ b/fs/bcachefs/journal.c
@@ -75,6 +75,19 @@ static inline size_t journal_entry_u64s_reserve(struct journal_buf *buf)
return BTREE_ID_NR * (JSET_KEYS_U64s + BKEY_EXTENT_U64s_MAX);
}
+static inline bool journal_entry_empty(struct jset *j)
+{
+ struct jset_entry *i;
+
+ if (j->seq != j->last_seq)
+ return false;
+
+ vstruct_for_each(j, i)
+ if (i->type || i->u64s)
+ return false;
+ return true;
+}
+
static enum {
JOURNAL_ENTRY_ERROR,
JOURNAL_ENTRY_INUSE,
@@ -129,6 +142,11 @@ static enum {
/* XXX: why set this here, and not in bch2_journal_write()? */
buf->data->last_seq = cpu_to_le64(journal_last_seq(j));
+ if (journal_entry_empty(buf->data))
+ clear_bit(JOURNAL_NOT_EMPTY, &j->flags);
+ else
+ set_bit(JOURNAL_NOT_EMPTY, &j->flags);
+
journal_pin_new_entry(j, 1);
bch2_journal_buf_init(j);
@@ -884,8 +902,18 @@ void bch2_dev_journal_stop(struct journal *j, struct bch_dev *ca)
void bch2_fs_journal_stop(struct journal *j)
{
+ struct bch_fs *c = container_of(j, struct bch_fs, journal);
+
wait_event(j->wait, journal_flush_write(j));
+ /* do we need to write another journal entry? */
+ if (test_bit(JOURNAL_NOT_EMPTY, &j->flags) ||
+ c->btree_roots_dirty)
+ bch2_journal_meta(j);
+
+ BUG_ON(!bch2_journal_error(j) &&
+ test_bit(JOURNAL_NOT_EMPTY, &j->flags));
+
cancel_delayed_work_sync(&j->write_work);
cancel_delayed_work_sync(&j->reclaim_work);
}
diff --git a/fs/bcachefs/journal_io.c b/fs/bcachefs/journal_io.c
index 36ba6a4daf84..4f873ccc492a 100644
--- a/fs/bcachefs/journal_io.c
+++ b/fs/bcachefs/journal_io.c
@@ -950,7 +950,8 @@ int bch2_journal_replay(struct bch_fs *c, struct list_head *list)
j->replay_journal_seq = 0;
bch2_journal_set_replay_done(j);
- ret = bch2_journal_flush_all_pins(j);
+ bch2_journal_flush_all_pins(j);
+ ret = bch2_journal_error(j);
err:
bch2_journal_entries_free(list);
return ret;
diff --git a/fs/bcachefs/journal_reclaim.c b/fs/bcachefs/journal_reclaim.c
index 0e3e5b6abb39..4cabfb7c9646 100644
--- a/fs/bcachefs/journal_reclaim.c
+++ b/fs/bcachefs/journal_reclaim.c
@@ -337,34 +337,22 @@ static int journal_flush_done(struct journal *j, u64 seq_to_flush,
return ret;
}
-int bch2_journal_flush_pins(struct journal *j, u64 seq_to_flush)
+void bch2_journal_flush_pins(struct journal *j, u64 seq_to_flush)
{
- struct bch_fs *c = container_of(j, struct bch_fs, journal);
struct journal_entry_pin *pin;
u64 pin_seq;
- bool flush;
if (!test_bit(JOURNAL_STARTED, &j->flags))
- return 0;
-again:
- wait_event(j->wait, journal_flush_done(j, seq_to_flush, &pin, &pin_seq));
- if (pin) {
- /* flushing a journal pin might cause a new one to be added: */
- pin->flush(j, pin, pin_seq);
- goto again;
- }
-
- spin_lock(&j->lock);
- flush = journal_last_seq(j) != j->last_seq_ondisk ||
- (seq_to_flush == U64_MAX && c->btree_roots_dirty);
- spin_unlock(&j->lock);
+ return;
- return flush ? bch2_journal_meta(j) : 0;
-}
+ while (1) {
+ wait_event(j->wait, journal_flush_done(j, seq_to_flush,
+ &pin, &pin_seq));
+ if (!pin)
+ break;
-int bch2_journal_flush_all_pins(struct journal *j)
-{
- return bch2_journal_flush_pins(j, U64_MAX);
+ pin->flush(j, pin, pin_seq);
+ }
}
int bch2_journal_flush_device_pins(struct journal *j, int dev_idx)
@@ -383,7 +371,9 @@ int bch2_journal_flush_device_pins(struct journal *j, int dev_idx)
seq = iter;
spin_unlock(&j->lock);
- ret = bch2_journal_flush_pins(j, seq);
+ bch2_journal_flush_pins(j, seq);
+
+ ret = bch2_journal_error(j);
if (ret)
return ret;
diff --git a/fs/bcachefs/journal_reclaim.h b/fs/bcachefs/journal_reclaim.h
index 7d460c35cfae..eb22790251de 100644
--- a/fs/bcachefs/journal_reclaim.h
+++ b/fs/bcachefs/journal_reclaim.h
@@ -29,8 +29,13 @@ void bch2_journal_pin_add_if_older(struct journal *,
void bch2_journal_reclaim_fast(struct journal *);
void bch2_journal_reclaim_work(struct work_struct *);
-int bch2_journal_flush_pins(struct journal *, u64);
-int bch2_journal_flush_all_pins(struct journal *);
+void bch2_journal_flush_pins(struct journal *, u64);
+
+static inline void bch2_journal_flush_all_pins(struct journal *j)
+{
+ bch2_journal_flush_pins(j, U64_MAX);
+}
+
int bch2_journal_flush_device_pins(struct journal *, int);
#endif /* _BCACHEFS_JOURNAL_RECLAIM_H */
diff --git a/fs/bcachefs/journal_types.h b/fs/bcachefs/journal_types.h
index a27e0548c098..effbeece1ed9 100644
--- a/fs/bcachefs/journal_types.h
+++ b/fs/bcachefs/journal_types.h
@@ -117,6 +117,7 @@ enum {
JOURNAL_REPLAY_DONE,
JOURNAL_STARTED,
JOURNAL_NEED_WRITE,
+ JOURNAL_NOT_EMPTY,
};
/* Embedded in struct bch_fs */
diff --git a/fs/bcachefs/recovery.c b/fs/bcachefs/recovery.c
index 0a87493961c8..271eca14753d 100644
--- a/fs/bcachefs/recovery.c
+++ b/fs/bcachefs/recovery.c
@@ -75,6 +75,11 @@ int bch2_fs_recovery(struct bch_fs *c)
goto err;
bch_verbose(c, "mark and sweep done");
+ mutex_lock(&c->sb_lock);
+ SET_BCH_SB_CLEAN(c->disk_sb.sb, false);
+ bch2_write_super(c);
+ mutex_unlock(&c->sb_lock);
+
if (c->opts.noreplay)
goto out;
@@ -184,6 +189,13 @@ int bch2_fs_initialize(struct bch_fs *c)
if (bch2_journal_meta(&c->journal))
goto err;
+ mutex_lock(&c->sb_lock);
+ SET_BCH_SB_INITIALIZED(c->disk_sb.sb, true);
+ SET_BCH_SB_CLEAN(c->disk_sb.sb, false);
+
+ bch2_write_super(c);
+ mutex_unlock(&c->sb_lock);
+
return 0;
err:
BUG_ON(!ret);
diff --git a/fs/bcachefs/super.c b/fs/bcachefs/super.c
index bd2e197c020d..6cafbdb888ee 100644
--- a/fs/bcachefs/super.c
+++ b/fs/bcachefs/super.c
@@ -199,16 +199,14 @@ int bch2_congested(void *data, int bdi_bits)
* - allocator depends on the journal (when it rewrites prios and gens)
*/
-static void bch_fs_mark_clean(struct bch_fs *c)
+static void bch2_fs_mark_clean(struct bch_fs *c, bool clean)
{
- if (!bch2_journal_error(&c->journal) &&
- !test_bit(BCH_FS_ERROR, &c->flags) &&
- !test_bit(BCH_FS_EMERGENCY_RO, &c->flags)) {
- mutex_lock(&c->sb_lock);
- SET_BCH_SB_CLEAN(c->disk_sb.sb, true);
+ mutex_lock(&c->sb_lock);
+ if (BCH_SB_CLEAN(c->disk_sb.sb) != clean) {
+ SET_BCH_SB_CLEAN(c->disk_sb.sb, clean);
bch2_write_super(c);
- mutex_unlock(&c->sb_lock);
}
+ mutex_unlock(&c->sb_lock);
}
static void __bch2_fs_read_only(struct bch_fs *c)
@@ -227,7 +225,7 @@ static void __bch2_fs_read_only(struct bch_fs *c)
* Flush journal before stopping allocators, because flushing journal
* blacklist entries involves allocating new btree nodes:
*/
- bch2_journal_flush_pins(&c->journal, U64_MAX - 1);
+ bch2_journal_flush_all_pins(&c->journal);
for_each_member_device(ca, c, i)
bch2_dev_allocator_stop(ca);
@@ -244,9 +242,6 @@ static void __bch2_fs_read_only(struct bch_fs *c)
closure_wait_event(&c->btree_interior_update_wait,
!bch2_btree_interior_updates_nr_pending(c));
- if (!test_bit(BCH_FS_EMERGENCY_RO, &c->flags))
- bch2_btree_verify_flushed(c);
-
bch2_fs_journal_stop(&c->journal);
/*
@@ -255,6 +250,8 @@ static void __bch2_fs_read_only(struct bch_fs *c)
*/
if (test_bit(BCH_FS_EMERGENCY_RO, &c->flags))
bch2_btree_flush_all_writes(c);
+ else
+ bch2_btree_verify_flushed(c);
/*
* After stopping journal:
@@ -273,12 +270,10 @@ static void bch2_writes_disabled(struct percpu_ref *writes)
void bch2_fs_read_only(struct bch_fs *c)
{
- if (c->state != BCH_FS_STARTING &&
- c->state != BCH_FS_RW)
+ if (c->state == BCH_FS_RO)
return;
- if (test_bit(BCH_FS_ERROR, &c->flags))
- return;
+ BUG_ON(test_bit(BCH_FS_WRITE_DISABLE_COMPLETE, &c->flags));
/*
* Block new foreground-end write operations from starting - any new
@@ -309,13 +304,20 @@ void bch2_fs_read_only(struct bch_fs *c)
__bch2_fs_read_only(c);
- bch_fs_mark_clean(c);
-
wait_event(bch_read_only_wait,
test_bit(BCH_FS_WRITE_DISABLE_COMPLETE, &c->flags));
clear_bit(BCH_FS_WRITE_DISABLE_COMPLETE, &c->flags);
- c->state = BCH_FS_RO;
+
+ if (!bch2_journal_error(&c->journal) &&
+ !test_bit(BCH_FS_ERROR, &c->flags) &&
+ !test_bit(BCH_FS_EMERGENCY_RO, &c->flags)) {
+
+ bch2_fs_mark_clean(c, true);
+ }
+
+ if (c->state != BCH_FS_STOPPING)
+ c->state = BCH_FS_RO;
}
static void bch2_fs_read_only_work(struct work_struct *work)
@@ -350,10 +352,11 @@ const char *bch2_fs_read_write(struct bch_fs *c)
const char *err = NULL;
unsigned i;
- if (c->state != BCH_FS_STARTING &&
- c->state != BCH_FS_RO)
+ if (c->state == BCH_FS_RW)
return NULL;
+ bch2_fs_mark_clean(c, false);
+
for_each_rw_member(ca, c, i)
bch2_dev_allocator_add(c, ca);
bch2_recalc_capacity(c);
@@ -444,11 +447,6 @@ void bch2_fs_stop(struct bch_fs *c)
struct bch_dev *ca;
unsigned i;
- mutex_lock(&c->state_lock);
- BUG_ON(c->state == BCH_FS_STOPPING);
- c->state = BCH_FS_STOPPING;
- mutex_unlock(&c->state_lock);
-
for_each_member_device(ca, c, i)
if (ca->kobj.state_in_sysfs &&
ca->disk_sb.bdev)
@@ -473,11 +471,9 @@ void bch2_fs_stop(struct bch_fs *c)
closure_debug_destroy(&c->cl);
mutex_lock(&c->state_lock);
- __bch2_fs_read_only(c);
+ bch2_fs_read_only(c);
mutex_unlock(&c->state_lock);
- bch_fs_mark_clean(c);
-
/* btree prefetch might have kicked off reads in the background: */
bch2_btree_flush_all_reads(c);
@@ -693,7 +689,7 @@ const char *bch2_fs_start(struct bch_fs *c)
const char *err = "cannot allocate memory";
struct bch_sb_field_members *mi;
struct bch_dev *ca;
- time64_t now;
+ time64_t now = ktime_get_seconds();
unsigned i;
int ret = -EINVAL;
@@ -702,8 +698,14 @@ const char *bch2_fs_start(struct bch_fs *c)
BUG_ON(c->state != BCH_FS_STARTING);
mutex_lock(&c->sb_lock);
+
for_each_online_member(ca, c, i)
bch2_sb_from_fs(c, ca);
+
+ mi = bch2_sb_get_members(c->disk_sb.sb);
+ for_each_online_member(ca, c, i)
+ mi->members[ca->dev_idx].last_mount = cpu_to_le64(now);
+
mutex_unlock(&c->sb_lock);
for_each_rw_member(ca, c, i)
@@ -728,19 +730,6 @@ const char *bch2_fs_start(struct bch_fs *c)
goto err;
}
- mutex_lock(&c->sb_lock);
- mi = bch2_sb_get_members(c->disk_sb.sb);
- now = ktime_get_seconds();
-
- for_each_member_device(ca, c, i)
- mi->members[ca->dev_idx].last_mount = cpu_to_le64(now);
-
- SET_BCH_SB_INITIALIZED(c->disk_sb.sb, true);
- SET_BCH_SB_CLEAN(c->disk_sb.sb, false);
-
- bch2_write_super(c);
- mutex_unlock(&c->sb_lock);
-
set_bit(BCH_FS_STARTED, &c->flags);
err = NULL;
@@ -1314,7 +1303,8 @@ int bch2_dev_remove(struct bch_fs *c, struct bch_dev *ca, int flags)
* must flush all existing journal entries, they might have
* (overwritten) keys that point to the device we're removing:
*/
- ret = bch2_journal_flush_all_pins(&c->journal);
+ bch2_journal_flush_all_pins(&c->journal);
+ ret = bch2_journal_error(&c->journal);
if (ret) {
bch_err(ca, "Remove failed, journal error");
goto err;
@@ -1475,6 +1465,7 @@ int bch2_dev_online(struct bch_fs *c, const char *path)
{
struct bch_opts opts = bch2_opts_empty();
struct bch_sb_handle sb = { NULL };
+ struct bch_sb_field_members *mi;
struct bch_dev *ca;
unsigned dev_idx;
const char *err;
@@ -1506,6 +1497,15 @@ int bch2_dev_online(struct bch_fs *c, const char *path)
goto err;
}
+ mutex_lock(&c->sb_lock);
+ mi = bch2_sb_get_members(c->disk_sb.sb);
+
+ mi->members[ca->dev_idx].last_mount =
+ cpu_to_le64(ktime_get_seconds());
+
+ bch2_write_super(c);
+ mutex_unlock(&c->sb_lock);
+
mutex_unlock(&c->state_lock);
return 0;
err: