diff options
author | Kent Overstreet <kent.overstreet@gmail.com> | 2015-04-22 15:24:38 -0700 |
---|---|---|
committer | Kent Overstreet <kent.overstreet@gmail.com> | 2016-10-07 12:33:57 -0800 |
commit | 55c6ca8f2d6e5c10f679dd82654171602dc2494c (patch) | |
tree | b31206330d3b2030e4c84e45a8f21bcd3ab9e76a | |
parent | 6cc869454858f55087b627c0e48cea9df375b7e8 (diff) |
bcache: More journal related refactoring
Signed-off-by: Kent Overstreet <kent.overstreet@gmail.com>
-rw-r--r-- | drivers/md/bcache/alloc.c | 2 | ||||
-rw-r--r-- | drivers/md/bcache/btree.c | 44 | ||||
-rw-r--r-- | drivers/md/bcache/journal.c | 181 | ||||
-rw-r--r-- | drivers/md/bcache/journal.h | 39 | ||||
-rw-r--r-- | drivers/md/bcache/journal_types.h | 32 | ||||
-rw-r--r-- | drivers/md/bcache/request.c | 49 | ||||
-rw-r--r-- | drivers/md/bcache/super.c | 21 | ||||
-rw-r--r-- | drivers/md/bcache/sysfs.c | 16 |
8 files changed, 190 insertions, 194 deletions
diff --git a/drivers/md/bcache/alloc.c b/drivers/md/bcache/alloc.c index b79745991532..97d7b0468fb4 100644 --- a/drivers/md/bcache/alloc.c +++ b/drivers/md/bcache/alloc.c @@ -297,7 +297,7 @@ static void bch_prio_write(struct cache *ca) c->journal.nr_prio_buckets); spin_unlock_irq(&c->journal.lock); - bch_journal_meta(c, &cl); + bch_journal_meta(&c->journal, &cl); closure_sync(&cl); /* diff --git a/drivers/md/bcache/btree.c b/drivers/md/bcache/btree.c index 1bd44e73ca1b..49d58d65d387 100644 --- a/drivers/md/bcache/btree.c +++ b/drivers/md/bcache/btree.c @@ -787,7 +787,7 @@ void bch_btree_push_journal_seq(struct cache_set *c, struct btree *b, u64 seq = b->keys.set[i].data->journal_seq; if (seq) { - bch_journal_push_seq(c, seq, cl); + bch_journal_push_seq(&c->journal, seq, cl); break; } } @@ -1481,7 +1481,6 @@ void btree_node_free(struct cache_set *c, struct btree *b) static void bch_btree_set_root(struct cache_set *c, struct btree *b) { struct journal_res res; - struct closure cl; struct btree *old; memset(&res, 0, sizeof(res)); @@ -1491,9 +1490,10 @@ static void bch_btree_set_root(struct cache_set *c, struct btree *b) old = btree_node_root(b); if (old) { - unsigned u64s = jset_u64s(0); - - bch_journal_res_get(c, &res, u64s, u64s); + /* + * Ensure no one is using the old root while we switch to the + * new root: + */ six_lock_write(&old->lock); } @@ -1509,14 +1509,25 @@ static void bch_btree_set_root(struct cache_set *c, struct btree *b) bch_recalc_btree_reserve(c); if (old) { - if (res.ref) { - closure_init_stack(&cl); - bch_journal_set_dirty(c); - bch_journal_res_put(c, &res, &cl); - closure_sync(&cl); - } + struct closure cl; + + closure_init_stack(&cl); + /* + * Unlock old root after new root is visible: + * + * The new root isn't persistent, but that's ok: we still have + * an intent lock on the new root, and any updates that would + * depend on the new root would have to update the new root. + */ six_unlock_write(&old->lock); + + /* + * Ensure new btree root is persistent (reachable via the + * journal) before returning and the caller unlocking it: + */ + bch_journal_meta(&c->journal, &cl); + closure_sync(&cl); } } @@ -1859,7 +1870,8 @@ void bch_btree_insert_and_journal(struct cache_set *c, struct btree *b, } if (res->ref) { - bch_journal_add_keys(c, res, b->btree_id, insert, b->level); + bch_journal_add_keys(&c->journal, res, b->btree_id, + insert, b->level); btree_bset_last(b)->journal_seq = c->journal.seq; } } @@ -2005,7 +2017,7 @@ bch_btree_insert_keys(struct btree *b, if (!b->level && test_bit(JOURNAL_REPLAY_DONE, &iter->c->journal.flags)) - bch_journal_res_get(iter->c, &res, + bch_journal_res_get(&iter->c->journal, &res, actual_min, actual_max); six_lock_write(&b->lock); @@ -2045,7 +2057,7 @@ bch_btree_insert_keys(struct btree *b, six_unlock_write(&b->lock); if (res.ref) - bch_journal_res_put(iter->c, &res, NULL); + bch_journal_res_put(&iter->c->journal, &res); } if (inserted && b->written) { @@ -2520,7 +2532,7 @@ int bch_btree_insert(struct cache_set *c, enum btree_id id, out: ret2 = bch_btree_iter_unlock(&iter); if (persistent) - bch_journal_push_seq(c, *journal_seq, persistent); + bch_journal_push_seq(&c->journal, *journal_seq, persistent); return ret ?: ret2; } @@ -2556,7 +2568,7 @@ int bch_btree_update(struct cache_set *c, enum btree_id id, struct bkey_i *k, out: ret2 = bch_btree_iter_unlock(&iter); if (persistent) - bch_journal_push_seq(c, *journal_seq, persistent); + bch_journal_push_seq(&c->journal, *journal_seq, persistent); return ret ?: ret2; } diff --git a/drivers/md/bcache/journal.c b/drivers/md/bcache/journal.c index 2a1f6ee8dc88..07140b6953fe 100644 --- a/drivers/md/bcache/journal.c +++ b/drivers/md/bcache/journal.c @@ -47,12 +47,12 @@ static inline u64 journal_pin_seq(struct journal *j, #define JSET_SECTORS (PAGE_SECTORS << JSET_BITS) -static inline void bch_journal_add_entry_at(struct jset *j, const void *data, +static inline void bch_journal_add_entry_at(struct journal *j, const void *data, size_t u64s, unsigned type, enum btree_id id, unsigned level, unsigned offset) { - struct jset_entry *jkeys = (struct jset_entry *) bkey_idx(j, offset); + struct jset_entry *jkeys = bkey_idx(journal_cur_write(j)->data, offset); jkeys->u64s = u64s; jkeys->btree_id = id; @@ -63,12 +63,14 @@ static inline void bch_journal_add_entry_at(struct jset *j, const void *data, memcpy(jkeys->_data, data, u64s * sizeof(u64)); } -static inline void bch_journal_add_entry(struct jset *j, const void *data, +static inline void bch_journal_add_entry(struct journal *j, const void *data, size_t u64s, unsigned type, enum btree_id id, unsigned level) { - bch_journal_add_entry_at(j, data, u64s, type, id, level, j->u64s); - j->u64s += jset_u64s(u64s); + struct jset *jset = journal_cur_write(j)->data; + + bch_journal_add_entry_at(j, data, u64s, type, id, level, jset->u64s); + jset->u64s += jset_u64s(u64s); } static struct jset_entry *bch_journal_find_entry(struct jset *j, unsigned type, @@ -106,15 +108,15 @@ struct bkey_i *bch_journal_find_btree_root(struct cache_set *c, struct jset *j, return k; } -static void bch_journal_add_btree_root(struct jset *j, enum btree_id id, +static void bch_journal_add_btree_root(struct journal *j, enum btree_id id, struct bkey_i *k, unsigned level) { bch_journal_add_entry(j, k, k->k.u64s, JKEYS_BTREE_ROOT, id, level); } -static inline void bch_journal_add_prios(struct journal *j, struct jset *jset) +static inline void bch_journal_add_prios(struct journal *j) { - bch_journal_add_entry(jset, j->prio_buckets, j->nr_prio_buckets, + bch_journal_add_entry(j, j->prio_buckets, j->nr_prio_buckets, JKEYS_PRIO_PTRS, 0, 0); } @@ -799,7 +801,7 @@ static bool journal_entry_close(struct journal *j) new.cur_entry_offset = S32_MAX; } while ((v = cmpxchg(&j->reservations.v, old.v, new.v)) != old.v); - j->cur->data->u64s = old.cur_entry_offset; + journal_cur_write(j)->data->u64s = old.cur_entry_offset; return old.count == 0; } @@ -807,8 +809,10 @@ static bool journal_entry_close(struct journal *j) /* Number of u64s we can write to the current journal bucket */ static void journal_calc_entry_size(struct journal *j) { + struct journal_write *w = journal_cur_write(j); ssize_t u64s; + lockdep_assert_held(&j->lock); BUG_ON(journal_entry_is_open(j) || test_bit(JOURNAL_DIRTY, &j->flags)); @@ -827,12 +831,12 @@ static void journal_calc_entry_size(struct journal *j) u64s -= JSET_KEYS_U64s + j->nr_prio_buckets; u64s = max_t(ssize_t, 0L, u64s); - if (u64s > j->cur->data->u64s) { + if (u64s > w->data->u64s) { j->cur_entry_u64s = max_t(ssize_t, 0L, u64s); /* Handle any already added entries */ atomic64_set(&j->reservations.counter, - journal_res_state(0, j->cur->data->u64s).v); + journal_res_state(0, w->data->u64s).v); wake_up(&j->wait); } } @@ -869,7 +873,7 @@ void bch_journal_start(struct cache_set *c) list_for_each_entry(bl, &j->seq_blacklist, list) if (!bl->written) { - bch_journal_add_entry(j->cur->data, &bl->seq, 1, + bch_journal_add_entry(j, &bl->seq, 1, JKEYS_JOURNAL_SEQ_BLACKLISTED, 0, 0); @@ -1278,10 +1282,9 @@ static void journal_next_bucket(struct cache_set *c) static void __bch_journal_next_entry(struct journal *j) { struct journal_entry_pin_list pin_list, *p; + struct jset *jset; - j->cur = (j->cur == j->w) - ? &j->w[1] - : &j->w[0]; + change_bit(JOURNAL_WRITE_IDX, &j->flags); /* * The fifo_push() needs to happen at the same time as j->seq is @@ -1296,8 +1299,9 @@ static void __bch_journal_next_entry(struct journal *j) if (test_bit(JOURNAL_REPLAY_DONE, &j->flags)) j->cur_pin_list = p; - j->cur->data->seq = ++j->seq; - j->cur->data->u64s = 0; + jset = journal_cur_write(j)->data; + jset->seq = ++j->seq; + jset->u64s = 0; } static void bch_journal_next_entry(struct journal *j) @@ -1315,22 +1319,18 @@ static void journal_write_endio(struct bio *bio) bch_cache_error(ca, "IO error %d writing journal", bio->bi_error); - closure_put(&w->c->journal.io); + closure_put(&w->j->io); percpu_ref_put(&ca->ref); } static void journal_write_done(struct closure *cl) { struct journal *j = container_of(cl, struct journal, io); - struct journal_write *w; + struct journal_write *w = journal_prev_write(j); unsigned long flags; spin_lock_irqsave(&j->lock, flags); - w = (j->cur == j->w) - ? &j->w[1] - : &j->w[0]; - j->last_seq_ondisk = w->data->last_seq; __closure_wake_up(&w->wait); @@ -1350,7 +1350,8 @@ static void journal_write_locked(struct closure *cl) struct journal *j = container_of(cl, struct journal, io); struct cache_set *c = container_of(j, struct cache_set, journal); struct cache *ca; - struct journal_write *w = j->cur; + struct btree *b; + struct journal_write *w = journal_cur_write(j); struct bkey_s_extent e = bkey_i_to_s_extent(&j->key); struct bch_extent_ptr *ptr; BKEY_PADDED(k) tmp; @@ -1370,17 +1371,13 @@ static void journal_write_locked(struct closure *cl) spin_lock(&c->btree_root_lock); - for (i = 0; i < BTREE_ID_NR; i++) { - struct btree *b = c->btree_roots[i]; - - if (b) - bch_journal_add_btree_root(w->data, i, - &b->key, b->level); - } + for (i = 0; i < BTREE_ID_NR; i++) + if ((b = c->btree_roots[i])) + bch_journal_add_btree_root(j, i, &b->key, b->level); spin_unlock(&c->btree_root_lock); - bch_journal_add_prios(j, w->data); + bch_journal_add_prios(j); /* So last_seq is up to date */ journal_reclaim_fast(j); @@ -1514,7 +1511,7 @@ static void journal_write_work(struct work_struct *work) journal_unlock(j); } -void bch_journal_add_keys(struct cache_set *c, struct journal_res *res, +void bch_journal_add_keys(struct journal *j, struct journal_res *res, enum btree_id id, const struct bkey_i *k, unsigned level) { @@ -1523,7 +1520,7 @@ void bch_journal_add_keys(struct cache_set *c, struct journal_res *res, BUG_ON(!res->ref); BUG_ON(actual > res->u64s); - bch_journal_add_entry_at(c->journal.cur->data, k, k->k.u64s, + bch_journal_add_entry_at(j, k, k->k.u64s, JKEYS_BTREE_KEYS, id, level, res->offset); res->offset += actual; @@ -1534,11 +1531,8 @@ void bch_journal_add_keys(struct cache_set *c, struct journal_res *res, * This function releases the journal write structure so other threads can * then proceed to add their keys as well. */ -void bch_journal_res_put(struct cache_set *c, - struct journal_res *res, - struct closure *parent) +void bch_journal_res_put(struct journal *j, struct journal_res *res) { - struct journal *j = &c->journal; union journal_res_state s; bool do_write = false; @@ -1549,18 +1543,15 @@ void bch_journal_res_put(struct cache_set *c, while (res->u64s) { unsigned actual = jset_u64s(0); - bch_journal_add_entry_at(j->cur->data, NULL, 0, - JKEYS_BTREE_KEYS, 0, 0, res->offset); + bch_journal_add_entry_at(j, NULL, 0, JKEYS_BTREE_KEYS, + 0, 0, res->offset); res->offset += actual; res->u64s -= actual; } - bch_journal_set_dirty(c); - - if (parent && test_bit(JOURNAL_DIRTY, &j->flags)) { - BUG_ON(!closure_wait(&j->cur->wait, parent)); - set_bit(JOURNAL_NEED_WRITE, &j->flags); - } + if (!test_and_set_bit(JOURNAL_DIRTY, &j->flags)) + schedule_delayed_work(&j->write_work, + msecs_to_jiffies(j->delay_ms)); if (test_bit(JOURNAL_NEED_WRITE, &j->flags) && !test_bit(JOURNAL_IO_IN_FLIGHT, &j->flags)) { @@ -1619,11 +1610,11 @@ static inline bool journal_res_get_fast(struct journal *j, return true; } -static bool __journal_res_get(struct cache_set *c, struct journal_res *res, +static bool __journal_res_get(struct journal *j, struct journal_res *res, unsigned u64s_min, unsigned u64s_max, u64 *start_time) { - struct journal *j = &c->journal; + struct cache_set *c = container_of(j, struct cache_set, journal); while (1) { if (journal_res_get_fast(j, res, u64s_min, u64s_max)) @@ -1687,18 +1678,16 @@ static bool __journal_res_get(struct cache_set *c, struct journal_res *res, * To ensure forward progress, the current task must not be holding any * btree node write locks. */ -void bch_journal_res_get(struct cache_set *c, struct journal_res *res, +void bch_journal_res_get(struct journal *j, struct journal_res *res, unsigned u64s_min, unsigned u64s_max) { - struct journal *j = &c->journal; u64 start_time = 0; BUG_ON(res->ref); BUG_ON(u64s_max < u64s_min); - BUG_ON(!j->cur); wait_event(j->wait, - __journal_res_get(c, res, u64s_min, u64s_max, &start_time)); + __journal_res_get(j, res, u64s_min, u64s_max, &start_time)); BUG_ON(!res->ref); @@ -1706,67 +1695,70 @@ void bch_journal_res_get(struct cache_set *c, struct journal_res *res, bch_time_stats_update(&j->full_time, start_time); } -void bch_journal_set_dirty(struct cache_set *c) +void bch_journal_push_seq(struct journal *j, u64 seq, struct closure *parent) { - if (!test_and_set_bit(JOURNAL_DIRTY, &c->journal.flags)) - schedule_delayed_work(&c->journal.write_work, - msecs_to_jiffies(c->journal.delay_ms)); + spin_lock_irq(&j->lock); + + BUG_ON(seq > j->seq); + + if (seq == j->seq) { + BUG_ON(!test_bit(JOURNAL_DIRTY, &j->flags)); + set_bit(JOURNAL_NEED_WRITE, &j->flags); + if (parent && + !closure_wait(&journal_cur_write(j)->wait, parent)) + BUG(); + } else if (seq + 1 == j->seq && + test_bit(JOURNAL_IO_IN_FLIGHT, &j->flags)) { + if (parent && + !closure_wait(&journal_prev_write(j)->wait, parent)) + BUG(); + } + + journal_unlock(j); } -void bch_journal_meta(struct cache_set *c, struct closure *parent) +void bch_journal_meta(struct journal *j, struct closure *parent) { struct journal_res res; unsigned u64s = jset_u64s(0); + u64 seq; memset(&res, 0, sizeof(res)); - if (!CACHE_SYNC(&c->sb)) - return; + bch_journal_res_get(j, &res, u64s, u64s); + seq = j->seq; + bch_journal_res_put(j, &res); - bch_journal_res_get(c, &res, u64s, u64s); - if (res.ref) { - bch_journal_set_dirty(c); - bch_journal_res_put(c, &res, parent); - } + bch_journal_push_seq(j, seq, parent); } -void bch_journal_push_seq(struct cache_set *c, u64 seq, struct closure *parent) +void bch_journal_flush(struct journal *j, struct closure *parent) { - struct journal *j = &c->journal; + u64 seq; spin_lock_irq(&j->lock); - - BUG_ON(seq > j->seq); - - if (seq == j->seq) { - BUG_ON(!test_bit(JOURNAL_REPLAY_DONE, &j->flags)); - BUG_ON(!test_bit(JOURNAL_DIRTY, &j->flags)); - - BUG_ON(!closure_wait(&j->cur->wait, parent)); - set_bit(JOURNAL_NEED_WRITE, &j->flags); - } else if (seq + 1 == j->seq && - test_bit(JOURNAL_IO_IN_FLIGHT, &j->flags)) { - struct journal_write *w = (j->cur == j->w) - ? &j->w[1] - : &j->w[0]; - - BUG_ON(!closure_wait(&w->wait, parent)); + if (test_bit(JOURNAL_DIRTY, &j->flags)) { + seq = j->seq; + } else if (j->seq) { + seq = j->seq - 1; + } else { + spin_unlock_irq(&j->lock); + return; } + spin_unlock_irq(&j->lock); - journal_unlock(j); + bch_journal_push_seq(j, seq, parent); } -void bch_journal_free(struct cache_set *c) +void bch_journal_free(struct journal *j) { - free_pages((unsigned long) c->journal.w[1].data, JSET_BITS); - free_pages((unsigned long) c->journal.w[0].data, JSET_BITS); - free_fifo(&c->journal.pin); + free_pages((unsigned long) j->w[1].data, JSET_BITS); + free_pages((unsigned long) j->w[0].data, JSET_BITS); + free_fifo(&j->pin); } -int bch_journal_alloc(struct cache_set *c) +int bch_journal_alloc(struct journal *j) { - struct journal *j = &c->journal; - spin_lock_init(&j->lock); init_waitqueue_head(&j->wait); INIT_DELAYED_WORK(&j->write_work, journal_write_work); @@ -1782,8 +1774,8 @@ int bch_journal_alloc(struct cache_set *c) atomic64_set(&j->reservations.counter, journal_res_state(0, S32_MAX).v); - j->w[0].c = c; - j->w[1].c = c; + j->w[0].j = j; + j->w[1].j = j; if (!(init_fifo(&j->pin, JOURNAL_PIN, GFP_KERNEL)) || !(j->w[0].data = (void *) __get_free_pages(GFP_KERNEL, JSET_BITS)) || @@ -1874,6 +1866,7 @@ int bch_journal_move(struct cache *ca) unsigned i, nr_buckets; u64 last_flushed_seq; struct cache_set *c = ca->set; + struct journal *j = &c->journal; int ret = 0; /* Success */ closure_init_stack(&cl); @@ -1886,7 +1879,7 @@ int bch_journal_move(struct cache *ca) * will call journal_next_bucket which notices that the * device is no longer writeable, and picks a new one. */ - bch_journal_meta(c, &cl); + bch_journal_meta(j, &cl); /* Wait for the meta-data write */ closure_sync(&cl); BUG_ON(bch_journal_writing_to_device(ca)); @@ -1904,7 +1897,7 @@ int bch_journal_move(struct cache *ca) * we have newer journal entries in devices other than ca, * and wait for the meta data write to complete. */ - bch_journal_meta(c, &cl); + bch_journal_meta(j, &cl); closure_sync(&cl); /* diff --git a/drivers/md/bcache/journal.h b/drivers/md/bcache/journal.h index 5c2df565dccf..a77a53017b58 100644 --- a/drivers/md/bcache/journal.h +++ b/drivers/md/bcache/journal.h @@ -172,7 +172,7 @@ static inline void journal_pin_drop(struct journal *j, } if (atomic_dec_and_test(&pin->pin_list->count)) - wake_up(&j->wait); + queue_work(system_long_wq, &j->reclaim_work); pin->pin_list = NULL; } @@ -189,21 +189,30 @@ struct bkey_i *bch_journal_find_btree_root(struct cache_set *, struct jset *, int bch_journal_seq_blacklisted(struct cache_set *, u64, struct btree *); -struct journal_res { - bool ref; - u16 offset; - u16 u64s; -}; +static inline struct journal_write *journal_cur_write(struct journal *j) +{ -void bch_journal_res_put(struct cache_set *, struct journal_res *, - struct closure *); -void bch_journal_res_get(struct cache_set *, struct journal_res *, - unsigned, unsigned); -void bch_journal_set_dirty(struct cache_set *); -void bch_journal_add_keys(struct cache_set *, struct journal_res *, + return j->w + test_bit(JOURNAL_WRITE_IDX, &j->flags); +} + +static inline struct journal_write *journal_prev_write(struct journal *j) +{ + + return j->w + !test_bit(JOURNAL_WRITE_IDX, &j->flags); +} + +void bch_journal_add_keys(struct journal *, struct journal_res *, enum btree_id, const struct bkey_i *, unsigned); +void bch_journal_res_put(struct journal *, struct journal_res *); +void bch_journal_res_get(struct journal *, struct journal_res *, + unsigned, unsigned); + +void bch_journal_push_seq(struct journal *, u64, struct closure *); +void bch_journal_meta(struct journal *, struct closure *); +void bch_journal_flush(struct journal *, struct closure *); + /* * Amount of space that will be taken up by some keys in the journal (i.e. * including the jset header) @@ -221,8 +230,6 @@ static inline bool journal_res_full(struct journal_res *res, void bch_journal_start(struct cache_set *); void bch_journal_mark(struct cache_set *, struct list_head *); -void bch_journal_meta(struct cache_set *, struct closure *); -void bch_journal_push_seq(struct cache_set *, u64, struct closure *); const char *bch_journal_read(struct cache_set *, struct list_head *); int bch_journal_replay(struct cache_set *, struct list_head *); @@ -234,8 +241,8 @@ static inline void bch_journal_set_replay_done(struct journal *j) spin_unlock_irq(&j->lock); } -void bch_journal_free(struct cache_set *); -int bch_journal_alloc(struct cache_set *); +void bch_journal_free(struct journal *); +int bch_journal_alloc(struct journal *); ssize_t bch_journal_print_debug(struct journal *, char *); diff --git a/drivers/md/bcache/journal_types.h b/drivers/md/bcache/journal_types.h index 65935b0b226b..1b55a445affa 100644 --- a/drivers/md/bcache/journal_types.h +++ b/drivers/md/bcache/journal_types.h @@ -15,7 +15,7 @@ struct journal_write { struct jset *data; #define JSET_BITS 5 - struct cache_set *c; + struct journal *j; struct closure_waitlist wait; }; @@ -49,6 +49,12 @@ struct journal_seq_blacklist { struct list_head nodes; }; +struct journal_res { + bool ref; + u16 offset; + u16 u64s; +}; + union journal_res_state { struct { atomic64_t counter; @@ -64,21 +70,33 @@ union journal_res_state { }; }; +/* + * JOURNAL_DIRTY - current journal entry has stuff in it to write + * + * JOURNAL_NEED_WRITE - current (pending) journal entry should be written ASAP, + * either because something's waiting on the write to complete or because it's + * been dirty too long and the timer's expired. + * + * If JOURNAL_NEED_WRITE is set, JOURNAL_DIRTY must be set. + */ + +enum { + JOURNAL_DIRTY, + JOURNAL_NEED_WRITE, + JOURNAL_IO_IN_FLIGHT, + JOURNAL_WRITE_IDX, + JOURNAL_REPLAY_DONE, +}; + /* Embedded in struct cache_set */ struct journal { /* Fastpath stuff up front: */ unsigned long flags; -#define JOURNAL_NEED_WRITE 0 -#define JOURNAL_DIRTY 1 -#define JOURNAL_REPLAY_DONE 2 -#define JOURNAL_IO_IN_FLIGHT 3 union journal_res_state reservations; unsigned cur_entry_u64s; - struct journal_write *cur; - /* * Two journal entries -- one is currently open for new entries, the * other is possibly being written out. diff --git a/drivers/md/bcache/request.c b/drivers/md/bcache/request.c index b43126105212..e0c218eecc06 100644 --- a/drivers/md/bcache/request.c +++ b/drivers/md/bcache/request.c @@ -594,20 +594,6 @@ static void cached_dev_write(struct cached_dev *dc, struct search *s) continue_at(cl, cached_dev_write_complete, NULL); } -static void cached_dev_nodata(struct closure *cl) -{ - struct search *s = container_of(cl, struct search, cl); - struct bio *bio = &s->bio.bio; - - if (s->orig_bio->bi_opf & (REQ_PREFLUSH|REQ_FUA)) - bch_journal_meta(s->iop.c, cl); - - /* If it's a flush, we send the flush to the backing device too */ - closure_bio_submit(bio, cl); - - continue_at(cl, cached_dev_bio_complete, NULL); -} - /* Cached devices - read & write stuff */ static void __cached_dev_make_request(struct request_queue *q, struct bio *bio) @@ -627,13 +613,16 @@ static void __cached_dev_make_request(struct request_queue *q, struct bio *bio) trace_bcache_request_start(s->d, bio); if (!bio->bi_iter.bi_size) { + if (s->orig_bio->bi_opf & (REQ_PREFLUSH|REQ_FUA)) + bch_journal_meta(&s->iop.c->journal, &s->cl); + /* - * can't call bch_journal_meta from under - * generic_make_request + * If it's a flush, we send the flush to the backing + * device too */ - continue_at_nobarrier(&s->cl, - cached_dev_nodata, - d->c->wq); + closure_bio_submit(&s->bio.bio, &s->cl); + + continue_at(&s->cl, cached_dev_bio_complete, NULL); } else { s->bypass = check_should_bypass(dc, bio, rw); @@ -701,16 +690,6 @@ void bch_cached_dev_request_init(struct cached_dev *dc) /* Flash backed devices */ -static void flash_dev_nodata(struct closure *cl) -{ - struct search *s = container_of(cl, struct search, cl); - - if (s->orig_bio->bi_opf & (REQ_PREFLUSH|REQ_FUA)) - bch_journal_meta(s->iop.c, cl); - - continue_at(cl, search_free, NULL); -} - static void __flash_dev_make_request(struct request_queue *q, struct bio *bio) { struct search *s; @@ -723,13 +702,11 @@ static void __flash_dev_make_request(struct request_queue *q, struct bio *bio) if (!bio->bi_iter.bi_size) { s = search_alloc(bio, d); - /* - * can't call bch_journal_meta from under - * generic_make_request - */ - continue_at_nobarrier(&s->cl, - flash_dev_nodata, - d->c->wq); + + if (s->orig_bio->bi_opf & (REQ_PREFLUSH|REQ_FUA)) + bch_journal_meta(&s->iop.c->journal, &s->cl); + + continue_at(&s->cl, search_free, NULL); } else if (rw) { unsigned flags = 0; diff --git a/drivers/md/bcache/super.c b/drivers/md/bcache/super.c index 6d488115a6e0..7e5af15d0267 100644 --- a/drivers/md/bcache/super.c +++ b/drivers/md/bcache/super.c @@ -654,12 +654,14 @@ static void bch_cache_set_read_only(struct cache_set *c) struct cached_dev *dc; struct bcache_device *d; struct radix_tree_iter iter; + struct closure cl; void **slot; struct cache *ca; unsigned i; lockdep_assert_held(&bch_register_lock); + closure_init_stack(&cl); if (test_and_set_bit(CACHE_SET_RO, &c->flags)) return; @@ -704,11 +706,10 @@ static void bch_cache_set_read_only(struct cache_set *c) for_each_cache(ca, c, i) __bch_cache_read_only(ca); - if (c->journal.cur) { - cancel_delayed_work_sync(&c->journal.write_work); - /* flush last journal entry if needed */ - c->journal.write_work.work.func(&c->journal.write_work.work); - } + bch_journal_flush(&c->journal, &cl); + closure_sync(&cl); + + cancel_delayed_work_sync(&c->journal.write_work); bch_notify_cache_set_read_only(c); @@ -757,7 +758,7 @@ static void cache_set_free(struct closure *cl) bch_debug_exit_cache_set(c); bch_btree_cache_free(c); - bch_journal_free(c); + bch_journal_free(&c->journal); mutex_lock(&bch_register_lock); for_each_cache(ca, c, i) @@ -999,7 +1000,7 @@ static const char *bch_cache_set_alloc(struct cache_sb *sb, !(c->wq = alloc_workqueue("bcache", WQ_MEM_RECLAIM, 0)) || bch_io_clock_init(&c->io_clock[READ]) || bch_io_clock_init(&c->io_clock[WRITE]) || - bch_journal_alloc(c) || + bch_journal_alloc(&c->journal) || bch_btree_cache_alloc(c) || bch_bset_sort_state_init(&c->sort, ilog2(c->btree_pages))) goto err; @@ -1168,7 +1169,8 @@ static const char *run_cache_set(struct cache_set *c) bch_journal_set_replay_done(&c->journal); /* XXX: necessary? */ - bch_journal_meta(c, &cl); + bch_journal_meta(&c->journal, &cl); + closure_sync(&cl); } bch_prio_timer_start(c, READ); @@ -1750,7 +1752,8 @@ static void bch_cache_remove_work(struct work_struct *work) c->journal.prio_buckets[ca->sb.nr_this_dev] = 0; spin_unlock(&c->journal.lock); - bch_journal_meta(c, &cl); + /* write new prio pointers */ + bch_journal_meta(&c->journal, &cl); closure_sync(&cl); __bcache_write_super(c); /* ups sb_write_mutex */ diff --git a/drivers/md/bcache/sysfs.c b/drivers/md/bcache/sysfs.c index e727e69a2a3d..0f04e79676e9 100644 --- a/drivers/md/bcache/sysfs.c +++ b/drivers/md/bcache/sysfs.c @@ -147,7 +147,6 @@ sysfs_pd_controller_attribute(writeback); read_attribute(stripe_size); read_attribute(partial_stripes_expensive); -rw_attribute(synchronous); rw_attribute(journal_delay_ms); rw_attribute(discard); rw_attribute(running); @@ -583,7 +582,6 @@ SHOW(bch_cache_set) { struct cache_set *c = container_of(kobj, struct cache_set, kobj); - sysfs_print(synchronous, CACHE_SYNC(&c->sb)); sysfs_print(journal_delay_ms, c->journal.delay_ms); sysfs_hprint(block_size, block_bytes(c)); @@ -705,17 +703,6 @@ STORE(__bch_cache_set) return size; } - if (attr == &sysfs_synchronous) { - bool sync = strtoul_or_return(buf); - - if (sync != CACHE_SYNC(&c->sb)) { - SET_CACHE_SYNC(&c->sb, sync); - bcache_write_super(c); - } - - return size; - } - if (attr == &sysfs_clear_stats) { atomic_long_set(&c->writeback_keys_done, 0); atomic_long_set(&c->writeback_keys_failed, 0); @@ -824,7 +811,7 @@ STORE(__bch_cache_set) struct closure cl; closure_init_stack(&cl); - bch_journal_meta(c, &cl); + bch_journal_meta(&c->journal, &cl); closure_sync(&cl); return size; @@ -920,7 +907,6 @@ static void bch_cache_set_internal_release(struct kobject *k) static struct attribute *bch_cache_set_files[] = { &sysfs_unregister, &sysfs_stop, - &sysfs_synchronous, &sysfs_journal_delay_ms, &sysfs_flash_vol_create, &sysfs_add_device, |