summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorKent Overstreet <kent.overstreet@gmail.com>2015-04-22 15:24:38 -0700
committerKent Overstreet <kent.overstreet@gmail.com>2016-10-07 12:33:57 -0800
commit55c6ca8f2d6e5c10f679dd82654171602dc2494c (patch)
treeb31206330d3b2030e4c84e45a8f21bcd3ab9e76a
parent6cc869454858f55087b627c0e48cea9df375b7e8 (diff)
bcache: More journal related refactoring
Signed-off-by: Kent Overstreet <kent.overstreet@gmail.com>
-rw-r--r--drivers/md/bcache/alloc.c2
-rw-r--r--drivers/md/bcache/btree.c44
-rw-r--r--drivers/md/bcache/journal.c181
-rw-r--r--drivers/md/bcache/journal.h39
-rw-r--r--drivers/md/bcache/journal_types.h32
-rw-r--r--drivers/md/bcache/request.c49
-rw-r--r--drivers/md/bcache/super.c21
-rw-r--r--drivers/md/bcache/sysfs.c16
8 files changed, 190 insertions, 194 deletions
diff --git a/drivers/md/bcache/alloc.c b/drivers/md/bcache/alloc.c
index b79745991532..97d7b0468fb4 100644
--- a/drivers/md/bcache/alloc.c
+++ b/drivers/md/bcache/alloc.c
@@ -297,7 +297,7 @@ static void bch_prio_write(struct cache *ca)
c->journal.nr_prio_buckets);
spin_unlock_irq(&c->journal.lock);
- bch_journal_meta(c, &cl);
+ bch_journal_meta(&c->journal, &cl);
closure_sync(&cl);
/*
diff --git a/drivers/md/bcache/btree.c b/drivers/md/bcache/btree.c
index 1bd44e73ca1b..49d58d65d387 100644
--- a/drivers/md/bcache/btree.c
+++ b/drivers/md/bcache/btree.c
@@ -787,7 +787,7 @@ void bch_btree_push_journal_seq(struct cache_set *c, struct btree *b,
u64 seq = b->keys.set[i].data->journal_seq;
if (seq) {
- bch_journal_push_seq(c, seq, cl);
+ bch_journal_push_seq(&c->journal, seq, cl);
break;
}
}
@@ -1481,7 +1481,6 @@ void btree_node_free(struct cache_set *c, struct btree *b)
static void bch_btree_set_root(struct cache_set *c, struct btree *b)
{
struct journal_res res;
- struct closure cl;
struct btree *old;
memset(&res, 0, sizeof(res));
@@ -1491,9 +1490,10 @@ static void bch_btree_set_root(struct cache_set *c, struct btree *b)
old = btree_node_root(b);
if (old) {
- unsigned u64s = jset_u64s(0);
-
- bch_journal_res_get(c, &res, u64s, u64s);
+ /*
+ * Ensure no one is using the old root while we switch to the
+ * new root:
+ */
six_lock_write(&old->lock);
}
@@ -1509,14 +1509,25 @@ static void bch_btree_set_root(struct cache_set *c, struct btree *b)
bch_recalc_btree_reserve(c);
if (old) {
- if (res.ref) {
- closure_init_stack(&cl);
- bch_journal_set_dirty(c);
- bch_journal_res_put(c, &res, &cl);
- closure_sync(&cl);
- }
+ struct closure cl;
+
+ closure_init_stack(&cl);
+ /*
+ * Unlock old root after new root is visible:
+ *
+ * The new root isn't persistent, but that's ok: we still have
+ * an intent lock on the new root, and any updates that would
+ * depend on the new root would have to update the new root.
+ */
six_unlock_write(&old->lock);
+
+ /*
+ * Ensure new btree root is persistent (reachable via the
+ * journal) before returning and the caller unlocking it:
+ */
+ bch_journal_meta(&c->journal, &cl);
+ closure_sync(&cl);
}
}
@@ -1859,7 +1870,8 @@ void bch_btree_insert_and_journal(struct cache_set *c, struct btree *b,
}
if (res->ref) {
- bch_journal_add_keys(c, res, b->btree_id, insert, b->level);
+ bch_journal_add_keys(&c->journal, res, b->btree_id,
+ insert, b->level);
btree_bset_last(b)->journal_seq = c->journal.seq;
}
}
@@ -2005,7 +2017,7 @@ bch_btree_insert_keys(struct btree *b,
if (!b->level &&
test_bit(JOURNAL_REPLAY_DONE, &iter->c->journal.flags))
- bch_journal_res_get(iter->c, &res,
+ bch_journal_res_get(&iter->c->journal, &res,
actual_min, actual_max);
six_lock_write(&b->lock);
@@ -2045,7 +2057,7 @@ bch_btree_insert_keys(struct btree *b,
six_unlock_write(&b->lock);
if (res.ref)
- bch_journal_res_put(iter->c, &res, NULL);
+ bch_journal_res_put(&iter->c->journal, &res);
}
if (inserted && b->written) {
@@ -2520,7 +2532,7 @@ int bch_btree_insert(struct cache_set *c, enum btree_id id,
out: ret2 = bch_btree_iter_unlock(&iter);
if (persistent)
- bch_journal_push_seq(c, *journal_seq, persistent);
+ bch_journal_push_seq(&c->journal, *journal_seq, persistent);
return ret ?: ret2;
}
@@ -2556,7 +2568,7 @@ int bch_btree_update(struct cache_set *c, enum btree_id id, struct bkey_i *k,
out: ret2 = bch_btree_iter_unlock(&iter);
if (persistent)
- bch_journal_push_seq(c, *journal_seq, persistent);
+ bch_journal_push_seq(&c->journal, *journal_seq, persistent);
return ret ?: ret2;
}
diff --git a/drivers/md/bcache/journal.c b/drivers/md/bcache/journal.c
index 2a1f6ee8dc88..07140b6953fe 100644
--- a/drivers/md/bcache/journal.c
+++ b/drivers/md/bcache/journal.c
@@ -47,12 +47,12 @@ static inline u64 journal_pin_seq(struct journal *j,
#define JSET_SECTORS (PAGE_SECTORS << JSET_BITS)
-static inline void bch_journal_add_entry_at(struct jset *j, const void *data,
+static inline void bch_journal_add_entry_at(struct journal *j, const void *data,
size_t u64s, unsigned type,
enum btree_id id, unsigned level,
unsigned offset)
{
- struct jset_entry *jkeys = (struct jset_entry *) bkey_idx(j, offset);
+ struct jset_entry *jkeys = bkey_idx(journal_cur_write(j)->data, offset);
jkeys->u64s = u64s;
jkeys->btree_id = id;
@@ -63,12 +63,14 @@ static inline void bch_journal_add_entry_at(struct jset *j, const void *data,
memcpy(jkeys->_data, data, u64s * sizeof(u64));
}
-static inline void bch_journal_add_entry(struct jset *j, const void *data,
+static inline void bch_journal_add_entry(struct journal *j, const void *data,
size_t u64s, unsigned type,
enum btree_id id, unsigned level)
{
- bch_journal_add_entry_at(j, data, u64s, type, id, level, j->u64s);
- j->u64s += jset_u64s(u64s);
+ struct jset *jset = journal_cur_write(j)->data;
+
+ bch_journal_add_entry_at(j, data, u64s, type, id, level, jset->u64s);
+ jset->u64s += jset_u64s(u64s);
}
static struct jset_entry *bch_journal_find_entry(struct jset *j, unsigned type,
@@ -106,15 +108,15 @@ struct bkey_i *bch_journal_find_btree_root(struct cache_set *c, struct jset *j,
return k;
}
-static void bch_journal_add_btree_root(struct jset *j, enum btree_id id,
+static void bch_journal_add_btree_root(struct journal *j, enum btree_id id,
struct bkey_i *k, unsigned level)
{
bch_journal_add_entry(j, k, k->k.u64s, JKEYS_BTREE_ROOT, id, level);
}
-static inline void bch_journal_add_prios(struct journal *j, struct jset *jset)
+static inline void bch_journal_add_prios(struct journal *j)
{
- bch_journal_add_entry(jset, j->prio_buckets, j->nr_prio_buckets,
+ bch_journal_add_entry(j, j->prio_buckets, j->nr_prio_buckets,
JKEYS_PRIO_PTRS, 0, 0);
}
@@ -799,7 +801,7 @@ static bool journal_entry_close(struct journal *j)
new.cur_entry_offset = S32_MAX;
} while ((v = cmpxchg(&j->reservations.v, old.v, new.v)) != old.v);
- j->cur->data->u64s = old.cur_entry_offset;
+ journal_cur_write(j)->data->u64s = old.cur_entry_offset;
return old.count == 0;
}
@@ -807,8 +809,10 @@ static bool journal_entry_close(struct journal *j)
/* Number of u64s we can write to the current journal bucket */
static void journal_calc_entry_size(struct journal *j)
{
+ struct journal_write *w = journal_cur_write(j);
ssize_t u64s;
+ lockdep_assert_held(&j->lock);
BUG_ON(journal_entry_is_open(j) ||
test_bit(JOURNAL_DIRTY, &j->flags));
@@ -827,12 +831,12 @@ static void journal_calc_entry_size(struct journal *j)
u64s -= JSET_KEYS_U64s + j->nr_prio_buckets;
u64s = max_t(ssize_t, 0L, u64s);
- if (u64s > j->cur->data->u64s) {
+ if (u64s > w->data->u64s) {
j->cur_entry_u64s = max_t(ssize_t, 0L, u64s);
/* Handle any already added entries */
atomic64_set(&j->reservations.counter,
- journal_res_state(0, j->cur->data->u64s).v);
+ journal_res_state(0, w->data->u64s).v);
wake_up(&j->wait);
}
}
@@ -869,7 +873,7 @@ void bch_journal_start(struct cache_set *c)
list_for_each_entry(bl, &j->seq_blacklist, list)
if (!bl->written) {
- bch_journal_add_entry(j->cur->data, &bl->seq, 1,
+ bch_journal_add_entry(j, &bl->seq, 1,
JKEYS_JOURNAL_SEQ_BLACKLISTED,
0, 0);
@@ -1278,10 +1282,9 @@ static void journal_next_bucket(struct cache_set *c)
static void __bch_journal_next_entry(struct journal *j)
{
struct journal_entry_pin_list pin_list, *p;
+ struct jset *jset;
- j->cur = (j->cur == j->w)
- ? &j->w[1]
- : &j->w[0];
+ change_bit(JOURNAL_WRITE_IDX, &j->flags);
/*
* The fifo_push() needs to happen at the same time as j->seq is
@@ -1296,8 +1299,9 @@ static void __bch_journal_next_entry(struct journal *j)
if (test_bit(JOURNAL_REPLAY_DONE, &j->flags))
j->cur_pin_list = p;
- j->cur->data->seq = ++j->seq;
- j->cur->data->u64s = 0;
+ jset = journal_cur_write(j)->data;
+ jset->seq = ++j->seq;
+ jset->u64s = 0;
}
static void bch_journal_next_entry(struct journal *j)
@@ -1315,22 +1319,18 @@ static void journal_write_endio(struct bio *bio)
bch_cache_error(ca, "IO error %d writing journal",
bio->bi_error);
- closure_put(&w->c->journal.io);
+ closure_put(&w->j->io);
percpu_ref_put(&ca->ref);
}
static void journal_write_done(struct closure *cl)
{
struct journal *j = container_of(cl, struct journal, io);
- struct journal_write *w;
+ struct journal_write *w = journal_prev_write(j);
unsigned long flags;
spin_lock_irqsave(&j->lock, flags);
- w = (j->cur == j->w)
- ? &j->w[1]
- : &j->w[0];
-
j->last_seq_ondisk = w->data->last_seq;
__closure_wake_up(&w->wait);
@@ -1350,7 +1350,8 @@ static void journal_write_locked(struct closure *cl)
struct journal *j = container_of(cl, struct journal, io);
struct cache_set *c = container_of(j, struct cache_set, journal);
struct cache *ca;
- struct journal_write *w = j->cur;
+ struct btree *b;
+ struct journal_write *w = journal_cur_write(j);
struct bkey_s_extent e = bkey_i_to_s_extent(&j->key);
struct bch_extent_ptr *ptr;
BKEY_PADDED(k) tmp;
@@ -1370,17 +1371,13 @@ static void journal_write_locked(struct closure *cl)
spin_lock(&c->btree_root_lock);
- for (i = 0; i < BTREE_ID_NR; i++) {
- struct btree *b = c->btree_roots[i];
-
- if (b)
- bch_journal_add_btree_root(w->data, i,
- &b->key, b->level);
- }
+ for (i = 0; i < BTREE_ID_NR; i++)
+ if ((b = c->btree_roots[i]))
+ bch_journal_add_btree_root(j, i, &b->key, b->level);
spin_unlock(&c->btree_root_lock);
- bch_journal_add_prios(j, w->data);
+ bch_journal_add_prios(j);
/* So last_seq is up to date */
journal_reclaim_fast(j);
@@ -1514,7 +1511,7 @@ static void journal_write_work(struct work_struct *work)
journal_unlock(j);
}
-void bch_journal_add_keys(struct cache_set *c, struct journal_res *res,
+void bch_journal_add_keys(struct journal *j, struct journal_res *res,
enum btree_id id, const struct bkey_i *k,
unsigned level)
{
@@ -1523,7 +1520,7 @@ void bch_journal_add_keys(struct cache_set *c, struct journal_res *res,
BUG_ON(!res->ref);
BUG_ON(actual > res->u64s);
- bch_journal_add_entry_at(c->journal.cur->data, k, k->k.u64s,
+ bch_journal_add_entry_at(j, k, k->k.u64s,
JKEYS_BTREE_KEYS, id, level, res->offset);
res->offset += actual;
@@ -1534,11 +1531,8 @@ void bch_journal_add_keys(struct cache_set *c, struct journal_res *res,
* This function releases the journal write structure so other threads can
* then proceed to add their keys as well.
*/
-void bch_journal_res_put(struct cache_set *c,
- struct journal_res *res,
- struct closure *parent)
+void bch_journal_res_put(struct journal *j, struct journal_res *res)
{
- struct journal *j = &c->journal;
union journal_res_state s;
bool do_write = false;
@@ -1549,18 +1543,15 @@ void bch_journal_res_put(struct cache_set *c,
while (res->u64s) {
unsigned actual = jset_u64s(0);
- bch_journal_add_entry_at(j->cur->data, NULL, 0,
- JKEYS_BTREE_KEYS, 0, 0, res->offset);
+ bch_journal_add_entry_at(j, NULL, 0, JKEYS_BTREE_KEYS,
+ 0, 0, res->offset);
res->offset += actual;
res->u64s -= actual;
}
- bch_journal_set_dirty(c);
-
- if (parent && test_bit(JOURNAL_DIRTY, &j->flags)) {
- BUG_ON(!closure_wait(&j->cur->wait, parent));
- set_bit(JOURNAL_NEED_WRITE, &j->flags);
- }
+ if (!test_and_set_bit(JOURNAL_DIRTY, &j->flags))
+ schedule_delayed_work(&j->write_work,
+ msecs_to_jiffies(j->delay_ms));
if (test_bit(JOURNAL_NEED_WRITE, &j->flags) &&
!test_bit(JOURNAL_IO_IN_FLIGHT, &j->flags)) {
@@ -1619,11 +1610,11 @@ static inline bool journal_res_get_fast(struct journal *j,
return true;
}
-static bool __journal_res_get(struct cache_set *c, struct journal_res *res,
+static bool __journal_res_get(struct journal *j, struct journal_res *res,
unsigned u64s_min, unsigned u64s_max,
u64 *start_time)
{
- struct journal *j = &c->journal;
+ struct cache_set *c = container_of(j, struct cache_set, journal);
while (1) {
if (journal_res_get_fast(j, res, u64s_min, u64s_max))
@@ -1687,18 +1678,16 @@ static bool __journal_res_get(struct cache_set *c, struct journal_res *res,
* To ensure forward progress, the current task must not be holding any
* btree node write locks.
*/
-void bch_journal_res_get(struct cache_set *c, struct journal_res *res,
+void bch_journal_res_get(struct journal *j, struct journal_res *res,
unsigned u64s_min, unsigned u64s_max)
{
- struct journal *j = &c->journal;
u64 start_time = 0;
BUG_ON(res->ref);
BUG_ON(u64s_max < u64s_min);
- BUG_ON(!j->cur);
wait_event(j->wait,
- __journal_res_get(c, res, u64s_min, u64s_max, &start_time));
+ __journal_res_get(j, res, u64s_min, u64s_max, &start_time));
BUG_ON(!res->ref);
@@ -1706,67 +1695,70 @@ void bch_journal_res_get(struct cache_set *c, struct journal_res *res,
bch_time_stats_update(&j->full_time, start_time);
}
-void bch_journal_set_dirty(struct cache_set *c)
+void bch_journal_push_seq(struct journal *j, u64 seq, struct closure *parent)
{
- if (!test_and_set_bit(JOURNAL_DIRTY, &c->journal.flags))
- schedule_delayed_work(&c->journal.write_work,
- msecs_to_jiffies(c->journal.delay_ms));
+ spin_lock_irq(&j->lock);
+
+ BUG_ON(seq > j->seq);
+
+ if (seq == j->seq) {
+ BUG_ON(!test_bit(JOURNAL_DIRTY, &j->flags));
+ set_bit(JOURNAL_NEED_WRITE, &j->flags);
+ if (parent &&
+ !closure_wait(&journal_cur_write(j)->wait, parent))
+ BUG();
+ } else if (seq + 1 == j->seq &&
+ test_bit(JOURNAL_IO_IN_FLIGHT, &j->flags)) {
+ if (parent &&
+ !closure_wait(&journal_prev_write(j)->wait, parent))
+ BUG();
+ }
+
+ journal_unlock(j);
}
-void bch_journal_meta(struct cache_set *c, struct closure *parent)
+void bch_journal_meta(struct journal *j, struct closure *parent)
{
struct journal_res res;
unsigned u64s = jset_u64s(0);
+ u64 seq;
memset(&res, 0, sizeof(res));
- if (!CACHE_SYNC(&c->sb))
- return;
+ bch_journal_res_get(j, &res, u64s, u64s);
+ seq = j->seq;
+ bch_journal_res_put(j, &res);
- bch_journal_res_get(c, &res, u64s, u64s);
- if (res.ref) {
- bch_journal_set_dirty(c);
- bch_journal_res_put(c, &res, parent);
- }
+ bch_journal_push_seq(j, seq, parent);
}
-void bch_journal_push_seq(struct cache_set *c, u64 seq, struct closure *parent)
+void bch_journal_flush(struct journal *j, struct closure *parent)
{
- struct journal *j = &c->journal;
+ u64 seq;
spin_lock_irq(&j->lock);
-
- BUG_ON(seq > j->seq);
-
- if (seq == j->seq) {
- BUG_ON(!test_bit(JOURNAL_REPLAY_DONE, &j->flags));
- BUG_ON(!test_bit(JOURNAL_DIRTY, &j->flags));
-
- BUG_ON(!closure_wait(&j->cur->wait, parent));
- set_bit(JOURNAL_NEED_WRITE, &j->flags);
- } else if (seq + 1 == j->seq &&
- test_bit(JOURNAL_IO_IN_FLIGHT, &j->flags)) {
- struct journal_write *w = (j->cur == j->w)
- ? &j->w[1]
- : &j->w[0];
-
- BUG_ON(!closure_wait(&w->wait, parent));
+ if (test_bit(JOURNAL_DIRTY, &j->flags)) {
+ seq = j->seq;
+ } else if (j->seq) {
+ seq = j->seq - 1;
+ } else {
+ spin_unlock_irq(&j->lock);
+ return;
}
+ spin_unlock_irq(&j->lock);
- journal_unlock(j);
+ bch_journal_push_seq(j, seq, parent);
}
-void bch_journal_free(struct cache_set *c)
+void bch_journal_free(struct journal *j)
{
- free_pages((unsigned long) c->journal.w[1].data, JSET_BITS);
- free_pages((unsigned long) c->journal.w[0].data, JSET_BITS);
- free_fifo(&c->journal.pin);
+ free_pages((unsigned long) j->w[1].data, JSET_BITS);
+ free_pages((unsigned long) j->w[0].data, JSET_BITS);
+ free_fifo(&j->pin);
}
-int bch_journal_alloc(struct cache_set *c)
+int bch_journal_alloc(struct journal *j)
{
- struct journal *j = &c->journal;
-
spin_lock_init(&j->lock);
init_waitqueue_head(&j->wait);
INIT_DELAYED_WORK(&j->write_work, journal_write_work);
@@ -1782,8 +1774,8 @@ int bch_journal_alloc(struct cache_set *c)
atomic64_set(&j->reservations.counter,
journal_res_state(0, S32_MAX).v);
- j->w[0].c = c;
- j->w[1].c = c;
+ j->w[0].j = j;
+ j->w[1].j = j;
if (!(init_fifo(&j->pin, JOURNAL_PIN, GFP_KERNEL)) ||
!(j->w[0].data = (void *) __get_free_pages(GFP_KERNEL, JSET_BITS)) ||
@@ -1874,6 +1866,7 @@ int bch_journal_move(struct cache *ca)
unsigned i, nr_buckets;
u64 last_flushed_seq;
struct cache_set *c = ca->set;
+ struct journal *j = &c->journal;
int ret = 0; /* Success */
closure_init_stack(&cl);
@@ -1886,7 +1879,7 @@ int bch_journal_move(struct cache *ca)
* will call journal_next_bucket which notices that the
* device is no longer writeable, and picks a new one.
*/
- bch_journal_meta(c, &cl);
+ bch_journal_meta(j, &cl);
/* Wait for the meta-data write */
closure_sync(&cl);
BUG_ON(bch_journal_writing_to_device(ca));
@@ -1904,7 +1897,7 @@ int bch_journal_move(struct cache *ca)
* we have newer journal entries in devices other than ca,
* and wait for the meta data write to complete.
*/
- bch_journal_meta(c, &cl);
+ bch_journal_meta(j, &cl);
closure_sync(&cl);
/*
diff --git a/drivers/md/bcache/journal.h b/drivers/md/bcache/journal.h
index 5c2df565dccf..a77a53017b58 100644
--- a/drivers/md/bcache/journal.h
+++ b/drivers/md/bcache/journal.h
@@ -172,7 +172,7 @@ static inline void journal_pin_drop(struct journal *j,
}
if (atomic_dec_and_test(&pin->pin_list->count))
- wake_up(&j->wait);
+ queue_work(system_long_wq, &j->reclaim_work);
pin->pin_list = NULL;
}
@@ -189,21 +189,30 @@ struct bkey_i *bch_journal_find_btree_root(struct cache_set *, struct jset *,
int bch_journal_seq_blacklisted(struct cache_set *, u64, struct btree *);
-struct journal_res {
- bool ref;
- u16 offset;
- u16 u64s;
-};
+static inline struct journal_write *journal_cur_write(struct journal *j)
+{
-void bch_journal_res_put(struct cache_set *, struct journal_res *,
- struct closure *);
-void bch_journal_res_get(struct cache_set *, struct journal_res *,
- unsigned, unsigned);
-void bch_journal_set_dirty(struct cache_set *);
-void bch_journal_add_keys(struct cache_set *, struct journal_res *,
+ return j->w + test_bit(JOURNAL_WRITE_IDX, &j->flags);
+}
+
+static inline struct journal_write *journal_prev_write(struct journal *j)
+{
+
+ return j->w + !test_bit(JOURNAL_WRITE_IDX, &j->flags);
+}
+
+void bch_journal_add_keys(struct journal *, struct journal_res *,
enum btree_id, const struct bkey_i *,
unsigned);
+void bch_journal_res_put(struct journal *, struct journal_res *);
+void bch_journal_res_get(struct journal *, struct journal_res *,
+ unsigned, unsigned);
+
+void bch_journal_push_seq(struct journal *, u64, struct closure *);
+void bch_journal_meta(struct journal *, struct closure *);
+void bch_journal_flush(struct journal *, struct closure *);
+
/*
* Amount of space that will be taken up by some keys in the journal (i.e.
* including the jset header)
@@ -221,8 +230,6 @@ static inline bool journal_res_full(struct journal_res *res,
void bch_journal_start(struct cache_set *);
void bch_journal_mark(struct cache_set *, struct list_head *);
-void bch_journal_meta(struct cache_set *, struct closure *);
-void bch_journal_push_seq(struct cache_set *, u64, struct closure *);
const char *bch_journal_read(struct cache_set *, struct list_head *);
int bch_journal_replay(struct cache_set *, struct list_head *);
@@ -234,8 +241,8 @@ static inline void bch_journal_set_replay_done(struct journal *j)
spin_unlock_irq(&j->lock);
}
-void bch_journal_free(struct cache_set *);
-int bch_journal_alloc(struct cache_set *);
+void bch_journal_free(struct journal *);
+int bch_journal_alloc(struct journal *);
ssize_t bch_journal_print_debug(struct journal *, char *);
diff --git a/drivers/md/bcache/journal_types.h b/drivers/md/bcache/journal_types.h
index 65935b0b226b..1b55a445affa 100644
--- a/drivers/md/bcache/journal_types.h
+++ b/drivers/md/bcache/journal_types.h
@@ -15,7 +15,7 @@ struct journal_write {
struct jset *data;
#define JSET_BITS 5
- struct cache_set *c;
+ struct journal *j;
struct closure_waitlist wait;
};
@@ -49,6 +49,12 @@ struct journal_seq_blacklist {
struct list_head nodes;
};
+struct journal_res {
+ bool ref;
+ u16 offset;
+ u16 u64s;
+};
+
union journal_res_state {
struct {
atomic64_t counter;
@@ -64,21 +70,33 @@ union journal_res_state {
};
};
+/*
+ * JOURNAL_DIRTY - current journal entry has stuff in it to write
+ *
+ * JOURNAL_NEED_WRITE - current (pending) journal entry should be written ASAP,
+ * either because something's waiting on the write to complete or because it's
+ * been dirty too long and the timer's expired.
+ *
+ * If JOURNAL_NEED_WRITE is set, JOURNAL_DIRTY must be set.
+ */
+
+enum {
+ JOURNAL_DIRTY,
+ JOURNAL_NEED_WRITE,
+ JOURNAL_IO_IN_FLIGHT,
+ JOURNAL_WRITE_IDX,
+ JOURNAL_REPLAY_DONE,
+};
+
/* Embedded in struct cache_set */
struct journal {
/* Fastpath stuff up front: */
unsigned long flags;
-#define JOURNAL_NEED_WRITE 0
-#define JOURNAL_DIRTY 1
-#define JOURNAL_REPLAY_DONE 2
-#define JOURNAL_IO_IN_FLIGHT 3
union journal_res_state reservations;
unsigned cur_entry_u64s;
- struct journal_write *cur;
-
/*
* Two journal entries -- one is currently open for new entries, the
* other is possibly being written out.
diff --git a/drivers/md/bcache/request.c b/drivers/md/bcache/request.c
index b43126105212..e0c218eecc06 100644
--- a/drivers/md/bcache/request.c
+++ b/drivers/md/bcache/request.c
@@ -594,20 +594,6 @@ static void cached_dev_write(struct cached_dev *dc, struct search *s)
continue_at(cl, cached_dev_write_complete, NULL);
}
-static void cached_dev_nodata(struct closure *cl)
-{
- struct search *s = container_of(cl, struct search, cl);
- struct bio *bio = &s->bio.bio;
-
- if (s->orig_bio->bi_opf & (REQ_PREFLUSH|REQ_FUA))
- bch_journal_meta(s->iop.c, cl);
-
- /* If it's a flush, we send the flush to the backing device too */
- closure_bio_submit(bio, cl);
-
- continue_at(cl, cached_dev_bio_complete, NULL);
-}
-
/* Cached devices - read & write stuff */
static void __cached_dev_make_request(struct request_queue *q, struct bio *bio)
@@ -627,13 +613,16 @@ static void __cached_dev_make_request(struct request_queue *q, struct bio *bio)
trace_bcache_request_start(s->d, bio);
if (!bio->bi_iter.bi_size) {
+ if (s->orig_bio->bi_opf & (REQ_PREFLUSH|REQ_FUA))
+ bch_journal_meta(&s->iop.c->journal, &s->cl);
+
/*
- * can't call bch_journal_meta from under
- * generic_make_request
+ * If it's a flush, we send the flush to the backing
+ * device too
*/
- continue_at_nobarrier(&s->cl,
- cached_dev_nodata,
- d->c->wq);
+ closure_bio_submit(&s->bio.bio, &s->cl);
+
+ continue_at(&s->cl, cached_dev_bio_complete, NULL);
} else {
s->bypass = check_should_bypass(dc, bio, rw);
@@ -701,16 +690,6 @@ void bch_cached_dev_request_init(struct cached_dev *dc)
/* Flash backed devices */
-static void flash_dev_nodata(struct closure *cl)
-{
- struct search *s = container_of(cl, struct search, cl);
-
- if (s->orig_bio->bi_opf & (REQ_PREFLUSH|REQ_FUA))
- bch_journal_meta(s->iop.c, cl);
-
- continue_at(cl, search_free, NULL);
-}
-
static void __flash_dev_make_request(struct request_queue *q, struct bio *bio)
{
struct search *s;
@@ -723,13 +702,11 @@ static void __flash_dev_make_request(struct request_queue *q, struct bio *bio)
if (!bio->bi_iter.bi_size) {
s = search_alloc(bio, d);
- /*
- * can't call bch_journal_meta from under
- * generic_make_request
- */
- continue_at_nobarrier(&s->cl,
- flash_dev_nodata,
- d->c->wq);
+
+ if (s->orig_bio->bi_opf & (REQ_PREFLUSH|REQ_FUA))
+ bch_journal_meta(&s->iop.c->journal, &s->cl);
+
+ continue_at(&s->cl, search_free, NULL);
} else if (rw) {
unsigned flags = 0;
diff --git a/drivers/md/bcache/super.c b/drivers/md/bcache/super.c
index 6d488115a6e0..7e5af15d0267 100644
--- a/drivers/md/bcache/super.c
+++ b/drivers/md/bcache/super.c
@@ -654,12 +654,14 @@ static void bch_cache_set_read_only(struct cache_set *c)
struct cached_dev *dc;
struct bcache_device *d;
struct radix_tree_iter iter;
+ struct closure cl;
void **slot;
struct cache *ca;
unsigned i;
lockdep_assert_held(&bch_register_lock);
+ closure_init_stack(&cl);
if (test_and_set_bit(CACHE_SET_RO, &c->flags))
return;
@@ -704,11 +706,10 @@ static void bch_cache_set_read_only(struct cache_set *c)
for_each_cache(ca, c, i)
__bch_cache_read_only(ca);
- if (c->journal.cur) {
- cancel_delayed_work_sync(&c->journal.write_work);
- /* flush last journal entry if needed */
- c->journal.write_work.work.func(&c->journal.write_work.work);
- }
+ bch_journal_flush(&c->journal, &cl);
+ closure_sync(&cl);
+
+ cancel_delayed_work_sync(&c->journal.write_work);
bch_notify_cache_set_read_only(c);
@@ -757,7 +758,7 @@ static void cache_set_free(struct closure *cl)
bch_debug_exit_cache_set(c);
bch_btree_cache_free(c);
- bch_journal_free(c);
+ bch_journal_free(&c->journal);
mutex_lock(&bch_register_lock);
for_each_cache(ca, c, i)
@@ -999,7 +1000,7 @@ static const char *bch_cache_set_alloc(struct cache_sb *sb,
!(c->wq = alloc_workqueue("bcache", WQ_MEM_RECLAIM, 0)) ||
bch_io_clock_init(&c->io_clock[READ]) ||
bch_io_clock_init(&c->io_clock[WRITE]) ||
- bch_journal_alloc(c) ||
+ bch_journal_alloc(&c->journal) ||
bch_btree_cache_alloc(c) ||
bch_bset_sort_state_init(&c->sort, ilog2(c->btree_pages)))
goto err;
@@ -1168,7 +1169,8 @@ static const char *run_cache_set(struct cache_set *c)
bch_journal_set_replay_done(&c->journal);
/* XXX: necessary? */
- bch_journal_meta(c, &cl);
+ bch_journal_meta(&c->journal, &cl);
+ closure_sync(&cl);
}
bch_prio_timer_start(c, READ);
@@ -1750,7 +1752,8 @@ static void bch_cache_remove_work(struct work_struct *work)
c->journal.prio_buckets[ca->sb.nr_this_dev] = 0;
spin_unlock(&c->journal.lock);
- bch_journal_meta(c, &cl);
+ /* write new prio pointers */
+ bch_journal_meta(&c->journal, &cl);
closure_sync(&cl);
__bcache_write_super(c); /* ups sb_write_mutex */
diff --git a/drivers/md/bcache/sysfs.c b/drivers/md/bcache/sysfs.c
index e727e69a2a3d..0f04e79676e9 100644
--- a/drivers/md/bcache/sysfs.c
+++ b/drivers/md/bcache/sysfs.c
@@ -147,7 +147,6 @@ sysfs_pd_controller_attribute(writeback);
read_attribute(stripe_size);
read_attribute(partial_stripes_expensive);
-rw_attribute(synchronous);
rw_attribute(journal_delay_ms);
rw_attribute(discard);
rw_attribute(running);
@@ -583,7 +582,6 @@ SHOW(bch_cache_set)
{
struct cache_set *c = container_of(kobj, struct cache_set, kobj);
- sysfs_print(synchronous, CACHE_SYNC(&c->sb));
sysfs_print(journal_delay_ms, c->journal.delay_ms);
sysfs_hprint(block_size, block_bytes(c));
@@ -705,17 +703,6 @@ STORE(__bch_cache_set)
return size;
}
- if (attr == &sysfs_synchronous) {
- bool sync = strtoul_or_return(buf);
-
- if (sync != CACHE_SYNC(&c->sb)) {
- SET_CACHE_SYNC(&c->sb, sync);
- bcache_write_super(c);
- }
-
- return size;
- }
-
if (attr == &sysfs_clear_stats) {
atomic_long_set(&c->writeback_keys_done, 0);
atomic_long_set(&c->writeback_keys_failed, 0);
@@ -824,7 +811,7 @@ STORE(__bch_cache_set)
struct closure cl;
closure_init_stack(&cl);
- bch_journal_meta(c, &cl);
+ bch_journal_meta(&c->journal, &cl);
closure_sync(&cl);
return size;
@@ -920,7 +907,6 @@ static void bch_cache_set_internal_release(struct kobject *k)
static struct attribute *bch_cache_set_files[] = {
&sysfs_unregister,
&sysfs_stop,
- &sysfs_synchronous,
&sysfs_journal_delay_ms,
&sysfs_flash_vol_create,
&sysfs_add_device,