summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorKent Overstreet <kent.overstreet@gmail.com>2015-05-04 18:55:58 -0700
committerKent Overstreet <kent.overstreet@gmail.com>2016-10-07 12:34:07 -0800
commit35e590ea1d212f92f42611a7f4190ab4e1e298ae (patch)
tree8d91995929232e1b707c5021f3db240f2bbe500f
parent63eff9c5bfd977bc9f039855557b7b436c6e014d (diff)
bcache: Journal locking shenanigans
split out pin_lock from the journal lock, so that only pin_lock requires interrupts off - this is because we have to take the btree root lock with the journal lock held in journal_write_locked(), and from thence irqs-off was going to propagate way too far
-rw-r--r--drivers/md/bcache/alloc.c8
-rw-r--r--drivers/md/bcache/btree.c4
-rw-r--r--drivers/md/bcache/journal.c85
-rw-r--r--drivers/md/bcache/journal.h37
-rw-r--r--drivers/md/bcache/journal_types.h6
5 files changed, 65 insertions, 75 deletions
diff --git a/drivers/md/bcache/alloc.c b/drivers/md/bcache/alloc.c
index b153f3877994..5aa6f7b6200d 100644
--- a/drivers/md/bcache/alloc.c
+++ b/drivers/md/bcache/alloc.c
@@ -292,12 +292,12 @@ static void bch_prio_write(struct cache *ca)
ret, r);
}
- spin_lock_irq(&c->journal.lock);
+ spin_lock(&c->journal.lock);
c->journal.prio_buckets[ca->sb.nr_this_dev] = ca->prio_buckets[0];
c->journal.nr_prio_buckets = max_t(unsigned,
ca->sb.nr_this_dev + 1,
c->journal.nr_prio_buckets);
- spin_unlock_irq(&c->journal.lock);
+ spin_unlock(&c->journal.lock);
bch_journal_meta(&c->journal, &cl);
closure_sync(&cl);
@@ -346,9 +346,9 @@ int bch_prio_read(struct cache *ca)
return -EIO;
}
- spin_lock_irq(&c->journal.lock);
+ spin_lock(&c->journal.lock);
c->journal.prio_buckets[ca->sb.nr_this_dev] = bucket;
- spin_unlock_irq(&c->journal.lock);
+ spin_unlock(&c->journal.lock);
for (b = 0; b < ca->mi.nbuckets; b++, d++) {
if (d == end) {
diff --git a/drivers/md/bcache/btree.c b/drivers/md/bcache/btree.c
index 46bf8aa2efd2..918b73e71675 100644
--- a/drivers/md/bcache/btree.c
+++ b/drivers/md/bcache/btree.c
@@ -1628,7 +1628,7 @@ static void bch_btree_set_root(struct cache_set *c, struct btree *b)
list_del_init(&b->list);
mutex_unlock(&c->btree_cache_lock);
- spin_lock_irq(&c->btree_root_lock);
+ spin_lock(&c->btree_root_lock);
btree_node_root(b) = b;
if (b->btree_id != c->gc_cur_btree
@@ -1642,7 +1642,7 @@ static void bch_btree_set_root(struct cache_set *c, struct btree *b)
bch_mark_metadata_bucket(ca, PTR_BUCKET(ca, ptr), false);
rcu_read_unlock();
}
- spin_unlock_irq(&c->btree_root_lock);
+ spin_unlock(&c->btree_root_lock);
bch_recalc_btree_reserve(c);
diff --git a/drivers/md/bcache/journal.c b/drivers/md/bcache/journal.c
index fb30298a19db..88ebd28d27f6 100644
--- a/drivers/md/bcache/journal.c
+++ b/drivers/md/bcache/journal.c
@@ -657,8 +657,8 @@ static int journal_seq_blacklist_read(struct cache_set *c,
return -ENOMEM;
}
- __journal_pin_add(p, &bl->pin,
- journal_seq_blacklist_flush);
+ journal_pin_add(&c->journal, p, &bl->pin,
+ journal_seq_blacklist_flush);
bl->written = true;
break;
}
@@ -847,7 +847,7 @@ void bch_journal_start(struct cache_set *c)
list_for_each_entry(bl, &j->seq_blacklist, list)
new_seq = max(new_seq, bl->seq);
- spin_lock_irq(&j->lock);
+ spin_lock(&j->lock);
while (j->seq < new_seq) {
struct journal_entry_pin_list pin_list, *p;
@@ -874,8 +874,8 @@ void bch_journal_start(struct cache_set *c)
JKEYS_JOURNAL_SEQ_BLACKLISTED,
0, 0);
- __journal_pin_add(&fifo_back(&j->pin), &bl->pin,
- journal_seq_blacklist_flush);
+ journal_pin_add(j, &fifo_back(&j->pin), &bl->pin,
+ journal_seq_blacklist_flush);
bl->written = true;
}
@@ -884,7 +884,7 @@ void bch_journal_start(struct cache_set *c)
* reservations
*/
journal_entry_open(j);
- spin_unlock_irq(&j->lock);
+ spin_unlock(&j->lock);
queue_work(system_long_wq, &j->reclaim_work);
}
@@ -1094,9 +1094,9 @@ static void journal_reclaim_work(struct work_struct *work)
unsigned nr = bch_nr_journal_buckets(&ca->sb),
cur_idx, bucket_to_flush;
- spin_lock_irq(&j->lock);
+ spin_lock(&j->lock);
cur_idx = ja->cur_idx;
- spin_unlock_irq(&j->lock);
+ spin_unlock(&j->lock);
/* We're the only thread that modifies last_idx: */
@@ -1110,9 +1110,9 @@ static void journal_reclaim_work(struct work_struct *work)
ja->last_idx)),
ca->mi.bucket_size, GFP_NOIO, 0);
- spin_lock_irq(&j->lock);
+ spin_lock(&j->lock);
ja->last_idx = (ja->last_idx + 1) % nr;
- spin_unlock_irq(&j->lock);
+ spin_unlock(&j->lock);
wake_up(&j->wait);
}
@@ -1121,20 +1121,23 @@ static void journal_reclaim_work(struct work_struct *work)
* Write out enough btree nodes to free up 50% journal
* buckets
*/
- spin_lock_irq(&j->lock);
+ spin_lock(&j->lock);
bucket_to_flush = (cur_idx + (nr >> 1)) % nr;
seq_to_flush = max_t(u64, seq_to_flush,
ja->bucket_seq[bucket_to_flush]);
- spin_unlock_irq(&j->lock);
+ spin_unlock(&j->lock);
}
- spin_lock_irq(&j->lock);
+ spin_lock(&j->lock);
/* Also flush if the pin fifo is more than half full */
seq_to_flush = max_t(s64, seq_to_flush,
(s64) j->seq - (j->pin.size >> 1));
journal_reclaim_fast(j);
+ spin_unlock(&j->lock);
+
+ spin_lock_irq(&j->pin_lock);
restart_flush:
/* Now do the actual flushing */
@@ -1147,16 +1150,16 @@ restart_flush:
struct journal_entry_pin,
list);
list_del_init(&pin->list);
- spin_unlock_irq(&j->lock);
+ spin_unlock_irq(&j->pin_lock);
pin->flush(pin);
- spin_lock_irq(&j->lock);
+ spin_lock_irq(&j->pin_lock);
goto restart_flush;
}
}
- spin_unlock_irq(&j->lock);
+ spin_unlock_irq(&j->pin_lock);
}
/**
@@ -1328,22 +1331,17 @@ static void journal_write_done(struct closure *cl)
{
struct journal *j = container_of(cl, struct journal, io);
struct journal_write *w = journal_prev_write(j);
- unsigned long flags;
-
- spin_lock_irqsave(&j->lock, flags);
j->last_seq_ondisk = w->data->last_seq;
- __closure_wake_up(&w->wait);
-
clear_bit(JOURNAL_IO_IN_FLIGHT, &j->flags);
+
+ closure_wake_up(&w->wait);
wake_up(&j->wait);
if (test_bit(JOURNAL_NEED_WRITE, &j->flags))
mod_delayed_work(system_wq, &j->write_work, 0);
- spin_unlock_irqrestore(&j->lock, flags);
-
/*
* Updating last_seq_ondisk may let journal_reclaim_work() discard more
* buckets:
@@ -1462,7 +1460,7 @@ static void journal_write_locked(struct closure *cl)
bch_journal_next_entry(j);
wake_up(&j->wait);
- spin_unlock_irq(&j->lock);
+ spin_unlock(&j->lock);
bch_check_mark_super(c, &tmp.k, true);
@@ -1490,7 +1488,7 @@ static bool __journal_write(struct journal *j)
closure_call(&j->io, journal_write_locked, NULL, &c->cl);
return true;
nowrite:
- spin_unlock_irq(&j->lock);
+ spin_unlock(&j->lock);
return false;
}
@@ -1505,14 +1503,14 @@ static void journal_unlock(struct journal *j)
if (test_bit(JOURNAL_NEED_WRITE, &j->flags))
__journal_write(j);
else
- spin_unlock_irq(&j->lock);
+ spin_unlock(&j->lock);
}
static void journal_write_work(struct work_struct *work)
{
struct journal *j = container_of(to_delayed_work(work),
struct journal, write_work);
- spin_lock_irq(&j->lock);
+ spin_lock(&j->lock);
if (test_bit(JOURNAL_DIRTY, &j->flags))
set_bit(JOURNAL_NEED_WRITE, &j->flags);
journal_unlock(j);
@@ -1572,7 +1570,7 @@ void bch_journal_res_put(struct journal *j, struct journal_res *res)
if (!s.count) {
if (do_write) {
- spin_lock_irq(&j->lock);
+ spin_lock(&j->lock);
journal_unlock(j);
}
@@ -1626,7 +1624,7 @@ static bool __journal_res_get(struct journal *j, struct journal_res *res,
if (journal_res_get_fast(j, res, u64s_min, u64s_max))
return true;
- spin_lock_irq(&j->lock);
+ spin_lock(&j->lock);
/*
* Recheck after taking the lock, so we don't race with another
@@ -1634,7 +1632,7 @@ static bool __journal_res_get(struct journal *j, struct journal_res *res,
* journal_entry_close() unnecessarily
*/
if (journal_res_get_fast(j, res, u64s_min, u64s_max)) {
- spin_unlock_irq(&j->lock);
+ spin_unlock(&j->lock);
return true;
}
@@ -1643,7 +1641,7 @@ static bool __journal_res_get(struct journal *j, struct journal_res *res,
*start_time = local_clock();
if (!journal_entry_close(j)) {
- spin_unlock_irq(&j->lock);
+ spin_unlock(&j->lock);
return false;
}
@@ -1664,12 +1662,12 @@ static bool __journal_res_get(struct journal *j, struct journal_res *res,
if (!journal_bucket_has_room(j)) {
/* Still no room, we have to wait */
- spin_unlock_irq(&j->lock);
+ spin_unlock(&j->lock);
trace_bcache_journal_full(c);
return false;
}
- spin_unlock_irq(&j->lock);
+ spin_unlock(&j->lock);
}
}
}
@@ -1703,7 +1701,7 @@ void bch_journal_res_get(struct journal *j, struct journal_res *res,
void bch_journal_push_seq(struct journal *j, u64 seq, struct closure *parent)
{
- spin_lock_irq(&j->lock);
+ spin_lock(&j->lock);
BUG_ON(seq > j->seq);
@@ -1742,16 +1740,16 @@ void bch_journal_flush(struct journal *j, struct closure *parent)
{
u64 seq;
- spin_lock_irq(&j->lock);
+ spin_lock(&j->lock);
if (test_bit(JOURNAL_DIRTY, &j->flags)) {
seq = j->seq;
} else if (j->seq) {
seq = j->seq - 1;
} else {
- spin_unlock_irq(&j->lock);
+ spin_unlock(&j->lock);
return;
}
- spin_unlock_irq(&j->lock);
+ spin_unlock(&j->lock);
bch_journal_push_seq(j, seq, parent);
}
@@ -1766,6 +1764,7 @@ void bch_journal_free(struct journal *j)
int bch_journal_alloc(struct journal *j)
{
spin_lock_init(&j->lock);
+ spin_lock_init(&j->pin_lock);
init_waitqueue_head(&j->wait);
INIT_DELAYED_WORK(&j->write_work, journal_write_work);
INIT_WORK(&j->reclaim_work, journal_reclaim_work);
@@ -1799,7 +1798,7 @@ ssize_t bch_journal_print_debug(struct journal *j, char *buf)
ssize_t ret = 0;
rcu_read_lock();
- spin_lock_irq(&j->lock);
+ spin_lock(&j->lock);
ret += scnprintf(buf + ret, PAGE_SIZE - ret,
"active journal entries:\t%zu\n"
@@ -1840,7 +1839,7 @@ ssize_t bch_journal_print_debug(struct journal *j, char *buf)
ja->last_idx, ja->bucket_seq[ja->last_idx]);
}
- spin_unlock_irq(&j->lock);
+ spin_unlock(&j->lock);
rcu_read_unlock();
return ret;
@@ -1851,10 +1850,10 @@ static bool bch_journal_writing_to_device(struct cache *ca)
struct journal *j = &ca->set->journal;
bool ret;
- spin_lock_irq(&j->lock);
+ spin_lock(&j->lock);
ret = bch_extent_has_device(bkey_i_to_s_c_extent(&j->key),
ca->sb.nr_this_dev);
- spin_unlock_irq(&j->lock);
+ spin_unlock(&j->lock);
return ret;
}
@@ -1915,9 +1914,9 @@ int bch_journal_move(struct cache *ca)
* Verify that we no longer need any of the journal entries in
* the device
*/
- spin_lock_irq(&j->lock);
+ spin_lock(&j->lock);
last_flushed_seq = last_seq(j);
- spin_unlock_irq(&j->lock);
+ spin_unlock(&j->lock);
nr_buckets = bch_nr_journal_buckets(&ca->sb);
diff --git a/drivers/md/bcache/journal.h b/drivers/md/bcache/journal.h
index c5669121e600..6b7f6424152f 100644
--- a/drivers/md/bcache/journal.h
+++ b/drivers/md/bcache/journal.h
@@ -126,10 +126,13 @@ struct journal_replay {
#define JOURNAL_PIN ((32 * 1024) - 1)
-static inline void __journal_pin_add(struct journal_entry_pin_list *pin_list,
- struct journal_entry_pin *pin,
- journal_pin_flush_fn flush_fn)
+static inline void journal_pin_add(struct journal *j,
+ struct journal_entry_pin_list *pin_list,
+ struct journal_entry_pin *pin,
+ journal_pin_flush_fn flush_fn)
{
+ spin_lock_irq(&j->pin_lock);
+
atomic_inc(&pin_list->count);
pin->pin_list = pin_list;
pin->flush = flush_fn;
@@ -138,26 +141,8 @@ static inline void __journal_pin_add(struct journal_entry_pin_list *pin_list,
list_add(&pin->list, &pin_list->list);
else
INIT_LIST_HEAD(&pin->list);
-}
-static inline void journal_pin_add(struct journal *j,
- struct journal_entry_pin_list *pin_list,
- struct journal_entry_pin *pin,
- journal_pin_flush_fn flush_fn)
-{
- spin_lock_irq(&j->lock);
- __journal_pin_add(pin_list, pin, flush_fn);
- spin_unlock_irq(&j->lock);
-}
-
-static inline void __journal_pin_drop(struct journal *j,
- struct journal_entry_pin *pin)
-{
- if (!list_empty_careful(&pin->list))
- list_del_init(&pin->list);
-
- if (atomic_dec_and_test(&pin->pin_list->count))
- wake_up(&j->wait);
+ spin_unlock_irq(&j->pin_lock);
}
static inline void journal_pin_drop(struct journal *j,
@@ -167,9 +152,9 @@ static inline void journal_pin_drop(struct journal *j,
/* journal_reclaim_work() might have already taken us off the list */
if (!list_empty_careful(&pin->list)) {
- spin_lock_irqsave(&j->lock, flags);
+ spin_lock_irqsave(&j->pin_lock, flags);
list_del_init(&pin->list);
- spin_unlock_irqrestore(&j->lock, flags);
+ spin_unlock_irqrestore(&j->pin_lock, flags);
}
if (atomic_dec_and_test(&pin->pin_list->count)) {
@@ -242,10 +227,10 @@ int bch_journal_replay(struct cache_set *, struct list_head *);
static inline void bch_journal_set_replay_done(struct journal *j)
{
- spin_lock_irq(&j->lock);
+ spin_lock(&j->lock);
set_bit(JOURNAL_REPLAY_DONE, &j->flags);
j->cur_pin_list = &fifo_back(&j->pin);
- spin_unlock_irq(&j->lock);
+ spin_unlock(&j->lock);
}
void bch_journal_free(struct journal *);
diff --git a/drivers/md/bcache/journal_types.h b/drivers/md/bcache/journal_types.h
index 1b55a445affa..4a67da8737a4 100644
--- a/drivers/md/bcache/journal_types.h
+++ b/drivers/md/bcache/journal_types.h
@@ -141,6 +141,12 @@ struct journal {
DECLARE_FIFO(struct journal_entry_pin_list, pin);
struct journal_entry_pin_list *cur_pin_list;
+ /*
+ * Protects the pin lists - the fifo itself is still protected by
+ * j->lock though:
+ */
+ spinlock_t pin_lock;
+
struct mutex blacklist_lock;
struct list_head seq_blacklist;