diff options
author | Kent Overstreet <kent.overstreet@gmail.com> | 2015-05-04 18:55:58 -0700 |
---|---|---|
committer | Kent Overstreet <kent.overstreet@gmail.com> | 2016-10-07 12:34:07 -0800 |
commit | 35e590ea1d212f92f42611a7f4190ab4e1e298ae (patch) | |
tree | 8d91995929232e1b707c5021f3db240f2bbe500f | |
parent | 63eff9c5bfd977bc9f039855557b7b436c6e014d (diff) |
bcache: Journal locking shenanigans
split out pin_lock from the journal lock, so that only pin_lock requires
interrupts off - this is because we have to take the btree root lock with the
journal lock held in journal_write_locked(), and from thence irqs-off was going
to propagate way too far
-rw-r--r-- | drivers/md/bcache/alloc.c | 8 | ||||
-rw-r--r-- | drivers/md/bcache/btree.c | 4 | ||||
-rw-r--r-- | drivers/md/bcache/journal.c | 85 | ||||
-rw-r--r-- | drivers/md/bcache/journal.h | 37 | ||||
-rw-r--r-- | drivers/md/bcache/journal_types.h | 6 |
5 files changed, 65 insertions, 75 deletions
diff --git a/drivers/md/bcache/alloc.c b/drivers/md/bcache/alloc.c index b153f3877994..5aa6f7b6200d 100644 --- a/drivers/md/bcache/alloc.c +++ b/drivers/md/bcache/alloc.c @@ -292,12 +292,12 @@ static void bch_prio_write(struct cache *ca) ret, r); } - spin_lock_irq(&c->journal.lock); + spin_lock(&c->journal.lock); c->journal.prio_buckets[ca->sb.nr_this_dev] = ca->prio_buckets[0]; c->journal.nr_prio_buckets = max_t(unsigned, ca->sb.nr_this_dev + 1, c->journal.nr_prio_buckets); - spin_unlock_irq(&c->journal.lock); + spin_unlock(&c->journal.lock); bch_journal_meta(&c->journal, &cl); closure_sync(&cl); @@ -346,9 +346,9 @@ int bch_prio_read(struct cache *ca) return -EIO; } - spin_lock_irq(&c->journal.lock); + spin_lock(&c->journal.lock); c->journal.prio_buckets[ca->sb.nr_this_dev] = bucket; - spin_unlock_irq(&c->journal.lock); + spin_unlock(&c->journal.lock); for (b = 0; b < ca->mi.nbuckets; b++, d++) { if (d == end) { diff --git a/drivers/md/bcache/btree.c b/drivers/md/bcache/btree.c index 46bf8aa2efd2..918b73e71675 100644 --- a/drivers/md/bcache/btree.c +++ b/drivers/md/bcache/btree.c @@ -1628,7 +1628,7 @@ static void bch_btree_set_root(struct cache_set *c, struct btree *b) list_del_init(&b->list); mutex_unlock(&c->btree_cache_lock); - spin_lock_irq(&c->btree_root_lock); + spin_lock(&c->btree_root_lock); btree_node_root(b) = b; if (b->btree_id != c->gc_cur_btree @@ -1642,7 +1642,7 @@ static void bch_btree_set_root(struct cache_set *c, struct btree *b) bch_mark_metadata_bucket(ca, PTR_BUCKET(ca, ptr), false); rcu_read_unlock(); } - spin_unlock_irq(&c->btree_root_lock); + spin_unlock(&c->btree_root_lock); bch_recalc_btree_reserve(c); diff --git a/drivers/md/bcache/journal.c b/drivers/md/bcache/journal.c index fb30298a19db..88ebd28d27f6 100644 --- a/drivers/md/bcache/journal.c +++ b/drivers/md/bcache/journal.c @@ -657,8 +657,8 @@ static int journal_seq_blacklist_read(struct cache_set *c, return -ENOMEM; } - __journal_pin_add(p, &bl->pin, - journal_seq_blacklist_flush); + journal_pin_add(&c->journal, p, &bl->pin, + journal_seq_blacklist_flush); bl->written = true; break; } @@ -847,7 +847,7 @@ void bch_journal_start(struct cache_set *c) list_for_each_entry(bl, &j->seq_blacklist, list) new_seq = max(new_seq, bl->seq); - spin_lock_irq(&j->lock); + spin_lock(&j->lock); while (j->seq < new_seq) { struct journal_entry_pin_list pin_list, *p; @@ -874,8 +874,8 @@ void bch_journal_start(struct cache_set *c) JKEYS_JOURNAL_SEQ_BLACKLISTED, 0, 0); - __journal_pin_add(&fifo_back(&j->pin), &bl->pin, - journal_seq_blacklist_flush); + journal_pin_add(j, &fifo_back(&j->pin), &bl->pin, + journal_seq_blacklist_flush); bl->written = true; } @@ -884,7 +884,7 @@ void bch_journal_start(struct cache_set *c) * reservations */ journal_entry_open(j); - spin_unlock_irq(&j->lock); + spin_unlock(&j->lock); queue_work(system_long_wq, &j->reclaim_work); } @@ -1094,9 +1094,9 @@ static void journal_reclaim_work(struct work_struct *work) unsigned nr = bch_nr_journal_buckets(&ca->sb), cur_idx, bucket_to_flush; - spin_lock_irq(&j->lock); + spin_lock(&j->lock); cur_idx = ja->cur_idx; - spin_unlock_irq(&j->lock); + spin_unlock(&j->lock); /* We're the only thread that modifies last_idx: */ @@ -1110,9 +1110,9 @@ static void journal_reclaim_work(struct work_struct *work) ja->last_idx)), ca->mi.bucket_size, GFP_NOIO, 0); - spin_lock_irq(&j->lock); + spin_lock(&j->lock); ja->last_idx = (ja->last_idx + 1) % nr; - spin_unlock_irq(&j->lock); + spin_unlock(&j->lock); wake_up(&j->wait); } @@ -1121,20 +1121,23 @@ static void journal_reclaim_work(struct work_struct *work) * Write out enough btree nodes to free up 50% journal * buckets */ - spin_lock_irq(&j->lock); + spin_lock(&j->lock); bucket_to_flush = (cur_idx + (nr >> 1)) % nr; seq_to_flush = max_t(u64, seq_to_flush, ja->bucket_seq[bucket_to_flush]); - spin_unlock_irq(&j->lock); + spin_unlock(&j->lock); } - spin_lock_irq(&j->lock); + spin_lock(&j->lock); /* Also flush if the pin fifo is more than half full */ seq_to_flush = max_t(s64, seq_to_flush, (s64) j->seq - (j->pin.size >> 1)); journal_reclaim_fast(j); + spin_unlock(&j->lock); + + spin_lock_irq(&j->pin_lock); restart_flush: /* Now do the actual flushing */ @@ -1147,16 +1150,16 @@ restart_flush: struct journal_entry_pin, list); list_del_init(&pin->list); - spin_unlock_irq(&j->lock); + spin_unlock_irq(&j->pin_lock); pin->flush(pin); - spin_lock_irq(&j->lock); + spin_lock_irq(&j->pin_lock); goto restart_flush; } } - spin_unlock_irq(&j->lock); + spin_unlock_irq(&j->pin_lock); } /** @@ -1328,22 +1331,17 @@ static void journal_write_done(struct closure *cl) { struct journal *j = container_of(cl, struct journal, io); struct journal_write *w = journal_prev_write(j); - unsigned long flags; - - spin_lock_irqsave(&j->lock, flags); j->last_seq_ondisk = w->data->last_seq; - __closure_wake_up(&w->wait); - clear_bit(JOURNAL_IO_IN_FLIGHT, &j->flags); + + closure_wake_up(&w->wait); wake_up(&j->wait); if (test_bit(JOURNAL_NEED_WRITE, &j->flags)) mod_delayed_work(system_wq, &j->write_work, 0); - spin_unlock_irqrestore(&j->lock, flags); - /* * Updating last_seq_ondisk may let journal_reclaim_work() discard more * buckets: @@ -1462,7 +1460,7 @@ static void journal_write_locked(struct closure *cl) bch_journal_next_entry(j); wake_up(&j->wait); - spin_unlock_irq(&j->lock); + spin_unlock(&j->lock); bch_check_mark_super(c, &tmp.k, true); @@ -1490,7 +1488,7 @@ static bool __journal_write(struct journal *j) closure_call(&j->io, journal_write_locked, NULL, &c->cl); return true; nowrite: - spin_unlock_irq(&j->lock); + spin_unlock(&j->lock); return false; } @@ -1505,14 +1503,14 @@ static void journal_unlock(struct journal *j) if (test_bit(JOURNAL_NEED_WRITE, &j->flags)) __journal_write(j); else - spin_unlock_irq(&j->lock); + spin_unlock(&j->lock); } static void journal_write_work(struct work_struct *work) { struct journal *j = container_of(to_delayed_work(work), struct journal, write_work); - spin_lock_irq(&j->lock); + spin_lock(&j->lock); if (test_bit(JOURNAL_DIRTY, &j->flags)) set_bit(JOURNAL_NEED_WRITE, &j->flags); journal_unlock(j); @@ -1572,7 +1570,7 @@ void bch_journal_res_put(struct journal *j, struct journal_res *res) if (!s.count) { if (do_write) { - spin_lock_irq(&j->lock); + spin_lock(&j->lock); journal_unlock(j); } @@ -1626,7 +1624,7 @@ static bool __journal_res_get(struct journal *j, struct journal_res *res, if (journal_res_get_fast(j, res, u64s_min, u64s_max)) return true; - spin_lock_irq(&j->lock); + spin_lock(&j->lock); /* * Recheck after taking the lock, so we don't race with another @@ -1634,7 +1632,7 @@ static bool __journal_res_get(struct journal *j, struct journal_res *res, * journal_entry_close() unnecessarily */ if (journal_res_get_fast(j, res, u64s_min, u64s_max)) { - spin_unlock_irq(&j->lock); + spin_unlock(&j->lock); return true; } @@ -1643,7 +1641,7 @@ static bool __journal_res_get(struct journal *j, struct journal_res *res, *start_time = local_clock(); if (!journal_entry_close(j)) { - spin_unlock_irq(&j->lock); + spin_unlock(&j->lock); return false; } @@ -1664,12 +1662,12 @@ static bool __journal_res_get(struct journal *j, struct journal_res *res, if (!journal_bucket_has_room(j)) { /* Still no room, we have to wait */ - spin_unlock_irq(&j->lock); + spin_unlock(&j->lock); trace_bcache_journal_full(c); return false; } - spin_unlock_irq(&j->lock); + spin_unlock(&j->lock); } } } @@ -1703,7 +1701,7 @@ void bch_journal_res_get(struct journal *j, struct journal_res *res, void bch_journal_push_seq(struct journal *j, u64 seq, struct closure *parent) { - spin_lock_irq(&j->lock); + spin_lock(&j->lock); BUG_ON(seq > j->seq); @@ -1742,16 +1740,16 @@ void bch_journal_flush(struct journal *j, struct closure *parent) { u64 seq; - spin_lock_irq(&j->lock); + spin_lock(&j->lock); if (test_bit(JOURNAL_DIRTY, &j->flags)) { seq = j->seq; } else if (j->seq) { seq = j->seq - 1; } else { - spin_unlock_irq(&j->lock); + spin_unlock(&j->lock); return; } - spin_unlock_irq(&j->lock); + spin_unlock(&j->lock); bch_journal_push_seq(j, seq, parent); } @@ -1766,6 +1764,7 @@ void bch_journal_free(struct journal *j) int bch_journal_alloc(struct journal *j) { spin_lock_init(&j->lock); + spin_lock_init(&j->pin_lock); init_waitqueue_head(&j->wait); INIT_DELAYED_WORK(&j->write_work, journal_write_work); INIT_WORK(&j->reclaim_work, journal_reclaim_work); @@ -1799,7 +1798,7 @@ ssize_t bch_journal_print_debug(struct journal *j, char *buf) ssize_t ret = 0; rcu_read_lock(); - spin_lock_irq(&j->lock); + spin_lock(&j->lock); ret += scnprintf(buf + ret, PAGE_SIZE - ret, "active journal entries:\t%zu\n" @@ -1840,7 +1839,7 @@ ssize_t bch_journal_print_debug(struct journal *j, char *buf) ja->last_idx, ja->bucket_seq[ja->last_idx]); } - spin_unlock_irq(&j->lock); + spin_unlock(&j->lock); rcu_read_unlock(); return ret; @@ -1851,10 +1850,10 @@ static bool bch_journal_writing_to_device(struct cache *ca) struct journal *j = &ca->set->journal; bool ret; - spin_lock_irq(&j->lock); + spin_lock(&j->lock); ret = bch_extent_has_device(bkey_i_to_s_c_extent(&j->key), ca->sb.nr_this_dev); - spin_unlock_irq(&j->lock); + spin_unlock(&j->lock); return ret; } @@ -1915,9 +1914,9 @@ int bch_journal_move(struct cache *ca) * Verify that we no longer need any of the journal entries in * the device */ - spin_lock_irq(&j->lock); + spin_lock(&j->lock); last_flushed_seq = last_seq(j); - spin_unlock_irq(&j->lock); + spin_unlock(&j->lock); nr_buckets = bch_nr_journal_buckets(&ca->sb); diff --git a/drivers/md/bcache/journal.h b/drivers/md/bcache/journal.h index c5669121e600..6b7f6424152f 100644 --- a/drivers/md/bcache/journal.h +++ b/drivers/md/bcache/journal.h @@ -126,10 +126,13 @@ struct journal_replay { #define JOURNAL_PIN ((32 * 1024) - 1) -static inline void __journal_pin_add(struct journal_entry_pin_list *pin_list, - struct journal_entry_pin *pin, - journal_pin_flush_fn flush_fn) +static inline void journal_pin_add(struct journal *j, + struct journal_entry_pin_list *pin_list, + struct journal_entry_pin *pin, + journal_pin_flush_fn flush_fn) { + spin_lock_irq(&j->pin_lock); + atomic_inc(&pin_list->count); pin->pin_list = pin_list; pin->flush = flush_fn; @@ -138,26 +141,8 @@ static inline void __journal_pin_add(struct journal_entry_pin_list *pin_list, list_add(&pin->list, &pin_list->list); else INIT_LIST_HEAD(&pin->list); -} -static inline void journal_pin_add(struct journal *j, - struct journal_entry_pin_list *pin_list, - struct journal_entry_pin *pin, - journal_pin_flush_fn flush_fn) -{ - spin_lock_irq(&j->lock); - __journal_pin_add(pin_list, pin, flush_fn); - spin_unlock_irq(&j->lock); -} - -static inline void __journal_pin_drop(struct journal *j, - struct journal_entry_pin *pin) -{ - if (!list_empty_careful(&pin->list)) - list_del_init(&pin->list); - - if (atomic_dec_and_test(&pin->pin_list->count)) - wake_up(&j->wait); + spin_unlock_irq(&j->pin_lock); } static inline void journal_pin_drop(struct journal *j, @@ -167,9 +152,9 @@ static inline void journal_pin_drop(struct journal *j, /* journal_reclaim_work() might have already taken us off the list */ if (!list_empty_careful(&pin->list)) { - spin_lock_irqsave(&j->lock, flags); + spin_lock_irqsave(&j->pin_lock, flags); list_del_init(&pin->list); - spin_unlock_irqrestore(&j->lock, flags); + spin_unlock_irqrestore(&j->pin_lock, flags); } if (atomic_dec_and_test(&pin->pin_list->count)) { @@ -242,10 +227,10 @@ int bch_journal_replay(struct cache_set *, struct list_head *); static inline void bch_journal_set_replay_done(struct journal *j) { - spin_lock_irq(&j->lock); + spin_lock(&j->lock); set_bit(JOURNAL_REPLAY_DONE, &j->flags); j->cur_pin_list = &fifo_back(&j->pin); - spin_unlock_irq(&j->lock); + spin_unlock(&j->lock); } void bch_journal_free(struct journal *); diff --git a/drivers/md/bcache/journal_types.h b/drivers/md/bcache/journal_types.h index 1b55a445affa..4a67da8737a4 100644 --- a/drivers/md/bcache/journal_types.h +++ b/drivers/md/bcache/journal_types.h @@ -141,6 +141,12 @@ struct journal { DECLARE_FIFO(struct journal_entry_pin_list, pin); struct journal_entry_pin_list *cur_pin_list; + /* + * Protects the pin lists - the fifo itself is still protected by + * j->lock though: + */ + spinlock_t pin_lock; + struct mutex blacklist_lock; struct list_head seq_blacklist; |