diff options
author | Kent Overstreet <kent.overstreet@gmail.com> | 2016-04-09 01:29:52 -0800 |
---|---|---|
committer | Kent Overstreet <kent.overstreet@gmail.com> | 2016-10-07 12:36:08 -0800 |
commit | 9fb193f1c8fed094a7f8b0480b6f9d06074aba4d (patch) | |
tree | d8ac15eab3f7f0c28750a12b9cc73e4928d4048b | |
parent | e2fe801f60f5c5b601a60a39218416b290e467c7 (diff) |
bcache: fix journal_reclaim_work() locking
it assumed it was only ever called by one thread, but that changed when freezig
was fixed
-rw-r--r-- | drivers/md/bcache/journal.c | 112 | ||||
-rw-r--r-- | drivers/md/bcache/journal_types.h | 7 |
2 files changed, 74 insertions, 45 deletions
diff --git a/drivers/md/bcache/journal.c b/drivers/md/bcache/journal.c index 5b9da4828542..237daedab3b4 100644 --- a/drivers/md/bcache/journal.c +++ b/drivers/md/bcache/journal.c @@ -1126,6 +1126,50 @@ static void journal_reclaim_fast(struct journal *j) } } +static struct journal_entry_pin * +journal_get_next_pin(struct journal *j, u64 seq_to_flush) +{ + struct journal_entry_pin_list *pin_list; + struct journal_entry_pin *ret = NULL; + unsigned iter; + + /* so we don't iterate over empty fifo entries below: */ + if (!atomic_read(&fifo_front(&j->pin).count)) { + spin_lock(&j->lock); + journal_reclaim_fast(j); + spin_unlock(&j->lock); + } + + spin_lock_irq(&j->pin_lock); + fifo_for_each_entry_ptr(pin_list, &j->pin, iter) { + if (journal_pin_seq(j, pin_list) > seq_to_flush) + break; + + ret = list_first_entry_or_null(&pin_list->list, + struct journal_entry_pin, list); + if (ret) { + /* must be list_del_init(), see journal_pin_drop() */ + list_del_init(&ret->list); + break; + } + } + spin_unlock_irq(&j->pin_lock); + + return ret; +} + +static bool should_discard_bucket(struct journal *j, struct journal_device *ja) +{ + bool ret; + + spin_lock(&j->lock); + ret = (ja->last_idx != ja->cur_idx && + ja->bucket_seq[ja->last_idx] < j->last_seq_ondisk); + spin_unlock(&j->lock); + + return ret; +} + /** * journal_reclaim_work - free up journal buckets * @@ -1150,10 +1194,10 @@ static void journal_reclaim_work(struct work_struct *work) journal.reclaim_work); struct journal *j = &c->journal; struct cache *ca; - struct journal_entry_pin_list *pin_list; struct journal_entry_pin *pin; - u64 seq_to_flush = 0, last_seq = j->last_seq_ondisk; - unsigned iter; + u64 seq_to_flush = 0; + unsigned iter, nr, bucket_to_flush; + bool reclaim_lock_held = false; /* * Advance last_idx to point to the oldest journal entry containing @@ -1161,19 +1205,9 @@ static void journal_reclaim_work(struct work_struct *work) */ group_for_each_cache(ca, &c->cache_tiers[0], iter) { struct journal_device *ja = &ca->journal; - unsigned nr = bch_nr_journal_buckets(ca->disk_sb.sb), - cur_idx, bucket_to_flush; - - spin_lock(&j->lock); - cur_idx = ja->cur_idx; - spin_unlock(&j->lock); - /* We're the only thread that modifies last_idx: */ - - while (ja->last_idx != cur_idx && - ja->bucket_seq[ja->last_idx] < last_seq) { - if (ca->mi.discard && - blk_queue_discard(bdev_get_queue(ca->disk_sb.bdev))) { + while (should_discard_bucket(j, ja)) { + if (!reclaim_lock_held) { /* * ugh: * might be called from __journal_res_get() @@ -1183,15 +1217,23 @@ static void journal_reclaim_work(struct work_struct *work) */ __set_current_state(TASK_RUNNING); + mutex_lock(&j->reclaim_lock); + reclaim_lock_held = true; + /* recheck under reclaim_lock: */ + continue; + } + + if (ca->mi.discard && + blk_queue_discard(bdev_get_queue(ca->disk_sb.bdev))) blkdev_issue_discard(ca->disk_sb.bdev, bucket_to_sector(ca, journal_bucket(ca, ja->last_idx)), ca->mi.bucket_size, GFP_NOIO, 0); - } spin_lock(&j->lock); - ja->last_idx = (ja->last_idx + 1) % nr; + ja->last_idx = (ja->last_idx + 1) % + bch_nr_journal_buckets(ca->disk_sb.sb); spin_unlock(&j->lock); wake_up(&j->wait); @@ -1202,45 +1244,24 @@ static void journal_reclaim_work(struct work_struct *work) * buckets */ spin_lock(&j->lock); - bucket_to_flush = (cur_idx + (nr >> 1)) % nr; + nr = bch_nr_journal_buckets(ca->disk_sb.sb), + bucket_to_flush = (ja->cur_idx + (nr >> 1)) % nr; seq_to_flush = max_t(u64, seq_to_flush, ja->bucket_seq[bucket_to_flush]); spin_unlock(&j->lock); } - spin_lock(&j->lock); + if (reclaim_lock_held) + mutex_unlock(&j->reclaim_lock); /* Also flush if the pin fifo is more than half full */ seq_to_flush = max_t(s64, seq_to_flush, (s64) j->seq - (j->pin.size >> 1)); - journal_reclaim_fast(j); - spin_unlock(&j->lock); - - spin_lock_irq(&j->pin_lock); - -restart_flush: - /* Now do the actual flushing */ - fifo_for_each_entry_ptr(pin_list, &j->pin, iter) { - if (journal_pin_seq(j, pin_list) > seq_to_flush) - break; - - if (!list_empty(&pin_list->list)) { - pin = list_first_entry(&pin_list->list, - struct journal_entry_pin, - list); - list_del_init(&pin->list); - spin_unlock_irq(&j->pin_lock); - - __set_current_state(TASK_RUNNING); - pin->flush(pin); - - spin_lock_irq(&j->pin_lock); - goto restart_flush; - } + while ((pin = journal_get_next_pin(j, seq_to_flush))) { + __set_current_state(TASK_RUNNING); + pin->flush(pin); } - - spin_unlock_irq(&j->pin_lock); } /** @@ -2067,6 +2088,7 @@ int bch_journal_alloc(struct journal *j) INIT_WORK(&j->reclaim_work, journal_reclaim_work); mutex_init(&j->blacklist_lock); INIT_LIST_HEAD(&j->seq_blacklist); + mutex_init(&j->reclaim_lock); lockdep_init_map(&j->res_map, "journal res", &res_key, 0); diff --git a/drivers/md/bcache/journal_types.h b/drivers/md/bcache/journal_types.h index d2892b053bda..e0d949c546bc 100644 --- a/drivers/md/bcache/journal_types.h +++ b/drivers/md/bcache/journal_types.h @@ -161,6 +161,8 @@ struct journal { BKEY_PADDED(key); struct work_struct reclaim_work; + /* protects advancing ja->last_idx: */ + struct mutex reclaim_lock; __le64 prio_buckets[MAX_CACHES_PER_SET]; unsigned nr_prio_buckets; @@ -197,6 +199,11 @@ struct journal_device { unsigned cur_idx; /* Last journal bucket that still contains an open journal entry */ + + /* + * j->lock and j->reclaim_lock must both be held to modify, j->lock + * sufficient to read: + */ unsigned last_idx; /* Bio for journal reads/writes to this device */ |