diff options
author | Kent Overstreet <kent.overstreet@gmail.com> | 2016-12-06 07:13:16 -0900 |
---|---|---|
committer | Kent Overstreet <kent.overstreet@gmail.com> | 2017-01-18 21:41:11 -0900 |
commit | 30d3dece30d86c05429b7d3e2d5bcf24b59d8d26 (patch) | |
tree | 955e992fa2d928ae2968e810787ce3de8eb3bb91 | |
parent | fa31f1a033c74f83ced87984f8b93576aae632d3 (diff) |
bcache: fix another copygc deadlock
since btree node reserves got reworked, btree updates can end up
blocking on copygc - which means we have to run index updates for copygc
out of a separate workqueue.
also fix another deadlock related to rwsems being unfair.
-rw-r--r-- | drivers/md/bcache/bcache.h | 2 | ||||
-rw-r--r-- | drivers/md/bcache/btree_update.c | 3 | ||||
-rw-r--r-- | drivers/md/bcache/io.c | 29 | ||||
-rw-r--r-- | drivers/md/bcache/super.c | 4 |
4 files changed, 27 insertions, 11 deletions
diff --git a/drivers/md/bcache/bcache.h b/drivers/md/bcache/bcache.h index e0f164bb5693..e0c8716172e0 100644 --- a/drivers/md/bcache/bcache.h +++ b/drivers/md/bcache/bcache.h @@ -608,6 +608,8 @@ struct cache_set { struct mutex btree_interior_update_lock; struct workqueue_struct *wq; + /* copygc needs its own workqueue for index updates.. */ + struct workqueue_struct *copygc_wq; /* ALLOCATION */ struct bch_pd_controller foreground_write_pd; diff --git a/drivers/md/bcache/btree_update.c b/drivers/md/bcache/btree_update.c index f3579b8a678c..ea4c47438afd 100644 --- a/drivers/md/bcache/btree_update.c +++ b/drivers/md/bcache/btree_update.c @@ -1558,8 +1558,9 @@ static int bch_btree_split_leaf(struct btree_iter *iter, unsigned flags) ret = PTR_ERR(reserve); if (ret == -EAGAIN) { bch_btree_iter_unlock(iter); + up_read(&c->gc_lock); closure_sync(&cl); - ret = -EINTR; + return -EINTR; } goto out; } diff --git a/drivers/md/bcache/io.c b/drivers/md/bcache/io.c index ba2644166c31..2bf1e4b8eec1 100644 --- a/drivers/md/bcache/io.c +++ b/drivers/md/bcache/io.c @@ -182,6 +182,13 @@ void bch_submit_wbio_replicas(struct bch_write_bio *wbio, struct cache_set *c, /* Writes */ +static struct workqueue_struct *index_update_wq(struct bch_write_op *op) +{ + return op->alloc_reserve == RESERVE_MOVINGGC + ? op->c->copygc_wq + : op->c->wq; +} + static void __bch_write(struct closure *); static void bch_write_done(struct closure *cl) @@ -268,7 +275,7 @@ static void bch_write_index(struct closure *cl) bch_journal_flush_seq_async(&c->journal, *op_journal_seq(op), cl); - continue_at(cl, bch_write_done, c->wq); + continue_at(cl, bch_write_done, index_update_wq(op)); } else { continue_at_nobarrier(cl, bch_write_done, NULL); } @@ -342,6 +349,7 @@ static void bch_write_io_error(struct closure *cl) static void bch_write_endio(struct bio *bio) { struct closure *cl = bio->bi_private; + struct bch_write_op *op = container_of(cl, struct bch_write_op, cl); struct bch_write_bio *wbio = to_wbio(bio); struct cache_set *c = wbio->c; struct bio *orig = wbio->orig; @@ -349,7 +357,7 @@ static void bch_write_endio(struct bio *bio) if (cache_nonfatal_io_err_on(bio->bi_error, ca, "data write")) - set_closure_fn(cl, bch_write_io_error, c->wq); + set_closure_fn(cl, bch_write_io_error, index_update_wq(op)); bch_account_io_completion_time(ca, wbio->submit_time_us, REQ_OP_WRITE); @@ -535,7 +543,7 @@ static void __bch_write(struct closure *cl) op->flags |= BCH_WRITE_DONE; bch_write_discard(cl); bio_put(bio); - continue_at(cl, bch_write_done, c->wq); + continue_at(cl, bch_write_done, index_update_wq(op)); } /* @@ -551,14 +559,14 @@ static void __bch_write(struct closure *cl) EBUG_ON(!bio_sectors(bio)); if (open_bucket_nr == ARRAY_SIZE(op->open_buckets)) - continue_at(cl, bch_write_index, c->wq); + continue_at(cl, bch_write_index, index_update_wq(op)); /* for the device pointers and 1 for the chksum */ if (bch_keylist_realloc(&op->insert_keys, op->inline_keys, ARRAY_SIZE(op->inline_keys), BKEY_EXTENT_U64s_MAX)) - continue_at(cl, bch_write_index, c->wq); + continue_at(cl, bch_write_index, index_update_wq(op)); k = op->insert_keys.top; bkey_extent_init(k); @@ -586,7 +594,8 @@ static void __bch_write(struct closure *cl) * this case if open_bucket_nr > 1. */ if (!bch_keylist_empty(&op->insert_keys)) - continue_at(cl, bch_write_index, c->wq); + continue_at(cl, bch_write_index, + index_update_wq(op)); /* * If we've looped, we're running out of a workqueue - @@ -629,7 +638,7 @@ static void __bch_write(struct closure *cl) } while (ret); op->flags |= BCH_WRITE_DONE; - continue_at(cl, bch_write_index, c->wq); + continue_at(cl, bch_write_index, index_update_wq(op)); err: if (op->flags & BCH_WRITE_DISCARD_ON_ERROR) { /* @@ -661,7 +670,7 @@ err: */ continue_at(cl, !bch_keylist_empty(&op->insert_keys) ? bch_write_index - : bch_write_done, c->wq); + : bch_write_done, index_update_wq(op)); } void bch_wake_delayed_writes(unsigned long data) @@ -769,7 +778,7 @@ void bch_write(struct closure *cl) spin_unlock_irqrestore(&c->foreground_write_pd_lock, flags); - continue_at(cl, __bch_write, op->c->wq); + continue_at(cl, __bch_write, index_update_wq(op)); } spin_unlock_irqrestore(&c->foreground_write_pd_lock, flags); @@ -784,7 +793,7 @@ void bch_write_op_init(struct bch_write_op *op, struct cache_set *c, u64 *journal_seq, unsigned flags) { op->c = c; - op->io_wq = op->c->wq; + op->io_wq = index_update_wq(op); op->bio = bio; op->written = 0; op->error = 0; diff --git a/drivers/md/bcache/super.c b/drivers/md/bcache/super.c index 3c5a207196b6..ab786e81ea0b 100644 --- a/drivers/md/bcache/super.c +++ b/drivers/md/bcache/super.c @@ -909,6 +909,8 @@ static void cache_set_free(struct cache_set *c) mempool_exit(&c->search); percpu_ref_exit(&c->writes); + if (c->copygc_wq) + destroy_workqueue(c->copygc_wq); if (c->wq) destroy_workqueue(c->wq); @@ -1143,6 +1145,8 @@ static struct cache_set *bch_cache_set_alloc(struct cache_sb *sb, if (!(c->wq = alloc_workqueue("bcache", WQ_FREEZABLE|WQ_MEM_RECLAIM|WQ_HIGHPRI, 1)) || + !(c->copygc_wq = alloc_workqueue("bcache_copygc", + WQ_FREEZABLE|WQ_MEM_RECLAIM|WQ_HIGHPRI, 1)) || percpu_ref_init(&c->writes, bch_writes_disabled, 0, GFP_KERNEL) || mempool_init_slab_pool(&c->search, 1, bch_search_cache) || mempool_init_kmalloc_pool(&c->btree_reserve_pool, 1, |