diff options
-rw-r--r-- | drivers/md/bcache/bcache.h | 6 | ||||
-rw-r--r-- | drivers/md/bcache/request.c | 94 | ||||
-rw-r--r-- | drivers/md/bcache/request.h | 2 | ||||
-rw-r--r-- | drivers/md/bcache/super.c | 4 |
4 files changed, 105 insertions, 1 deletions
diff --git a/drivers/md/bcache/bcache.h b/drivers/md/bcache/bcache.h index 17480233880d..c1d52cc5c9e7 100644 --- a/drivers/md/bcache/bcache.h +++ b/drivers/md/bcache/bcache.h @@ -741,6 +741,12 @@ struct cache_set { struct gc_stat gc_stats; + + /* IO PATH */ + struct bio_list read_race_list; + struct work_struct read_race_work; + spinlock_t read_race_lock; + /* TIERING */ struct task_struct *tiering_thread; struct keybuf tiering_keys; diff --git a/drivers/md/bcache/request.c b/drivers/md/bcache/request.c index c0f581d9a5d9..b8f88e255db2 100644 --- a/drivers/md/bcache/request.c +++ b/drivers/md/bcache/request.c @@ -786,6 +786,45 @@ static int bch_read_hole(struct bio *bio, unsigned sectors) return bio->bi_iter.bi_size ? MAP_CONTINUE : MAP_DONE; } +static void bch_read_requeue(struct cache_set *c, struct bio *bio) +{ + unsigned long flags; + + spin_lock_irqsave(&c->read_race_lock, flags); + bio_list_add(&c->read_race_list, bio); + spin_unlock_irqrestore(&c->read_race_lock, flags); + queue_work(c->wq, &c->read_race_work); +} + +static void bch_read_endio(struct bio *bio) +{ + struct bbio *b = container_of(bio, struct bbio, bio); + struct cache_set *c = b->c; + struct bio *orig = bio->bi_private; + + if (!bio->bi_error && (race_fault() || ptr_stale(c, &b->key, 0))) { + /* Read bucket invalidate race */ + atomic_long_inc(&c->cache_read_races); + bch_read_requeue(c, bio); + return; + } + + bch_bbio_count_io_errors(c, bio, bio->bi_error, "reading from cache"); + + if (bio->bi_error) + orig->bi_error = bio->bi_error; + + bio_endio(orig); + bio_put(bio); +} + +static inline void __bio_inc_remaining(struct bio *bio) +{ + bio->bi_flags |= (1 << BIO_CHAIN); + smp_mb__before_atomic(); + atomic_inc(&bio->__bi_remaining); +} + /* XXX: this looks a lot like cache_lookup_fn() */ static int bch_read_fn(struct btree_op *b_op, struct btree *b, struct bkey *k) { @@ -817,7 +856,9 @@ static int bch_read_fn(struct btree_op *b_op, struct btree *b, struct bkey *k) ret = MAP_CONTINUE; } - bio_chain(n, bio); + n->bi_private = bio; + n->bi_end_io = bch_read_endio; + __bio_inc_remaining(bio); bbio = to_bbio(n); bch_bkey_copy_single_ptr(&bbio->key, k, ptr); @@ -852,6 +893,57 @@ int bch_read(struct cache_set *c, struct bio *bio, u64 inode) } EXPORT_SYMBOL(bch_read); +/** + * bch_read_retry - re-submit a bio originally from bch_read() + */ +static void bch_read_retry(struct bbio *bbio) +{ + struct bio *bio = &bbio->bio; + struct bio *parent; + u64 inode; + + trace_bcache_read_retry(bio); + + /* + * This used to be a leaf bio from bch_read_fn(), but + * since we don't know what happened to the btree in + * the meantime, we have to re-submit it via the + * top-level bch_read() entry point. Before doing that, + * we have to reset the bio, preserving the biovec. + * + * The inode, offset and size come from the bbio's key, + * which was set by bch_read_fn(). + */ + inode = KEY_INODE(&bbio->key); + parent = bio->bi_private; + + bch_bbio_reset(bbio); + bio_chain(bio, parent); + + bch_read(bbio->c, bio, inode); + bio_endio(parent); /* for bio_chain() in bch_read_fn() */ + bio_endio(bio); +} + +void bch_read_race_work(struct work_struct *work) +{ + struct cache_set *c = container_of(work, struct cache_set, + read_race_work); + unsigned long flags; + struct bio *bio; + + while (1) { + spin_lock_irqsave(&c->read_race_lock, flags); + bio = bio_list_pop(&c->read_race_list); + spin_unlock_irqrestore(&c->read_race_lock, flags); + + if (!bio) + break; + + bch_read_retry(to_bbio(bio)); + } +} + /* struct search based code */ struct search { diff --git a/drivers/md/bcache/request.h b/drivers/md/bcache/request.h index 913160680f2e..5398771ab9e1 100644 --- a/drivers/md/bcache/request.h +++ b/drivers/md/bcache/request.h @@ -86,6 +86,8 @@ void bch_data_insert(struct closure *cl); void bch_cached_dev_request_init(struct cached_dev *dc); void bch_flash_dev_request_init(struct bcache_device *d); +void bch_read_race_work(struct work_struct *work); + extern struct kmem_cache *bch_search_cache; #endif /* _BCACHE_REQUEST_H_ */ diff --git a/drivers/md/bcache/super.c b/drivers/md/bcache/super.c index 67386f58dd14..dce19dbb2a98 100644 --- a/drivers/md/bcache/super.c +++ b/drivers/md/bcache/super.c @@ -1453,6 +1453,10 @@ struct cache_set *bch_cache_set_alloc(struct cache_sb *sb) INIT_WORK(&c->bio_submit_work, bch_bio_submit_work); spin_lock_init(&c->bio_submit_lock); + bio_list_init(&c->read_race_list); + spin_lock_init(&c->read_race_lock); + INIT_WORK(&c->read_race_work, bch_read_race_work); + c->congested_read_threshold_us = 2000; c->congested_write_threshold_us = 20000; c->error_limit = 8 << IO_ERROR_SHIFT; |