summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--drivers/md/bcache/bcache.h6
-rw-r--r--drivers/md/bcache/request.c94
-rw-r--r--drivers/md/bcache/request.h2
-rw-r--r--drivers/md/bcache/super.c4
4 files changed, 105 insertions, 1 deletions
diff --git a/drivers/md/bcache/bcache.h b/drivers/md/bcache/bcache.h
index 17480233880d..c1d52cc5c9e7 100644
--- a/drivers/md/bcache/bcache.h
+++ b/drivers/md/bcache/bcache.h
@@ -741,6 +741,12 @@ struct cache_set {
struct gc_stat gc_stats;
+
+ /* IO PATH */
+ struct bio_list read_race_list;
+ struct work_struct read_race_work;
+ spinlock_t read_race_lock;
+
/* TIERING */
struct task_struct *tiering_thread;
struct keybuf tiering_keys;
diff --git a/drivers/md/bcache/request.c b/drivers/md/bcache/request.c
index c0f581d9a5d9..b8f88e255db2 100644
--- a/drivers/md/bcache/request.c
+++ b/drivers/md/bcache/request.c
@@ -786,6 +786,45 @@ static int bch_read_hole(struct bio *bio, unsigned sectors)
return bio->bi_iter.bi_size ? MAP_CONTINUE : MAP_DONE;
}
+static void bch_read_requeue(struct cache_set *c, struct bio *bio)
+{
+ unsigned long flags;
+
+ spin_lock_irqsave(&c->read_race_lock, flags);
+ bio_list_add(&c->read_race_list, bio);
+ spin_unlock_irqrestore(&c->read_race_lock, flags);
+ queue_work(c->wq, &c->read_race_work);
+}
+
+static void bch_read_endio(struct bio *bio)
+{
+ struct bbio *b = container_of(bio, struct bbio, bio);
+ struct cache_set *c = b->c;
+ struct bio *orig = bio->bi_private;
+
+ if (!bio->bi_error && (race_fault() || ptr_stale(c, &b->key, 0))) {
+ /* Read bucket invalidate race */
+ atomic_long_inc(&c->cache_read_races);
+ bch_read_requeue(c, bio);
+ return;
+ }
+
+ bch_bbio_count_io_errors(c, bio, bio->bi_error, "reading from cache");
+
+ if (bio->bi_error)
+ orig->bi_error = bio->bi_error;
+
+ bio_endio(orig);
+ bio_put(bio);
+}
+
+static inline void __bio_inc_remaining(struct bio *bio)
+{
+ bio->bi_flags |= (1 << BIO_CHAIN);
+ smp_mb__before_atomic();
+ atomic_inc(&bio->__bi_remaining);
+}
+
/* XXX: this looks a lot like cache_lookup_fn() */
static int bch_read_fn(struct btree_op *b_op, struct btree *b, struct bkey *k)
{
@@ -817,7 +856,9 @@ static int bch_read_fn(struct btree_op *b_op, struct btree *b, struct bkey *k)
ret = MAP_CONTINUE;
}
- bio_chain(n, bio);
+ n->bi_private = bio;
+ n->bi_end_io = bch_read_endio;
+ __bio_inc_remaining(bio);
bbio = to_bbio(n);
bch_bkey_copy_single_ptr(&bbio->key, k, ptr);
@@ -852,6 +893,57 @@ int bch_read(struct cache_set *c, struct bio *bio, u64 inode)
}
EXPORT_SYMBOL(bch_read);
+/**
+ * bch_read_retry - re-submit a bio originally from bch_read()
+ */
+static void bch_read_retry(struct bbio *bbio)
+{
+ struct bio *bio = &bbio->bio;
+ struct bio *parent;
+ u64 inode;
+
+ trace_bcache_read_retry(bio);
+
+ /*
+ * This used to be a leaf bio from bch_read_fn(), but
+ * since we don't know what happened to the btree in
+ * the meantime, we have to re-submit it via the
+ * top-level bch_read() entry point. Before doing that,
+ * we have to reset the bio, preserving the biovec.
+ *
+ * The inode, offset and size come from the bbio's key,
+ * which was set by bch_read_fn().
+ */
+ inode = KEY_INODE(&bbio->key);
+ parent = bio->bi_private;
+
+ bch_bbio_reset(bbio);
+ bio_chain(bio, parent);
+
+ bch_read(bbio->c, bio, inode);
+ bio_endio(parent); /* for bio_chain() in bch_read_fn() */
+ bio_endio(bio);
+}
+
+void bch_read_race_work(struct work_struct *work)
+{
+ struct cache_set *c = container_of(work, struct cache_set,
+ read_race_work);
+ unsigned long flags;
+ struct bio *bio;
+
+ while (1) {
+ spin_lock_irqsave(&c->read_race_lock, flags);
+ bio = bio_list_pop(&c->read_race_list);
+ spin_unlock_irqrestore(&c->read_race_lock, flags);
+
+ if (!bio)
+ break;
+
+ bch_read_retry(to_bbio(bio));
+ }
+}
+
/* struct search based code */
struct search {
diff --git a/drivers/md/bcache/request.h b/drivers/md/bcache/request.h
index 913160680f2e..5398771ab9e1 100644
--- a/drivers/md/bcache/request.h
+++ b/drivers/md/bcache/request.h
@@ -86,6 +86,8 @@ void bch_data_insert(struct closure *cl);
void bch_cached_dev_request_init(struct cached_dev *dc);
void bch_flash_dev_request_init(struct bcache_device *d);
+void bch_read_race_work(struct work_struct *work);
+
extern struct kmem_cache *bch_search_cache;
#endif /* _BCACHE_REQUEST_H_ */
diff --git a/drivers/md/bcache/super.c b/drivers/md/bcache/super.c
index 67386f58dd14..dce19dbb2a98 100644
--- a/drivers/md/bcache/super.c
+++ b/drivers/md/bcache/super.c
@@ -1453,6 +1453,10 @@ struct cache_set *bch_cache_set_alloc(struct cache_sb *sb)
INIT_WORK(&c->bio_submit_work, bch_bio_submit_work);
spin_lock_init(&c->bio_submit_lock);
+ bio_list_init(&c->read_race_list);
+ spin_lock_init(&c->read_race_lock);
+ INIT_WORK(&c->read_race_work, bch_read_race_work);
+
c->congested_read_threshold_us = 2000;
c->congested_write_threshold_us = 20000;
c->error_limit = 8 << IO_ERROR_SHIFT;