24 files changed, 631 insertions, 473 deletions
diff --git a/drivers/md/bcache/Makefile b/drivers/md/bcache/Makefile
index 650d6542eb5a..95142f1dc2dd 100644
--- a/drivers/md/bcache/Makefile
+++ b/drivers/md/bcache/Makefile
@@ -3,8 +3,8 @@ obj-$(CONFIG_BCACHE)	+= bcache.o
 
 bcache-y		:= acl.o alloc.o bkey.o bkey_methods.o blockdev.o\
 	bset.o btree.o buckets.o chardev.o clock.o closure.o debug.o dirent.o\
-	extents.o fs.o fs-gc.o gc.o inode.o io.o journal.o keybuf.o keylist.o\
-	migrate.o move.o movinggc.o notify.o request.o siphash.o six.o stats.o\
-	super.o sysfs.o tier.o trace.o util.o writeback.o xattr.o
+	error.o extents.o fs.o fs-gc.o gc.o inode.o io.o journal.o keybuf.o\
+	keylist.o migrate.o move.o movinggc.o notify.o request.o siphash.o\
+	six.o stats.o super.o sysfs.o tier.o trace.o util.o writeback.o xattr.o
 
 ccflags-y		:= -Werror
diff --git a/drivers/md/bcache/alloc.c b/drivers/md/bcache/alloc.c
index 4b6930452935..d86133c79420 100644
--- a/drivers/md/bcache/alloc.c
+++ b/drivers/md/bcache/alloc.c
@@ -58,6 +58,7 @@
 #include "btree.h"
 #include "buckets.h"
 #include "clock.h"
+#include "error.h"
 #include "extents.h"
 #include "io.h"
 #include "journal.h"
@@ -281,15 +282,10 @@ static int bch_prio_write(struct cache *ca)
 		spin_unlock(&ca->prio_buckets_lock);
 
 		ret = prio_io(ca, r, REQ_OP_WRITE);
-		if (bch_meta_write_fault("prio"))
-			ret = -EIO;
-		if (ret) {
-			__bch_cache_error(ca,
-				"IO error %d writing prios to bucket %lu",
-				ret, r);
-			bch_cache_set_io_error(c);
+		if (cache_fatal_io_err_on(ret, ca,
+					  "prio write to bucket %lu", r) ||
+		    bch_meta_write_fault("prio"))
 			return ret;
-		}
 	}
 
 	spin_lock(&c->journal.lock);
@@ -300,11 +296,8 @@ static int bch_prio_write(struct cache *ca)
 	spin_unlock(&c->journal.lock);
 
 	ret = bch_journal_meta(&c->journal);
-	if (ret) {
-		__bch_cache_set_error(c,
-			"IO error %d journalling new prios", ret);
+	if (cache_set_fatal_err_on(ret, c, "journalling new prios"))
 		return ret;
-	}
 
 	/*
 	 * Don't want the old priorities to get garbage collected until after we
@@ -347,7 +340,7 @@ int bch_prio_read(struct cache *ca)
 
 	if ((bucket < ca->mi.first_bucket && bucket >= ca->mi.nbuckets) ||
 	    bch_meta_read_fault("prio")) {
-		bch_cache_error(ca, "bad prio bucket %llu", bucket);
+		cache_inconsistent(ca, "bad prio bucket %llu", bucket);
 		return -EIO;
 	}
 
@@ -361,32 +354,27 @@ int bch_prio_read(struct cache *ca)
 			bucket_nr++;
 
 			ret = prio_io(ca, bucket, REQ_OP_READ);
-			if (ret || bch_meta_read_fault("prio")) {
-				bch_cache_error(ca,
-					"IO error %d reading prios from bucket %llu",
-					ret, bucket);
+			if (cache_fatal_io_err_on(ret, ca,
+					"prior read from bucket %llu",
+					bucket) ||
+			    bch_meta_read_fault("prio"))
 				return -EIO;
-			}
 
 			got = p->magic;
 			expect = pset_magic(&c->sb);
-			if (got != expect) {
-				bch_cache_error(ca,
+			if (cache_inconsistent_on(got != expect, ca,
 					"bad magic (got %llu expect %llu) while reading prios from bucket %llu",
-					got, expect, bucket);
+					got, expect, bucket))
 				return -EIO;
-			}
 
 			got = p->csum;
 			expect = bch_checksum(PSET_CSUM_TYPE(p),
 					      &p->magic,
 					      bucket_bytes(ca) - 8);
-			if (got != expect) {
-				bch_cache_error(ca,
+			if (cache_inconsistent_on(got != expect, ca,
 					"bad checksum (got %llu expect %llu) while reading prios from bucket %llu",
-					got, expect, bucket);
+					got, expect, bucket))
 				return -EIO;
-			}
 
 			bucket = p->next_bucket;
 			d = p->data;
diff --git a/drivers/md/bcache/bcache.h b/drivers/md/bcache/bcache.h
index 224858445b5e..d1b0b61de590 100644
--- a/drivers/md/bcache/bcache.h
+++ b/drivers/md/bcache/bcache.h
@@ -237,7 +237,6 @@ struct cache {
 	struct percpu_ref	ref;
 	struct rcu_head		free_rcu;
 	struct work_struct	free_work;
-	struct work_struct	read_only_work;
 	struct work_struct	remove_work;
 	unsigned long		flags;
 
@@ -376,6 +375,7 @@ enum {
 	CACHE_SET_STOPPING,
 	CACHE_SET_RUNNING,
 	CACHE_SET_RO,
+	CACHE_SET_RO_COMPLETE,
 	CACHE_SET_GC_STOPPING,
 	CACHE_SET_GC_FAILURE,
 	CACHE_SET_BDEV_MOUNTED,
@@ -404,6 +404,8 @@ struct cache_set {
 
 	int			minor;
 	struct device		*chardev;
+	struct super_block	*vfs_sb;
+	char			uuid[40];
 
 	/* Counts outstanding writes, for clean transition to read-only */
 	struct percpu_ref	writes;
@@ -687,74 +689,6 @@ static inline unsigned bucket_bytes(const struct cache *ca)
 #define prio_buckets(ca)					\
 	DIV_ROUND_UP((size_t) (ca)->mi.nbuckets, prios_per_bucket(ca))
 
-/* Error handling macros */
-
-/* The underscore versions merely log an error, they don't fail the cache set */
-#define __bch_cache_set_error(c, fmt, ...)				\
-	printk(KERN_ERR "bcache: error on %pU: " fmt "\n",		\
-	       (c)->sb.set_uuid.b, ##__VA_ARGS__)
-
-#define __bch_cache_error(ca, fmt, ...)					\
-do {									\
-	char _buf[BDEVNAME_SIZE];					\
-	__bch_cache_set_error((ca)->set, "%s: " fmt,			\
-			      bdevname((ca)->disk_sb.bdev, _buf),	\
-			      ##__VA_ARGS__);				\
-} while (0)
-
-/* These do fail the cache set */
-#define bch_cache_set_error(c, ...)					\
-do {									\
-	__bch_cache_set_error(c, __VA_ARGS__);				\
-	bch_cache_set_fail(c);						\
-} while (0)
-
-#define bch_cache_error(ca, ...)					\
-do {									\
-	__bch_cache_error(ca, __VA_ARGS__);				\
-	bch_cache_set_fail((ca)->set);					\
-} while (0)
-
-#define btree_bug(b, ...)						\
-do {									\
-	__bch_cache_set_error((b)->c, __VA_ARGS__);			\
-	BUG();								\
-} while (0)
-
-#define cache_set_bug(c, ...)						\
-do {									\
-	__bch_cache_set_error(c, __VA_ARGS__);				\
-	BUG();								\
-} while (0)
-
-#define btree_bug_on(cond, b, ...)					\
-do {									\
-	if (cond)							\
-		btree_bug(b, __VA_ARGS__);				\
-} while (0)
-
-#define cache_set_bug_on(cond, c, ...)					\
-do {									\
-	if (cond)							\
-		cache_set_bug(c, __VA_ARGS__);				\
-} while (0)
-
-#define cache_set_err_on(cond, c, ...)					\
-do {									\
-	if (cond)							\
-		bch_cache_set_error(c, __VA_ARGS__);			\
-} while (0)
-
-#define __bcache_io_error(c, fmt, ...)					\
-	printk_ratelimited(KERN_ERR "bcache: IO error on %pU: " fmt "\n",\
-	       (c)->sb.set_uuid.b, ##__VA_ARGS__)
-
-#define bcache_io_error(c, bio, fmt, ...)				\
-do {									\
-	__bcache_io_error(c, fmt, ##__VA_ARGS__);			\
-	(bio)->bi_error = -EIO;						\
-} while (0)
-
 /* Forward declarations */
 
 long bch_chardev_ioctl(struct file *, unsigned, unsigned long);
diff --git a/drivers/md/bcache/bkey_methods.c b/drivers/md/bcache/bkey_methods.c
index fc0ca2631921..9544f8696d26 100644
--- a/drivers/md/bcache/bkey_methods.c
+++ b/drivers/md/bcache/bkey_methods.c
@@ -3,6 +3,7 @@
 #include "bkey_methods.h"
 #include "btree.h"
 #include "dirent.h"
+#include "error.h"
 #include "extents.h"
 #include "inode.h"
 #include "xattr.h"
@@ -15,58 +16,68 @@ static const struct bkey_ops *bch_bkey_ops[] = {
 	[BKEY_TYPE_BTREE]	= &bch_bkey_btree_ops,
 };
 
-bool bkey_invalid(struct cache_set *c,
-		  enum bkey_type type,
-		  struct bkey_s_c k)
+/* Returns string indicating reason for being invalid, or NULL if valid: */
+const char *bkey_invalid(struct cache_set *c, enum bkey_type type,
+			 struct bkey_s_c k)
 {
 	const struct bkey_ops *ops = bch_bkey_ops[type];
 
 	if (k.k->u64s < BKEY_U64s)
-		return true;
+		return "u64s too small";
 
 	if (k.k->size &&
 	    (bkey_deleted(k.k) || !ops->is_extents))
-		return true;
+		return "nonzero size field";
 
 	switch (k.k->type) {
 	case KEY_TYPE_DELETED:
-		return false;
+		return NULL;
 
 	case KEY_TYPE_DISCARD:
 	case KEY_TYPE_ERROR:
-		return bkey_val_bytes(k.k) != 0;
+		return bkey_val_bytes(k.k) != 0
+			? "value size should be zero"
+			: NULL;
 
 	case KEY_TYPE_COOKIE:
-		return (bkey_val_bytes(k.k) != sizeof(struct bch_cookie));
+		return bkey_val_bytes(k.k) != sizeof(struct bch_cookie)
+			? "incorrect value size"
+			: NULL;
 
 	default:
 		if (k.k->type < KEY_TYPE_GENERIC_NR)
-			return true;
+			return "invalid type";
 
 		return ops->key_invalid(c, k);
 	}
 }
 
+const char *btree_bkey_invalid(struct cache_set *c, struct btree *b,
+			       struct bkey_s_c k)
+{
+	if (bkey_cmp(bkey_start_pos(k.k), b->data->min_key) < 0)
+		return "key before start of btree node";
+
+	if (bkey_cmp(k.k->p, b->data->max_key) > 0)
+		return "key past end of btree node";
+
+	return bkey_invalid(c, btree_node_type(b), k);
+}
+
 void bkey_debugcheck(struct cache_set *c, struct btree *b, struct bkey_s_c k)
 {
 	enum bkey_type type = btree_node_type(b);
 	const struct bkey_ops *ops = bch_bkey_ops[type];
+	const char *invalid;
 
 	BUG_ON(!k.k->u64s);
 
-	cache_set_bug_on(bkey_cmp(bkey_start_pos(k.k),
-				  b->data->min_key) < 0,
-			 c, "key before start of btree node");
-
-	cache_set_bug_on(bkey_cmp(k.k->p,
-				  b->data->max_key) > 0,
-			 c, "key past end of btree node");
-
-	if (bkey_invalid(c, type, k)) {
+	invalid = btree_bkey_invalid(c, b, k);
+	if (invalid) {
 		char buf[160];
 
 		bch_bkey_val_to_text(c, type, buf, sizeof(buf), k);
-		cache_set_bug(c, "invalid bkey %s", buf);
+		cache_set_bug(c, "invalid bkey %s: %s", buf, invalid);
 		return;
 	}
 
diff --git a/drivers/md/bcache/bkey_methods.h b/drivers/md/bcache/bkey_methods.h
index a8a5e802160e..a21325b87208 100644
--- a/drivers/md/bcache/bkey_methods.h
+++ b/drivers/md/bcache/bkey_methods.h
@@ -15,7 +15,8 @@ struct btree;
 struct bkey;
 
 struct bkey_ops {
-	bool		(*key_invalid)(const struct cache_set *,
+	/* Returns reason for being invalid if invalid, else NULL: */
+	const char *	(*key_invalid)(const struct cache_set *,
 				       struct bkey_s_c);
 	void		(*key_debugcheck)(struct cache_set *, struct btree *,
 					  struct bkey_s_c);
@@ -25,7 +26,10 @@ struct bkey_ops {
 	bool		is_extents;
 };
 
-bool bkey_invalid(struct cache_set *, enum bkey_type, struct bkey_s_c);
+const char *bkey_invalid(struct cache_set *, enum bkey_type, struct bkey_s_c);
+const char *btree_bkey_invalid(struct cache_set *, struct btree *,
+			       struct bkey_s_c);
+
 void bkey_debugcheck(struct cache_set *, struct btree *, struct bkey_s_c);
 void bch_bkey_val_to_text(struct cache_set *, enum bkey_type,
 			  char *, size_t, struct bkey_s_c);
diff --git a/drivers/md/bcache/blockdev.c b/drivers/md/bcache/blockdev.c
index 1a788f2fd7c5..20cc8dd39a4f 100644
--- a/drivers/md/bcache/blockdev.c
+++ b/drivers/md/bcache/blockdev.c
@@ -2,6 +2,7 @@
 #include "bcache.h"
 #include "blockdev.h"
 #include "btree.h"
+#include "error.h"
 #include "inode.h"
 #include "request.h"
 #include "super.h"
@@ -719,12 +720,8 @@ int bch_blockdev_volumes_start(struct cache_set *c)
 		inode = bkey_s_c_to_inode_blockdev(k);
 
 		ret = blockdev_volume_run(c, inode);
-		if (ret) {
-			bch_cache_set_error(c,
-				"can't bring up blockdev volumes: %i",
-				ret);
+		if (ret)
 			break;
-		}
 	}
 	bch_btree_iter_unlock(&iter);
 
diff --git a/drivers/md/bcache/btree.c b/drivers/md/bcache/btree.c
index 6edbb06d46f8..3bda6f09e8e1 100644
--- a/drivers/md/bcache/btree.c
+++ b/drivers/md/bcache/btree.c
@@ -24,6 +24,7 @@
 #include "alloc.h"
 #include "btree.h"
 #include "debug.h"
+#include "error.h"
 #include "extents.h"
 #include "gc.h"
 #include "io.h"
@@ -352,8 +353,8 @@ static void bch_btree_init_next(struct cache_set *c, struct btree *b,
 			    _end - _data) ^ 0xffffffffffffffffULL;	\
 })
 
-#define btree_node_error(b, ca, ptr, fmt, ...)				\
-	bch_cache_error(ca,						\
+#define btree_node_error(b, c, ptr, fmt, ...)				\
+	cache_set_inconsistent(c,					\
 		"btree node error at btree %u level %u/%u bucket %zu block %u u64s %u: " fmt,\
 		(b)->btree_id, (b)->level, btree_node_root(b)		\
 			    ? btree_node_root(b)->level : -1,		\
@@ -375,14 +376,16 @@ static const char *validate_bset(struct cache_set *c, struct btree *b,
 		return  "bset past end of btree node";
 
 	if (i != &b->data->keys && !i->u64s)
-		btree_node_error(b, ca, ptr, "empty set");
+		btree_node_error(b, c, ptr, "empty set");
 
 	for (k = i->start;
 	     k != bset_bkey_last(i);) {
 		struct bkey_tup tup;
+		struct bkey_s_c u;
+		const char *invalid;
 
 		if (!k->u64s) {
-			btree_node_error(b, ca, ptr,
+			btree_node_error(b, c, ptr,
 				"KEY_U64s 0: %zu bytes of metadata lost",
 				(void *) bset_bkey_last(i) - (void *) k);
 
@@ -391,7 +394,7 @@ static const char *validate_bset(struct cache_set *c, struct btree *b,
 		}
 
 		if (bkey_next(k) > bset_bkey_last(i)) {
-			btree_node_error(b, ca, ptr,
+			btree_node_error(b, c, ptr,
 					 "key extends past end of bset");
 
 			i->u64s = (u64 *) k - i->_data;
@@ -399,16 +402,15 @@ static const char *validate_bset(struct cache_set *c, struct btree *b,
 		}
 
 		bkey_disassemble(&tup, f, k);
+		u = bkey_tup_to_s_c(&tup);
 
-		if (bkey_invalid(c, btree_node_type(b),
-				 bkey_tup_to_s_c(&tup))) {
+		invalid = btree_bkey_invalid(c, b, u);
+		if (invalid) {
 			char buf[160];
 
-			bkey_disassemble(&tup, f, k);
 			bch_bkey_val_to_text(c, btree_node_type(b),
-					     buf, sizeof(buf),
-					     bkey_tup_to_s_c(&tup));
-			btree_node_error(b, ca, ptr,
+					     buf, sizeof(buf), u);
+			btree_node_error(b, c, ptr,
 					 "invalid bkey %s", buf);
 
 			i->u64s -= k->u64s;
@@ -474,6 +476,8 @@ void bch_btree_node_read_done(struct cache_set *c, struct btree *b,
 			if (BSET_CSUM_TYPE(i) >= BCH_CSUM_NR)
 				goto err;
 
+			/* XXX: retry checksum errors */
+
 			err = "bad checksum";
 			if (b->data->csum != btree_csum_set(b, b->data))
 				goto err;
@@ -542,13 +546,13 @@ out:
 	return;
 err:
 	set_btree_node_read_error(b);
-	btree_node_error(b, ca, ptr, "%s", err);
+	btree_node_error(b, c, ptr, "%s", err);
 	goto out;
 }
 
 static void btree_node_read_endio(struct bio *bio)
 {
-	bch_bbio_endio(to_bbio(bio), bio->bi_error, "reading btree");
+	bch_bbio_endio(to_bbio(bio));
 }
 
 static void bch_btree_node_read(struct cache_set *c, struct btree *b)
@@ -563,9 +567,10 @@ static void bch_btree_node_read(struct cache_set *c, struct btree *b)
 	closure_init_stack(&cl);
 
 	pick = bch_btree_pick_ptr(c, b);
-	if (!pick.ca) {
+	if (cache_set_fatal_err_on(!pick.ca, c,
+				   "no cache device for btree node")) {
 		set_btree_node_read_error(b);
-		goto missing;
+		return;
 	}
 
 	percpu_ref_get(&pick.ca->ref);
@@ -583,29 +588,18 @@ static void bch_btree_node_read(struct cache_set *c, struct btree *b)
 
 	closure_sync(&cl);
 
-	if (bio->bi_error ||
-	    bch_meta_read_fault("btree"))
+	if (cache_fatal_io_err_on(bio->bi_error,
+				  pick.ca, "IO error reading bucket %zu",
+				  PTR_BUCKET_NR(pick.ca, &pick.ptr)) ||
+	    bch_meta_read_fault("btree")) {
 		set_btree_node_read_error(b);
-
-	bio_put(bio);
-
-	if (btree_node_read_error(b))
-		goto err;
+		goto out;
+	}
 
 	bch_btree_node_read_done(c, b, pick.ca, &pick.ptr);
 	bch_time_stats_update(&c->btree_read_time, start_time);
-
-	percpu_ref_put(&pick.ca->ref);
-	return;
-
-missing:
-	bch_cache_set_error(c, "no cache device for btree node");
-	percpu_ref_put(&pick.ca->ref);
-	return;
-
-err:
-	bch_cache_error(pick.ca, "IO error reading bucket %zu",
-			PTR_BUCKET_NR(pick.ca, &pick.ptr));
+out:
+	bio_put(bio);
 	percpu_ref_put(&pick.ca->ref);
 }
 
@@ -646,20 +640,16 @@ static void btree_node_write_endio(struct bio *bio)
 	struct btree *b = container_of(cl, struct btree, io);
 	struct bch_write_bio *wbio = to_wbio(bio);
 
-	if (bio->bi_error || bch_meta_write_fault("btree")) {
+	if (cache_fatal_io_err_on(bio->bi_error, wbio->bio.ca, "btree write") ||
+	    bch_meta_write_fault("btree"))
 		set_btree_node_write_error(b);
 
-		__bch_cache_error(wbio->bio.ca, "IO error %d writing btree",
-				  bio->bi_error);
-		bch_cache_set_io_error(wbio->bio.ca->set);
-	}
-
 	if (wbio->orig)
 		bio_endio(wbio->orig);
 	else if (wbio->bounce)
 		bch_bio_free_pages(bio);
 
-	bch_bbio_endio(to_bbio(bio), bio->bi_error, "writing btree");
+	bch_bbio_endio(to_bbio(bio));
 }
 
 static void do_btree_node_write(struct closure *cl)
diff --git a/drivers/md/bcache/debug.c b/drivers/md/bcache/debug.c
index 99d4657c4f4a..c0fb3cb19086 100644
--- a/drivers/md/bcache/debug.c
+++ b/drivers/md/bcache/debug.c
@@ -9,6 +9,7 @@
 #include "btree.h"
 #include "buckets.h"
 #include "debug.h"
+#include "error.h"
 #include "extents.h"
 #include "inode.h"
 #include "io.h"
@@ -166,13 +167,12 @@ void bch_data_verify(struct cached_dev *dc, struct bio *bio)
 		void *p1 = kmap_atomic(bv.bv_page);
 		void *p2 = page_address(check->bi_io_vec[iter.bi_idx].bv_page);
 
-		cache_set_err_on(memcmp(p1 + bv.bv_offset,
-					p2 + bv.bv_offset,
-					bv.bv_len),
-				 dc->disk.c,
-				 "verify failed at dev %s sector %llu",
-				 bdevname(dc->disk_sb.bdev, name),
-				 (uint64_t) bio->bi_iter.bi_sector);
+		if (memcmp(p1 + bv.bv_offset,
+			   p2 + bv.bv_offset,
+			   bv.bv_len))
+			panic("verify failed at dev %s sector %llu\n",
+			      bdevname(dc->disk_sb.bdev, name),
+			      (uint64_t) bio->bi_iter.bi_sector);
 
 		kunmap_atomic(p1);
 	}
@@ -199,7 +199,7 @@ void bch_verify_inode_refs(struct cache_set *c)
 		    bch_inode_find_by_inum(c, k.k->p.inode, &inode)) {
 			bch_bkey_val_to_text(c, BTREE_ID_EXTENTS, buf,
 					     sizeof(buf), k);
-			bch_cache_set_error(c,
+			cache_set_inconsistent(c,
 				"extent for missing inode %llu\n%s",
 				k.k->p.inode, buf);
 			bch_btree_iter_unlock(&iter);
@@ -210,7 +210,7 @@ void bch_verify_inode_refs(struct cache_set *c)
 
 		if (!S_ISREG(inode.v.i_mode) &&
 		    !S_ISLNK(inode.v.i_mode))
-			bch_cache_set_error(c,
+			cache_set_inconsistent(c,
 				"extent for non regular file, inode %llu mode %u",
 				k.k->p.inode, inode.v.i_mode);
 
@@ -219,7 +219,7 @@ void bch_verify_inode_refs(struct cache_set *c)
 		if (k.k->p.offset > round_up(inode.v.i_size, PAGE_SIZE) >> 9) {
 			bch_bkey_val_to_text(c, BTREE_ID_EXTENTS, buf,
 					     sizeof(buf), k);
-			bch_cache_set_error(c,
+			cache_set_inconsistent(c,
 				"extent past end of inode %llu: i_size %llu extent\n%s",
 				k.k->p.inode, inode.v.i_size, buf);
 		}
@@ -234,7 +234,7 @@ void bch_verify_inode_refs(struct cache_set *c)
 
 		if (k.k->p.inode != cur_inum &&
 		    bch_inode_find_by_inum(c, k.k->p.inode, &inode)) {
-			bch_cache_set_error(c, "dirent for missing inode %llu",
+			cache_set_inconsistent(c, "dirent for missing inode %llu",
 					    k.k->p.inode);
 			bch_btree_iter_unlock(&iter);
 			return;
@@ -243,7 +243,7 @@ void bch_verify_inode_refs(struct cache_set *c)
 		cur_inum = k.k->p.inode;
 
 		if (!S_ISDIR(inode.v.i_mode))
-			bch_cache_set_error(c,
+			cache_set_inconsistent(c,
 				"dirent for non directory, inode %llu mode %u",
 				k.k->p.inode, inode.v.i_mode);
 	}
@@ -253,7 +253,7 @@ void bch_verify_inode_refs(struct cache_set *c)
 			   POS(BCACHE_ROOT_INO, 0), k) {
 		if (k.k->p.inode != cur_inum &&
 		    bch_inode_find_by_inum(c, k.k->p.inode, &inode)) {
-			bch_cache_set_error(c,
+			cache_set_inconsistent(c,
 				"xattr for missing inode %llu",
 					    k.k->p.inode);
 			bch_btree_iter_unlock(&iter);
@@ -262,11 +262,10 @@ void bch_verify_inode_refs(struct cache_set *c)
 
 		cur_inum = k.k->p.inode;
 
-		if (!S_ISREG(inode.v.i_mode) &&
-		    !S_ISDIR(inode.v.i_mode))
-			bch_cache_set_error(c,
-				"xattr for non file/directory, inode %llu mode %u",
-				k.k->p.inode, inode.v.i_mode);
+		cache_set_inconsistent_on(!S_ISREG(inode.v.i_mode) &&
+					  !S_ISDIR(inode.v.i_mode), c,
+			"xattr for non file/directory, inode %llu mode %u",
+			k.k->p.inode, inode.v.i_mode);
 	}
 	bch_btree_iter_unlock(&iter);
 }
diff --git a/drivers/md/bcache/dirent.c b/drivers/md/bcache/dirent.c
index c007b7b0d39d..b7554e87ac55 100644
--- a/drivers/md/bcache/dirent.c
+++ b/drivers/md/bcache/dirent.c
@@ -78,21 +78,22 @@ static int dirent_cmp(struct bkey_s_c_dirent d,
 	return len - q->len ?: memcmp(d.v->d_name, q->name, len);
 }
 
-static bool bch_dirent_invalid(const struct cache_set *c, struct bkey_s_c k)
+static const char *bch_dirent_invalid(const struct cache_set *c,
+				      struct bkey_s_c k)
 {
 	switch (k.k->type) {
 	case BCH_DIRENT:
-		if (bkey_val_bytes(k.k) < sizeof(struct bch_dirent))
-			return true;
+		return bkey_val_bytes(k.k) < sizeof(struct bch_dirent)
+			? "value too small"
+			: NULL;
 
-		return false;
 	case BCH_DIRENT_WHITEOUT:
-		if (bkey_val_bytes(k.k))
-			return true;
+		return bkey_val_bytes(k.k) != 0
+			? "value size should be zero"
+			: NULL;
 
-		return false;
 	default:
-		return true;
+		return "invalid type";
 	}
 }
 
diff --git a/drivers/md/bcache/error.c b/drivers/md/bcache/error.c
new file mode 100644
index 000000000000..a0ca5ecbba7b
--- /dev/null
+++ b/drivers/md/bcache/error.c
@@ -0,0 +1,134 @@
+#include "bcache.h"
+#include "error.h"
+#include "io.h"
+#include "notify.h"
+#include "super.h"
+
+void bch_inconsistent_error(struct cache_set *c)
+{
+	switch (c->opts.on_error_action) {
+	case BCH_ON_ERROR_CONTINUE:
+		break;
+	case BCH_ON_ERROR_RO:
+		if (!test_bit(CACHE_SET_INITIAL_GC_DONE, &c->flags)) {
+			/* XXX do something better here? */
+			bch_cache_set_stop(c);
+			return;
+		}
+
+		if (bch_cache_set_read_only(c))
+			__bch_cache_set_error(c, "emergency read only");
+		break;
+	case BCH_ON_ERROR_PANIC:
+		panic("bcache: (%s) panic after error\n",
+		      c->vfs_sb ? c->vfs_sb->s_id : c->uuid);
+		break;
+	}
+}
+
+void bch_fatal_error(struct cache_set *c)
+{
+	if (bch_cache_set_read_only(c))
+		printk(KERN_ERR "bcache: %pU emergency read only\n",
+		       c->sb.set_uuid.b);
+}
+
+/* Nonfatal IO errors, IO error/latency accounting: */
+
+/* Just does IO error accounting: */
+void bch_account_io_completion(struct cache *ca)
+{
+	/*
+	 * The halflife of an error is:
+	 * log2(1/2)/log2(127/128) * refresh ~= 88 * refresh
+	 */
+
+	if (ca->set->error_decay) {
+		unsigned count = atomic_inc_return(&ca->io_count);
+
+		while (count > ca->set->error_decay) {
+			unsigned errors;
+			unsigned old = count;
+			unsigned new = count - ca->set->error_decay;
+
+			/*
+			 * First we subtract refresh from count; each time we
+			 * succesfully do so, we rescale the errors once:
+			 */
+
+			count = atomic_cmpxchg(&ca->io_count, old, new);
+
+			if (count == old) {
+				count = new;
+
+				errors = atomic_read(&ca->io_errors);
+				do {
+					old = errors;
+					new = ((uint64_t) errors * 127) / 128;
+					errors = atomic_cmpxchg(&ca->io_errors,
+								old, new);
+				} while (old != errors);
+			}
+		}
+	}
+}
+
+/* IO error accounting and latency accounting: */
+void bch_account_bbio_completion(struct bbio *bio)
+{
+	struct cache_set *c;
+	unsigned threshold;
+
+	if (!bio->ca)
+		return;
+
+	c = bio->ca->set;
+	threshold = op_is_write(bio_op(&bio->bio))
+		? c->congested_write_threshold_us
+		: c->congested_read_threshold_us;
+
+	if (threshold && bio->submit_time_us) {
+		unsigned t = local_clock_us();
+
+		int us = t - bio->submit_time_us;
+		int congested = atomic_read(&c->congested);
+
+		if (us > (int) threshold) {
+			int ms = us / 1024;
+			c->congested_last_us = t;
+
+			ms = min(ms, CONGESTED_MAX + congested);
+			atomic_sub(ms, &c->congested);
+		} else if (congested < 0)
+			atomic_inc(&c->congested);
+	}
+
+	bch_account_io_completion(bio->ca);
+}
+
+void bch_nonfatal_io_error_work(struct work_struct *work)
+{
+	struct cache *ca = container_of(work, struct cache, io_error_work);
+	unsigned errors = atomic_read(&ca->io_errors);
+	char buf[BDEVNAME_SIZE];
+
+	if (errors < ca->set->error_limit) {
+		bch_notify_cache_error(ca, false);
+	} else {
+		bch_notify_cache_error(ca, true);
+
+		mutex_lock(&bch_register_lock);
+		if (CACHE_STATE(&ca->mi) == CACHE_ACTIVE) {
+			printk(KERN_ERR "bcache: too many IO errors on %s, going RO\n",
+			       bdevname(ca->disk_sb.bdev, buf));
+			bch_cache_read_only(ca);
+		}
+		mutex_unlock(&bch_register_lock);
+	}
+}
+
+void bch_nonfatal_io_error(struct cache *ca)
+{
+	atomic_add(1 << IO_ERROR_SHIFT, &ca->io_errors);
+	queue_work(system_long_wq, &ca->io_error_work);
+}
diff --git a/drivers/md/bcache/error.h b/drivers/md/bcache/error.h
new file mode 100644
index 000000000000..ea67bb92aaec
--- /dev/null
+++ b/drivers/md/bcache/error.h
@@ -0,0 +1,202 @@
+#ifndef _BCACHE_ERROR_H
+#define _BCACHE_ERROR_H
+
+#include <linux/printk.h>
+
+struct cache;
+struct cache_set;
+struct bbio;
+
+/*
+ * XXX: separate out errors that indicate on disk data is inconsistent, and flag
+ * superblock as such
+ */
+
+/* Error messages: */
+
+/* should clean this up */
+
+#define __bch_err_fmt(_c, fmt, ...)					\
+	KERN_ERR "bcache (%s): " fmt "\n",				\
+	((_c)->vfs_sb ? (_c)->vfs_sb->s_id : (_c)->uuid), ##__VA_ARGS__
+
+#define __bch_cache_set_error(c, fmt, ...)				\
+	printk(__bch_err_fmt(c, fmt, ##__VA_ARGS__))
+
+#define __bch_cache_error(ca, fmt, ...)					\
+do {									\
+	char _buf[BDEVNAME_SIZE];					\
+	__bch_cache_set_error((ca)->set, "%s: " fmt,			\
+			      bdevname((ca)->disk_sb.bdev, _buf),	\
+			      ##__VA_ARGS__);				\
+} while (0)
+
+/*
+ * Very fatal logic/inconsistency errors: these indicate that we've majorly
+ * screwed up at runtime, i.e. it's not likely that it was just caused by the
+ * data on disk being inconsistent. These BUG():
+ *
+ * XXX: audit and convert to inconsistent() checks
+ */
+
+#define cache_set_bug(c, ...)						\
+do {									\
+	__bch_cache_set_error(c, __VA_ARGS__);				\
+	BUG();								\
+} while (0)
+
+#define cache_set_bug_on(cond, c, ...)					\
+do {									\
+	if (cond)							\
+		cache_set_bug(c, __VA_ARGS__);				\
+} while (0)
+
+/*
+ * Inconsistency errors: The on disk data is inconsistent. If these occur during
+ * initial recovery, they don't indicate a bug in the running code - we walk all
+ * the metadata before modifying anything. If they occur at runtime, they
+ * indicate either a bug in the running code or (less likely) data is being
+ * silently corrupted under us.
+ *
+ * XXX: audit all inconsistent errors and make sure they're all recoverable, in
+ * BCH_ON_ERROR_CONTINUE mode
+ */
+
+void bch_inconsistent_error(struct cache_set *);
+
+#define cache_set_inconsistent(c, ...)					\
+do {									\
+	__bch_cache_set_error(c, __VA_ARGS__);				\
+	bch_inconsistent_error(c);					\
+} while (0)
+
+#define cache_set_inconsistent_on(cond, c, ...)				\
+({									\
+	int _ret = !!(cond);						\
+									\
+	if (_ret)							\
+		cache_set_inconsistent(c, __VA_ARGS__);			\
+	_ret;								\
+})
+
+/*
+ * Later we might want to mark only the particular device inconsistent, not the
+ * entire cache set:
+ */
+
+#define cache_inconsistent(ca, ...)					\
+do {									\
+	__bch_cache_error(ca, __VA_ARGS__);				\
+	bch_inconsistent_error((ca)->set);				\
+} while (0)
+
+#define cache_inconsistent_on(cond, ca, ...)				\
+({									\
+	int _ret = !!(cond);						\
+									\
+	if (_ret)							\
+		cache_inconsistent(ca, __VA_ARGS__);			\
+	_ret;								\
+})
+
+/*
+ * Fatal errors: these don't indicate a bug, but we can't continue running in RW
+ * mode - pretty much just due to metadata IO errors:
+ */
+
+void bch_fatal_error(struct cache_set *);
+
+#define cache_set_fatal_error(c, ...)					\
+do {									\
+	__bch_cache_set_error(c, __VA_ARGS__);				\
+	bch_fatal_error(c);						\
+} while (0)
+
+#define cache_set_fatal_err_on(cond, c, ...)				\
+({									\
+	int _ret = !!(cond);						\
+									\
+	if (_ret)							\
+		cache_set_fatal_error(c, __VA_ARGS__);			\
+	_ret;								\
+})
+
+#define cache_fatal_error(ca, ...)					\
+do {									\
+	__bch_cache_error(ca, __VA_ARGS__);				\
+	bch_fatal_error(c);						\
+} while (0)
+
+#define cache_fatal_io_error(ca, fmt, ...)				\
+do {									\
+	char _buf[BDEVNAME_SIZE];					\
+									\
+	printk_ratelimited(__bch_err_fmt((ca)->set, "fatal IO error on %s for " fmt,\
+					 bdevname((ca)->disk_sb.bdev, _buf),\
+					 ##__VA_ARGS__));		\
+	bch_fatal_error((ca)->set);					\
+} while (0)
+
+#define cache_fatal_io_err_on(cond, ca, ...)				\
+({									\
+	int _ret = !!(cond);						\
+									\
+	if (_ret)							\
+		cache_fatal_io_error(ca, __VA_ARGS__);			\
+	_ret;								\
+})
+
+/*
+ * Nonfatal IO errors: either recoverable metadata IO (because we have
+ * replicas), or data IO - we need to log it and print out a message, but we
+ * don't (necessarily) want to shut down the fs:
+ */
+
+void bch_account_io_completion(struct cache *);
+void bch_account_bbio_completion(struct bbio *);
+
+void bch_nonfatal_io_error_work(struct work_struct *);
+
+/* Does the error handling without logging a message */
+void bch_nonfatal_io_error(struct cache *);
+
+#if 0
+#define cache_set_nonfatal_io_error(c, ...)				\
+do {									\
+	__bch_cache_set_error(c, __VA_ARGS__);				\
+	bch_nonfatal_io_error(c);					\
+} while (0)
+#endif
+
+/* Logs message and handles the error: */
+#define cache_nonfatal_io_error(ca, fmt, ...)				\
+do {									\
+	char _buf[BDEVNAME_SIZE];					\
+									\
+	printk_ratelimited(__bch_err_fmt((ca)->set, "IO error on %s for " fmt,\
+					 bdevname((ca)->disk_sb.bdev, _buf),\
+					 ##__VA_ARGS__));		\
+	bch_nonfatal_io_error(ca);					\
+} while (0)
+
+#define cache_nonfatal_io_err_on(cond, ca, ...)				\
+({									\
+	int _ret = !!(cond);						\
+									\
+	if (_ret)							\
+		cache_nonfatal_io_error(ca, __VA_ARGS__);		\
+	_ret;								\
+})
+
+/* kill? */
+
+#define __bcache_io_error(c, fmt, ...)					\
+	printk_ratelimited(__bch_err_fmt(c, "IO error: " fmt, ##__VA_ARGS__))
+
+#define bcache_io_error(c, bio, fmt, ...)				\
+do {									\
+	__bcache_io_error(c, fmt, ##__VA_ARGS__);			\
+	(bio)->bi_error = -EIO;						\
+} while (0)
+
+#endif /* _BCACHE_ERROR_H */
diff --git a/drivers/md/bcache/extents.c b/drivers/md/bcache/extents.c
index 4d5889d6d107..25a63aefb6a8 100644
--- a/drivers/md/bcache/extents.c
+++ b/drivers/md/bcache/extents.c
@@ -9,6 +9,7 @@
 #include "btree.h"
 #include "debug.h"
 #include "dirent.h"
+#include "error.h"
 #include "extents.h"
 #include "gc.h"
 #include "inode.h"
@@ -340,8 +341,8 @@ static size_t extent_print_ptrs(struct cache_set *c, char *buf,
 
 /* Btree ptrs */
 
-static const char *bch_btree_ptr_invalid_reason(const struct cache_set *c,
-						struct bkey_s_c k)
+static const char *bch_btree_ptr_invalid(const struct cache_set *c,
+					 struct bkey_s_c k)
 {
 	if (bkey_extent_is_cached(k.k))
 		return "cached";
@@ -383,11 +384,6 @@ static const char *bch_btree_ptr_invalid_reason(const struct cache_set *c,
 	}
 }
 
-static bool bch_btree_ptr_invalid(const struct cache_set *c, struct bkey_s_c k)
-{
-	return bch_btree_ptr_invalid_reason(c, k);
-}
-
 static void btree_ptr_debugcheck(struct cache_set *c, struct btree *b,
 				 struct bkey_s_c k)
 {
@@ -401,11 +397,6 @@ static void btree_ptr_debugcheck(struct cache_set *c, struct btree *b,
 	unsigned replicas = 0;
 	bool bad;
 
-	if (bkey_extent_is_cached(k.k)) {
-		btree_bug(b, "btree ptr marked as cached");
-		return;
-	}
-
 	rcu_read_lock();
 
 	extent_for_each_online_device(c, e, ptr, ca) {
@@ -444,11 +435,11 @@ static void btree_ptr_debugcheck(struct cache_set *c, struct btree *b,
 	return;
 err:
 	bch_bkey_val_to_text(c, btree_node_type(b), buf, sizeof(buf), k);
-	btree_bug(b, "%s btree pointer %s: bucket %zi prio %i "
-		  "gen %i last_gc %i mark %08x",
-		  err, buf, PTR_BUCKET_NR(ca, ptr),
-		  g->read_prio, PTR_BUCKET_GEN(ca, ptr),
-		  g->oldest_gen, g->mark.counter);
+	cache_set_bug(c, "%s btree pointer %s: bucket %zi prio %i "
+		      "gen %i last_gc %i mark %08x",
+		      err, buf, PTR_BUCKET_NR(ca, ptr),
+		      g->read_prio, PTR_BUCKET_GEN(ca, ptr),
+		      g->oldest_gen, g->mark.counter);
 	rcu_read_unlock();
 }
 
@@ -463,7 +454,7 @@ static void bch_btree_ptr_to_text(struct cache_set *c, char *buf,
 	if (bkey_extent_is_data(k.k))
 		out += extent_print_ptrs(c, buf, size, bkey_s_c_to_extent(k));
 
-	invalid = bch_btree_ptr_invalid_reason(c, k);
+	invalid = bch_btree_ptr_invalid(c, k);
 	if (invalid)
 		p(" invalid: %s", invalid);
 #undef p
@@ -480,23 +471,19 @@ bch_btree_pick_ptr(struct cache_set *c, const struct btree *b)
 	rcu_read_lock();
 
 	extent_for_each_online_device_crc(c, e, crc, ptr, ca) {
-		if (crc) {
-			bch_cache_error(ca,
+		if (cache_set_inconsistent_on(crc, c,
 				"btree node pointer with crc at btree %u level %u/%u bucket %zu",
 				b->btree_id, b->level, btree_node_root(b)
 				? btree_node_root(b)->level : -1,
-				PTR_BUCKET_NR(ca, ptr));
+				PTR_BUCKET_NR(ca, ptr)))
 			break;
-		}
 
-		if (ptr_stale(ca, ptr)) {
-			bch_cache_error(ca,
+		if (cache_inconsistent_on(ptr_stale(ca, ptr), ca,
 				"stale btree node pointer at btree %u level %u/%u bucket %zu",
 				b->btree_id, b->level, btree_node_root(b)
 				? btree_node_root(b)->level : -1,
-				PTR_BUCKET_NR(ca, ptr));
+				PTR_BUCKET_NR(ca, ptr)))
 			continue;
-		}
 
 		percpu_ref_get(&ca->ref);
 		rcu_read_unlock();
@@ -1306,8 +1293,8 @@ out:
 	return inserted;
 }
 
-static const char *bch_extent_invalid_reason(const struct cache_set *c,
-					     struct bkey_s_c k)
+static const char *bch_extent_invalid(const struct cache_set *c,
+				      struct bkey_s_c k)
 {
 	if (bkey_val_u64s(k.k) > BKEY_EXTENT_VAL_U64s_MAX)
 		return "value too big";
@@ -1360,11 +1347,6 @@ invalid:
 	}
 }
 
-static bool bch_extent_invalid(const struct cache_set *c, struct bkey_s_c k)
-{
-	return bch_extent_invalid_reason(c, k);
-}
-
 static void bch_extent_debugcheck(struct cache_set *c, struct btree *b,
 				  struct bkey_s_c k)
 {
@@ -1379,6 +1361,15 @@ static void bch_extent_debugcheck(struct cache_set *c, struct btree *b,
 	unsigned ptrs_per_tier[CACHE_TIERS];
 	unsigned i, tier, replicas = 0;
 
+	/*
+	 * XXX: we should be doing most/all of these checks at startup time,
+	 * where we check bkey_invalid() in btree_node_read_done()
+	 *
+	 * But note that we can't check for stale pointers or incorrect gc marks
+	 * until after journal replay is done (it might be an extent that's
+	 * going to get overwritten during replay)
+	 */
+
 	memset(ptrs_per_tier, 0, sizeof(ptrs_per_tier));
 
 	mi = cache_member_info_get(c);
@@ -1491,7 +1482,7 @@ static void bch_extent_to_text(struct cache_set *c, char *buf,
 	if (bkey_extent_is_data(k.k))
 		out += extent_print_ptrs(c, buf, size, bkey_s_c_to_extent(k));
 
-	invalid = bch_extent_invalid_reason(c, k);
+	invalid = bch_extent_invalid(c, k);
 	if (invalid)
 		p(" invalid: %s", invalid);
 #undef p
diff --git a/drivers/md/bcache/fs-gc.c b/drivers/md/bcache/fs-gc.c
index 3245e1063898..54bdf550ac93 100644
--- a/drivers/md/bcache/fs-gc.c
+++ b/drivers/md/bcache/fs-gc.c
@@ -2,6 +2,7 @@
 #include "bcache.h"
 #include "btree.h"
 #include "dirent.h"
+#include "error.h"
 #include "fs.h"
 #include "inode.h"
 #include "keylist.h"
@@ -74,13 +75,13 @@ static int bch_gc_do_inode(struct cache_set *c, struct btree_iter *iter,
 	struct bkey_i_inode update;
 	int ret;
 
-	cache_set_err_on(inode.v->i_nlink < link.count, c,
+	cache_set_inconsistent_on(inode.v->i_nlink < link.count, c,
 			 "i_link too small (%u < %u, type %i)",
 			 inode.v->i_nlink, link.count + link.dir_count,
 			 mode_to_type(inode.v->i_mode));
 
 	if (!link.count) {
-		cache_set_err_on(S_ISDIR(inode.v->i_mode) &&
+		cache_set_inconsistent_on(S_ISDIR(inode.v->i_mode) &&
 			bch_empty_dir(c, inode.k->p.inode), c,
 			"non empty directory with link count 0,inode nlink %u, dir links found %u",
 			inode.v->i_nlink, link.dir_count);
@@ -139,7 +140,7 @@ static int bch_gc_walk_inodes(struct cache_set *c, u64 pos, struct nlink *links)
 			break;
 
 		while (i < k.k->p.inode - pos) {
-			cache_set_err_on(links[i].count, c,
+			cache_set_inconsistent_on(links[i].count, c,
 					 "missing inode %llu",
 					 pos + i);
 			i++;
@@ -157,7 +158,7 @@ static int bch_gc_walk_inodes(struct cache_set *c, u64 pos, struct nlink *links)
 
 			break;
 		default:
-			cache_set_err_on(links[i].count, c,
+			cache_set_inconsistent_on(links[i].count, c,
 					 "missing inode %llu",
 					 pos + i);
 			break;
diff --git a/drivers/md/bcache/fs.c b/drivers/md/bcache/fs.c
index e6c689974de1..bccc2b176533 100644
--- a/drivers/md/bcache/fs.c
+++ b/drivers/md/bcache/fs.c
@@ -2057,7 +2057,7 @@ static int bch_remount(struct super_block *sb, int *flags, char *data)
 		const char *err = NULL;
 
 		if (opts.read_only) {
-			bch_cache_set_read_only(c);
+			bch_cache_set_read_only_sync(c);
 
 			sb->s_flags |= MS_RDONLY;
 		} else {
@@ -2103,9 +2103,11 @@ static struct dentry *bch_mount(struct file_system_type *fs_type,
 				int flags, const char *dev_name, void *data)
 {
 	struct cache_set *c;
+	struct cache *ca;
 	struct super_block *sb;
 	struct inode *inode;
 	struct cache_set_opts opts;
+	unsigned i;
 	int ret;
 
 	if (!parse_options(&opts, flags, data))
@@ -2130,6 +2132,17 @@ static struct dentry *bch_mount(struct file_system_type *fs_type,
 	sb->s_magic		= BCACHE_STATFS_MAGIC;
 	sb->s_time_gran		= 1;
 	sb->s_fs_info		= c;
+	c->vfs_sb		= sb;
+
+	rcu_read_lock();
+	for_each_cache_rcu(ca, c, i) {
+		char b[BDEVNAME_SIZE];
+
+		strlcpy(sb->s_id, bdevname(ca->disk_sb.bdev, b),
+			sizeof(sb->s_id));
+		break;
+	}
+	rcu_read_unlock();
 
 	if (opts.posix_acl < 0)
 		sb->s_flags	|= MS_POSIXACL;
diff --git a/drivers/md/bcache/gc.c b/drivers/md/bcache/gc.c
index c08dd74a1015..74ac042c82b5 100644
--- a/drivers/md/bcache/gc.c
+++ b/drivers/md/bcache/gc.c
@@ -8,6 +8,7 @@
 #include "btree.h"
 #include "buckets.h"
 #include "debug.h"
+#include "error.h"
 #include "extents.h"
 #include "gc.h"
 #include "journal.h"
@@ -79,13 +80,6 @@ static inline bool btree_node_has_ptrs(struct btree *b)
 bool btree_gc_mark_node(struct cache_set *c, struct btree *b)
 {
 	struct bkey_format *f = &b->keys.format;
-	struct bset_tree *t;
-
-	for (t = b->keys.set; t <= &b->keys.set[b->keys.nsets]; t++)
-		btree_bug_on(t->size &&
-			     bset_written(&b->keys, t) &&
-			     bkey_cmp_packed(f, &b->key.k, &t->end) < 0,
-			     b, "found short btree key in gc");
 
 	if (btree_node_has_ptrs(b)) {
 		struct btree_node_iter iter;
@@ -128,8 +122,12 @@ static int bch_gc_btree(struct cache_set *c, enum btree_id btree_id)
 
 	for_each_btree_node(&iter, c, btree_id, POS_MIN, b) {
 		if (!b->level) {
-			cache_set_bug_on(bkey_cmp(b->data->min_key, next_min),
-				c,
+			/*
+			 * XXX: this check should be elsewhere - also, we should
+			 * be checking all nodes, not just leaf nodes
+			 */
+			cache_set_inconsistent_on(bkey_cmp(b->data->min_key,
+							   next_min), c,
 				"btree node has incorrect min key: %llu:%llu != %llu:%llu",
 				b->data->min_key.inode,
 				b->data->min_key.offset,
diff --git a/drivers/md/bcache/inode.c b/drivers/md/bcache/inode.c
index 53b699920e1d..38189277ace3 100644
--- a/drivers/md/bcache/inode.c
+++ b/drivers/md/bcache/inode.c
@@ -51,30 +51,31 @@ ssize_t bch_inode_status(char *buf, size_t len, const struct bkey *k)
 	}
 }
 
-static bool bch_inode_invalid(const struct cache_set *c, struct bkey_s_c k)
+static const char *bch_inode_invalid(const struct cache_set *c,
+				     struct bkey_s_c k)
 {
 	if (k.k->p.offset)
-		return true;
+		return "nonzero offset";
 
 	switch (k.k->type) {
 	case BCH_INODE_FS:
 		if (bkey_val_bytes(k.k) != sizeof(struct bch_inode))
-			return true;
+			return "incorrect value size";
 
 		if (k.k->p.inode < BLOCKDEV_INODE_MAX)
-			return true;
+			return "fs inode in blockdev range";
 
-		return false;
+		return NULL;
 	case BCH_INODE_BLOCKDEV:
 		if (bkey_val_bytes(k.k) != sizeof(struct bch_inode_blockdev))
-			return true;
+			return "incorrect value size";
 
 		if (k.k->p.inode >= BLOCKDEV_INODE_MAX)
-			return true;
+			return "blockdev inode in fs range";
 
-		return false;
+		return NULL;
 	default:
-		return true;
+		return "invalid type";
 	}
 }
 
diff --git a/drivers/md/bcache/io.c b/drivers/md/bcache/io.c
index 1a4736111b44..0d15ec0ed78e 100644
--- a/drivers/md/bcache/io.c
+++ b/drivers/md/bcache/io.c
@@ -12,6 +12,7 @@
 #include "buckets.h"
 #include "clock.h"
 #include "debug.h"
+#include "error.h"
 #include "extents.h"
 #include "gc.h"
 #include "io.h"
@@ -195,106 +196,13 @@ static void bch_bbio_reset(struct bbio *b)
 
 /* IO errors */
 
-void bch_count_io_errors(struct cache *ca, int error, const char *m)
-{
-	/*
-	 * The halflife of an error is:
-	 * log2(1/2)/log2(127/128) * refresh ~= 88 * refresh
-	 */
-
-	if (ca->set->error_decay) {
-		unsigned count = atomic_inc_return(&ca->io_count);
-
-		while (count > ca->set->error_decay) {
-			unsigned errors;
-			unsigned old = count;
-			unsigned new = count - ca->set->error_decay;
-
-			/*
-			 * First we subtract refresh from count; each time we
-			 * succesfully do so, we rescale the errors once:
-			 */
-
-			count = atomic_cmpxchg(&ca->io_count, old, new);
-
-			if (count == old) {
-				count = new;
-
-				errors = atomic_read(&ca->io_errors);
-				do {
-					old = errors;
-					new = ((uint64_t) errors * 127) / 128;
-					errors = atomic_cmpxchg(&ca->io_errors,
-								old, new);
-				} while (old != errors);
-			}
-		}
-	}
-
-	if (error) {
-		char buf[BDEVNAME_SIZE];
-
-		atomic_add(1 << IO_ERROR_SHIFT, &ca->io_errors);
-		queue_work(system_long_wq, &ca->io_error_work);
-		printk_ratelimited(KERN_ERR "%s: IO error on %s",
-		       bdevname(ca->disk_sb.bdev, buf), m);
-	}
-}
-
-void bch_cache_io_error_work(struct work_struct *work)
-{
-	struct cache *ca = container_of(work, struct cache, io_error_work);
-	unsigned errors = atomic_read(&ca->io_errors);
-	char buf[BDEVNAME_SIZE];
-
-	if (errors < ca->set->error_limit) {
-		bch_notify_cache_error(ca, false);
-	} else {
-		bch_notify_cache_error(ca, true);
-		printk_ratelimited(KERN_ERR "%s: too many IO errors, going RO",
-		       bdevname(ca->disk_sb.bdev, buf));
-		queue_work(system_long_wq, &ca->read_only_work);
-	}
-}
-
-void bch_bbio_count_io_errors(struct bbio *bio, int error, const char *m)
-{
-	struct cache_set *c;
-	unsigned threshold;
-
-	if (!bio->ca)
-		return;
-
-	c = bio->ca->set;
-	threshold = op_is_write(bio_op(&bio->bio))
-		? c->congested_write_threshold_us
-		: c->congested_read_threshold_us;
-
-	if (threshold && bio->submit_time_us) {
-		unsigned t = local_clock_us();
-
-		int us = t - bio->submit_time_us;
-		int congested = atomic_read(&c->congested);
-
-		if (us > (int) threshold) {
-			int ms = us / 1024;
-			c->congested_last_us = t;
-
-			ms = min(ms, CONGESTED_MAX + congested);
-			atomic_sub(ms, &c->congested);
-		} else if (congested < 0)
-			atomic_inc(&c->congested);
-	}
-
-	bch_count_io_errors(bio->ca, error, m);
-}
-
-void bch_bbio_endio(struct bbio *bio, int error, const char *m)
+void bch_bbio_endio(struct bbio *bio)
 {
 	struct closure *cl = bio->bio.bi_private;
 	struct cache *ca = bio->ca;
 
-	bch_bbio_count_io_errors(bio, error, m);
+	bch_account_bbio_completion(bio);
+
 	bio_put(&bio->bio);
 	if (ca)
 		percpu_ref_put(&ca->ref);
@@ -622,10 +530,10 @@ static void bch_write_endio(struct bio *bio)
 	struct bch_write_op *op = container_of(cl, struct bch_write_op, cl);
 	struct bch_write_bio *wbio = to_wbio(bio);
 
-	if (bio->bi_error) {
+	if (cache_nonfatal_io_err_on(bio->bi_error, wbio->bio.ca,
+				     "data write")) {
 		/* TODO: We could try to recover from this. */
 		if (!bkey_extent_is_cached(&op->insert_key.k)) {
-			__bcache_io_error(op->c, "IO error writing data");
 			op->error = bio->bi_error;
 		} else if (!op->replace)
 			set_closure_fn(cl, bch_write_error, op->c->wq);
@@ -638,7 +546,7 @@ static void bch_write_endio(struct bio *bio)
 	else if (wbio->bounce)
 		bch_bio_free_pages_pool(op->c, bio);
 
-	bch_bbio_endio(&wbio->bio, bio->bi_error, "writing data to cache");
+	bch_bbio_endio(&wbio->bio);
 }
 
 static const unsigned bch_crc_size[] = {
@@ -1445,11 +1353,7 @@ static int bio_checksum_uncompress(struct bch_read_bio *rbio)
 
 	if (rbio->csum_type != BCH_CSUM_NONE &&
 	    rbio->csum != checksum_bio(bio, rbio->csum_type)) {
-		/*
-		 * XXX: bch_bbio_count_io_errors() isn't counting checksum
-		 * errors
-		 */
-		__bcache_io_error(rbio->c, "checksum error");
+		cache_nonfatal_io_error(rbio->bio.ca, "checksum error");
 		return -EIO;
 	}
 
@@ -1537,7 +1441,10 @@ static void bch_read_endio(struct bio *bio)
 		ptr_stale(rbio->bio.ca, &rbio->bio.ptr);
 	int error = bio->bi_error;
 
-	bch_bbio_count_io_errors(&rbio->bio, error, "reading from cache");
+	bch_account_bbio_completion(&rbio->bio);
+
+	cache_nonfatal_io_err_on(error, rbio->bio.ca, "data read");
+
 	percpu_ref_put(&rbio->bio.ca->ref);
 
 	if (error)
diff --git a/drivers/md/bcache/io.h b/drivers/md/bcache/io.h
index 5c819c77a6ec..d424ddcf9bfd 100644
--- a/drivers/md/bcache/io.h
+++ b/drivers/md/bcache/io.h
@@ -140,10 +140,7 @@ enum bch_read_flags {
 
 int bch_read(struct cache_set *, struct bio *, u64);
 
-void bch_cache_io_error_work(struct work_struct *);
-void bch_count_io_errors(struct cache *, int, const char *);
-void bch_bbio_count_io_errors(struct bbio *, int, const char *);
-void bch_bbio_endio(struct bbio *, int, const char *);
+void bch_bbio_endio(struct bbio *);
 
 void bch_generic_make_request(struct bio *, struct cache_set *);
 void bch_bio_submit_work(struct work_struct *);
diff --git a/drivers/md/bcache/journal.c b/drivers/md/bcache/journal.c
index 3cfa378ecd6f..2625e8d4b550 100644
--- a/drivers/md/bcache/journal.c
+++ b/drivers/md/bcache/journal.c
@@ -8,6 +8,7 @@
 #include "buckets.h"
 #include "btree.h"
 #include "debug.h"
+#include "error.h"
 #include "extents.h"
 #include "gc.h"
 #include "io.h"
@@ -98,11 +99,11 @@ struct bkey_i *bch_journal_find_btree_root(struct cache_set *c, struct jset *j,
 	k = jkeys->start;
 	*level = jkeys->level;
 
-	if (!jkeys->u64s || jkeys->u64s != k->k.u64s ||
-	    bkey_invalid(c, BKEY_TYPE_BTREE, bkey_i_to_s_c(k))) {
-		bch_cache_set_error(c, "invalid btree root in journal");
+	if (cache_set_inconsistent_on(!jkeys->u64s ||
+			jkeys->u64s != k->k.u64s ||
+			bkey_invalid(c, BKEY_TYPE_BTREE, bkey_i_to_s_c(k)),
+			c, "invalid btree root in journal"))
 		return NULL;
-	}
 
 	*level = jkeys->level;
 	return k;
@@ -217,7 +218,7 @@ static int __bch_journal_seq_blacklisted(struct cache_set *c, u64 seq,
 	if (seq <= j->seq)
 		return 0;
 
-	cache_set_err_on(seq > j->seq + 1, c,
+	cache_set_inconsistent_on(seq > j->seq + 1, c,
 			 "bset journal seq too far in the future: %llu > %llu",
 			 seq, j->seq);
 
@@ -357,38 +358,33 @@ static enum {
 
 	got = j->version;
 	expect = BCACHE_JSET_VERSION;
-	if (got != expect) {
-		__bch_cache_error(ca,
+
+	if (cache_inconsistent_on(got != expect, ca,
 			"bad journal version (got %llu expect %llu) sector %lluu",
-			got, expect, sector);
+			got, expect, sector))
 		return JOURNAL_ENTRY_BAD;
-	}
 
-	if (bytes > bucket_sectors_left << 9 ||
-	    bytes > PAGE_SIZE << JSET_BITS) {
-		__bch_cache_error(ca,
+	if (cache_inconsistent_on(bytes > bucket_sectors_left << 9 ||
+				  bytes > PAGE_SIZE << JSET_BITS, ca,
 			"journal entry too big (%zu bytes), sector %lluu",
-			bytes, sector);
+			bytes, sector))
 		return JOURNAL_ENTRY_BAD;
-	}
 
 	if (bytes > sectors_read << 9)
 		return JOURNAL_ENTRY_REREAD;
 
+	/* XXX: retry on checksum error */
+
 	got = j->csum;
 	expect = csum_set(j, JSET_CSUM_TYPE(j));
-	if (got != expect) {
-		__bch_cache_error(ca,
+	if (cache_inconsistent_on(got != expect, ca,
 			"journal checksum bad (got %llu expect %llu), sector %lluu",
-			got, expect, sector);
+			got, expect, sector))
 		return JOURNAL_ENTRY_BAD;
-	}
 
-	if (j->last_seq > j->seq) {
-		__bch_cache_error(ca,
-				  "invalid journal entry: last_seq > seq");
+	if (cache_inconsistent_on(j->last_seq > j->seq, ca,
+				  "invalid journal entry: last_seq > seq"))
 		return JOURNAL_ENTRY_BAD;
-	}
 
 	return JOURNAL_ENTRY_OK;
 }
@@ -427,12 +423,12 @@ reread:
 		bch_bio_map(bio, data);
 
 		ret = submit_bio_wait(bio);
-		if (bch_meta_read_fault("journal"))
+
+		if (cache_fatal_io_err_on(ret, ca,
+					  "journal read from sector %llu",
+					  sector + bucket_offset) ||
+		    bch_meta_read_fault("journal")) {
 			ret = -EIO;
-		if (ret) {
-			__bch_cache_error(ca,
-				"IO error %d reading journal from bucket_offset %llu",
-				ret, sector + bucket_offset);
 			goto err;
 		}
 
@@ -965,13 +961,13 @@ int bch_journal_replay(struct cache_set *c, struct list_head *list)
 		       journal_seq_blacklist_find(j, cur_seq))
 			cur_seq++;
 
-		cache_set_err_on(journal_seq_blacklist_find(j, i->j.seq), c,
+		cache_set_inconsistent_on(journal_seq_blacklist_find(j, i->j.seq), c,
 				 "found blacklisted journal entry %llu",
 				 i->j.seq);
 
 		mutex_unlock(&j->blacklist_lock);
 
-		cache_set_err_on(i->j.seq != cur_seq, c,
+		cache_set_inconsistent_on(i->j.seq != cur_seq, c,
 			"journal entries %llu-%llu missing! (replaying %llu-%llu)",
 			cur_seq, i->j.seq - 1, last_seq(j), end_seq);
 
@@ -1353,13 +1349,10 @@ static void journal_write_endio(struct bio *bio)
 	struct journal_write *w = bio->bi_private;
 	struct journal *j = w->j;
 
-	if (bio->bi_error || bch_meta_write_fault("journal")) {
+	if (cache_fatal_io_err_on(bio->bi_error, ca, "journal write") ||
+	    bch_meta_write_fault("journal")) {
 		set_bit(JOURNAL_ERROR, &j->flags);
 		__journal_entry_close(j, JOURNAL_ENTRY_ERROR);
-
-		__bch_cache_error(ca, "IO error %d writing journal",
-				  bio->bi_error);
-		bch_cache_set_io_error(ca->set);
 	}
 
 	closure_put(&j->io);
diff --git a/drivers/md/bcache/request.c b/drivers/md/bcache/request.c
index 13e08ed237ce..2397fdc47500 100644
--- a/drivers/md/bcache/request.c
+++ b/drivers/md/bcache/request.c
@@ -28,6 +28,7 @@
 #include "btree.h"
 #include "clock.h"
 #include "debug.h"
+#include "error.h"
 #include "extents.h"
 #include "io.h"
 #include "journal.h"
diff --git a/drivers/md/bcache/super.c b/drivers/md/bcache/super.c
index 20b79dbdbc7e..efa03f795bab 100644
--- a/drivers/md/bcache/super.c
+++ b/drivers/md/bcache/super.c
@@ -12,6 +12,7 @@
 #include "btree.h"
 #include "clock.h"
 #include "debug.h"
+#include "error.h"
 #include "fs-gc.h"
 #include "gc.h"
 #include "inode.h"
@@ -59,6 +60,7 @@ static int bch_chardev_major;
 static struct class *bch_chardev_class;
 static struct device *bch_chardev;
 static DEFINE_IDR(bch_chardev_minor);
+static DECLARE_WAIT_QUEUE_HEAD(bch_read_only_wait);
 
 struct workqueue_struct *bcache_io_wq;
 
@@ -491,7 +493,12 @@ static void write_super_endio(struct bio *bio)
 {
 	struct cache *ca = bio->bi_private;
 
-	bch_count_io_errors(ca, bio->bi_error, "writing superblock");
+	/* XXX: return errors directly */
+
+	cache_fatal_io_err_on(bio->bi_error, ca, "superblock write");
+
+	bch_account_io_completion(ca);
+
 	closure_put(&ca->set->sb_write);
 	percpu_ref_put(&ca->ref);
 }
@@ -690,15 +697,41 @@ static void bch_writes_disabled(struct percpu_ref *writes)
 	complete(&c->write_disable_complete);
 }
 
-void bch_cache_set_read_only(struct cache_set *c)
+static void bch_cache_set_read_only_work(struct work_struct *work)
 {
-	lockdep_assert_held(&bch_register_lock);
+	struct cache_set *c =
+		container_of(work, struct cache_set, read_only_work);
+
+	init_completion(&c->write_disable_complete);
+	percpu_ref_put(&c->writes);
+
+	del_timer_sync(&c->foreground_write_wakeup);
+	cancel_delayed_work_sync(&c->pd_controllers_update);
+
+	c->foreground_write_pd.rate.rate = UINT_MAX;
+	bch_wake_delayed_writes((unsigned long) c);
+
+	/* Wait for outstanding writes to complete: */
+	wait_for_completion(&c->write_disable_complete);
+
+	__bch_cache_set_read_only(c);
 
+	bch_notify_cache_set_read_only(c);
+	trace_bcache_cache_set_read_only_done(c);
+
+	set_bit(CACHE_SET_RO_COMPLETE, &c->flags);
+	wake_up(&bch_read_only_wait);
+}
+
+bool bch_cache_set_read_only(struct cache_set *c)
+{
 	if (test_and_set_bit(CACHE_SET_RO, &c->flags))
-		return;
+		return false;
 
 	trace_bcache_cache_set_read_only(c);
 
+	percpu_ref_get(&c->writes);
+
 	/*
 	 * Block new foreground-end write operations from starting - any new
 	 * writes will return -EROFS:
@@ -707,20 +740,18 @@ void bch_cache_set_read_only(struct cache_set *c)
 	 * allocated space can still happen until stopping the allocator in
 	 * bch_cache_allocator_stop()).
 	 */
-	init_completion(&c->write_disable_complete);
 	percpu_ref_kill(&c->writes);
 
-	bch_wake_delayed_writes((unsigned long) c);
-	del_timer_sync(&c->foreground_write_wakeup);
-	cancel_delayed_work_sync(&c->pd_controllers_update);
-
-	/* Wait for outstanding writes to complete: */
-	wait_for_completion(&c->write_disable_complete);
+	queue_work(system_unbound_wq, &c->read_only_work);
+	return true;
+}
 
-	__bch_cache_set_read_only(c);
+void bch_cache_set_read_only_sync(struct cache_set *c)
+{
+	bch_cache_set_read_only(c);
 
-	bch_notify_cache_set_read_only(c);
-	trace_bcache_cache_set_read_only_done(c);
+	wait_event(bch_read_only_wait,
+		   test_bit(CACHE_SET_RO_COMPLETE, &c->flags));
 }
 
 static const char *__bch_cache_set_read_write(struct cache_set *c)
@@ -768,7 +799,7 @@ const char *bch_cache_set_read_write(struct cache_set *c)
 
 	lockdep_assert_held(&bch_register_lock);
 
-	if (!test_bit(CACHE_SET_RO, &c->flags))
+	if (!test_bit(CACHE_SET_RO_COMPLETE, &c->flags))
 		return NULL;
 
 	for_each_cache(ca, c, i)
@@ -783,6 +814,8 @@ const char *bch_cache_set_read_write(struct cache_set *c)
 		return err;
 
 	percpu_ref_reinit(&c->writes);
+
+	clear_bit(CACHE_SET_RO_COMPLETE, &c->flags);
 	clear_bit(CACHE_SET_RO, &c->flags);
 
 	return NULL;
@@ -791,38 +824,6 @@ err:
 	return err;
 }
 
-static void bch_cache_set_read_only_work(struct work_struct *work)
-{
-	struct cache_set *c =
-		container_of(work, struct cache_set, read_only_work);
-
-	mutex_lock(&bch_register_lock);
-	bch_cache_set_read_only(c);
-	mutex_unlock(&bch_register_lock);
-}
-
-void bch_cache_set_io_error(struct cache_set *c)
-{
-	pr_err("%pU going read only", c->sb.set_uuid.b);
-	schedule_work(&c->read_only_work);
-}
-
-void bch_cache_set_fail(struct cache_set *c)
-{
-	switch (c->opts.on_error_action) {
-	case BCH_ON_ERROR_CONTINUE:
-		break;
-	case BCH_ON_ERROR_RO:
-		pr_err("%pU going read only", c->sb.set_uuid.b);
-		schedule_work(&c->read_only_work);
-		break;
-	case BCH_ON_ERROR_PANIC:
-		panic("bcache: %pU panic after error\n",
-		      c->sb.set_uuid.b);
-		break;
-	}
-}
-
 /* Cache set startup/shutdown: */
 
 void bch_cache_set_release(struct kobject *kobj)
@@ -893,7 +894,7 @@ static void cache_set_flush(struct closure *cl)
 		device_unregister(c->chardev);
 
 	mutex_lock(&bch_register_lock);
-	bch_cache_set_read_only(c);
+	bch_cache_set_read_only_sync(c);
 
 	if (c->kobj.state_in_sysfs)
 		kobject_del(&c->kobj);
@@ -986,6 +987,8 @@ static struct cache_set *bch_cache_set_alloc(struct cache_sb *sb,
 	if (cache_sb_to_cache_set(c, sb))
 		goto err;
 
+	scnprintf(c->uuid, sizeof(c->uuid), "%pU", &c->sb.user_uuid);
+
 	c->opts = (struct cache_set_opts) {
 		   .read_only = 0,
 		   .on_error_action = CACHE_ERROR_ACTION(&c->sb),
@@ -1145,7 +1148,7 @@ static const char *run_cache_set(struct cache_set *c)
 	lockdep_assert_held(&bch_register_lock);
 	BUG_ON(test_bit(CACHE_SET_RUNNING, &c->flags));
 
-	/* We don't want bch_cache_set_error() to free underneath us */
+	/* We don't want bch_fatal_error() to free underneath us */
 	closure_get(&c->caching);
 
 	/*
@@ -1307,7 +1310,7 @@ static const char *run_cache_set(struct cache_set *c)
 	bch_prio_timer_start(c, WRITE);
 
 	if (c->opts.read_only) {
-		bch_cache_set_read_only(c);
+		bch_cache_set_read_only_sync(c);
 	} else {
 		err = __bch_cache_set_read_write(c);
 		if (err)
@@ -1322,7 +1325,9 @@ static const char *run_cache_set(struct cache_set *c)
 
 	bcache_write_super(c);
 
-	bch_blockdev_volumes_start(c);
+	err = "can't bring up blockdev volumes";
+	if (bch_blockdev_volumes_start(c))
+		goto err;
 
 	bch_debug_init_cache_set(c);
 
@@ -1467,9 +1472,9 @@ void bch_cache_read_only(struct cache *ca)
 		return;
 
 	if (!cache_may_remove(ca)) {
-		pr_warning("Required member %s for %pU going RO, cache set going RO",
-			   buf, &c->sb.set_uuid);
-		bch_cache_set_read_only(c);
+		printk(__bch_err_fmt(c, "required member %s going RO, forcing fs RO",
+				     buf));
+		bch_cache_set_read_only_sync(c);
 	}
 
 	/*
@@ -1484,17 +1489,6 @@ void bch_cache_read_only(struct cache *ca)
 	bch_cache_member_info_update(ca);
 }
 
-static void bch_cache_read_only_work(struct work_struct *work)
-{
-	struct cache *ca = container_of(work, struct cache, read_only_work);
-
-	/* Going RO because of an error: */
-
-	mutex_lock(&bch_register_lock);
-	bch_cache_read_only(ca);
-	mutex_unlock(&bch_register_lock);
-}
-
 static const char *__bch_cache_read_write(struct cache *ca)
 {
 	const char *err;
@@ -1824,7 +1818,6 @@ static const char *cache_alloc(struct bcache_superblock *sb,
 	ca->self.devices[0] = ca;
 
 	INIT_WORK(&ca->free_work, bch_cache_free_work);
-	INIT_WORK(&ca->read_only_work, bch_cache_read_only_work);
 	INIT_WORK(&ca->remove_work, bch_cache_remove_work);
 	bio_init(&ca->journal.bio);
 	ca->journal.bio.bi_max_vecs = 8;
@@ -1837,7 +1830,7 @@ static const char *cache_alloc(struct bcache_superblock *sb,
 	ca->disk_sb.bdev->bd_holder = ca;
 	memset(sb, 0, sizeof(*sb));
 
-	INIT_WORK(&ca->io_error_work, bch_cache_io_error_work);
+	INIT_WORK(&ca->io_error_work, bch_nonfatal_io_error_work);
 
 	err = "dynamic fault";
 	if (cache_set_init_fault("cache_alloc"))
@@ -2287,6 +2280,9 @@ static int bcache_reboot(struct notifier_block *n, unsigned long code, void *x)
 		list_for_each_entry(c, &bch_cache_sets, list)
 			bch_cache_set_read_only(c);
 
+		list_for_each_entry(c, &bch_cache_sets, list)
+			bch_cache_set_read_only_sync(c);
+
 		mutex_unlock(&bch_register_lock);
 	}
 
diff --git a/drivers/md/bcache/super.h b/drivers/md/bcache/super.h
index 258152a42014..15b03c6e54a2 100644
--- a/drivers/md/bcache/super.h
+++ b/drivers/md/bcache/super.h
@@ -152,9 +152,6 @@ const char *validate_super(struct bcache_superblock *, struct cache_sb *);
 
 void bch_cache_member_info_update(struct cache *);
 
-void bch_cache_set_io_error(struct cache_set *);
-void bch_cache_set_fail(struct cache_set *);
-
 void bch_cache_set_release(struct kobject *);
 void bch_cache_release(struct kobject *);
 
@@ -174,7 +171,8 @@ const char *bch_register_cache_set(char * const *, unsigned,
 				   struct cache_set_opts,
 				   struct cache_set **);
 
-void bch_cache_set_read_only(struct cache_set *);
+bool bch_cache_set_read_only(struct cache_set *);
+void bch_cache_set_read_only_sync(struct cache_set *);
 const char *bch_cache_set_read_write(struct cache_set *);
 
 void bch_cache_read_only(struct cache *);
diff --git a/drivers/md/bcache/writeback.c b/drivers/md/bcache/writeback.c
index 34bec1106f0e..892306f1b2e9 100644
--- a/drivers/md/bcache/writeback.c
+++ b/drivers/md/bcache/writeback.c
@@ -10,6 +10,7 @@
 #include "btree.h"
 #include "clock.h"
 #include "debug.h"
+#include "error.h"
 #include "extents.h"
 #include "io.h"
 #include "keybuf.h"
@@ -168,9 +169,9 @@ static void read_dirty_endio(struct bio *bio)
 {
 	struct dirty_io *io = container_of(bio, struct dirty_io, bio);
 
-	bch_count_io_errors(io->ca, bio->bi_error,
-			    "reading dirty data from cache");
-	percpu_ref_put(&io->ca->ref);
+	cache_nonfatal_io_err_on(bio->bi_error, io->ca, "writeback read");
+
+	bch_account_io_completion(io->ca);
 
 	if (ptr_stale(io->ca, &io->ptr))
 		bio->bi_error = -EINTR;
diff --git a/drivers/md/bcache/xattr.c b/drivers/md/bcache/xattr.c
index 22e728c18121..2f004c3e69a7 100644
--- a/drivers/md/bcache/xattr.c
+++ b/drivers/md/bcache/xattr.c
@@ -75,21 +75,22 @@ static int xattr_cmp(const struct bch_xattr *xattr,
 		memcmp(xattr->x_name, q->name, q->len);
 }
 
-static bool bch_xattr_invalid(const struct cache_set *c, struct bkey_s_c k)
+static const char *bch_xattr_invalid(const struct cache_set *c,
+				     struct bkey_s_c k)
 {
 	switch (k.k->type) {
 	case BCH_XATTR:
-		if (bkey_val_bytes(k.k) < sizeof(struct bch_xattr))
-			return true;
+		return bkey_val_bytes(k.k) < sizeof(struct bch_xattr)
+			? "value too small"
+			: NULL;
 
-		return false;
 	case BCH_XATTR_WHITEOUT:
-		if (bkey_val_bytes(k.k))
-			return true;
+		return bkey_val_bytes(k.k) != 0
+			? "value size should be zero"
+			: NULL;
 
-		return false;
 	default:
-		return true;
+		return "invalid type";
 	}
 }