summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--drivers/md/bcache/Makefile6
-rw-r--r--drivers/md/bcache/alloc.c40
-rw-r--r--drivers/md/bcache/bcache.h72
-rw-r--r--drivers/md/bcache/bkey_methods.c49
-rw-r--r--drivers/md/bcache/bkey_methods.h8
-rw-r--r--drivers/md/bcache/blockdev.c7
-rw-r--r--drivers/md/bcache/btree.c72
-rw-r--r--drivers/md/bcache/debug.c35
-rw-r--r--drivers/md/bcache/dirent.c17
-rw-r--r--drivers/md/bcache/error.c134
-rw-r--r--drivers/md/bcache/error.h202
-rw-r--r--drivers/md/bcache/extents.c59
-rw-r--r--drivers/md/bcache/fs-gc.c9
-rw-r--r--drivers/md/bcache/fs.c15
-rw-r--r--drivers/md/bcache/gc.c16
-rw-r--r--drivers/md/bcache/inode.c19
-rw-r--r--drivers/md/bcache/io.c117
-rw-r--r--drivers/md/bcache/io.h5
-rw-r--r--drivers/md/bcache/journal.c61
-rw-r--r--drivers/md/bcache/request.c1
-rw-r--r--drivers/md/bcache/super.c130
-rw-r--r--drivers/md/bcache/super.h6
-rw-r--r--drivers/md/bcache/writeback.c7
-rw-r--r--drivers/md/bcache/xattr.c17
24 files changed, 631 insertions, 473 deletions
diff --git a/drivers/md/bcache/Makefile b/drivers/md/bcache/Makefile
index 650d6542eb5a..95142f1dc2dd 100644
--- a/drivers/md/bcache/Makefile
+++ b/drivers/md/bcache/Makefile
@@ -3,8 +3,8 @@ obj-$(CONFIG_BCACHE) += bcache.o
bcache-y := acl.o alloc.o bkey.o bkey_methods.o blockdev.o\
bset.o btree.o buckets.o chardev.o clock.o closure.o debug.o dirent.o\
- extents.o fs.o fs-gc.o gc.o inode.o io.o journal.o keybuf.o keylist.o\
- migrate.o move.o movinggc.o notify.o request.o siphash.o six.o stats.o\
- super.o sysfs.o tier.o trace.o util.o writeback.o xattr.o
+ error.o extents.o fs.o fs-gc.o gc.o inode.o io.o journal.o keybuf.o\
+ keylist.o migrate.o move.o movinggc.o notify.o request.o siphash.o\
+ six.o stats.o super.o sysfs.o tier.o trace.o util.o writeback.o xattr.o
ccflags-y := -Werror
diff --git a/drivers/md/bcache/alloc.c b/drivers/md/bcache/alloc.c
index 4b6930452935..d86133c79420 100644
--- a/drivers/md/bcache/alloc.c
+++ b/drivers/md/bcache/alloc.c
@@ -58,6 +58,7 @@
#include "btree.h"
#include "buckets.h"
#include "clock.h"
+#include "error.h"
#include "extents.h"
#include "io.h"
#include "journal.h"
@@ -281,15 +282,10 @@ static int bch_prio_write(struct cache *ca)
spin_unlock(&ca->prio_buckets_lock);
ret = prio_io(ca, r, REQ_OP_WRITE);
- if (bch_meta_write_fault("prio"))
- ret = -EIO;
- if (ret) {
- __bch_cache_error(ca,
- "IO error %d writing prios to bucket %lu",
- ret, r);
- bch_cache_set_io_error(c);
+ if (cache_fatal_io_err_on(ret, ca,
+ "prio write to bucket %lu", r) ||
+ bch_meta_write_fault("prio"))
return ret;
- }
}
spin_lock(&c->journal.lock);
@@ -300,11 +296,8 @@ static int bch_prio_write(struct cache *ca)
spin_unlock(&c->journal.lock);
ret = bch_journal_meta(&c->journal);
- if (ret) {
- __bch_cache_set_error(c,
- "IO error %d journalling new prios", ret);
+ if (cache_set_fatal_err_on(ret, c, "journalling new prios"))
return ret;
- }
/*
* Don't want the old priorities to get garbage collected until after we
@@ -347,7 +340,7 @@ int bch_prio_read(struct cache *ca)
if ((bucket < ca->mi.first_bucket && bucket >= ca->mi.nbuckets) ||
bch_meta_read_fault("prio")) {
- bch_cache_error(ca, "bad prio bucket %llu", bucket);
+ cache_inconsistent(ca, "bad prio bucket %llu", bucket);
return -EIO;
}
@@ -361,32 +354,27 @@ int bch_prio_read(struct cache *ca)
bucket_nr++;
ret = prio_io(ca, bucket, REQ_OP_READ);
- if (ret || bch_meta_read_fault("prio")) {
- bch_cache_error(ca,
- "IO error %d reading prios from bucket %llu",
- ret, bucket);
+ if (cache_fatal_io_err_on(ret, ca,
+ "prior read from bucket %llu",
+ bucket) ||
+ bch_meta_read_fault("prio"))
return -EIO;
- }
got = p->magic;
expect = pset_magic(&c->sb);
- if (got != expect) {
- bch_cache_error(ca,
+ if (cache_inconsistent_on(got != expect, ca,
"bad magic (got %llu expect %llu) while reading prios from bucket %llu",
- got, expect, bucket);
+ got, expect, bucket))
return -EIO;
- }
got = p->csum;
expect = bch_checksum(PSET_CSUM_TYPE(p),
&p->magic,
bucket_bytes(ca) - 8);
- if (got != expect) {
- bch_cache_error(ca,
+ if (cache_inconsistent_on(got != expect, ca,
"bad checksum (got %llu expect %llu) while reading prios from bucket %llu",
- got, expect, bucket);
+ got, expect, bucket))
return -EIO;
- }
bucket = p->next_bucket;
d = p->data;
diff --git a/drivers/md/bcache/bcache.h b/drivers/md/bcache/bcache.h
index 224858445b5e..d1b0b61de590 100644
--- a/drivers/md/bcache/bcache.h
+++ b/drivers/md/bcache/bcache.h
@@ -237,7 +237,6 @@ struct cache {
struct percpu_ref ref;
struct rcu_head free_rcu;
struct work_struct free_work;
- struct work_struct read_only_work;
struct work_struct remove_work;
unsigned long flags;
@@ -376,6 +375,7 @@ enum {
CACHE_SET_STOPPING,
CACHE_SET_RUNNING,
CACHE_SET_RO,
+ CACHE_SET_RO_COMPLETE,
CACHE_SET_GC_STOPPING,
CACHE_SET_GC_FAILURE,
CACHE_SET_BDEV_MOUNTED,
@@ -404,6 +404,8 @@ struct cache_set {
int minor;
struct device *chardev;
+ struct super_block *vfs_sb;
+ char uuid[40];
/* Counts outstanding writes, for clean transition to read-only */
struct percpu_ref writes;
@@ -687,74 +689,6 @@ static inline unsigned bucket_bytes(const struct cache *ca)
#define prio_buckets(ca) \
DIV_ROUND_UP((size_t) (ca)->mi.nbuckets, prios_per_bucket(ca))
-/* Error handling macros */
-
-/* The underscore versions merely log an error, they don't fail the cache set */
-#define __bch_cache_set_error(c, fmt, ...) \
- printk(KERN_ERR "bcache: error on %pU: " fmt "\n", \
- (c)->sb.set_uuid.b, ##__VA_ARGS__)
-
-#define __bch_cache_error(ca, fmt, ...) \
-do { \
- char _buf[BDEVNAME_SIZE]; \
- __bch_cache_set_error((ca)->set, "%s: " fmt, \
- bdevname((ca)->disk_sb.bdev, _buf), \
- ##__VA_ARGS__); \
-} while (0)
-
-/* These do fail the cache set */
-#define bch_cache_set_error(c, ...) \
-do { \
- __bch_cache_set_error(c, __VA_ARGS__); \
- bch_cache_set_fail(c); \
-} while (0)
-
-#define bch_cache_error(ca, ...) \
-do { \
- __bch_cache_error(ca, __VA_ARGS__); \
- bch_cache_set_fail((ca)->set); \
-} while (0)
-
-#define btree_bug(b, ...) \
-do { \
- __bch_cache_set_error((b)->c, __VA_ARGS__); \
- BUG(); \
-} while (0)
-
-#define cache_set_bug(c, ...) \
-do { \
- __bch_cache_set_error(c, __VA_ARGS__); \
- BUG(); \
-} while (0)
-
-#define btree_bug_on(cond, b, ...) \
-do { \
- if (cond) \
- btree_bug(b, __VA_ARGS__); \
-} while (0)
-
-#define cache_set_bug_on(cond, c, ...) \
-do { \
- if (cond) \
- cache_set_bug(c, __VA_ARGS__); \
-} while (0)
-
-#define cache_set_err_on(cond, c, ...) \
-do { \
- if (cond) \
- bch_cache_set_error(c, __VA_ARGS__); \
-} while (0)
-
-#define __bcache_io_error(c, fmt, ...) \
- printk_ratelimited(KERN_ERR "bcache: IO error on %pU: " fmt "\n",\
- (c)->sb.set_uuid.b, ##__VA_ARGS__)
-
-#define bcache_io_error(c, bio, fmt, ...) \
-do { \
- __bcache_io_error(c, fmt, ##__VA_ARGS__); \
- (bio)->bi_error = -EIO; \
-} while (0)
-
/* Forward declarations */
long bch_chardev_ioctl(struct file *, unsigned, unsigned long);
diff --git a/drivers/md/bcache/bkey_methods.c b/drivers/md/bcache/bkey_methods.c
index fc0ca2631921..9544f8696d26 100644
--- a/drivers/md/bcache/bkey_methods.c
+++ b/drivers/md/bcache/bkey_methods.c
@@ -3,6 +3,7 @@
#include "bkey_methods.h"
#include "btree.h"
#include "dirent.h"
+#include "error.h"
#include "extents.h"
#include "inode.h"
#include "xattr.h"
@@ -15,58 +16,68 @@ static const struct bkey_ops *bch_bkey_ops[] = {
[BKEY_TYPE_BTREE] = &bch_bkey_btree_ops,
};
-bool bkey_invalid(struct cache_set *c,
- enum bkey_type type,
- struct bkey_s_c k)
+/* Returns string indicating reason for being invalid, or NULL if valid: */
+const char *bkey_invalid(struct cache_set *c, enum bkey_type type,
+ struct bkey_s_c k)
{
const struct bkey_ops *ops = bch_bkey_ops[type];
if (k.k->u64s < BKEY_U64s)
- return true;
+ return "u64s too small";
if (k.k->size &&
(bkey_deleted(k.k) || !ops->is_extents))
- return true;
+ return "nonzero size field";
switch (k.k->type) {
case KEY_TYPE_DELETED:
- return false;
+ return NULL;
case KEY_TYPE_DISCARD:
case KEY_TYPE_ERROR:
- return bkey_val_bytes(k.k) != 0;
+ return bkey_val_bytes(k.k) != 0
+ ? "value size should be zero"
+ : NULL;
case KEY_TYPE_COOKIE:
- return (bkey_val_bytes(k.k) != sizeof(struct bch_cookie));
+ return bkey_val_bytes(k.k) != sizeof(struct bch_cookie)
+ ? "incorrect value size"
+ : NULL;
default:
if (k.k->type < KEY_TYPE_GENERIC_NR)
- return true;
+ return "invalid type";
return ops->key_invalid(c, k);
}
}
+const char *btree_bkey_invalid(struct cache_set *c, struct btree *b,
+ struct bkey_s_c k)
+{
+ if (bkey_cmp(bkey_start_pos(k.k), b->data->min_key) < 0)
+ return "key before start of btree node";
+
+ if (bkey_cmp(k.k->p, b->data->max_key) > 0)
+ return "key past end of btree node";
+
+ return bkey_invalid(c, btree_node_type(b), k);
+}
+
void bkey_debugcheck(struct cache_set *c, struct btree *b, struct bkey_s_c k)
{
enum bkey_type type = btree_node_type(b);
const struct bkey_ops *ops = bch_bkey_ops[type];
+ const char *invalid;
BUG_ON(!k.k->u64s);
- cache_set_bug_on(bkey_cmp(bkey_start_pos(k.k),
- b->data->min_key) < 0,
- c, "key before start of btree node");
-
- cache_set_bug_on(bkey_cmp(k.k->p,
- b->data->max_key) > 0,
- c, "key past end of btree node");
-
- if (bkey_invalid(c, type, k)) {
+ invalid = btree_bkey_invalid(c, b, k);
+ if (invalid) {
char buf[160];
bch_bkey_val_to_text(c, type, buf, sizeof(buf), k);
- cache_set_bug(c, "invalid bkey %s", buf);
+ cache_set_bug(c, "invalid bkey %s: %s", buf, invalid);
return;
}
diff --git a/drivers/md/bcache/bkey_methods.h b/drivers/md/bcache/bkey_methods.h
index a8a5e802160e..a21325b87208 100644
--- a/drivers/md/bcache/bkey_methods.h
+++ b/drivers/md/bcache/bkey_methods.h
@@ -15,7 +15,8 @@ struct btree;
struct bkey;
struct bkey_ops {
- bool (*key_invalid)(const struct cache_set *,
+ /* Returns reason for being invalid if invalid, else NULL: */
+ const char * (*key_invalid)(const struct cache_set *,
struct bkey_s_c);
void (*key_debugcheck)(struct cache_set *, struct btree *,
struct bkey_s_c);
@@ -25,7 +26,10 @@ struct bkey_ops {
bool is_extents;
};
-bool bkey_invalid(struct cache_set *, enum bkey_type, struct bkey_s_c);
+const char *bkey_invalid(struct cache_set *, enum bkey_type, struct bkey_s_c);
+const char *btree_bkey_invalid(struct cache_set *, struct btree *,
+ struct bkey_s_c);
+
void bkey_debugcheck(struct cache_set *, struct btree *, struct bkey_s_c);
void bch_bkey_val_to_text(struct cache_set *, enum bkey_type,
char *, size_t, struct bkey_s_c);
diff --git a/drivers/md/bcache/blockdev.c b/drivers/md/bcache/blockdev.c
index 1a788f2fd7c5..20cc8dd39a4f 100644
--- a/drivers/md/bcache/blockdev.c
+++ b/drivers/md/bcache/blockdev.c
@@ -2,6 +2,7 @@
#include "bcache.h"
#include "blockdev.h"
#include "btree.h"
+#include "error.h"
#include "inode.h"
#include "request.h"
#include "super.h"
@@ -719,12 +720,8 @@ int bch_blockdev_volumes_start(struct cache_set *c)
inode = bkey_s_c_to_inode_blockdev(k);
ret = blockdev_volume_run(c, inode);
- if (ret) {
- bch_cache_set_error(c,
- "can't bring up blockdev volumes: %i",
- ret);
+ if (ret)
break;
- }
}
bch_btree_iter_unlock(&iter);
diff --git a/drivers/md/bcache/btree.c b/drivers/md/bcache/btree.c
index 6edbb06d46f8..3bda6f09e8e1 100644
--- a/drivers/md/bcache/btree.c
+++ b/drivers/md/bcache/btree.c
@@ -24,6 +24,7 @@
#include "alloc.h"
#include "btree.h"
#include "debug.h"
+#include "error.h"
#include "extents.h"
#include "gc.h"
#include "io.h"
@@ -352,8 +353,8 @@ static void bch_btree_init_next(struct cache_set *c, struct btree *b,
_end - _data) ^ 0xffffffffffffffffULL; \
})
-#define btree_node_error(b, ca, ptr, fmt, ...) \
- bch_cache_error(ca, \
+#define btree_node_error(b, c, ptr, fmt, ...) \
+ cache_set_inconsistent(c, \
"btree node error at btree %u level %u/%u bucket %zu block %u u64s %u: " fmt,\
(b)->btree_id, (b)->level, btree_node_root(b) \
? btree_node_root(b)->level : -1, \
@@ -375,14 +376,16 @@ static const char *validate_bset(struct cache_set *c, struct btree *b,
return "bset past end of btree node";
if (i != &b->data->keys && !i->u64s)
- btree_node_error(b, ca, ptr, "empty set");
+ btree_node_error(b, c, ptr, "empty set");
for (k = i->start;
k != bset_bkey_last(i);) {
struct bkey_tup tup;
+ struct bkey_s_c u;
+ const char *invalid;
if (!k->u64s) {
- btree_node_error(b, ca, ptr,
+ btree_node_error(b, c, ptr,
"KEY_U64s 0: %zu bytes of metadata lost",
(void *) bset_bkey_last(i) - (void *) k);
@@ -391,7 +394,7 @@ static const char *validate_bset(struct cache_set *c, struct btree *b,
}
if (bkey_next(k) > bset_bkey_last(i)) {
- btree_node_error(b, ca, ptr,
+ btree_node_error(b, c, ptr,
"key extends past end of bset");
i->u64s = (u64 *) k - i->_data;
@@ -399,16 +402,15 @@ static const char *validate_bset(struct cache_set *c, struct btree *b,
}
bkey_disassemble(&tup, f, k);
+ u = bkey_tup_to_s_c(&tup);
- if (bkey_invalid(c, btree_node_type(b),
- bkey_tup_to_s_c(&tup))) {
+ invalid = btree_bkey_invalid(c, b, u);
+ if (invalid) {
char buf[160];
- bkey_disassemble(&tup, f, k);
bch_bkey_val_to_text(c, btree_node_type(b),
- buf, sizeof(buf),
- bkey_tup_to_s_c(&tup));
- btree_node_error(b, ca, ptr,
+ buf, sizeof(buf), u);
+ btree_node_error(b, c, ptr,
"invalid bkey %s", buf);
i->u64s -= k->u64s;
@@ -474,6 +476,8 @@ void bch_btree_node_read_done(struct cache_set *c, struct btree *b,
if (BSET_CSUM_TYPE(i) >= BCH_CSUM_NR)
goto err;
+ /* XXX: retry checksum errors */
+
err = "bad checksum";
if (b->data->csum != btree_csum_set(b, b->data))
goto err;
@@ -542,13 +546,13 @@ out:
return;
err:
set_btree_node_read_error(b);
- btree_node_error(b, ca, ptr, "%s", err);
+ btree_node_error(b, c, ptr, "%s", err);
goto out;
}
static void btree_node_read_endio(struct bio *bio)
{
- bch_bbio_endio(to_bbio(bio), bio->bi_error, "reading btree");
+ bch_bbio_endio(to_bbio(bio));
}
static void bch_btree_node_read(struct cache_set *c, struct btree *b)
@@ -563,9 +567,10 @@ static void bch_btree_node_read(struct cache_set *c, struct btree *b)
closure_init_stack(&cl);
pick = bch_btree_pick_ptr(c, b);
- if (!pick.ca) {
+ if (cache_set_fatal_err_on(!pick.ca, c,
+ "no cache device for btree node")) {
set_btree_node_read_error(b);
- goto missing;
+ return;
}
percpu_ref_get(&pick.ca->ref);
@@ -583,29 +588,18 @@ static void bch_btree_node_read(struct cache_set *c, struct btree *b)
closure_sync(&cl);
- if (bio->bi_error ||
- bch_meta_read_fault("btree"))
+ if (cache_fatal_io_err_on(bio->bi_error,
+ pick.ca, "IO error reading bucket %zu",
+ PTR_BUCKET_NR(pick.ca, &pick.ptr)) ||
+ bch_meta_read_fault("btree")) {
set_btree_node_read_error(b);
-
- bio_put(bio);
-
- if (btree_node_read_error(b))
- goto err;
+ goto out;
+ }
bch_btree_node_read_done(c, b, pick.ca, &pick.ptr);
bch_time_stats_update(&c->btree_read_time, start_time);
-
- percpu_ref_put(&pick.ca->ref);
- return;
-
-missing:
- bch_cache_set_error(c, "no cache device for btree node");
- percpu_ref_put(&pick.ca->ref);
- return;
-
-err:
- bch_cache_error(pick.ca, "IO error reading bucket %zu",
- PTR_BUCKET_NR(pick.ca, &pick.ptr));
+out:
+ bio_put(bio);
percpu_ref_put(&pick.ca->ref);
}
@@ -646,20 +640,16 @@ static void btree_node_write_endio(struct bio *bio)
struct btree *b = container_of(cl, struct btree, io);
struct bch_write_bio *wbio = to_wbio(bio);
- if (bio->bi_error || bch_meta_write_fault("btree")) {
+ if (cache_fatal_io_err_on(bio->bi_error, wbio->bio.ca, "btree write") ||
+ bch_meta_write_fault("btree"))
set_btree_node_write_error(b);
- __bch_cache_error(wbio->bio.ca, "IO error %d writing btree",
- bio->bi_error);
- bch_cache_set_io_error(wbio->bio.ca->set);
- }
-
if (wbio->orig)
bio_endio(wbio->orig);
else if (wbio->bounce)
bch_bio_free_pages(bio);
- bch_bbio_endio(to_bbio(bio), bio->bi_error, "writing btree");
+ bch_bbio_endio(to_bbio(bio));
}
static void do_btree_node_write(struct closure *cl)
diff --git a/drivers/md/bcache/debug.c b/drivers/md/bcache/debug.c
index 99d4657c4f4a..c0fb3cb19086 100644
--- a/drivers/md/bcache/debug.c
+++ b/drivers/md/bcache/debug.c
@@ -9,6 +9,7 @@
#include "btree.h"
#include "buckets.h"
#include "debug.h"
+#include "error.h"
#include "extents.h"
#include "inode.h"
#include "io.h"
@@ -166,13 +167,12 @@ void bch_data_verify(struct cached_dev *dc, struct bio *bio)
void *p1 = kmap_atomic(bv.bv_page);
void *p2 = page_address(check->bi_io_vec[iter.bi_idx].bv_page);
- cache_set_err_on(memcmp(p1 + bv.bv_offset,
- p2 + bv.bv_offset,
- bv.bv_len),
- dc->disk.c,
- "verify failed at dev %s sector %llu",
- bdevname(dc->disk_sb.bdev, name),
- (uint64_t) bio->bi_iter.bi_sector);
+ if (memcmp(p1 + bv.bv_offset,
+ p2 + bv.bv_offset,
+ bv.bv_len))
+ panic("verify failed at dev %s sector %llu\n",
+ bdevname(dc->disk_sb.bdev, name),
+ (uint64_t) bio->bi_iter.bi_sector);
kunmap_atomic(p1);
}
@@ -199,7 +199,7 @@ void bch_verify_inode_refs(struct cache_set *c)
bch_inode_find_by_inum(c, k.k->p.inode, &inode)) {
bch_bkey_val_to_text(c, BTREE_ID_EXTENTS, buf,
sizeof(buf), k);
- bch_cache_set_error(c,
+ cache_set_inconsistent(c,
"extent for missing inode %llu\n%s",
k.k->p.inode, buf);
bch_btree_iter_unlock(&iter);
@@ -210,7 +210,7 @@ void bch_verify_inode_refs(struct cache_set *c)
if (!S_ISREG(inode.v.i_mode) &&
!S_ISLNK(inode.v.i_mode))
- bch_cache_set_error(c,
+ cache_set_inconsistent(c,
"extent for non regular file, inode %llu mode %u",
k.k->p.inode, inode.v.i_mode);
@@ -219,7 +219,7 @@ void bch_verify_inode_refs(struct cache_set *c)
if (k.k->p.offset > round_up(inode.v.i_size, PAGE_SIZE) >> 9) {
bch_bkey_val_to_text(c, BTREE_ID_EXTENTS, buf,
sizeof(buf), k);
- bch_cache_set_error(c,
+ cache_set_inconsistent(c,
"extent past end of inode %llu: i_size %llu extent\n%s",
k.k->p.inode, inode.v.i_size, buf);
}
@@ -234,7 +234,7 @@ void bch_verify_inode_refs(struct cache_set *c)
if (k.k->p.inode != cur_inum &&
bch_inode_find_by_inum(c, k.k->p.inode, &inode)) {
- bch_cache_set_error(c, "dirent for missing inode %llu",
+ cache_set_inconsistent(c, "dirent for missing inode %llu",
k.k->p.inode);
bch_btree_iter_unlock(&iter);
return;
@@ -243,7 +243,7 @@ void bch_verify_inode_refs(struct cache_set *c)
cur_inum = k.k->p.inode;
if (!S_ISDIR(inode.v.i_mode))
- bch_cache_set_error(c,
+ cache_set_inconsistent(c,
"dirent for non directory, inode %llu mode %u",
k.k->p.inode, inode.v.i_mode);
}
@@ -253,7 +253,7 @@ void bch_verify_inode_refs(struct cache_set *c)
POS(BCACHE_ROOT_INO, 0), k) {
if (k.k->p.inode != cur_inum &&
bch_inode_find_by_inum(c, k.k->p.inode, &inode)) {
- bch_cache_set_error(c,
+ cache_set_inconsistent(c,
"xattr for missing inode %llu",
k.k->p.inode);
bch_btree_iter_unlock(&iter);
@@ -262,11 +262,10 @@ void bch_verify_inode_refs(struct cache_set *c)
cur_inum = k.k->p.inode;
- if (!S_ISREG(inode.v.i_mode) &&
- !S_ISDIR(inode.v.i_mode))
- bch_cache_set_error(c,
- "xattr for non file/directory, inode %llu mode %u",
- k.k->p.inode, inode.v.i_mode);
+ cache_set_inconsistent_on(!S_ISREG(inode.v.i_mode) &&
+ !S_ISDIR(inode.v.i_mode), c,
+ "xattr for non file/directory, inode %llu mode %u",
+ k.k->p.inode, inode.v.i_mode);
}
bch_btree_iter_unlock(&iter);
}
diff --git a/drivers/md/bcache/dirent.c b/drivers/md/bcache/dirent.c
index c007b7b0d39d..b7554e87ac55 100644
--- a/drivers/md/bcache/dirent.c
+++ b/drivers/md/bcache/dirent.c
@@ -78,21 +78,22 @@ static int dirent_cmp(struct bkey_s_c_dirent d,
return len - q->len ?: memcmp(d.v->d_name, q->name, len);
}
-static bool bch_dirent_invalid(const struct cache_set *c, struct bkey_s_c k)
+static const char *bch_dirent_invalid(const struct cache_set *c,
+ struct bkey_s_c k)
{
switch (k.k->type) {
case BCH_DIRENT:
- if (bkey_val_bytes(k.k) < sizeof(struct bch_dirent))
- return true;
+ return bkey_val_bytes(k.k) < sizeof(struct bch_dirent)
+ ? "value too small"
+ : NULL;
- return false;
case BCH_DIRENT_WHITEOUT:
- if (bkey_val_bytes(k.k))
- return true;
+ return bkey_val_bytes(k.k) != 0
+ ? "value size should be zero"
+ : NULL;
- return false;
default:
- return true;
+ return "invalid type";
}
}
diff --git a/drivers/md/bcache/error.c b/drivers/md/bcache/error.c
new file mode 100644
index 000000000000..a0ca5ecbba7b
--- /dev/null
+++ b/drivers/md/bcache/error.c
@@ -0,0 +1,134 @@
+#include "bcache.h"
+#include "error.h"
+#include "io.h"
+#include "notify.h"
+#include "super.h"
+
+void bch_inconsistent_error(struct cache_set *c)
+{
+ switch (c->opts.on_error_action) {
+ case BCH_ON_ERROR_CONTINUE:
+ break;
+ case BCH_ON_ERROR_RO:
+ if (!test_bit(CACHE_SET_INITIAL_GC_DONE, &c->flags)) {
+ /* XXX do something better here? */
+ bch_cache_set_stop(c);
+ return;
+ }
+
+ if (bch_cache_set_read_only(c))
+ __bch_cache_set_error(c, "emergency read only");
+ break;
+ case BCH_ON_ERROR_PANIC:
+ panic("bcache: (%s) panic after error\n",
+ c->vfs_sb ? c->vfs_sb->s_id : c->uuid);
+ break;
+ }
+}
+
+void bch_fatal_error(struct cache_set *c)
+{
+ if (bch_cache_set_read_only(c))
+ printk(KERN_ERR "bcache: %pU emergency read only\n",
+ c->sb.set_uuid.b);
+}
+
+/* Nonfatal IO errors, IO error/latency accounting: */
+
+/* Just does IO error accounting: */
+void bch_account_io_completion(struct cache *ca)
+{
+ /*
+ * The halflife of an error is:
+ * log2(1/2)/log2(127/128) * refresh ~= 88 * refresh
+ */
+
+ if (ca->set->error_decay) {
+ unsigned count = atomic_inc_return(&ca->io_count);
+
+ while (count > ca->set->error_decay) {
+ unsigned errors;
+ unsigned old = count;
+ unsigned new = count - ca->set->error_decay;
+
+ /*
+ * First we subtract refresh from count; each time we
+ * succesfully do so, we rescale the errors once:
+ */
+
+ count = atomic_cmpxchg(&ca->io_count, old, new);
+
+ if (count == old) {
+ count = new;
+
+ errors = atomic_read(&ca->io_errors);
+ do {
+ old = errors;
+ new = ((uint64_t) errors * 127) / 128;
+ errors = atomic_cmpxchg(&ca->io_errors,
+ old, new);
+ } while (old != errors);
+ }
+ }
+ }
+}
+
+/* IO error accounting and latency accounting: */
+void bch_account_bbio_completion(struct bbio *bio)
+{
+ struct cache_set *c;
+ unsigned threshold;
+
+ if (!bio->ca)
+ return;
+
+ c = bio->ca->set;
+ threshold = op_is_write(bio_op(&bio->bio))
+ ? c->congested_write_threshold_us
+ : c->congested_read_threshold_us;
+
+ if (threshold && bio->submit_time_us) {
+ unsigned t = local_clock_us();
+
+ int us = t - bio->submit_time_us;
+ int congested = atomic_read(&c->congested);
+
+ if (us > (int) threshold) {
+ int ms = us / 1024;
+ c->congested_last_us = t;
+
+ ms = min(ms, CONGESTED_MAX + congested);
+ atomic_sub(ms, &c->congested);
+ } else if (congested < 0)
+ atomic_inc(&c->congested);
+ }
+
+ bch_account_io_completion(bio->ca);
+}
+
+void bch_nonfatal_io_error_work(struct work_struct *work)
+{
+ struct cache *ca = container_of(work, struct cache, io_error_work);
+ unsigned errors = atomic_read(&ca->io_errors);
+ char buf[BDEVNAME_SIZE];
+
+ if (errors < ca->set->error_limit) {
+ bch_notify_cache_error(ca, false);
+ } else {
+ bch_notify_cache_error(ca, true);
+
+ mutex_lock(&bch_register_lock);
+ if (CACHE_STATE(&ca->mi) == CACHE_ACTIVE) {
+ printk(KERN_ERR "bcache: too many IO errors on %s, going RO\n",
+ bdevname(ca->disk_sb.bdev, buf));
+ bch_cache_read_only(ca);
+ }
+ mutex_unlock(&bch_register_lock);
+ }
+}
+
+void bch_nonfatal_io_error(struct cache *ca)
+{
+ atomic_add(1 << IO_ERROR_SHIFT, &ca->io_errors);
+ queue_work(system_long_wq, &ca->io_error_work);
+}
diff --git a/drivers/md/bcache/error.h b/drivers/md/bcache/error.h
new file mode 100644
index 000000000000..ea67bb92aaec
--- /dev/null
+++ b/drivers/md/bcache/error.h
@@ -0,0 +1,202 @@
+#ifndef _BCACHE_ERROR_H
+#define _BCACHE_ERROR_H
+
+#include <linux/printk.h>
+
+struct cache;
+struct cache_set;
+struct bbio;
+
+/*
+ * XXX: separate out errors that indicate on disk data is inconsistent, and flag
+ * superblock as such
+ */
+
+/* Error messages: */
+
+/* should clean this up */
+
+#define __bch_err_fmt(_c, fmt, ...) \
+ KERN_ERR "bcache (%s): " fmt "\n", \
+ ((_c)->vfs_sb ? (_c)->vfs_sb->s_id : (_c)->uuid), ##__VA_ARGS__
+
+#define __bch_cache_set_error(c, fmt, ...) \
+ printk(__bch_err_fmt(c, fmt, ##__VA_ARGS__))
+
+#define __bch_cache_error(ca, fmt, ...) \
+do { \
+ char _buf[BDEVNAME_SIZE]; \
+ __bch_cache_set_error((ca)->set, "%s: " fmt, \
+ bdevname((ca)->disk_sb.bdev, _buf), \
+ ##__VA_ARGS__); \
+} while (0)
+
+/*
+ * Very fatal logic/inconsistency errors: these indicate that we've majorly
+ * screwed up at runtime, i.e. it's not likely that it was just caused by the
+ * data on disk being inconsistent. These BUG():
+ *
+ * XXX: audit and convert to inconsistent() checks
+ */
+
+#define cache_set_bug(c, ...) \
+do { \
+ __bch_cache_set_error(c, __VA_ARGS__); \
+ BUG(); \
+} while (0)
+
+#define cache_set_bug_on(cond, c, ...) \
+do { \
+ if (cond) \
+ cache_set_bug(c, __VA_ARGS__); \
+} while (0)
+
+/*
+ * Inconsistency errors: The on disk data is inconsistent. If these occur during
+ * initial recovery, they don't indicate a bug in the running code - we walk all
+ * the metadata before modifying anything. If they occur at runtime, they
+ * indicate either a bug in the running code or (less likely) data is being
+ * silently corrupted under us.
+ *
+ * XXX: audit all inconsistent errors and make sure they're all recoverable, in
+ * BCH_ON_ERROR_CONTINUE mode
+ */
+
+void bch_inconsistent_error(struct cache_set *);
+
+#define cache_set_inconsistent(c, ...) \
+do { \
+ __bch_cache_set_error(c, __VA_ARGS__); \
+ bch_inconsistent_error(c); \
+} while (0)
+
+#define cache_set_inconsistent_on(cond, c, ...) \
+({ \
+ int _ret = !!(cond); \
+ \
+ if (_ret) \
+ cache_set_inconsistent(c, __VA_ARGS__); \
+ _ret; \
+})
+
+/*
+ * Later we might want to mark only the particular device inconsistent, not the
+ * entire cache set:
+ */
+
+#define cache_inconsistent(ca, ...) \
+do { \
+ __bch_cache_error(ca, __VA_ARGS__); \
+ bch_inconsistent_error((ca)->set); \
+} while (0)
+
+#define cache_inconsistent_on(cond, ca, ...) \
+({ \
+ int _ret = !!(cond); \
+ \
+ if (_ret) \
+ cache_inconsistent(ca, __VA_ARGS__); \
+ _ret; \
+})
+
+/*
+ * Fatal errors: these don't indicate a bug, but we can't continue running in RW
+ * mode - pretty much just due to metadata IO errors:
+ */
+
+void bch_fatal_error(struct cache_set *);
+
+#define cache_set_fatal_error(c, ...) \
+do { \
+ __bch_cache_set_error(c, __VA_ARGS__); \
+ bch_fatal_error(c); \
+} while (0)
+
+#define cache_set_fatal_err_on(cond, c, ...) \
+({ \
+ int _ret = !!(cond); \
+ \
+ if (_ret) \
+ cache_set_fatal_error(c, __VA_ARGS__); \
+ _ret; \
+})
+
+#define cache_fatal_error(ca, ...) \
+do { \
+ __bch_cache_error(ca, __VA_ARGS__); \
+ bch_fatal_error(c); \
+} while (0)
+
+#define cache_fatal_io_error(ca, fmt, ...) \
+do { \
+ char _buf[BDEVNAME_SIZE]; \
+ \
+ printk_ratelimited(__bch_err_fmt((ca)->set, "fatal IO error on %s for " fmt,\
+ bdevname((ca)->disk_sb.bdev, _buf),\
+ ##__VA_ARGS__)); \
+ bch_fatal_error((ca)->set); \
+} while (0)
+
+#define cache_fatal_io_err_on(cond, ca, ...) \
+({ \
+ int _ret = !!(cond); \
+ \
+ if (_ret) \
+ cache_fatal_io_error(ca, __VA_ARGS__); \
+ _ret; \
+})
+
+/*
+ * Nonfatal IO errors: either recoverable metadata IO (because we have
+ * replicas), or data IO - we need to log it and print out a message, but we
+ * don't (necessarily) want to shut down the fs:
+ */
+
+void bch_account_io_completion(struct cache *);
+void bch_account_bbio_completion(struct bbio *);
+
+void bch_nonfatal_io_error_work(struct work_struct *);
+
+/* Does the error handling without logging a message */
+void bch_nonfatal_io_error(struct cache *);
+
+#if 0
+#define cache_set_nonfatal_io_error(c, ...) \
+do { \
+ __bch_cache_set_error(c, __VA_ARGS__); \
+ bch_nonfatal_io_error(c); \
+} while (0)
+#endif
+
+/* Logs message and handles the error: */
+#define cache_nonfatal_io_error(ca, fmt, ...) \
+do { \
+ char _buf[BDEVNAME_SIZE]; \
+ \
+ printk_ratelimited(__bch_err_fmt((ca)->set, "IO error on %s for " fmt,\
+ bdevname((ca)->disk_sb.bdev, _buf),\
+ ##__VA_ARGS__)); \
+ bch_nonfatal_io_error(ca); \
+} while (0)
+
+#define cache_nonfatal_io_err_on(cond, ca, ...) \
+({ \
+ int _ret = !!(cond); \
+ \
+ if (_ret) \
+ cache_nonfatal_io_error(ca, __VA_ARGS__); \
+ _ret; \
+})
+
+/* kill? */
+
+#define __bcache_io_error(c, fmt, ...) \
+ printk_ratelimited(__bch_err_fmt(c, "IO error: " fmt, ##__VA_ARGS__))
+
+#define bcache_io_error(c, bio, fmt, ...) \
+do { \
+ __bcache_io_error(c, fmt, ##__VA_ARGS__); \
+ (bio)->bi_error = -EIO; \
+} while (0)
+
+#endif /* _BCACHE_ERROR_H */
diff --git a/drivers/md/bcache/extents.c b/drivers/md/bcache/extents.c
index 4d5889d6d107..25a63aefb6a8 100644
--- a/drivers/md/bcache/extents.c
+++ b/drivers/md/bcache/extents.c
@@ -9,6 +9,7 @@
#include "btree.h"
#include "debug.h"
#include "dirent.h"
+#include "error.h"
#include "extents.h"
#include "gc.h"
#include "inode.h"
@@ -340,8 +341,8 @@ static size_t extent_print_ptrs(struct cache_set *c, char *buf,
/* Btree ptrs */
-static const char *bch_btree_ptr_invalid_reason(const struct cache_set *c,
- struct bkey_s_c k)
+static const char *bch_btree_ptr_invalid(const struct cache_set *c,
+ struct bkey_s_c k)
{
if (bkey_extent_is_cached(k.k))
return "cached";
@@ -383,11 +384,6 @@ static const char *bch_btree_ptr_invalid_reason(const struct cache_set *c,
}
}
-static bool bch_btree_ptr_invalid(const struct cache_set *c, struct bkey_s_c k)
-{
- return bch_btree_ptr_invalid_reason(c, k);
-}
-
static void btree_ptr_debugcheck(struct cache_set *c, struct btree *b,
struct bkey_s_c k)
{
@@ -401,11 +397,6 @@ static void btree_ptr_debugcheck(struct cache_set *c, struct btree *b,
unsigned replicas = 0;
bool bad;
- if (bkey_extent_is_cached(k.k)) {
- btree_bug(b, "btree ptr marked as cached");
- return;
- }
-
rcu_read_lock();
extent_for_each_online_device(c, e, ptr, ca) {
@@ -444,11 +435,11 @@ static void btree_ptr_debugcheck(struct cache_set *c, struct btree *b,
return;
err:
bch_bkey_val_to_text(c, btree_node_type(b), buf, sizeof(buf), k);
- btree_bug(b, "%s btree pointer %s: bucket %zi prio %i "
- "gen %i last_gc %i mark %08x",
- err, buf, PTR_BUCKET_NR(ca, ptr),
- g->read_prio, PTR_BUCKET_GEN(ca, ptr),
- g->oldest_gen, g->mark.counter);
+ cache_set_bug(c, "%s btree pointer %s: bucket %zi prio %i "
+ "gen %i last_gc %i mark %08x",
+ err, buf, PTR_BUCKET_NR(ca, ptr),
+ g->read_prio, PTR_BUCKET_GEN(ca, ptr),
+ g->oldest_gen, g->mark.counter);
rcu_read_unlock();
}
@@ -463,7 +454,7 @@ static void bch_btree_ptr_to_text(struct cache_set *c, char *buf,
if (bkey_extent_is_data(k.k))
out += extent_print_ptrs(c, buf, size, bkey_s_c_to_extent(k));
- invalid = bch_btree_ptr_invalid_reason(c, k);
+ invalid = bch_btree_ptr_invalid(c, k);
if (invalid)
p(" invalid: %s", invalid);
#undef p
@@ -480,23 +471,19 @@ bch_btree_pick_ptr(struct cache_set *c, const struct btree *b)
rcu_read_lock();
extent_for_each_online_device_crc(c, e, crc, ptr, ca) {
- if (crc) {
- bch_cache_error(ca,
+ if (cache_set_inconsistent_on(crc, c,
"btree node pointer with crc at btree %u level %u/%u bucket %zu",
b->btree_id, b->level, btree_node_root(b)
? btree_node_root(b)->level : -1,
- PTR_BUCKET_NR(ca, ptr));
+ PTR_BUCKET_NR(ca, ptr)))
break;
- }
- if (ptr_stale(ca, ptr)) {
- bch_cache_error(ca,
+ if (cache_inconsistent_on(ptr_stale(ca, ptr), ca,
"stale btree node pointer at btree %u level %u/%u bucket %zu",
b->btree_id, b->level, btree_node_root(b)
? btree_node_root(b)->level : -1,
- PTR_BUCKET_NR(ca, ptr));
+ PTR_BUCKET_NR(ca, ptr)))
continue;
- }
percpu_ref_get(&ca->ref);
rcu_read_unlock();
@@ -1306,8 +1293,8 @@ out:
return inserted;
}
-static const char *bch_extent_invalid_reason(const struct cache_set *c,
- struct bkey_s_c k)
+static const char *bch_extent_invalid(const struct cache_set *c,
+ struct bkey_s_c k)
{
if (bkey_val_u64s(k.k) > BKEY_EXTENT_VAL_U64s_MAX)
return "value too big";
@@ -1360,11 +1347,6 @@ invalid:
}
}
-static bool bch_extent_invalid(const struct cache_set *c, struct bkey_s_c k)
-{
- return bch_extent_invalid_reason(c, k);
-}
-
static void bch_extent_debugcheck(struct cache_set *c, struct btree *b,
struct bkey_s_c k)
{
@@ -1379,6 +1361,15 @@ static void bch_extent_debugcheck(struct cache_set *c, struct btree *b,
unsigned ptrs_per_tier[CACHE_TIERS];
unsigned i, tier, replicas = 0;
+ /*
+ * XXX: we should be doing most/all of these checks at startup time,
+ * where we check bkey_invalid() in btree_node_read_done()
+ *
+ * But note that we can't check for stale pointers or incorrect gc marks
+ * until after journal replay is done (it might be an extent that's
+ * going to get overwritten during replay)
+ */
+
memset(ptrs_per_tier, 0, sizeof(ptrs_per_tier));
mi = cache_member_info_get(c);
@@ -1491,7 +1482,7 @@ static void bch_extent_to_text(struct cache_set *c, char *buf,
if (bkey_extent_is_data(k.k))
out += extent_print_ptrs(c, buf, size, bkey_s_c_to_extent(k));
- invalid = bch_extent_invalid_reason(c, k);
+ invalid = bch_extent_invalid(c, k);
if (invalid)
p(" invalid: %s", invalid);
#undef p
diff --git a/drivers/md/bcache/fs-gc.c b/drivers/md/bcache/fs-gc.c
index 3245e1063898..54bdf550ac93 100644
--- a/drivers/md/bcache/fs-gc.c
+++ b/drivers/md/bcache/fs-gc.c
@@ -2,6 +2,7 @@
#include "bcache.h"
#include "btree.h"
#include "dirent.h"
+#include "error.h"
#include "fs.h"
#include "inode.h"
#include "keylist.h"
@@ -74,13 +75,13 @@ static int bch_gc_do_inode(struct cache_set *c, struct btree_iter *iter,
struct bkey_i_inode update;
int ret;
- cache_set_err_on(inode.v->i_nlink < link.count, c,
+ cache_set_inconsistent_on(inode.v->i_nlink < link.count, c,
"i_link too small (%u < %u, type %i)",
inode.v->i_nlink, link.count + link.dir_count,
mode_to_type(inode.v->i_mode));
if (!link.count) {
- cache_set_err_on(S_ISDIR(inode.v->i_mode) &&
+ cache_set_inconsistent_on(S_ISDIR(inode.v->i_mode) &&
bch_empty_dir(c, inode.k->p.inode), c,
"non empty directory with link count 0,inode nlink %u, dir links found %u",
inode.v->i_nlink, link.dir_count);
@@ -139,7 +140,7 @@ static int bch_gc_walk_inodes(struct cache_set *c, u64 pos, struct nlink *links)
break;
while (i < k.k->p.inode - pos) {
- cache_set_err_on(links[i].count, c,
+ cache_set_inconsistent_on(links[i].count, c,
"missing inode %llu",
pos + i);
i++;
@@ -157,7 +158,7 @@ static int bch_gc_walk_inodes(struct cache_set *c, u64 pos, struct nlink *links)
break;
default:
- cache_set_err_on(links[i].count, c,
+ cache_set_inconsistent_on(links[i].count, c,
"missing inode %llu",
pos + i);
break;
diff --git a/drivers/md/bcache/fs.c b/drivers/md/bcache/fs.c
index e6c689974de1..bccc2b176533 100644
--- a/drivers/md/bcache/fs.c
+++ b/drivers/md/bcache/fs.c
@@ -2057,7 +2057,7 @@ static int bch_remount(struct super_block *sb, int *flags, char *data)
const char *err = NULL;
if (opts.read_only) {
- bch_cache_set_read_only(c);
+ bch_cache_set_read_only_sync(c);
sb->s_flags |= MS_RDONLY;
} else {
@@ -2103,9 +2103,11 @@ static struct dentry *bch_mount(struct file_system_type *fs_type,
int flags, const char *dev_name, void *data)
{
struct cache_set *c;
+ struct cache *ca;
struct super_block *sb;
struct inode *inode;
struct cache_set_opts opts;
+ unsigned i;
int ret;
if (!parse_options(&opts, flags, data))
@@ -2130,6 +2132,17 @@ static struct dentry *bch_mount(struct file_system_type *fs_type,
sb->s_magic = BCACHE_STATFS_MAGIC;
sb->s_time_gran = 1;
sb->s_fs_info = c;
+ c->vfs_sb = sb;
+
+ rcu_read_lock();
+ for_each_cache_rcu(ca, c, i) {
+ char b[BDEVNAME_SIZE];
+
+ strlcpy(sb->s_id, bdevname(ca->disk_sb.bdev, b),
+ sizeof(sb->s_id));
+ break;
+ }
+ rcu_read_unlock();
if (opts.posix_acl < 0)
sb->s_flags |= MS_POSIXACL;
diff --git a/drivers/md/bcache/gc.c b/drivers/md/bcache/gc.c
index c08dd74a1015..74ac042c82b5 100644
--- a/drivers/md/bcache/gc.c
+++ b/drivers/md/bcache/gc.c
@@ -8,6 +8,7 @@
#include "btree.h"
#include "buckets.h"
#include "debug.h"
+#include "error.h"
#include "extents.h"
#include "gc.h"
#include "journal.h"
@@ -79,13 +80,6 @@ static inline bool btree_node_has_ptrs(struct btree *b)
bool btree_gc_mark_node(struct cache_set *c, struct btree *b)
{
struct bkey_format *f = &b->keys.format;
- struct bset_tree *t;
-
- for (t = b->keys.set; t <= &b->keys.set[b->keys.nsets]; t++)
- btree_bug_on(t->size &&
- bset_written(&b->keys, t) &&
- bkey_cmp_packed(f, &b->key.k, &t->end) < 0,
- b, "found short btree key in gc");
if (btree_node_has_ptrs(b)) {
struct btree_node_iter iter;
@@ -128,8 +122,12 @@ static int bch_gc_btree(struct cache_set *c, enum btree_id btree_id)
for_each_btree_node(&iter, c, btree_id, POS_MIN, b) {
if (!b->level) {
- cache_set_bug_on(bkey_cmp(b->data->min_key, next_min),
- c,
+ /*
+ * XXX: this check should be elsewhere - also, we should
+ * be checking all nodes, not just leaf nodes
+ */
+ cache_set_inconsistent_on(bkey_cmp(b->data->min_key,
+ next_min), c,
"btree node has incorrect min key: %llu:%llu != %llu:%llu",
b->data->min_key.inode,
b->data->min_key.offset,
diff --git a/drivers/md/bcache/inode.c b/drivers/md/bcache/inode.c
index 53b699920e1d..38189277ace3 100644
--- a/drivers/md/bcache/inode.c
+++ b/drivers/md/bcache/inode.c
@@ -51,30 +51,31 @@ ssize_t bch_inode_status(char *buf, size_t len, const struct bkey *k)
}
}
-static bool bch_inode_invalid(const struct cache_set *c, struct bkey_s_c k)
+static const char *bch_inode_invalid(const struct cache_set *c,
+ struct bkey_s_c k)
{
if (k.k->p.offset)
- return true;
+ return "nonzero offset";
switch (k.k->type) {
case BCH_INODE_FS:
if (bkey_val_bytes(k.k) != sizeof(struct bch_inode))
- return true;
+ return "incorrect value size";
if (k.k->p.inode < BLOCKDEV_INODE_MAX)
- return true;
+ return "fs inode in blockdev range";
- return false;
+ return NULL;
case BCH_INODE_BLOCKDEV:
if (bkey_val_bytes(k.k) != sizeof(struct bch_inode_blockdev))
- return true;
+ return "incorrect value size";
if (k.k->p.inode >= BLOCKDEV_INODE_MAX)
- return true;
+ return "blockdev inode in fs range";
- return false;
+ return NULL;
default:
- return true;
+ return "invalid type";
}
}
diff --git a/drivers/md/bcache/io.c b/drivers/md/bcache/io.c
index 1a4736111b44..0d15ec0ed78e 100644
--- a/drivers/md/bcache/io.c
+++ b/drivers/md/bcache/io.c
@@ -12,6 +12,7 @@
#include "buckets.h"
#include "clock.h"
#include "debug.h"
+#include "error.h"
#include "extents.h"
#include "gc.h"
#include "io.h"
@@ -195,106 +196,13 @@ static void bch_bbio_reset(struct bbio *b)
/* IO errors */
-void bch_count_io_errors(struct cache *ca, int error, const char *m)
-{
- /*
- * The halflife of an error is:
- * log2(1/2)/log2(127/128) * refresh ~= 88 * refresh
- */
-
- if (ca->set->error_decay) {
- unsigned count = atomic_inc_return(&ca->io_count);
-
- while (count > ca->set->error_decay) {
- unsigned errors;
- unsigned old = count;
- unsigned new = count - ca->set->error_decay;
-
- /*
- * First we subtract refresh from count; each time we
- * succesfully do so, we rescale the errors once:
- */
-
- count = atomic_cmpxchg(&ca->io_count, old, new);
-
- if (count == old) {
- count = new;
-
- errors = atomic_read(&ca->io_errors);
- do {
- old = errors;
- new = ((uint64_t) errors * 127) / 128;
- errors = atomic_cmpxchg(&ca->io_errors,
- old, new);
- } while (old != errors);
- }
- }
- }
-
- if (error) {
- char buf[BDEVNAME_SIZE];
-
- atomic_add(1 << IO_ERROR_SHIFT, &ca->io_errors);
- queue_work(system_long_wq, &ca->io_error_work);
- printk_ratelimited(KERN_ERR "%s: IO error on %s",
- bdevname(ca->disk_sb.bdev, buf), m);
- }
-}
-
-void bch_cache_io_error_work(struct work_struct *work)
-{
- struct cache *ca = container_of(work, struct cache, io_error_work);
- unsigned errors = atomic_read(&ca->io_errors);
- char buf[BDEVNAME_SIZE];
-
- if (errors < ca->set->error_limit) {
- bch_notify_cache_error(ca, false);
- } else {
- bch_notify_cache_error(ca, true);
- printk_ratelimited(KERN_ERR "%s: too many IO errors, going RO",
- bdevname(ca->disk_sb.bdev, buf));
- queue_work(system_long_wq, &ca->read_only_work);
- }
-}
-
-void bch_bbio_count_io_errors(struct bbio *bio, int error, const char *m)
-{
- struct cache_set *c;
- unsigned threshold;
-
- if (!bio->ca)
- return;
-
- c = bio->ca->set;
- threshold = op_is_write(bio_op(&bio->bio))
- ? c->congested_write_threshold_us
- : c->congested_read_threshold_us;
-
- if (threshold && bio->submit_time_us) {
- unsigned t = local_clock_us();
-
- int us = t - bio->submit_time_us;
- int congested = atomic_read(&c->congested);
-
- if (us > (int) threshold) {
- int ms = us / 1024;
- c->congested_last_us = t;
-
- ms = min(ms, CONGESTED_MAX + congested);
- atomic_sub(ms, &c->congested);
- } else if (congested < 0)
- atomic_inc(&c->congested);
- }
-
- bch_count_io_errors(bio->ca, error, m);
-}
-
-void bch_bbio_endio(struct bbio *bio, int error, const char *m)
+void bch_bbio_endio(struct bbio *bio)
{
struct closure *cl = bio->bio.bi_private;
struct cache *ca = bio->ca;
- bch_bbio_count_io_errors(bio, error, m);
+ bch_account_bbio_completion(bio);
+
bio_put(&bio->bio);
if (ca)
percpu_ref_put(&ca->ref);
@@ -622,10 +530,10 @@ static void bch_write_endio(struct bio *bio)
struct bch_write_op *op = container_of(cl, struct bch_write_op, cl);
struct bch_write_bio *wbio = to_wbio(bio);
- if (bio->bi_error) {
+ if (cache_nonfatal_io_err_on(bio->bi_error, wbio->bio.ca,
+ "data write")) {
/* TODO: We could try to recover from this. */
if (!bkey_extent_is_cached(&op->insert_key.k)) {
- __bcache_io_error(op->c, "IO error writing data");
op->error = bio->bi_error;
} else if (!op->replace)
set_closure_fn(cl, bch_write_error, op->c->wq);
@@ -638,7 +546,7 @@ static void bch_write_endio(struct bio *bio)
else if (wbio->bounce)
bch_bio_free_pages_pool(op->c, bio);
- bch_bbio_endio(&wbio->bio, bio->bi_error, "writing data to cache");
+ bch_bbio_endio(&wbio->bio);
}
static const unsigned bch_crc_size[] = {
@@ -1445,11 +1353,7 @@ static int bio_checksum_uncompress(struct bch_read_bio *rbio)
if (rbio->csum_type != BCH_CSUM_NONE &&
rbio->csum != checksum_bio(bio, rbio->csum_type)) {
- /*
- * XXX: bch_bbio_count_io_errors() isn't counting checksum
- * errors
- */
- __bcache_io_error(rbio->c, "checksum error");
+ cache_nonfatal_io_error(rbio->bio.ca, "checksum error");
return -EIO;
}
@@ -1537,7 +1441,10 @@ static void bch_read_endio(struct bio *bio)
ptr_stale(rbio->bio.ca, &rbio->bio.ptr);
int error = bio->bi_error;
- bch_bbio_count_io_errors(&rbio->bio, error, "reading from cache");
+ bch_account_bbio_completion(&rbio->bio);
+
+ cache_nonfatal_io_err_on(error, rbio->bio.ca, "data read");
+
percpu_ref_put(&rbio->bio.ca->ref);
if (error)
diff --git a/drivers/md/bcache/io.h b/drivers/md/bcache/io.h
index 5c819c77a6ec..d424ddcf9bfd 100644
--- a/drivers/md/bcache/io.h
+++ b/drivers/md/bcache/io.h
@@ -140,10 +140,7 @@ enum bch_read_flags {
int bch_read(struct cache_set *, struct bio *, u64);
-void bch_cache_io_error_work(struct work_struct *);
-void bch_count_io_errors(struct cache *, int, const char *);
-void bch_bbio_count_io_errors(struct bbio *, int, const char *);
-void bch_bbio_endio(struct bbio *, int, const char *);
+void bch_bbio_endio(struct bbio *);
void bch_generic_make_request(struct bio *, struct cache_set *);
void bch_bio_submit_work(struct work_struct *);
diff --git a/drivers/md/bcache/journal.c b/drivers/md/bcache/journal.c
index 3cfa378ecd6f..2625e8d4b550 100644
--- a/drivers/md/bcache/journal.c
+++ b/drivers/md/bcache/journal.c
@@ -8,6 +8,7 @@
#include "buckets.h"
#include "btree.h"
#include "debug.h"
+#include "error.h"
#include "extents.h"
#include "gc.h"
#include "io.h"
@@ -98,11 +99,11 @@ struct bkey_i *bch_journal_find_btree_root(struct cache_set *c, struct jset *j,
k = jkeys->start;
*level = jkeys->level;
- if (!jkeys->u64s || jkeys->u64s != k->k.u64s ||
- bkey_invalid(c, BKEY_TYPE_BTREE, bkey_i_to_s_c(k))) {
- bch_cache_set_error(c, "invalid btree root in journal");
+ if (cache_set_inconsistent_on(!jkeys->u64s ||
+ jkeys->u64s != k->k.u64s ||
+ bkey_invalid(c, BKEY_TYPE_BTREE, bkey_i_to_s_c(k)),
+ c, "invalid btree root in journal"))
return NULL;
- }
*level = jkeys->level;
return k;
@@ -217,7 +218,7 @@ static int __bch_journal_seq_blacklisted(struct cache_set *c, u64 seq,
if (seq <= j->seq)
return 0;
- cache_set_err_on(seq > j->seq + 1, c,
+ cache_set_inconsistent_on(seq > j->seq + 1, c,
"bset journal seq too far in the future: %llu > %llu",
seq, j->seq);
@@ -357,38 +358,33 @@ static enum {
got = j->version;
expect = BCACHE_JSET_VERSION;
- if (got != expect) {
- __bch_cache_error(ca,
+
+ if (cache_inconsistent_on(got != expect, ca,
"bad journal version (got %llu expect %llu) sector %lluu",
- got, expect, sector);
+ got, expect, sector))
return JOURNAL_ENTRY_BAD;
- }
- if (bytes > bucket_sectors_left << 9 ||
- bytes > PAGE_SIZE << JSET_BITS) {
- __bch_cache_error(ca,
+ if (cache_inconsistent_on(bytes > bucket_sectors_left << 9 ||
+ bytes > PAGE_SIZE << JSET_BITS, ca,
"journal entry too big (%zu bytes), sector %lluu",
- bytes, sector);
+ bytes, sector))
return JOURNAL_ENTRY_BAD;
- }
if (bytes > sectors_read << 9)
return JOURNAL_ENTRY_REREAD;
+ /* XXX: retry on checksum error */
+
got = j->csum;
expect = csum_set(j, JSET_CSUM_TYPE(j));
- if (got != expect) {
- __bch_cache_error(ca,
+ if (cache_inconsistent_on(got != expect, ca,
"journal checksum bad (got %llu expect %llu), sector %lluu",
- got, expect, sector);
+ got, expect, sector))
return JOURNAL_ENTRY_BAD;
- }
- if (j->last_seq > j->seq) {
- __bch_cache_error(ca,
- "invalid journal entry: last_seq > seq");
+ if (cache_inconsistent_on(j->last_seq > j->seq, ca,
+ "invalid journal entry: last_seq > seq"))
return JOURNAL_ENTRY_BAD;
- }
return JOURNAL_ENTRY_OK;
}
@@ -427,12 +423,12 @@ reread:
bch_bio_map(bio, data);
ret = submit_bio_wait(bio);
- if (bch_meta_read_fault("journal"))
+
+ if (cache_fatal_io_err_on(ret, ca,
+ "journal read from sector %llu",
+ sector + bucket_offset) ||
+ bch_meta_read_fault("journal")) {
ret = -EIO;
- if (ret) {
- __bch_cache_error(ca,
- "IO error %d reading journal from bucket_offset %llu",
- ret, sector + bucket_offset);
goto err;
}
@@ -965,13 +961,13 @@ int bch_journal_replay(struct cache_set *c, struct list_head *list)
journal_seq_blacklist_find(j, cur_seq))
cur_seq++;
- cache_set_err_on(journal_seq_blacklist_find(j, i->j.seq), c,
+ cache_set_inconsistent_on(journal_seq_blacklist_find(j, i->j.seq), c,
"found blacklisted journal entry %llu",
i->j.seq);
mutex_unlock(&j->blacklist_lock);
- cache_set_err_on(i->j.seq != cur_seq, c,
+ cache_set_inconsistent_on(i->j.seq != cur_seq, c,
"journal entries %llu-%llu missing! (replaying %llu-%llu)",
cur_seq, i->j.seq - 1, last_seq(j), end_seq);
@@ -1353,13 +1349,10 @@ static void journal_write_endio(struct bio *bio)
struct journal_write *w = bio->bi_private;
struct journal *j = w->j;
- if (bio->bi_error || bch_meta_write_fault("journal")) {
+ if (cache_fatal_io_err_on(bio->bi_error, ca, "journal write") ||
+ bch_meta_write_fault("journal")) {
set_bit(JOURNAL_ERROR, &j->flags);
__journal_entry_close(j, JOURNAL_ENTRY_ERROR);
-
- __bch_cache_error(ca, "IO error %d writing journal",
- bio->bi_error);
- bch_cache_set_io_error(ca->set);
}
closure_put(&j->io);
diff --git a/drivers/md/bcache/request.c b/drivers/md/bcache/request.c
index 13e08ed237ce..2397fdc47500 100644
--- a/drivers/md/bcache/request.c
+++ b/drivers/md/bcache/request.c
@@ -28,6 +28,7 @@
#include "btree.h"
#include "clock.h"
#include "debug.h"
+#include "error.h"
#include "extents.h"
#include "io.h"
#include "journal.h"
diff --git a/drivers/md/bcache/super.c b/drivers/md/bcache/super.c
index 20b79dbdbc7e..efa03f795bab 100644
--- a/drivers/md/bcache/super.c
+++ b/drivers/md/bcache/super.c
@@ -12,6 +12,7 @@
#include "btree.h"
#include "clock.h"
#include "debug.h"
+#include "error.h"
#include "fs-gc.h"
#include "gc.h"
#include "inode.h"
@@ -59,6 +60,7 @@ static int bch_chardev_major;
static struct class *bch_chardev_class;
static struct device *bch_chardev;
static DEFINE_IDR(bch_chardev_minor);
+static DECLARE_WAIT_QUEUE_HEAD(bch_read_only_wait);
struct workqueue_struct *bcache_io_wq;
@@ -491,7 +493,12 @@ static void write_super_endio(struct bio *bio)
{
struct cache *ca = bio->bi_private;
- bch_count_io_errors(ca, bio->bi_error, "writing superblock");
+ /* XXX: return errors directly */
+
+ cache_fatal_io_err_on(bio->bi_error, ca, "superblock write");
+
+ bch_account_io_completion(ca);
+
closure_put(&ca->set->sb_write);
percpu_ref_put(&ca->ref);
}
@@ -690,15 +697,41 @@ static void bch_writes_disabled(struct percpu_ref *writes)
complete(&c->write_disable_complete);
}
-void bch_cache_set_read_only(struct cache_set *c)
+static void bch_cache_set_read_only_work(struct work_struct *work)
{
- lockdep_assert_held(&bch_register_lock);
+ struct cache_set *c =
+ container_of(work, struct cache_set, read_only_work);
+
+ init_completion(&c->write_disable_complete);
+ percpu_ref_put(&c->writes);
+
+ del_timer_sync(&c->foreground_write_wakeup);
+ cancel_delayed_work_sync(&c->pd_controllers_update);
+
+ c->foreground_write_pd.rate.rate = UINT_MAX;
+ bch_wake_delayed_writes((unsigned long) c);
+
+ /* Wait for outstanding writes to complete: */
+ wait_for_completion(&c->write_disable_complete);
+
+ __bch_cache_set_read_only(c);
+ bch_notify_cache_set_read_only(c);
+ trace_bcache_cache_set_read_only_done(c);
+
+ set_bit(CACHE_SET_RO_COMPLETE, &c->flags);
+ wake_up(&bch_read_only_wait);
+}
+
+bool bch_cache_set_read_only(struct cache_set *c)
+{
if (test_and_set_bit(CACHE_SET_RO, &c->flags))
- return;
+ return false;
trace_bcache_cache_set_read_only(c);
+ percpu_ref_get(&c->writes);
+
/*
* Block new foreground-end write operations from starting - any new
* writes will return -EROFS:
@@ -707,20 +740,18 @@ void bch_cache_set_read_only(struct cache_set *c)
* allocated space can still happen until stopping the allocator in
* bch_cache_allocator_stop()).
*/
- init_completion(&c->write_disable_complete);
percpu_ref_kill(&c->writes);
- bch_wake_delayed_writes((unsigned long) c);
- del_timer_sync(&c->foreground_write_wakeup);
- cancel_delayed_work_sync(&c->pd_controllers_update);
-
- /* Wait for outstanding writes to complete: */
- wait_for_completion(&c->write_disable_complete);
+ queue_work(system_unbound_wq, &c->read_only_work);
+ return true;
+}
- __bch_cache_set_read_only(c);
+void bch_cache_set_read_only_sync(struct cache_set *c)
+{
+ bch_cache_set_read_only(c);
- bch_notify_cache_set_read_only(c);
- trace_bcache_cache_set_read_only_done(c);
+ wait_event(bch_read_only_wait,
+ test_bit(CACHE_SET_RO_COMPLETE, &c->flags));
}
static const char *__bch_cache_set_read_write(struct cache_set *c)
@@ -768,7 +799,7 @@ const char *bch_cache_set_read_write(struct cache_set *c)
lockdep_assert_held(&bch_register_lock);
- if (!test_bit(CACHE_SET_RO, &c->flags))
+ if (!test_bit(CACHE_SET_RO_COMPLETE, &c->flags))
return NULL;
for_each_cache(ca, c, i)
@@ -783,6 +814,8 @@ const char *bch_cache_set_read_write(struct cache_set *c)
return err;
percpu_ref_reinit(&c->writes);
+
+ clear_bit(CACHE_SET_RO_COMPLETE, &c->flags);
clear_bit(CACHE_SET_RO, &c->flags);
return NULL;
@@ -791,38 +824,6 @@ err:
return err;
}
-static void bch_cache_set_read_only_work(struct work_struct *work)
-{
- struct cache_set *c =
- container_of(work, struct cache_set, read_only_work);
-
- mutex_lock(&bch_register_lock);
- bch_cache_set_read_only(c);
- mutex_unlock(&bch_register_lock);
-}
-
-void bch_cache_set_io_error(struct cache_set *c)
-{
- pr_err("%pU going read only", c->sb.set_uuid.b);
- schedule_work(&c->read_only_work);
-}
-
-void bch_cache_set_fail(struct cache_set *c)
-{
- switch (c->opts.on_error_action) {
- case BCH_ON_ERROR_CONTINUE:
- break;
- case BCH_ON_ERROR_RO:
- pr_err("%pU going read only", c->sb.set_uuid.b);
- schedule_work(&c->read_only_work);
- break;
- case BCH_ON_ERROR_PANIC:
- panic("bcache: %pU panic after error\n",
- c->sb.set_uuid.b);
- break;
- }
-}
-
/* Cache set startup/shutdown: */
void bch_cache_set_release(struct kobject *kobj)
@@ -893,7 +894,7 @@ static void cache_set_flush(struct closure *cl)
device_unregister(c->chardev);
mutex_lock(&bch_register_lock);
- bch_cache_set_read_only(c);
+ bch_cache_set_read_only_sync(c);
if (c->kobj.state_in_sysfs)
kobject_del(&c->kobj);
@@ -986,6 +987,8 @@ static struct cache_set *bch_cache_set_alloc(struct cache_sb *sb,
if (cache_sb_to_cache_set(c, sb))
goto err;
+ scnprintf(c->uuid, sizeof(c->uuid), "%pU", &c->sb.user_uuid);
+
c->opts = (struct cache_set_opts) {
.read_only = 0,
.on_error_action = CACHE_ERROR_ACTION(&c->sb),
@@ -1145,7 +1148,7 @@ static const char *run_cache_set(struct cache_set *c)
lockdep_assert_held(&bch_register_lock);
BUG_ON(test_bit(CACHE_SET_RUNNING, &c->flags));
- /* We don't want bch_cache_set_error() to free underneath us */
+ /* We don't want bch_fatal_error() to free underneath us */
closure_get(&c->caching);
/*
@@ -1307,7 +1310,7 @@ static const char *run_cache_set(struct cache_set *c)
bch_prio_timer_start(c, WRITE);
if (c->opts.read_only) {
- bch_cache_set_read_only(c);
+ bch_cache_set_read_only_sync(c);
} else {
err = __bch_cache_set_read_write(c);
if (err)
@@ -1322,7 +1325,9 @@ static const char *run_cache_set(struct cache_set *c)
bcache_write_super(c);
- bch_blockdev_volumes_start(c);
+ err = "can't bring up blockdev volumes";
+ if (bch_blockdev_volumes_start(c))
+ goto err;
bch_debug_init_cache_set(c);
@@ -1467,9 +1472,9 @@ void bch_cache_read_only(struct cache *ca)
return;
if (!cache_may_remove(ca)) {
- pr_warning("Required member %s for %pU going RO, cache set going RO",
- buf, &c->sb.set_uuid);
- bch_cache_set_read_only(c);
+ printk(__bch_err_fmt(c, "required member %s going RO, forcing fs RO",
+ buf));
+ bch_cache_set_read_only_sync(c);
}
/*
@@ -1484,17 +1489,6 @@ void bch_cache_read_only(struct cache *ca)
bch_cache_member_info_update(ca);
}
-static void bch_cache_read_only_work(struct work_struct *work)
-{
- struct cache *ca = container_of(work, struct cache, read_only_work);
-
- /* Going RO because of an error: */
-
- mutex_lock(&bch_register_lock);
- bch_cache_read_only(ca);
- mutex_unlock(&bch_register_lock);
-}
-
static const char *__bch_cache_read_write(struct cache *ca)
{
const char *err;
@@ -1824,7 +1818,6 @@ static const char *cache_alloc(struct bcache_superblock *sb,
ca->self.devices[0] = ca;
INIT_WORK(&ca->free_work, bch_cache_free_work);
- INIT_WORK(&ca->read_only_work, bch_cache_read_only_work);
INIT_WORK(&ca->remove_work, bch_cache_remove_work);
bio_init(&ca->journal.bio);
ca->journal.bio.bi_max_vecs = 8;
@@ -1837,7 +1830,7 @@ static const char *cache_alloc(struct bcache_superblock *sb,
ca->disk_sb.bdev->bd_holder = ca;
memset(sb, 0, sizeof(*sb));
- INIT_WORK(&ca->io_error_work, bch_cache_io_error_work);
+ INIT_WORK(&ca->io_error_work, bch_nonfatal_io_error_work);
err = "dynamic fault";
if (cache_set_init_fault("cache_alloc"))
@@ -2287,6 +2280,9 @@ static int bcache_reboot(struct notifier_block *n, unsigned long code, void *x)
list_for_each_entry(c, &bch_cache_sets, list)
bch_cache_set_read_only(c);
+ list_for_each_entry(c, &bch_cache_sets, list)
+ bch_cache_set_read_only_sync(c);
+
mutex_unlock(&bch_register_lock);
}
diff --git a/drivers/md/bcache/super.h b/drivers/md/bcache/super.h
index 258152a42014..15b03c6e54a2 100644
--- a/drivers/md/bcache/super.h
+++ b/drivers/md/bcache/super.h
@@ -152,9 +152,6 @@ const char *validate_super(struct bcache_superblock *, struct cache_sb *);
void bch_cache_member_info_update(struct cache *);
-void bch_cache_set_io_error(struct cache_set *);
-void bch_cache_set_fail(struct cache_set *);
-
void bch_cache_set_release(struct kobject *);
void bch_cache_release(struct kobject *);
@@ -174,7 +171,8 @@ const char *bch_register_cache_set(char * const *, unsigned,
struct cache_set_opts,
struct cache_set **);
-void bch_cache_set_read_only(struct cache_set *);
+bool bch_cache_set_read_only(struct cache_set *);
+void bch_cache_set_read_only_sync(struct cache_set *);
const char *bch_cache_set_read_write(struct cache_set *);
void bch_cache_read_only(struct cache *);
diff --git a/drivers/md/bcache/writeback.c b/drivers/md/bcache/writeback.c
index 34bec1106f0e..892306f1b2e9 100644
--- a/drivers/md/bcache/writeback.c
+++ b/drivers/md/bcache/writeback.c
@@ -10,6 +10,7 @@
#include "btree.h"
#include "clock.h"
#include "debug.h"
+#include "error.h"
#include "extents.h"
#include "io.h"
#include "keybuf.h"
@@ -168,9 +169,9 @@ static void read_dirty_endio(struct bio *bio)
{
struct dirty_io *io = container_of(bio, struct dirty_io, bio);
- bch_count_io_errors(io->ca, bio->bi_error,
- "reading dirty data from cache");
- percpu_ref_put(&io->ca->ref);
+ cache_nonfatal_io_err_on(bio->bi_error, io->ca, "writeback read");
+
+ bch_account_io_completion(io->ca);
if (ptr_stale(io->ca, &io->ptr))
bio->bi_error = -EINTR;
diff --git a/drivers/md/bcache/xattr.c b/drivers/md/bcache/xattr.c
index 22e728c18121..2f004c3e69a7 100644
--- a/drivers/md/bcache/xattr.c
+++ b/drivers/md/bcache/xattr.c
@@ -75,21 +75,22 @@ static int xattr_cmp(const struct bch_xattr *xattr,
memcmp(xattr->x_name, q->name, q->len);
}
-static bool bch_xattr_invalid(const struct cache_set *c, struct bkey_s_c k)
+static const char *bch_xattr_invalid(const struct cache_set *c,
+ struct bkey_s_c k)
{
switch (k.k->type) {
case BCH_XATTR:
- if (bkey_val_bytes(k.k) < sizeof(struct bch_xattr))
- return true;
+ return bkey_val_bytes(k.k) < sizeof(struct bch_xattr)
+ ? "value too small"
+ : NULL;
- return false;
case BCH_XATTR_WHITEOUT:
- if (bkey_val_bytes(k.k))
- return true;
+ return bkey_val_bytes(k.k) != 0
+ ? "value size should be zero"
+ : NULL;
- return false;
default:
- return true;
+ return "invalid type";
}
}