summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorKent Overstreet <kent.overstreet@gmail.com>2016-01-20 02:46:01 -0900
committerKent Overstreet <kent.overstreet@gmail.com>2016-10-07 12:35:20 -0800
commit1756202b161ed0f70220b123c2d5d1c90aca044d (patch)
tree1a45a200bd63ea16c24f3411986b1b5f4472f141
parent02a1f97a5588b1635a1cabd82976b625966d429d (diff)
bcache: bkey swabbing
Bkeys have to be stored in native endianness: thus, for endian portability we need a flag to indicate whether a givin btree node entry/journal entry was written in big or little endian, and we need to swab when reading an entry in non native endianness.
-rw-r--r--drivers/md/bcache/bkey.c27
-rw-r--r--drivers/md/bcache/bkey.h3
-rw-r--r--drivers/md/bcache/bkey_methods.c12
-rw-r--r--drivers/md/bcache/bkey_methods.h4
-rw-r--r--drivers/md/bcache/bset.c2
-rw-r--r--drivers/md/bcache/btree_io.c57
-rw-r--r--drivers/md/bcache/extents.c11
-rw-r--r--drivers/md/bcache/journal.c25
-rw-r--r--drivers/md/bcache/util.h6
-rw-r--r--include/uapi/linux/bcache.h49
10 files changed, 150 insertions, 46 deletions
diff --git a/drivers/md/bcache/bkey.c b/drivers/md/bcache/bkey.c
index 0920ace0803a..53d21ebf667d 100644
--- a/drivers/md/bcache/bkey.c
+++ b/drivers/md/bcache/bkey.c
@@ -313,7 +313,7 @@ bool bkey_pack_key(struct bkey_packed *out, const struct bkey *in,
}
/*
- * Alternate implementations using bkey_format_transform() - unfortunately, too
+ * Alternate implementations using bch_bkey_transform_key() - unfortunately, too
* slow
*/
#if 0
@@ -822,6 +822,31 @@ int __bkey_cmp_left_packed(const struct bkey_format *format,
return bkey_cmp(__bkey_unpack_pos(format, l), r);
}
+void bch_bpos_swab(struct bpos *p)
+{
+ u8 *l = (u8 *) p;
+ u8 *h = ((u8 *) &p[1]) - 1;
+
+ while (l < h) {
+ swap(*l, *h);
+ l++;
+ --h;
+ }
+}
+
+void bch_bkey_swab_key(const struct bkey_format *_f, struct bkey_packed *k)
+{
+ const struct bkey_format *f = bkey_packed(k) ? _f : &bch_bkey_format_current;
+ u8 *l = k->key_start;
+ u8 *h = (u8 *) (k->_data + f->key_u64s) - 1;
+
+ while (l < h) {
+ swap(*l, *h);
+ l++;
+ --h;
+ }
+}
+
#ifdef CONFIG_BCACHE_DEBUG
void bkey_pack_test(void)
{
diff --git a/drivers/md/bcache/bkey.h b/drivers/md/bcache/bkey.h
index 408f98abd294..3f9796287cbb 100644
--- a/drivers/md/bcache/bkey.h
+++ b/drivers/md/bcache/bkey.h
@@ -126,6 +126,9 @@ static __always_inline int bkey_cmp(struct bpos l, struct bpos r)
int bkey_cmp(struct bpos l, struct bpos r);
#endif
+void bch_bpos_swab(struct bpos *);
+void bch_bkey_swab_key(const struct bkey_format *, struct bkey_packed *);
+
#define bkey_packed(_k) \
({ EBUG_ON((_k)->format > KEY_FORMAT_CURRENT); \
(_k)->format != KEY_FORMAT_CURRENT; })
diff --git a/drivers/md/bcache/bkey_methods.c b/drivers/md/bcache/bkey_methods.c
index b470bbca5c3b..5c0943f00872 100644
--- a/drivers/md/bcache/bkey_methods.c
+++ b/drivers/md/bcache/bkey_methods.c
@@ -100,3 +100,15 @@ void bch_bkey_val_to_text(struct cache_set *c, enum bkey_type type,
ops->val_to_text(c, out, end - out, k);
}
}
+
+void bch_bkey_swab(enum bkey_type type,
+ const struct bkey_format *f,
+ struct bkey_packed *k)
+{
+ const struct bkey_ops *ops = bch_bkey_ops[type];
+
+ bch_bkey_swab_key(f, k);
+
+ if (ops->swab)
+ ops->swab(f, k);
+}
diff --git a/drivers/md/bcache/bkey_methods.h b/drivers/md/bcache/bkey_methods.h
index a01de6d45be3..03ca92e28a29 100644
--- a/drivers/md/bcache/bkey_methods.h
+++ b/drivers/md/bcache/bkey_methods.h
@@ -37,6 +37,7 @@ struct bkey_ops {
struct bkey_s_c);
void (*val_to_text)(struct cache_set *, char *,
size_t, struct bkey_s_c);
+ void (*swab)(const struct bkey_format *, struct bkey_packed *);
bool is_extents;
};
@@ -49,6 +50,9 @@ void bkey_debugcheck(struct cache_set *, struct btree *, struct bkey_s_c);
void bch_bkey_val_to_text(struct cache_set *, enum bkey_type,
char *, size_t, struct bkey_s_c);
+void bch_bkey_swab(enum bkey_type, const struct bkey_format *,
+ struct bkey_packed *);
+
#undef DEF_BTREE_ID
#endif /* _BCACHE_BKEY_METHODS_H */
diff --git a/drivers/md/bcache/bset.c b/drivers/md/bcache/bset.c
index e163b7becd9c..016066899954 100644
--- a/drivers/md/bcache/bset.c
+++ b/drivers/md/bcache/bset.c
@@ -618,6 +618,7 @@ void bch_bset_init_first(struct btree_keys *b, struct bset *i)
b->set[0].data = i;
memset(i, 0, sizeof(*i));
get_random_bytes(&i->seq, sizeof(i->seq));
+ SET_BSET_BIG_ENDIAN(i, CPU_BIG_ENDIAN);
bch_bset_build_unwritten_tree(b);
}
@@ -628,6 +629,7 @@ void bch_bset_init_next(struct btree_keys *b, struct bset *i)
b->set[++b->nsets].data = i;
memset(i, 0, sizeof(*i));
i->seq = b->set->data->seq;
+ SET_BSET_BIG_ENDIAN(i, CPU_BIG_ENDIAN);
bch_bset_build_unwritten_tree(b);
}
diff --git a/drivers/md/bcache/btree_io.c b/drivers/md/bcache/btree_io.c
index fa287da2ffde..6d08f8e1b7df 100644
--- a/drivers/md/bcache/btree_io.c
+++ b/drivers/md/bcache/btree_io.c
@@ -219,6 +219,9 @@ static const char *validate_bset(struct cache_set *c, struct btree *b,
break;
}
+ if (BSET_BIG_ENDIAN(i) != CPU_BIG_ENDIAN)
+ bch_bkey_swab(btree_node_type(b), &b->keys.format, k);
+
bkey_disassemble(&tup, f, k);
u = bkey_tup_to_s_c(&tup);
@@ -240,6 +243,8 @@ static const char *validate_bset(struct cache_set *c, struct btree *b,
k = bkey_next(k);
}
+ SET_BSET_BIG_ENDIAN(i, CPU_BIG_ENDIAN);
+
b->written += blocks;
return NULL;
}
@@ -261,29 +266,6 @@ void bch_btree_node_read_done(struct cache_set *c, struct btree *b,
if (bch_meta_read_fault("btree"))
goto err;
- err = "bad magic";
- if (le64_to_cpu(b->data->magic) != bset_magic(&c->disk_sb))
- goto err;
-
- err = "bad btree header";
- if (!b->data->keys.seq)
- goto err;
-
- err = "incorrect max key";
- if (bkey_cmp(b->data->max_key, b->key.k.p))
- goto err;
-
- err = "incorrect level";
- if (BSET_BTREE_LEVEL(i) != b->level)
- goto err;
-
- err = bch_bkey_format_validate(&b->data->format);
- if (err)
- goto err;
-
- b->keys.format = b->data->format;
- b->keys.set->data = &b->data->keys;
-
while (b->written < btree_blocks(c)) {
unsigned blocks;
@@ -304,6 +286,34 @@ void bch_btree_node_read_done(struct cache_set *c, struct btree *b,
blocks = __set_blocks(b->data,
le16_to_cpu(b->data->keys.u64s),
block_bytes(c));
+
+ err = "bad magic";
+ if (le64_to_cpu(b->data->magic) != bset_magic(&c->disk_sb))
+ goto err;
+
+ err = "bad btree header";
+ if (!b->data->keys.seq)
+ goto err;
+
+ if (BSET_BIG_ENDIAN(i) != CPU_BIG_ENDIAN) {
+ bch_bpos_swab(&b->data->min_key);
+ bch_bpos_swab(&b->data->max_key);
+ }
+
+ err = "incorrect max key";
+ if (bkey_cmp(b->data->max_key, b->key.k.p))
+ goto err;
+
+ err = "incorrect level";
+ if (BSET_BTREE_LEVEL(i) != b->level)
+ goto err;
+
+ err = bch_bkey_format_validate(&b->data->format);
+ if (err)
+ goto err;
+
+ b->keys.format = b->data->format;
+ b->keys.set->data = &b->data->keys;
} else {
bne = write_block(c, b);
i = &bne->keys;
@@ -523,6 +533,7 @@ static void do_btree_node_write(struct closure *cl)
BUG_ON(b->written >= btree_blocks(c));
BUG_ON(b->written && !i->u64s);
BUG_ON(btree_bset_first(b)->seq != i->seq);
+ BUG_ON(BSET_BIG_ENDIAN(i) != CPU_BIG_ENDIAN);
cancel_delayed_work(&b->work);
diff --git a/drivers/md/bcache/extents.c b/drivers/md/bcache/extents.c
index 9a3df3d1aaa2..63acaa625df8 100644
--- a/drivers/md/bcache/extents.c
+++ b/drivers/md/bcache/extents.c
@@ -268,6 +268,15 @@ static bool bch_ptr_normalize(struct btree_keys *bk, struct bkey_s k)
return bch_extent_normalize(b->c, k);
}
+static void bch_ptr_swab(const struct bkey_format *f, struct bkey_packed *k)
+{
+ u64 *d = (u64 *) bkeyp_val(f, k);
+ unsigned i;
+
+ for (i = 0; i < bkeyp_val_u64s(f, k); i++)
+ d[i] = swab64(d[i]);
+}
+
static const char *extent_ptr_invalid(const struct cache_member_rcu *mi,
const struct bch_extent_ptr *ptr,
unsigned size_ondisk)
@@ -504,6 +513,7 @@ const struct bkey_ops bch_bkey_btree_ops = {
.key_invalid = bch_btree_ptr_invalid,
.key_debugcheck = btree_ptr_debugcheck,
.val_to_text = bch_btree_ptr_to_text,
+ .swab = bch_ptr_swab,
};
/* Extents */
@@ -1999,6 +2009,7 @@ const struct bkey_ops bch_bkey_extent_ops = {
.key_invalid = bch_extent_invalid,
.key_debugcheck = bch_extent_debugcheck,
.val_to_text = bch_extent_to_text,
+ .swab = bch_ptr_swab,
.is_extents = true,
};
diff --git a/drivers/md/bcache/journal.c b/drivers/md/bcache/journal.c
index da7b391fce59..2c4675e7b01f 100644
--- a/drivers/md/bcache/journal.c
+++ b/drivers/md/bcache/journal.c
@@ -703,6 +703,29 @@ const char *bch_journal_read(struct cache_set *c, struct list_head *list)
if (list_empty(list))
return "no journal entries found";
+ /* Swabbing: */
+ list_for_each_entry(i, list, list) {
+ struct jset_entry *jkeys;
+ struct bkey_i *k;
+
+ if (JSET_BIG_ENDIAN(&i->j) != CPU_BIG_ENDIAN)
+ for_each_jset_jkeys(jkeys, &i->j)
+ switch (JKEYS_TYPE(jkeys)) {
+ case JKEYS_BTREE_KEYS:
+ for (k = jkeys->start;
+ k < bkey_idx(jkeys, le16_to_cpu(jkeys->u64s));
+ k = bkey_next(k))
+ bch_bkey_swab(bkey_type(jkeys->level,
+ jkeys->btree_id),
+ NULL, bkey_to_packed(k));
+ break;
+ case JKEYS_BTREE_ROOT:
+ bch_bkey_swab(BKEY_TYPE_BTREE,
+ NULL, bkey_to_packed(jkeys->start));
+ break;
+ }
+ }
+
j = &list_entry(list->prev, struct journal_replay, list)->j;
if (le64_to_cpu(j->seq) -
@@ -1466,6 +1489,8 @@ static void journal_write_locked(struct closure *cl)
w->data->version = cpu_to_le32(BCACHE_JSET_VERSION);
w->data->last_seq = cpu_to_le64(last_seq(j));
+ SET_JSET_BIG_ENDIAN(w->data, CPU_BIG_ENDIAN);
+
SET_JSET_CSUM_TYPE(w->data, c->opts.metadata_checksum);
w->data->csum = cpu_to_le64(__csum_set(w->data,
le32_to_cpu(w->data->u64s),
diff --git a/drivers/md/bcache/util.h b/drivers/md/bcache/util.h
index 828032b725c5..b37d7d97d262 100644
--- a/drivers/md/bcache/util.h
+++ b/drivers/md/bcache/util.h
@@ -54,6 +54,12 @@ struct closure;
#define __flatten
#endif
+#ifdef __LITTLE_ENDIAN
+#define CPU_BIG_ENDIAN 0
+#else
+#define CPU_BIG_ENDIAN 1
+#endif
+
#define DECLARE_HEAP(type, name) \
struct { \
size_t size, used; \
diff --git a/include/uapi/linux/bcache.h b/include/uapi/linux/bcache.h
index 516c1facb033..d783a3792c03 100644
--- a/include/uapi/linux/bcache.h
+++ b/include/uapi/linux/bcache.h
@@ -102,30 +102,34 @@ struct bch_val {
__u64 __nothing[0];
};
-struct bkey_packed {
+struct bkey {
__u64 _data[0];
/* Size of combined key and value, in u64s */
__u8 u64s;
- /* Format of key (0 for format local to btree node */
+ /* Format of key (0 for format local to btree node) */
__u8 format;
/* Type of the value */
__u8 type;
- __u8 key_start[0];
- /*
- * We copy bkeys with struct assignment in various places, and while
- * that shouldn't be done with packed bkeys we can't disallow it in C,
- * and it's legal to cast a bkey to a bkey_packed - so padding it out
- * to the same size as struct bkey should hopefully be safest.
- */
- __u8 pad[5];
- __u64 pad2[4];
+#if defined(__LITTLE_ENDIAN)
+ __u8 pad[1];
+
+ __u32 version;
+ __u32 size; /* extent size, in sectors */
+ struct bpos p;
+#elif defined(__BIG_ENDIAN)
+ struct bpos p;
+ __u32 size; /* extent size, in sectors */
+ __u32 version;
+
+ __u8 pad[1];
+#endif
} __attribute__((packed)) __attribute__((aligned(8)));
-struct bkey {
+struct bkey_packed {
__u64 _data[0];
/* Size of combined key and value, in u64s */
@@ -136,17 +140,15 @@ struct bkey {
/* Type of the value */
__u8 type;
+ __u8 key_start[0];
- __u8 pad[1];
-#if defined(__LITTLE_ENDIAN)
- __u32 version;
- __u32 size; /* extent size, in sectors */
- struct bpos p;
-#elif defined(__BIG_ENDIAN)
- struct bpos p;
- __u32 size; /* extent size, in sectors */
- __u32 version;
-#endif
+ /*
+ * We copy bkeys with struct assignment in various places, and while
+ * that shouldn't be done with packed bkeys we can't disallow it in C,
+ * and it's legal to cast a bkey to a bkey_packed - so padding it out
+ * to the same size as struct bkey should hopefully be safest.
+ */
+ __u8 pad[sizeof(struct bkey) - 3];
} __attribute__((packed)) __attribute__((aligned(8)));
#define BKEY_U64s (sizeof(struct bkey) / sizeof(__u64))
@@ -960,6 +962,7 @@ struct jset {
};
LE32_BITMASK(JSET_CSUM_TYPE, struct jset, flags, 0, 4);
+LE32_BITMASK(JSET_BIG_ENDIAN, struct jset, flags, 4, 5);
/* Bucket prios/gens */
@@ -1041,6 +1044,8 @@ LE32_BITMASK(BSET_CSUM_TYPE, struct bset, flags, 0, 4);
/* Only used in first bset */
LE32_BITMASK(BSET_BTREE_LEVEL, struct bset, flags, 4, 8);
+LE32_BITMASK(BSET_BIG_ENDIAN, struct bset, flags, 8, 9);
+
struct btree_node {
__le64 csum;
__le64 magic;