summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorKent Overstreet <kent.overstreet@gmail.com>2016-11-28 17:27:09 -0900
committerKent Overstreet <kent.overstreet@gmail.com>2016-12-06 07:54:24 -0900
commit477d831f67be85fd1bf7e5b242bd01c74ffbe4d7 (patch)
tree33f9ae0bc0982dfc9988de0e19244beaaaad60e2
parent13d09a1fe152f640960a5b9441201bcb27e0ad67 (diff)
bcache: jit bkey_unpack_key()
-rw-r--r--drivers/md/bcache/bkey.c248
-rw-r--r--drivers/md/bcache/bkey.h16
-rw-r--r--drivers/md/bcache/bset.c6
-rw-r--r--drivers/md/bcache/bset.h34
-rw-r--r--drivers/md/bcache/btree_io.c6
-rw-r--r--drivers/md/bcache/debug.c5
6 files changed, 298 insertions, 17 deletions
diff --git a/drivers/md/bcache/bkey.c b/drivers/md/bcache/bkey.c
index 90cc2a5a10f5..03410c92f91d 100644
--- a/drivers/md/bcache/bkey.c
+++ b/drivers/md/bcache/bkey.c
@@ -283,6 +283,7 @@ struct bkey __bkey_unpack_key(const struct bkey_format *format,
return out;
}
+#ifndef HAVE_BCACHE_COMPILED_UNPACK
static struct bpos __bkey_unpack_pos(const struct bkey_format *format,
const struct bkey_packed *in)
{
@@ -299,6 +300,7 @@ static struct bpos __bkey_unpack_pos(const struct bkey_format *format,
return out;
}
+#endif
/**
* bkey_pack_key -- pack just the key, not the value
@@ -380,18 +382,6 @@ bool bkey_pack_key(struct bkey_packed *out, const struct bkey *in,
#endif
/**
- * bkey_unpack_key -- unpack just the key, not the value
- */
-__flatten
-struct bkey bkey_unpack_key(const struct btree_keys *b,
- const struct bkey_packed *src)
-{
- return likely(bkey_packed(src))
- ? __bkey_unpack_key(&b->format, src)
- : *packed_to_bkey_c(src);
-}
-
-/**
* bkey_unpack -- unpack the key and the value
*/
void bkey_unpack(const struct btree_keys *b, struct bkey_i *dst,
@@ -758,6 +748,7 @@ unsigned bkey_ffs(const struct btree_keys *b,
}
#ifdef CONFIG_X86_64
+
static inline int __bkey_cmp_bits(const u64 *l, const u64 *r,
unsigned nr_key_bits)
{
@@ -801,6 +792,231 @@ static inline int __bkey_cmp_bits(const u64 *l, const u64 *r,
return cmp;
}
+
+#define I(_x) (*(out)++ = (_x))
+#define I1(i0) I(i0)
+#define I2(i0, i1) (I1(i0), I(i1))
+#define I3(i0, i1, i2) (I2(i0, i1), I(i2))
+#define I4(i0, i1, i2, i3) (I3(i0, i1, i2), I(i3))
+#define I5(i0, i1, i2, i3, i4) (I4(i0, i1, i2, i3), I(i4))
+
+static u8 *compile_bkey_field(const struct bkey_format *format, u8 *out,
+ enum bch_bkey_fields field,
+ unsigned dst_offset, unsigned dst_size,
+ bool *eax_zeroed)
+{
+ unsigned byte = format->key_u64s * sizeof(u64);
+ unsigned bits = format->bits_per_field[field];
+ u64 offset = format->field_offset[field];
+ unsigned i, bit_offset = 0;
+ unsigned shl, shr;
+
+ if (!bits && !offset) {
+ if (!*eax_zeroed) {
+ /* xor eax, eax */
+ I2(0x31, 0xc0);
+ }
+
+ *eax_zeroed = true;
+ goto set_field;
+ }
+
+ if (!bits) {
+ /* just return offset: */
+
+ switch (dst_size) {
+ case 8:
+ if (offset > S32_MAX) {
+ /* mov [rdi + dst_offset], offset */
+ I3(0xc7, 0x47, dst_offset);
+ memcpy(out, &offset, 4);
+ out += 4;
+
+ I3(0xc7, 0x47, dst_offset + 4);
+ memcpy(out, (void *) &offset + 4, 4);
+ out += 4;
+ } else {
+ /* mov [rdi + dst_offset], offset */
+ /* sign extended */
+ I4(0x48, 0xc7, 0x47, dst_offset);
+ memcpy(out, &offset, 4);
+ out += 4;
+ }
+ break;
+ case 4:
+ /* mov [rdi + dst_offset], offset */
+ I3(0xc7, 0x47, dst_offset);
+ memcpy(out, &offset, 4);
+ out += 4;
+ break;
+ default:
+ BUG();
+ }
+
+ return out;
+ }
+
+ for (i = 0; i <= field; i++)
+ bit_offset += format->bits_per_field[i];
+
+ byte -= DIV_ROUND_UP(bit_offset, 8);
+ bit_offset = round_up(bit_offset, 8) - bit_offset;
+
+ *eax_zeroed = false;
+
+ if (bit_offset == 0 && bits == 8) {
+ /* movzx eax, BYTE PTR [rsi + imm8] */
+ I4(0x0f, 0xb6, 0x46, byte);
+ } else if (bit_offset == 0 && bits == 16) {
+ /* movzx eax, WORD PTR [rsi + imm8] */
+ I4(0x0f, 0xb7, 0x46, byte);
+ } else if (bit_offset + bits <= 32) {
+ /* mov eax, [rsi + imm8] */
+ I3(0x8b, 0x46, byte);
+
+ if (bit_offset) {
+ /* shr eax, imm8 */
+ I3(0xc1, 0xe8, bit_offset);
+ }
+
+ if (bit_offset + bits < 32) {
+ unsigned mask = ~0U >> (32 - bits);
+
+ /* and eax, imm32 */
+ I1(0x25);
+ memcpy(out, &mask, 4);
+ out += 4;
+ }
+ } else if (bit_offset + bits <= 64) {
+ /* mov rax, [rsi + imm8] */
+ I4(0x48, 0x8b, 0x46, byte);
+
+ shl = 64 - bit_offset - bits;
+ shr = bit_offset + shl;
+
+ if (shl) {
+ /* shl rax, imm8 */
+ I4(0x48, 0xc1, 0xe0, shl);
+ }
+
+ if (shr) {
+ /* shr rax, imm8 */
+ I4(0x48, 0xc1, 0xe8, shr);
+ }
+ } else {
+ /* mov rax, [rsi + byte] */
+ I4(0x48, 0x8b, 0x46, byte);
+
+ /* mov edx, [rsi + byte + 8] */
+ I3(0x8b, 0x56, byte + 8);
+
+ /* bits from next word: */
+ shr = bit_offset + bits - 64;
+ BUG_ON(shr > bit_offset);
+
+ /* shr rax, bit_offset */
+ I4(0x48, 0xc1, 0xe8, shr);
+
+ /* shl rdx, imm8 */
+ I4(0x48, 0xc1, 0xe2, 64 - shr);
+
+ /* or rax, rdx */
+ I3(0x48, 0x09, 0xd0);
+
+ shr = bit_offset - shr;
+
+ if (shr) {
+ /* shr rax, imm8 */
+ I4(0x48, 0xc1, 0xe8, shr);
+ }
+ }
+
+ /* rax += offset: */
+ if (offset > S32_MAX) {
+ /* mov rdx, imm64 */
+ I2(0x48, 0xba);
+ memcpy(out, &offset, 8);
+ out += 8;
+ /* add %rdx, %rax */
+ I3(0x48, 0x01, 0xd0);
+ } else if (offset + (~0ULL >> (64 - bits)) > U32_MAX) {
+ /* add rax, imm32 */
+ I2(0x48, 0x05);
+ memcpy(out, &offset, 4);
+ out += 4;
+ } else if (offset) {
+ /* add eax, imm32 */
+ I1(0x05);
+ memcpy(out, &offset, 4);
+ out += 4;
+ }
+set_field:
+ switch (dst_size) {
+ case 8:
+ /* mov [rdi + dst_offset], rax */
+ I4(0x48, 0x89, 0x47, dst_offset);
+ break;
+ case 4:
+ /* mov [rdi + dst_offset], eax */
+ I3(0x89, 0x47, dst_offset);
+ break;
+ default:
+ BUG();
+ }
+
+ return out;
+}
+
+int bch_compile_bkey_format(const struct bkey_format *format, void *_out)
+{
+ bool eax_zeroed = false;
+ u8 *out = _out;
+
+ /*
+ * rdi: dst - unpacked key
+ * rsi: src - packed key
+ */
+
+ /* k->u64s, k->format, k->type */
+
+ /* mov eax, [rsi] */
+ I2(0x8b, 0x06);
+
+ /* add eax, BKEY_U64s - format->key_u64s */
+ I5(0x05, BKEY_U64s - format->key_u64s, KEY_FORMAT_CURRENT, 0, 0);
+
+ /* and eax, imm32: mask out k->pad: */
+ I5(0x25, 0xff, 0xff, 0xff, 0);
+
+ /* mov [rdi], eax */
+ I2(0x89, 0x07);
+
+ out = compile_bkey_field(format, out, BKEY_FIELD_INODE,
+ offsetof(struct bkey, p.inode), 8,
+ &eax_zeroed);
+
+ out = compile_bkey_field(format, out, BKEY_FIELD_OFFSET,
+ offsetof(struct bkey, p.offset), 8,
+ &eax_zeroed);
+
+ out = compile_bkey_field(format, out, BKEY_FIELD_SNAPSHOT,
+ offsetof(struct bkey, p.snapshot), 4,
+ &eax_zeroed);
+
+ out = compile_bkey_field(format, out, BKEY_FIELD_SIZE,
+ offsetof(struct bkey, size), 4,
+ &eax_zeroed);
+
+ out = compile_bkey_field(format, out, BKEY_FIELD_VERSION,
+ offsetof(struct bkey, version), 4,
+ &eax_zeroed);
+
+ /* retq */
+ I1(0xc3);
+
+ return (void *) out - _out;
+}
+
#else
static inline int __bkey_cmp_bits(const u64 *l, const u64 *r,
unsigned nr_key_bits)
@@ -899,9 +1115,11 @@ __flatten
int __bkey_cmp_left_packed(const struct btree_keys *b,
const struct bkey_packed *l, struct bpos r)
{
- const struct bkey_format *f = &b->format;
-
- return bkey_cmp(__bkey_unpack_pos(f, l), r);
+#ifdef HAVE_BCACHE_COMPILED_UNPACK
+ return bkey_cmp(bkey_unpack_key(b, l).p, r);
+#else
+ return bkey_cmp(__bkey_unpack_pos(&b->format, l), r);
+#endif
}
void bch_bpos_swab(struct bpos *p)
diff --git a/drivers/md/bcache/bkey.h b/drivers/md/bcache/bkey.h
index a7894f2115e9..fe25beaa56d7 100644
--- a/drivers/md/bcache/bkey.h
+++ b/drivers/md/bcache/bkey.h
@@ -276,8 +276,8 @@ bool bch_bkey_transform(const struct bkey_format *,
const struct bkey_format *,
const struct bkey_packed *);
-struct bkey bkey_unpack_key(const struct btree_keys *,
- const struct bkey_packed *);
+struct bkey __bkey_unpack_key(const struct bkey_format *,
+ const struct bkey_packed *);
bool bkey_pack_key(struct bkey_packed *, const struct bkey *,
const struct bkey_format *);
@@ -309,6 +309,18 @@ static inline u64 bkey_field_max(const struct bkey_format *f,
: U64_MAX;
}
+#ifdef CONFIG_X86_64
+#define HAVE_BCACHE_COMPILED_UNPACK 1
+
+int bch_compile_bkey_format(const struct bkey_format *, void *);
+
+#else
+
+static inline int bch_compile_bkey_format(const struct bkey_format *format,
+ void *out) { return 0; }
+
+#endif
+
static inline void bkey_reassemble(struct bkey_i *dst,
struct bkey_s_c src)
{
diff --git a/drivers/md/bcache/bset.c b/drivers/md/bcache/bset.c
index e65d754108ab..5b558a2e0bc6 100644
--- a/drivers/md/bcache/bset.c
+++ b/drivers/md/bcache/bset.c
@@ -355,6 +355,8 @@ void bch_btree_keys_free(struct btree_keys *b)
{
struct bset_tree *t = b->set;
+ vfree(b->unpack_fn);
+
if (bset_prev_bytes(b) < PAGE_SIZE)
kfree(t->prev);
else
@@ -392,6 +394,10 @@ int bch_btree_keys_alloc(struct btree_keys *b, unsigned page_order, gfp_t gfp)
if (!t->prev)
goto err;
+ b->unpack_fn = vmalloc_exec(200);
+ if (!b->unpack_fn)
+ goto err;
+
return 0;
err:
bch_btree_keys_free(b);
diff --git a/drivers/md/bcache/bset.h b/drivers/md/bcache/bset.h
index c88df03c908d..04503da380d6 100644
--- a/drivers/md/bcache/bset.h
+++ b/drivers/md/bcache/bset.h
@@ -223,6 +223,8 @@ struct btree_nr_keys {
u16 unpacked_keys;
};
+typedef void (*compiled_unpack_fn)(struct bkey *, const struct bkey_packed *);
+
struct btree_keys {
u8 nsets;
u8 page_order;
@@ -243,13 +245,45 @@ struct btree_keys {
#ifdef CONFIG_BCACHE_DEBUG
bool *expensive_debug_checks;
#endif
+
+ compiled_unpack_fn unpack_fn;
};
static inline void btree_node_set_format(struct btree_keys *b,
struct bkey_format f)
{
+ int len;
+
b->format = f;
b->nr_key_bits = bkey_format_key_bits(&f);
+
+ len = bch_compile_bkey_format(&b->format, b->unpack_fn);
+ BUG_ON(len < 0 || len > 200);
+}
+
+/**
+ * bkey_unpack_key -- unpack just the key, not the value
+ */
+static inline struct bkey bkey_unpack_key(const struct btree_keys *b,
+ const struct bkey_packed *src)
+{
+ struct bkey dst;
+
+ if (unlikely(!bkey_packed(src)))
+ return *packed_to_bkey_c(src);
+
+#ifdef HAVE_BCACHE_COMPILED_UNPACK
+ b->unpack_fn(&dst, src);
+
+ if (IS_ENABLED(CONFIG_BCACHE_DEBUG)) {
+ struct bkey dst2 = __bkey_unpack_key(&b->format, src);
+
+ BUG_ON(memcmp(&dst, &dst2, sizeof(dst)));
+ }
+#else
+ dst = __bkey_unpack_key(&b->format, src);
+#endif
+ return dst;
}
/* Disassembled bkeys */
diff --git a/drivers/md/bcache/btree_io.c b/drivers/md/bcache/btree_io.c
index 81880298f5d4..59245e75499e 100644
--- a/drivers/md/bcache/btree_io.c
+++ b/drivers/md/bcache/btree_io.c
@@ -503,6 +503,12 @@ static unsigned sort_keys(struct bkey_packed *dst,
!bkey_cmp_packed(iter->b, in, next)) {
BUG_ON(in->needs_whiteout &&
next->needs_whiteout);
+ /*
+ * XXX racy, called with read lock from write path
+ *
+ * leads to spurious BUG_ON() in bkey_unpack_key() in
+ * debug mode
+ */
next->needs_whiteout |= in->needs_whiteout;
continue;
}
diff --git a/drivers/md/bcache/debug.c b/drivers/md/bcache/debug.c
index b4d14d24c9ca..b171f452de12 100644
--- a/drivers/md/bcache/debug.c
+++ b/drivers/md/bcache/debug.c
@@ -300,6 +300,9 @@ static int print_btree_node(struct dump_iter *i, struct btree *b)
{
const struct bkey_format *f = &b->keys.format;
struct bset_stats stats;
+ u8 unpack_fn[200];
+ int unpack_fn_len =
+ bch_compile_bkey_format(&b->keys.format, unpack_fn);
memset(&stats, 0, sizeof(stats));
@@ -308,6 +311,7 @@ static int print_btree_node(struct dump_iter *i, struct btree *b)
i->bytes = scnprintf(i->buf, sizeof(i->buf),
"l %u %llu:%llu - %llu:%llu:\n"
" format: u64s %u fields %u %u %u %u %u\n"
+ " unpack fn len: %u\n"
" bytes used %zu/%zu (%zu%% full)\n"
" sib u64s: %u, %u (merge threshold %zu)\n"
" nr packed keys %u\n"
@@ -327,6 +331,7 @@ static int print_btree_node(struct dump_iter *i, struct btree *b)
f->bits_per_field[2],
f->bits_per_field[3],
f->bits_per_field[4],
+ unpack_fn_len,
b->keys.nr.live_u64s * sizeof(u64),
btree_bytes(i->c) - sizeof(struct btree_node),
b->keys.nr.live_u64s * 100 / btree_max_u64s(i->c),