diff options
author | Kent Overstreet <kent.overstreet@gmail.com> | 2016-11-28 17:27:09 -0900 |
---|---|---|
committer | Kent Overstreet <kent.overstreet@gmail.com> | 2016-12-06 07:54:24 -0900 |
commit | 477d831f67be85fd1bf7e5b242bd01c74ffbe4d7 (patch) | |
tree | 33f9ae0bc0982dfc9988de0e19244beaaaad60e2 | |
parent | 13d09a1fe152f640960a5b9441201bcb27e0ad67 (diff) |
bcache: jit bkey_unpack_key()
-rw-r--r-- | drivers/md/bcache/bkey.c | 248 | ||||
-rw-r--r-- | drivers/md/bcache/bkey.h | 16 | ||||
-rw-r--r-- | drivers/md/bcache/bset.c | 6 | ||||
-rw-r--r-- | drivers/md/bcache/bset.h | 34 | ||||
-rw-r--r-- | drivers/md/bcache/btree_io.c | 6 | ||||
-rw-r--r-- | drivers/md/bcache/debug.c | 5 |
6 files changed, 298 insertions, 17 deletions
diff --git a/drivers/md/bcache/bkey.c b/drivers/md/bcache/bkey.c index 90cc2a5a10f5..03410c92f91d 100644 --- a/drivers/md/bcache/bkey.c +++ b/drivers/md/bcache/bkey.c @@ -283,6 +283,7 @@ struct bkey __bkey_unpack_key(const struct bkey_format *format, return out; } +#ifndef HAVE_BCACHE_COMPILED_UNPACK static struct bpos __bkey_unpack_pos(const struct bkey_format *format, const struct bkey_packed *in) { @@ -299,6 +300,7 @@ static struct bpos __bkey_unpack_pos(const struct bkey_format *format, return out; } +#endif /** * bkey_pack_key -- pack just the key, not the value @@ -380,18 +382,6 @@ bool bkey_pack_key(struct bkey_packed *out, const struct bkey *in, #endif /** - * bkey_unpack_key -- unpack just the key, not the value - */ -__flatten -struct bkey bkey_unpack_key(const struct btree_keys *b, - const struct bkey_packed *src) -{ - return likely(bkey_packed(src)) - ? __bkey_unpack_key(&b->format, src) - : *packed_to_bkey_c(src); -} - -/** * bkey_unpack -- unpack the key and the value */ void bkey_unpack(const struct btree_keys *b, struct bkey_i *dst, @@ -758,6 +748,7 @@ unsigned bkey_ffs(const struct btree_keys *b, } #ifdef CONFIG_X86_64 + static inline int __bkey_cmp_bits(const u64 *l, const u64 *r, unsigned nr_key_bits) { @@ -801,6 +792,231 @@ static inline int __bkey_cmp_bits(const u64 *l, const u64 *r, return cmp; } + +#define I(_x) (*(out)++ = (_x)) +#define I1(i0) I(i0) +#define I2(i0, i1) (I1(i0), I(i1)) +#define I3(i0, i1, i2) (I2(i0, i1), I(i2)) +#define I4(i0, i1, i2, i3) (I3(i0, i1, i2), I(i3)) +#define I5(i0, i1, i2, i3, i4) (I4(i0, i1, i2, i3), I(i4)) + +static u8 *compile_bkey_field(const struct bkey_format *format, u8 *out, + enum bch_bkey_fields field, + unsigned dst_offset, unsigned dst_size, + bool *eax_zeroed) +{ + unsigned byte = format->key_u64s * sizeof(u64); + unsigned bits = format->bits_per_field[field]; + u64 offset = format->field_offset[field]; + unsigned i, bit_offset = 0; + unsigned shl, shr; + + if (!bits && !offset) { + if (!*eax_zeroed) { + /* xor eax, eax */ + I2(0x31, 0xc0); + } + + *eax_zeroed = true; + goto set_field; + } + + if (!bits) { + /* just return offset: */ + + switch (dst_size) { + case 8: + if (offset > S32_MAX) { + /* mov [rdi + dst_offset], offset */ + I3(0xc7, 0x47, dst_offset); + memcpy(out, &offset, 4); + out += 4; + + I3(0xc7, 0x47, dst_offset + 4); + memcpy(out, (void *) &offset + 4, 4); + out += 4; + } else { + /* mov [rdi + dst_offset], offset */ + /* sign extended */ + I4(0x48, 0xc7, 0x47, dst_offset); + memcpy(out, &offset, 4); + out += 4; + } + break; + case 4: + /* mov [rdi + dst_offset], offset */ + I3(0xc7, 0x47, dst_offset); + memcpy(out, &offset, 4); + out += 4; + break; + default: + BUG(); + } + + return out; + } + + for (i = 0; i <= field; i++) + bit_offset += format->bits_per_field[i]; + + byte -= DIV_ROUND_UP(bit_offset, 8); + bit_offset = round_up(bit_offset, 8) - bit_offset; + + *eax_zeroed = false; + + if (bit_offset == 0 && bits == 8) { + /* movzx eax, BYTE PTR [rsi + imm8] */ + I4(0x0f, 0xb6, 0x46, byte); + } else if (bit_offset == 0 && bits == 16) { + /* movzx eax, WORD PTR [rsi + imm8] */ + I4(0x0f, 0xb7, 0x46, byte); + } else if (bit_offset + bits <= 32) { + /* mov eax, [rsi + imm8] */ + I3(0x8b, 0x46, byte); + + if (bit_offset) { + /* shr eax, imm8 */ + I3(0xc1, 0xe8, bit_offset); + } + + if (bit_offset + bits < 32) { + unsigned mask = ~0U >> (32 - bits); + + /* and eax, imm32 */ + I1(0x25); + memcpy(out, &mask, 4); + out += 4; + } + } else if (bit_offset + bits <= 64) { + /* mov rax, [rsi + imm8] */ + I4(0x48, 0x8b, 0x46, byte); + + shl = 64 - bit_offset - bits; + shr = bit_offset + shl; + + if (shl) { + /* shl rax, imm8 */ + I4(0x48, 0xc1, 0xe0, shl); + } + + if (shr) { + /* shr rax, imm8 */ + I4(0x48, 0xc1, 0xe8, shr); + } + } else { + /* mov rax, [rsi + byte] */ + I4(0x48, 0x8b, 0x46, byte); + + /* mov edx, [rsi + byte + 8] */ + I3(0x8b, 0x56, byte + 8); + + /* bits from next word: */ + shr = bit_offset + bits - 64; + BUG_ON(shr > bit_offset); + + /* shr rax, bit_offset */ + I4(0x48, 0xc1, 0xe8, shr); + + /* shl rdx, imm8 */ + I4(0x48, 0xc1, 0xe2, 64 - shr); + + /* or rax, rdx */ + I3(0x48, 0x09, 0xd0); + + shr = bit_offset - shr; + + if (shr) { + /* shr rax, imm8 */ + I4(0x48, 0xc1, 0xe8, shr); + } + } + + /* rax += offset: */ + if (offset > S32_MAX) { + /* mov rdx, imm64 */ + I2(0x48, 0xba); + memcpy(out, &offset, 8); + out += 8; + /* add %rdx, %rax */ + I3(0x48, 0x01, 0xd0); + } else if (offset + (~0ULL >> (64 - bits)) > U32_MAX) { + /* add rax, imm32 */ + I2(0x48, 0x05); + memcpy(out, &offset, 4); + out += 4; + } else if (offset) { + /* add eax, imm32 */ + I1(0x05); + memcpy(out, &offset, 4); + out += 4; + } +set_field: + switch (dst_size) { + case 8: + /* mov [rdi + dst_offset], rax */ + I4(0x48, 0x89, 0x47, dst_offset); + break; + case 4: + /* mov [rdi + dst_offset], eax */ + I3(0x89, 0x47, dst_offset); + break; + default: + BUG(); + } + + return out; +} + +int bch_compile_bkey_format(const struct bkey_format *format, void *_out) +{ + bool eax_zeroed = false; + u8 *out = _out; + + /* + * rdi: dst - unpacked key + * rsi: src - packed key + */ + + /* k->u64s, k->format, k->type */ + + /* mov eax, [rsi] */ + I2(0x8b, 0x06); + + /* add eax, BKEY_U64s - format->key_u64s */ + I5(0x05, BKEY_U64s - format->key_u64s, KEY_FORMAT_CURRENT, 0, 0); + + /* and eax, imm32: mask out k->pad: */ + I5(0x25, 0xff, 0xff, 0xff, 0); + + /* mov [rdi], eax */ + I2(0x89, 0x07); + + out = compile_bkey_field(format, out, BKEY_FIELD_INODE, + offsetof(struct bkey, p.inode), 8, + &eax_zeroed); + + out = compile_bkey_field(format, out, BKEY_FIELD_OFFSET, + offsetof(struct bkey, p.offset), 8, + &eax_zeroed); + + out = compile_bkey_field(format, out, BKEY_FIELD_SNAPSHOT, + offsetof(struct bkey, p.snapshot), 4, + &eax_zeroed); + + out = compile_bkey_field(format, out, BKEY_FIELD_SIZE, + offsetof(struct bkey, size), 4, + &eax_zeroed); + + out = compile_bkey_field(format, out, BKEY_FIELD_VERSION, + offsetof(struct bkey, version), 4, + &eax_zeroed); + + /* retq */ + I1(0xc3); + + return (void *) out - _out; +} + #else static inline int __bkey_cmp_bits(const u64 *l, const u64 *r, unsigned nr_key_bits) @@ -899,9 +1115,11 @@ __flatten int __bkey_cmp_left_packed(const struct btree_keys *b, const struct bkey_packed *l, struct bpos r) { - const struct bkey_format *f = &b->format; - - return bkey_cmp(__bkey_unpack_pos(f, l), r); +#ifdef HAVE_BCACHE_COMPILED_UNPACK + return bkey_cmp(bkey_unpack_key(b, l).p, r); +#else + return bkey_cmp(__bkey_unpack_pos(&b->format, l), r); +#endif } void bch_bpos_swab(struct bpos *p) diff --git a/drivers/md/bcache/bkey.h b/drivers/md/bcache/bkey.h index a7894f2115e9..fe25beaa56d7 100644 --- a/drivers/md/bcache/bkey.h +++ b/drivers/md/bcache/bkey.h @@ -276,8 +276,8 @@ bool bch_bkey_transform(const struct bkey_format *, const struct bkey_format *, const struct bkey_packed *); -struct bkey bkey_unpack_key(const struct btree_keys *, - const struct bkey_packed *); +struct bkey __bkey_unpack_key(const struct bkey_format *, + const struct bkey_packed *); bool bkey_pack_key(struct bkey_packed *, const struct bkey *, const struct bkey_format *); @@ -309,6 +309,18 @@ static inline u64 bkey_field_max(const struct bkey_format *f, : U64_MAX; } +#ifdef CONFIG_X86_64 +#define HAVE_BCACHE_COMPILED_UNPACK 1 + +int bch_compile_bkey_format(const struct bkey_format *, void *); + +#else + +static inline int bch_compile_bkey_format(const struct bkey_format *format, + void *out) { return 0; } + +#endif + static inline void bkey_reassemble(struct bkey_i *dst, struct bkey_s_c src) { diff --git a/drivers/md/bcache/bset.c b/drivers/md/bcache/bset.c index e65d754108ab..5b558a2e0bc6 100644 --- a/drivers/md/bcache/bset.c +++ b/drivers/md/bcache/bset.c @@ -355,6 +355,8 @@ void bch_btree_keys_free(struct btree_keys *b) { struct bset_tree *t = b->set; + vfree(b->unpack_fn); + if (bset_prev_bytes(b) < PAGE_SIZE) kfree(t->prev); else @@ -392,6 +394,10 @@ int bch_btree_keys_alloc(struct btree_keys *b, unsigned page_order, gfp_t gfp) if (!t->prev) goto err; + b->unpack_fn = vmalloc_exec(200); + if (!b->unpack_fn) + goto err; + return 0; err: bch_btree_keys_free(b); diff --git a/drivers/md/bcache/bset.h b/drivers/md/bcache/bset.h index c88df03c908d..04503da380d6 100644 --- a/drivers/md/bcache/bset.h +++ b/drivers/md/bcache/bset.h @@ -223,6 +223,8 @@ struct btree_nr_keys { u16 unpacked_keys; }; +typedef void (*compiled_unpack_fn)(struct bkey *, const struct bkey_packed *); + struct btree_keys { u8 nsets; u8 page_order; @@ -243,13 +245,45 @@ struct btree_keys { #ifdef CONFIG_BCACHE_DEBUG bool *expensive_debug_checks; #endif + + compiled_unpack_fn unpack_fn; }; static inline void btree_node_set_format(struct btree_keys *b, struct bkey_format f) { + int len; + b->format = f; b->nr_key_bits = bkey_format_key_bits(&f); + + len = bch_compile_bkey_format(&b->format, b->unpack_fn); + BUG_ON(len < 0 || len > 200); +} + +/** + * bkey_unpack_key -- unpack just the key, not the value + */ +static inline struct bkey bkey_unpack_key(const struct btree_keys *b, + const struct bkey_packed *src) +{ + struct bkey dst; + + if (unlikely(!bkey_packed(src))) + return *packed_to_bkey_c(src); + +#ifdef HAVE_BCACHE_COMPILED_UNPACK + b->unpack_fn(&dst, src); + + if (IS_ENABLED(CONFIG_BCACHE_DEBUG)) { + struct bkey dst2 = __bkey_unpack_key(&b->format, src); + + BUG_ON(memcmp(&dst, &dst2, sizeof(dst))); + } +#else + dst = __bkey_unpack_key(&b->format, src); +#endif + return dst; } /* Disassembled bkeys */ diff --git a/drivers/md/bcache/btree_io.c b/drivers/md/bcache/btree_io.c index 81880298f5d4..59245e75499e 100644 --- a/drivers/md/bcache/btree_io.c +++ b/drivers/md/bcache/btree_io.c @@ -503,6 +503,12 @@ static unsigned sort_keys(struct bkey_packed *dst, !bkey_cmp_packed(iter->b, in, next)) { BUG_ON(in->needs_whiteout && next->needs_whiteout); + /* + * XXX racy, called with read lock from write path + * + * leads to spurious BUG_ON() in bkey_unpack_key() in + * debug mode + */ next->needs_whiteout |= in->needs_whiteout; continue; } diff --git a/drivers/md/bcache/debug.c b/drivers/md/bcache/debug.c index b4d14d24c9ca..b171f452de12 100644 --- a/drivers/md/bcache/debug.c +++ b/drivers/md/bcache/debug.c @@ -300,6 +300,9 @@ static int print_btree_node(struct dump_iter *i, struct btree *b) { const struct bkey_format *f = &b->keys.format; struct bset_stats stats; + u8 unpack_fn[200]; + int unpack_fn_len = + bch_compile_bkey_format(&b->keys.format, unpack_fn); memset(&stats, 0, sizeof(stats)); @@ -308,6 +311,7 @@ static int print_btree_node(struct dump_iter *i, struct btree *b) i->bytes = scnprintf(i->buf, sizeof(i->buf), "l %u %llu:%llu - %llu:%llu:\n" " format: u64s %u fields %u %u %u %u %u\n" + " unpack fn len: %u\n" " bytes used %zu/%zu (%zu%% full)\n" " sib u64s: %u, %u (merge threshold %zu)\n" " nr packed keys %u\n" @@ -327,6 +331,7 @@ static int print_btree_node(struct dump_iter *i, struct btree *b) f->bits_per_field[2], f->bits_per_field[3], f->bits_per_field[4], + unpack_fn_len, b->keys.nr.live_u64s * sizeof(u64), btree_bytes(i->c) - sizeof(struct btree_node), b->keys.nr.live_u64s * 100 / btree_max_u64s(i->c), |