diff options
-rw-r--r-- | drivers/md/bcache/bset.c | 218 | ||||
-rw-r--r-- | drivers/md/bcache/eytzinger.h | 196 |
2 files changed, 233 insertions, 181 deletions
diff --git a/drivers/md/bcache/bset.c b/drivers/md/bcache/bset.c index e1d6a0cc9c52..94600a9a8e42 100644 --- a/drivers/md/bcache/bset.c +++ b/drivers/md/bcache/bset.c @@ -7,6 +7,7 @@ #define pr_fmt(fmt) "bcache: %s() " fmt "\n", __func__ +#include "eytzinger.h" #include "util.h" #include "bset.h" @@ -466,161 +467,6 @@ void bch_btree_keys_init(struct btree *b, bool *expensive_debug_checks) /* Binary tree stuff for auxiliary search trees */ -#ifdef CONFIG_X86_64 -/* we don't need the rex prefixes, or the rep prefix (lzcnt/tzcnt) */ - -#define __fls(_x) \ -({ \ - unsigned _r; \ - asm("bsr %1,%0" : "=r" (_r) : "rm" (_x)); \ - _r; \ -}) - -#define __ffs(_x) \ -({ \ - unsigned _r; \ - asm("bsf %1,%0" : "=r" (_r) : "rm" (_x)); \ - _r; \ -}) - -#define ffz(_x) \ -({ \ - unsigned _r; \ - asm("bsf %1,%0" : "=r" (_r) : "rm" (~(_x))); \ - _r; \ -}) - -#endif - -static unsigned inorder_next(unsigned j, unsigned size) -{ - if (j * 2 + 1 < size) { - j = j * 2 + 1; - - j <<= __fls(size) - __fls(j); - j >>= j >= size; - } else - j >>= ffz(j) + 1; - - return j; -} - -static unsigned inorder_prev(unsigned j, unsigned size) -{ - if (j * 2 < size) { - j = j * 2 + 1; - - j <<= __fls(size) - __fls(j); - j -= 1; - j >>= j >= size; - } else - j >>= __ffs(j) + 1; - - return j; -} - -/* I have no idea why this code works... and I'm the one who wrote it - * - * However, I do know what it does: - * Given a binary tree constructed in an array (i.e. how you normally implement - * a heap), it converts a node in the tree - referenced by array index - to the - * index it would have if you did an inorder traversal. - * - * Also tested for every j, size up to size somewhere around 6 million. - * - * The binary tree starts at array index 1, not 0 - * extra is a function of size: - * extra = (size - rounddown_pow_of_two(size - 1)) << 1; - */ -static inline unsigned __to_inorder(unsigned j, unsigned size, unsigned extra) -{ - unsigned b = __fls(j); - unsigned shift = __fls(size - 1) - b; - int s; - - j ^= 1U << b; - j <<= 1; - j |= 1; - j <<= shift; - - /* sign bit trick: */ -#if 0 - if (j > extra) - j -= (j - extra) >> 1; -#else - s = extra - j; - j += (s >> 1) & (s >> 31); -#endif - - return j; -} - -static inline unsigned to_inorder(unsigned j, const struct bset_tree *t) -{ - return __to_inorder(j, t->size, t->extra); -} - -/* j in [1..size) */ -static unsigned __inorder_to_tree(unsigned j, unsigned size, unsigned extra) -{ - unsigned shift; - int s; - - EBUG_ON(!j || j >= size); - - s = extra - j; - j -= s & (s >> 31); - - shift = __ffs(j); - - j >>= shift + 1; - j |= 1U << (__fls(size - 1) - shift); - - return j; -} - -static unsigned inorder_to_tree(unsigned j, const struct bset_tree *t) -{ - return __inorder_to_tree(j, t->size, t->extra); -} - -#if 0 -void inorder_test(void) -{ - unsigned long done = 0; - ktime_t start = ktime_get(); - - for (unsigned size = 2; - size < 65536000; - size++) { - unsigned extra = (size - rounddown_pow_of_two(size - 1)) << 1; - unsigned i = 1, j = rounddown_pow_of_two(size - 1); - - if (!(size % 4096)) - printk(KERN_NOTICE "loop %u, %llu per us\n", size, - done / ktime_us_delta(ktime_get(), start)); - - while (1) { - if (__inorder_to_tree(i, size, extra) != j) - panic("size %10u j %10u i %10u", size, j, i); - - if (__to_inorder(j, size, extra) != i) - panic("size %10u j %10u i %10u", size, j, i); - - if (j == rounddown_pow_of_two(size) - 1) - break; - - BUG_ON(inorder_prev(inorder_next(j, size), size) != j); - - j = inorder_next(j, size); - i++; - } - - done += size - 1; - } -} -#endif - /* * Cacheline/offset <-> bkey pointer arithmetic: * @@ -628,8 +474,9 @@ void inorder_test(void) * in one cacheline in t->set (BSET_CACHELINE bytes). * * This means we don't have to store the full index of the key that a node in - * the binary tree points to; to_inorder() gives us the cacheline, and then - * bkey_float->m gives us the offset within that cacheline, in units of 8 bytes. + * the binary tree points to; eytzinger_to_inorder() gives us the cacheline, and + * then bkey_float->m gives us the offset within that cacheline, in units of 8 + * bytes. * * cacheline_to_bkey() and friends abstract out all the pointer arithmetic to * make this work. @@ -687,8 +534,9 @@ static inline struct bkey_packed *tree_to_bkey(const struct btree *b, const struct bset_tree *t, unsigned j) { - return cacheline_to_bkey(b, t, to_inorder(j, t), - bkey_float(b, t, j)->key_offset); + return cacheline_to_bkey(b, t, + __eytzinger_to_inorder(j, t->size, t->extra), + bkey_float(b, t, j)->key_offset); } static struct bkey_packed *tree_to_prev_bkey(const struct btree *b, @@ -1035,9 +883,7 @@ retry: t->extra = (t->size - rounddown_pow_of_two(t->size - 1)) << 1; /* First we figure out where the first key in each cacheline is */ - for (j = inorder_next(0, t->size); - j; - j = inorder_next(j, t->size)) { + eytzinger_for_each(j, t->size) { while (bkey_to_cacheline(b, t, k) < cacheline) prev = k, k = bkey_next(k); @@ -1060,9 +906,7 @@ retry: t->max_key = bkey_unpack_pos(b, k); /* Then we build the tree */ - for (j = inorder_next(0, t->size); - j; - j = inorder_next(j, t->size)) + eytzinger_for_each(j, t->size) make_bfloat(b, t, j, &min_key, &max_key); } @@ -1152,7 +996,9 @@ static struct bkey_packed *__bkey_prev(struct btree *b, struct bset_tree *t, j = min_t(unsigned, t->size - 1, bkey_to_cacheline(b, t, k)); do { - p = j ? tree_to_bkey(b, t, inorder_to_tree(j--, t)) + p = j ? tree_to_bkey(b, t, + __inorder_to_eytzinger(j--, + t->size, t->extra)) : btree_bkey_first(b, t); } while (p >= k); break; @@ -1241,23 +1087,33 @@ static void ro_aux_tree_fix_invalidated_key(struct btree *b, inorder = bkey_to_cacheline(b, t, k); if (inorder && - inorder < t->size && - k == tree_to_bkey(b, t, j = inorder_to_tree(inorder, t))) { - /* Fix the auxiliary search tree node this key corresponds to */ - make_bfloat(b, t, j, &min_key, &max_key); + inorder < t->size) { + j = __inorder_to_eytzinger(inorder, t->size, t->extra); - /* Children for which this key is the right side boundary */ - for (j = j * 2; j < t->size; j = j * 2 + 1) + if (k == tree_to_bkey(b, t, j)) { + /* Fix the node this key corresponds to */ make_bfloat(b, t, j, &min_key, &max_key); + + /* Children for which this key is the right boundary */ + for (j = eytzinger_left_child(j); + j < t->size; + j = eytzinger_right_child(j)) + make_bfloat(b, t, j, &min_key, &max_key); + } } - if (inorder + 1 < t->size && - k == tree_to_prev_bkey(b, t, j = inorder_to_tree(inorder + 1, t))) { - make_bfloat(b, t, j, &min_key, &max_key); + if (inorder + 1 < t->size) { + j = __inorder_to_eytzinger(inorder + 1, t->size, t->extra); - /* Children for which this key is the left side boundary */ - for (j = j * 2 + 1; j < t->size; j = j * 2) + if (k == tree_to_prev_bkey(b, t, j)) { make_bfloat(b, t, j, &min_key, &max_key); + + /* Children for which this key is the left boundary */ + for (j = eytzinger_right_child(j); + j < t->size; + j = eytzinger_left_child(j)) + make_bfloat(b, t, j, &min_key, &max_key); + } } } @@ -1476,7 +1332,7 @@ static struct bkey_packed *bset_search_tree(const struct btree *b, p = bkey_float_get(base, n << 4); prefetch(p); } else if (n << 3 < t->size) { - inorder = to_inorder(n, t); + inorder = __eytzinger_to_inorder(n, t->size, t->extra); p = bset_cacheline(b, t, inorder); #ifdef CONFIG_X86_64 asm(".intel_syntax noprefix;" @@ -1507,7 +1363,7 @@ static struct bkey_packed *bset_search_tree(const struct btree *b, &search, packed_search, n); } while (n < t->size); - inorder = to_inorder(n >> 1, t); + inorder = __eytzinger_to_inorder(n >> 1, t->size, t->extra); /* * n would have been the node we recursed to - the low bit tells us if @@ -1517,7 +1373,7 @@ static struct bkey_packed *bset_search_tree(const struct btree *b, return cacheline_to_bkey(b, t, inorder, f->key_offset); } else { if (--inorder) { - n = inorder_prev(n >> 1, t->size); + n = eytzinger_prev(n >> 1, t->size); f = bkey_float_get(base, n); return cacheline_to_bkey(b, t, inorder, f->key_offset); } else @@ -1937,7 +1793,7 @@ int bch_bkey_print_bfloat(struct btree *b, struct bkey_packed *k, if (!bset_has_ro_aux_tree(t)) goto out; - j = inorder_to_tree(bkey_to_cacheline(b, t, k), t); + j = __inorder_to_eytzinger(bkey_to_cacheline(b, t, k), t->size, t->extra); if (j && j < t->size && k == tree_to_bkey(b, t, j)) diff --git a/drivers/md/bcache/eytzinger.h b/drivers/md/bcache/eytzinger.h new file mode 100644 index 000000000000..13d54e5eb2fa --- /dev/null +++ b/drivers/md/bcache/eytzinger.h @@ -0,0 +1,196 @@ +#ifndef _EYTZINGER_H +#define _EYTZINGER_H + +#include <linux/bitops.h> +#include <linux/log2.h> + +#include "util.h" + +/* + * Traversal for trees in eytzinger layout - a full binary tree layed out in an + * array + * + * We used one based indexing, not zero based: with one based indexing, each + * level of the tree starts at a power of two - leading to better alignment - + * and it's what you want for implementing next/prev and to/from inorder. + * + * To/from inorder also uses 1 based indexing. + * + * Size parameter is treated as if we were using 0 based indexing, however: + * valid nodes, and inorder indices, are in the range [1..size) + */ + +static inline unsigned eytzinger_child(unsigned j, unsigned child) +{ + EBUG_ON(child > 1); + + return (j << 1) + child; +} + +static inline unsigned eytzinger_left_child(unsigned j) +{ + return eytzinger_child(j, 0); +} + +static inline unsigned eytzinger_right_child(unsigned j) +{ + return eytzinger_child(j, 1); +} + +static inline unsigned eytzinger_first(unsigned size) +{ + return rounddown_pow_of_two(size - 1); +} + +static inline unsigned eytzinger_last(unsigned size) +{ + return rounddown_pow_of_two(size) - 1; +} + +/* + * eytzinger_next() and eytzinger_prev() have the nice properties that + * + * eytzinger_next(0) == eytzinger_first()) + * eytzinger_prev(0) == eytzinger_last()) + * + * eytzinger_prev(eytzinger_first()) == 0 + * eytzinger_next(eytzinger_last()) == 0 + */ + +static inline unsigned eytzinger_next(unsigned j, unsigned size) +{ + EBUG_ON(j >= size); + + if (eytzinger_right_child(j) < size) { + j = eytzinger_right_child(j); + + j <<= __fls(size) - __fls(j); + j >>= j >= size; + } else { + j >>= ffz(j) + 1; + } + + return j; +} + +static inline unsigned eytzinger_prev(unsigned j, unsigned size) +{ + EBUG_ON(j >= size); + + if (eytzinger_left_child(j) < size) { + j = eytzinger_left_child(j); + + j <<= __fls(size) - __fls(j); + j -= 1; + j >>= j >= size; + } else { + j >>= __ffs(j) + 1; + } + + return j; +} + +static inline unsigned eytzinger_extra(unsigned size) +{ + return (size - rounddown_pow_of_two(size - 1)) << 1; +} + +static inline unsigned __eytzinger_to_inorder(unsigned j, unsigned size, + unsigned extra) +{ + unsigned b = __fls(j); + unsigned shift = __fls(size - 1) - b; + int s; + + EBUG_ON(!j || j >= size); + + j ^= 1U << b; + j <<= 1; + j |= 1; + j <<= shift; + + /* + * sign bit trick: + * + * if (j > extra) + * j -= (j - extra) >> 1; + */ + s = extra - j; + j += (s >> 1) & (s >> 31); + + return j; +} + +static inline unsigned __inorder_to_eytzinger(unsigned j, unsigned size, + unsigned extra) +{ + unsigned shift; + int s; + + EBUG_ON(!j || j >= size); + + /* + * sign bit trick: + * + * if (j > extra) + * j += j - extra; + */ + s = extra - j; + j -= s & (s >> 31); + + shift = __ffs(j); + + j >>= shift + 1; + j |= 1U << (__fls(size - 1) - shift); + + return j; +} + +static inline unsigned eytzinger_to_inorder(unsigned j, unsigned size) +{ + return __eytzinger_to_inorder(j, size, eytzinger_extra(size)); +} + +static inline unsigned inorder_to_eytzinger(unsigned j, unsigned size) +{ + return __inorder_to_eytzinger(j, size, eytzinger_extra(size)); +} + +#define eytzinger_for_each(_i, _size) \ + for ((_i) = eytzinger_first((_size)); \ + (_i) != 0; \ + (_i) = eytzinger_next((_i), (_size))) + +#if 0 +void eytzinger_test(void) +{ + unsigned i, j, size; + + for (size = 2; + size < 65536000; + size++) { + if (!(size % 4096)) + printk(KERN_INFO "tree size %u\n", size); + + assert(eytzinger_prev(0, size) == eytzinger_last(size)); + assert(eytzinger_next(0, size) == eytzinger_first(size)); + + assert(eytzinger_prev(eytzinger_first(size), size) == 0); + assert(eytzinger_next(eytzinger_last(size), size) == 0); + + eytzinger_for_each(j, size) { + assert(from_inorder(i, size) == j); + assert(to_inorder(j, size) == i); + + if (j != eytzinger_last(size)) { + unsigned next = eytzinger_next(j, size); + + assert(eytzinger_prev(next, size) == j); + } + } + } + +} +#endif + +#endif /* _EYTZINGER_H */ |