summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--drivers/md/bcache/bcache.h2
-rw-r--r--drivers/md/bcache/bkey_methods.c2
-rw-r--r--drivers/md/bcache/bkey_methods.h21
-rw-r--r--drivers/md/bcache/bset.c357
-rw-r--r--drivers/md/bcache/bset.h129
-rw-r--r--drivers/md/bcache/btree_cache.c6
-rw-r--r--drivers/md/bcache/btree_gc.c9
-rw-r--r--drivers/md/bcache/btree_io.c92
-rw-r--r--drivers/md/bcache/btree_iter.c5
-rw-r--r--drivers/md/bcache/btree_types.h14
-rw-r--r--drivers/md/bcache/btree_update.c61
-rw-r--r--drivers/md/bcache/btree_update.h4
-rw-r--r--drivers/md/bcache/debug.c6
-rw-r--r--drivers/md/bcache/dirent.c3
-rw-r--r--drivers/md/bcache/dirent.h1
-rw-r--r--drivers/md/bcache/extents.c44
-rw-r--r--drivers/md/bcache/extents.h11
-rw-r--r--drivers/md/bcache/inode.c3
-rw-r--r--drivers/md/bcache/inode.h1
-rw-r--r--drivers/md/bcache/super.c6
-rw-r--r--drivers/md/bcache/sysfs.c14
-rw-r--r--drivers/md/bcache/xattr.c3
-rw-r--r--drivers/md/bcache/xattr.h1
23 files changed, 370 insertions, 425 deletions
diff --git a/drivers/md/bcache/bcache.h b/drivers/md/bcache/bcache.h
index 599896689bc1..3f604fcd8560 100644
--- a/drivers/md/bcache/bcache.h
+++ b/drivers/md/bcache/bcache.h
@@ -755,7 +755,7 @@ struct cache_set {
*/
mempool_t fill_iter;
- struct bset_sort_state sort;
+ mempool_t btree_sort_pool;
struct journal journal;
unsigned btree_flush_delay;
diff --git a/drivers/md/bcache/bkey_methods.c b/drivers/md/bcache/bkey_methods.c
index 47db7a2ba04a..9755cc60557f 100644
--- a/drivers/md/bcache/bkey_methods.c
+++ b/drivers/md/bcache/bkey_methods.c
@@ -8,7 +8,7 @@
#include "inode.h"
#include "xattr.h"
-static const struct bkey_ops *bch_bkey_ops[] = {
+const struct bkey_ops *bch_bkey_ops[] = {
[BKEY_TYPE_EXTENTS] = &bch_bkey_extent_ops,
[BKEY_TYPE_INODES] = &bch_bkey_inode_ops,
[BKEY_TYPE_DIRENTS] = &bch_bkey_dirent_ops,
diff --git a/drivers/md/bcache/bkey_methods.h b/drivers/md/bcache/bkey_methods.h
index 03ca92e28a29..f8c7f42aba7e 100644
--- a/drivers/md/bcache/bkey_methods.h
+++ b/drivers/md/bcache/bkey_methods.h
@@ -26,9 +26,25 @@ static inline bool btree_type_has_ptrs(enum bkey_type type)
}
struct cache_set;
+struct btree_keys;
struct btree;
struct bkey;
+enum merge_result {
+ BCH_MERGE_NOMERGE,
+
+ /*
+ * The keys were mergeable, but would have overflowed size - so instead
+ * l was changed to the maximum size, and both keys were modified:
+ */
+ BCH_MERGE_PARTIAL,
+ BCH_MERGE_MERGE,
+};
+
+typedef bool (*key_filter_fn)(struct btree_keys *, struct bkey_s);
+typedef enum merge_result (*key_merge_fn)(struct btree_keys *,
+ struct bkey_i *, struct bkey_i *);
+
struct bkey_ops {
/* Returns reason for being invalid if invalid, else NULL: */
const char * (*key_invalid)(const struct cache_set *,
@@ -38,7 +54,8 @@ struct bkey_ops {
void (*val_to_text)(struct cache_set *, char *,
size_t, struct bkey_s_c);
void (*swab)(const struct bkey_format *, struct bkey_packed *);
-
+ key_filter_fn key_normalize;
+ key_merge_fn key_merge;
bool is_extents;
};
@@ -53,6 +70,8 @@ void bch_bkey_val_to_text(struct cache_set *, enum bkey_type,
void bch_bkey_swab(enum bkey_type, const struct bkey_format *,
struct bkey_packed *);
+extern const struct bkey_ops *bch_bkey_ops[];
+
#undef DEF_BTREE_ID
#endif /* _BCACHE_BKEY_METHODS_H */
diff --git a/drivers/md/bcache/bset.c b/drivers/md/bcache/bset.c
index 553f8c4f04ab..04a940487077 100644
--- a/drivers/md/bcache/bset.c
+++ b/drivers/md/bcache/bset.c
@@ -72,16 +72,25 @@ void bch_dump_bset(struct btree_keys *b, struct bset *i, unsigned set)
n = bkey_unpack_key(f, _n);
- if (bkey_cmp(bkey_start_pos(&n), k.p) < 0)
+ if (bkey_cmp(bkey_start_pos(&n), k.p) < 0) {
printk(KERN_ERR "Key skipped backwards\n");
- else if (!b->ops->is_extents &&
- !bkey_deleted(&k) &&
- !bkey_cmp(n.p, k.p))
+ continue;
+ }
+
+ /*
+ * Weird check for duplicate non extent keys: extents are
+ * deleted iff they have 0 size, so if it has zero size and it's
+ * not deleted these aren't extents:
+ */
+ if (((!k.size && !bkey_deleted(&k)) ||
+ (!n.size && !bkey_deleted(&n))) &&
+ !bkey_deleted(&k) &&
+ !bkey_cmp(n.p, k.p))
printk(KERN_ERR "Duplicate keys\n");
}
}
-void bch_dump_bucket(struct btree_keys *b)
+void bch_dump_btree_node(struct btree_keys *b)
{
unsigned i;
@@ -157,7 +166,7 @@ static void bch_btree_node_iter_next_check(struct btree_node_iter *iter,
struct bkey nu = bkey_unpack_key(f, n);
char buf1[80], buf2[80];
- bch_dump_bucket(b);
+ bch_dump_btree_node(b);
bch_bkey_to_text(buf1, sizeof(buf1), &ku);
bch_bkey_to_text(buf2, sizeof(buf2), &nu);
panic("out of order/overlapping:\n%s\n%s\n", buf1, buf2);
@@ -207,10 +216,10 @@ void bch_verify_key_order(struct btree_keys *b,
k = bkey_prev_all(t, where);
if (k &&
- keys_out_of_order(f, k, where, b->ops->is_extents)) {
+ keys_out_of_order(f, k, where, iter->is_extents)) {
char buf1[100], buf2[100];
- bch_dump_bucket(b);
+ bch_dump_btree_node(b);
uk = bkey_unpack_key(f, k);
bch_bkey_to_text(buf1, sizeof(buf1), &uk);
bch_bkey_to_text(buf2, sizeof(buf2), &uw);
@@ -220,7 +229,7 @@ void bch_verify_key_order(struct btree_keys *b,
k = bkey_next(where);
BUG_ON(k != bset_bkey_last(t->data) &&
- keys_out_of_order(f, where, k, b->ops->is_extents));
+ keys_out_of_order(f, where, k, iter->is_extents));
for (t = b->set; t <= b->set + b->nsets; t++) {
if (!t->data->u64s)
@@ -244,7 +253,7 @@ void bch_verify_key_order(struct btree_keys *b,
k = bkey_next(k)) {
uk = bkey_unpack_key(f, k);
- if (b->ops->is_extents) {
+ if (iter->is_extents) {
BUG_ON(!(bkey_cmp(uw.p, bkey_start_pos(&uk)) <= 0 ||
bkey_cmp(uk.p, bkey_start_pos(&uw)) <= 0));
} else {
@@ -392,8 +401,7 @@ err:
}
EXPORT_SYMBOL(bch_btree_keys_alloc);
-void bch_btree_keys_init(struct btree_keys *b, const struct btree_keys_ops *ops,
- bool *expensive_debug_checks)
+void bch_btree_keys_init(struct btree_keys *b, bool *expensive_debug_checks)
{
struct bkey_format_state s;
unsigned i;
@@ -401,7 +409,6 @@ void bch_btree_keys_init(struct btree_keys *b, const struct btree_keys_ops *ops,
bch_bkey_format_init(&s);
b->format = bch_bkey_format_done(&s);
- b->ops = ops;
b->nsets = 0;
memset(&b->nr, 0, sizeof(b->nr));
#ifdef CONFIG_BCACHE_DEBUG
@@ -410,7 +417,7 @@ void bch_btree_keys_init(struct btree_keys *b, const struct btree_keys_ops *ops,
for (i = 0; i < MAX_BSETS; i++) {
b->set[i].data = NULL;
b->set[i].size = 0;
- b->set[i].extra = BSET_AUX_TREE_NONE_VAL;
+ b->set[i].extra = BSET_NO_AUX_TREE_VAL;
}
}
EXPORT_SYMBOL(bch_btree_keys_init);
@@ -742,7 +749,7 @@ static void bset_alloc_tree(struct btree_keys *b, struct bset_tree *t)
while (++t < b->set + MAX_BSETS) {
t->size = 0;
- t->extra = BSET_AUX_TREE_NONE_VAL;
+ t->extra = BSET_NO_AUX_TREE_VAL;
t->tree = NULL;
t->prev = NULL;
}
@@ -754,49 +761,72 @@ static unsigned bset_tree_capacity(struct btree_keys *b, struct bset_tree *t)
return b->set->tree + btree_keys_cachelines(b) - t->tree;
}
-static void bch_bset_build_unwritten_tree(struct btree_keys *b)
+static void bch_bset_lookup_table_add_entries(struct btree_keys *b,
+ struct bset_tree *t)
{
- struct bset_tree *t = bset_tree_last(b);
+ struct bkey_packed *k;
+
+ BUG_ON(!bset_has_rw_aux_tree(t));
+ BUG_ON(t->size > bset_tree_capacity(b, t));
+ for (k = table_to_bkey(t, t->size - 1);
+ k != bset_bkey_last(t->data);
+ k = bkey_next(k))
+ while (bkey_to_cacheline(t, k) >= t->size) {
+ if (t->size == bset_tree_capacity(b, t))
+ return;
+
+ t->prev[t->size] = bkey_to_cacheline_offset(t, t->size, k);
+ t->size++;
+ }
+}
+
+static void bch_bset_build_rw_aux_tree(struct btree_keys *b, struct bset_tree *t)
+{
bset_alloc_tree(b, t);
- if (bset_tree_capacity(b, t)) {
- t->prev[0] = bkey_to_cacheline_offset(t, 0, t->data->start);
- t->size = 1;
- t->extra = BSET_UNWRITTEN_TABLE_VAL;
- } else {
- t->extra = BSET_AUX_TREE_NONE_VAL;
+ if (!bset_tree_capacity(b, t)) {
+ t->extra = BSET_NO_AUX_TREE_VAL;
+ return;
}
+
+ t->prev[0] = bkey_to_cacheline_offset(t, 0, t->data->start);
+ t->size = 1;
+ t->extra = BSET_RW_AUX_TREE_VAL;
+
+ bch_bset_lookup_table_add_entries(b, t);
}
void bch_bset_init_first(struct btree_keys *b, struct bset *i)
{
+ struct bset_tree *t = &b->set[0];
+
BUG_ON(b->nsets);
- b->set[0].data = i;
+ t->data = i;
memset(i, 0, sizeof(*i));
get_random_bytes(&i->seq, sizeof(i->seq));
SET_BSET_BIG_ENDIAN(i, CPU_BIG_ENDIAN);
- bch_bset_build_unwritten_tree(b);
+ bch_bset_build_rw_aux_tree(b, t);
}
-EXPORT_SYMBOL(bch_bset_init_first);
void bch_bset_init_next(struct btree_keys *b, struct bset *i)
{
+ struct bset_tree *t;
BUG_ON(b->nsets + 1 == MAX_BSETS);
- b->set[++b->nsets].data = i;
+ t = &b->set[++b->nsets];
+ t->data = i;
memset(i, 0, sizeof(*i));
i->seq = b->set->data->seq;
SET_BSET_BIG_ENDIAN(i, CPU_BIG_ENDIAN);
- bch_bset_build_unwritten_tree(b);
+ bch_bset_build_rw_aux_tree(b, t);
}
-EXPORT_SYMBOL(bch_bset_init_next);
-void bch_bset_build_written_tree(struct btree_keys *b,
- struct bset_tree *t)
+void bch_bset_build_ro_aux_tree(struct btree_keys *b,
+ struct bset_tree *t)
{
struct bkey_packed *prev = NULL, *k = t->data->start;
unsigned j, cacheline = 1;
@@ -808,7 +838,7 @@ void bch_bset_build_written_tree(struct btree_keys *b,
retry:
if (t->size < 2) {
t->size = 0;
- t->extra = BSET_AUX_TREE_NONE_VAL;
+ t->extra = BSET_NO_AUX_TREE_VAL;
return;
}
@@ -844,7 +874,6 @@ retry:
j = inorder_next(j, t->size))
make_bfloat(&b->format, t, j);
}
-EXPORT_SYMBOL(bch_bset_build_written_tree);
static struct bkey_packed *__bkey_prev(struct bset_tree *t, struct bkey_packed *k)
{
@@ -865,9 +894,17 @@ static struct bkey_packed *__bkey_prev(struct bset_tree *t, struct bkey_packed *
}
- p = bset_has_aux_tree(t)
- ? tree_to_bkey(t, inorder_to_tree(j, t))
- : table_to_bkey(t, j);
+ switch (bset_aux_tree_type(t)) {
+ case BSET_NO_AUX_TREE:
+ p = t->data->start;
+ break;
+ case BSET_RO_AUX_TREE:
+ p = tree_to_bkey(t, inorder_to_tree(j, t));
+ break;
+ case BSET_RW_AUX_TREE:
+ p = table_to_bkey(t, j);
+ break;
+ }
} while (p >= k);
return p;
@@ -919,7 +956,7 @@ void bch_bset_fix_invalidated_key(struct btree_keys *b, struct bset_tree *t,
{
unsigned inorder, j = 1;
- if (!bset_has_aux_tree(t))
+ if (bset_aux_tree_type(t) != BSET_RO_AUX_TREE)
return;
inorder = bkey_to_cacheline(t, k);
@@ -972,9 +1009,9 @@ static void bch_bset_fix_lookup_table(struct btree_keys *b,
int shift = new_u64s - clobber_u64s;
unsigned j;
- BUG_ON(bset_has_aux_tree(t));
+ BUG_ON(bset_has_ro_aux_tree(t));
- if (bset_aux_tree_type(t) != BSET_HAS_UNWRITTEN_TABLE)
+ if (!bset_has_rw_aux_tree(t))
return;
/* Did we just truncate? */
@@ -1024,21 +1061,7 @@ static void bch_bset_fix_lookup_table(struct btree_keys *b,
t->prev[j] = new_offset;
}
- BUG_ON(t->size > bset_tree_capacity(b, t));
-
- if (t->size == bset_tree_capacity(b, t))
- return;
-
- /* Possibly add a new entry to the end of the lookup table */
-
- for (k = table_to_bkey(t, t->size - 1);
- k != bset_bkey_last(t->data);
- k = bkey_next(k))
- if (t->size == bkey_to_cacheline(t, k)) {
- t->prev[t->size] = bkey_to_cacheline_offset(t, t->size, k);
- t->size++;
- return;
- }
+ bch_bset_lookup_table_add_entries(b, t);
}
static void bch_bset_verify_lookup_table(struct btree_keys *b,
@@ -1050,9 +1073,9 @@ static void bch_bset_verify_lookup_table(struct btree_keys *b,
if (!btree_keys_expensive_checks(b))
return;
- BUG_ON(bset_has_aux_tree(t));
+ BUG_ON(bset_has_ro_aux_tree(t));
- if (bset_aux_tree_type(t) != BSET_HAS_UNWRITTEN_TABLE)
+ if (!bset_has_rw_aux_tree(t))
return;
BUG_ON(t->size < 1);
@@ -1258,13 +1281,13 @@ static struct bkey_packed *bch_bset_search(struct btree_keys *b,
*/
switch (bset_aux_tree_type(t)) {
- case BSET_AUX_TREE_NONE:
+ case BSET_NO_AUX_TREE:
m = t->data->start;
break;
- case BSET_HAS_UNWRITTEN_TABLE:
+ case BSET_RW_AUX_TREE:
m = bset_search_write_set(f, t, search, lossy_packed_search);
break;
- case BSET_HAS_AUX_TREE:
+ case BSET_RO_AUX_TREE:
/*
* Each node in the auxiliary search tree covers a certain range
* of bits, and keys above and below the set it covers might
@@ -1373,7 +1396,7 @@ void bch_btree_node_iter_push(struct btree_node_iter *iter,
*/
void bch_btree_node_iter_init(struct btree_node_iter *iter,
struct btree_keys *b, struct bpos search,
- bool strictly_greater)
+ bool strictly_greater, bool is_extents)
{
struct bset_tree *t;
struct bkey_packed p, *packed_search, *lossy_packed_search;
@@ -1399,7 +1422,7 @@ void bch_btree_node_iter_init(struct btree_node_iter *iter,
BUG();
}
- __bch_btree_node_iter_init(iter, b);
+ __bch_btree_node_iter_init(iter, is_extents);
for (t = b->set; t <= b->set + b->nsets; t++)
__bch_btree_node_iter_push(iter, b,
@@ -1413,11 +1436,12 @@ void bch_btree_node_iter_init(struct btree_node_iter *iter,
EXPORT_SYMBOL(bch_btree_node_iter_init);
void bch_btree_node_iter_init_from_start(struct btree_node_iter *iter,
- struct btree_keys *b)
+ struct btree_keys *b,
+ bool is_extents)
{
struct bset_tree *t;
- __bch_btree_node_iter_init(iter, b);
+ __bch_btree_node_iter_init(iter, is_extents);
for (t = b->set; t <= b->set + b->nsets; t++)
__bch_btree_node_iter_push(iter, b,
@@ -1425,7 +1449,6 @@ void bch_btree_node_iter_init_from_start(struct btree_node_iter *iter,
bset_bkey_last(t->data));
bch_btree_node_iter_sort(iter, b);
}
-EXPORT_SYMBOL(bch_btree_node_iter_init_from_start);
struct bkey_packed *bch_btree_node_iter_bset_pos(struct btree_node_iter *iter,
struct btree_keys *b,
@@ -1585,70 +1608,42 @@ EXPORT_SYMBOL(bch_btree_node_iter_peek_unpack);
/* Mergesort */
-void bch_bset_sort_state_free(struct bset_sort_state *state)
-{
- mempool_exit(&state->pool);
-}
-
-int bch_bset_sort_state_init(struct bset_sort_state *state, unsigned page_order,
- struct time_stats *time)
-{
- state->page_order = page_order;
- state->crit_factor = int_sqrt(1 << page_order);
- state->time = time;
-
- if (mempool_init_page_pool(&state->pool, 1, page_order))
- return -ENOMEM;
-
- return 0;
-}
-EXPORT_SYMBOL(bch_bset_sort_state_init);
-
/* No repacking: */
-static struct btree_nr_keys btree_mergesort_simple(struct btree_keys *b,
- unsigned from,
- struct bset *bset,
- struct btree_node_iter *iter)
+static struct btree_nr_keys btree_mergesort_simple(struct bset *dst,
+ struct btree_keys *src,
+ struct btree_node_iter *src_iter)
{
- struct bkey_packed *in, *out = bset->start;
- struct btree_nr_keys ret = b->nr;
- unsigned i;
+ struct bkey_packed *in, *out = bset_bkey_last(dst);
+ struct btree_nr_keys nr;
+
+ memset(&nr, 0, sizeof(nr));
- while ((in = bch_btree_node_iter_next_all(iter, b))) {
- if (!bkey_packed_is_whiteout(b, in)) {
+ while ((in = bch_btree_node_iter_next_all(src_iter, src))) {
+ if (!bkey_packed_is_whiteout(src, in)) {
bkey_copy(out, in);
+ btree_keys_account_key_add(&nr, 0, out);
out = bkey_next(out);
}
}
- bset->u64s = cpu_to_le16((u64 *) out - bset->_data);
-
- for (i = from + 1; i < MAX_BSETS; i++) {
- ret.bset_u64s[from] += ret.bset_u64s[i];
- ret.bset_u64s[i] = 0;
- }
-
- return ret;
+ dst->u64s = cpu_to_le16((u64 *) out - dst->_data);
+ return nr;
}
/* Sort + repack in a new format: */
-static struct btree_nr_keys btree_mergesort(struct btree_keys *dst,
- struct bset *dst_set,
+static struct btree_nr_keys btree_mergesort(struct bset *dst,
struct btree_keys *src,
- struct btree_node_iter *iter,
- ptr_filter_fn filter)
+ struct btree_node_iter *src_iter,
+ struct bkey_format *in_f,
+ struct bkey_format *out_f,
+ key_filter_fn filter)
{
- struct bkey_format *in_f = &src->format;
- struct bkey_format *out_f = &dst->format;
- struct bkey_packed *in, *out = bset_bkey_last(dst_set);
+ struct bkey_packed *in, *out = bset_bkey_last(dst);
struct btree_nr_keys nr;
- BUG_ON(filter);
- EBUG_ON(filter && !dst->ops->is_extents);
-
memset(&nr, 0, sizeof(nr));
- while ((in = bch_btree_node_iter_next_all(iter, src))) {
+ while ((in = bch_btree_node_iter_next_all(src_iter, src))) {
if (bkey_packed_is_whiteout(src, in))
continue;
@@ -1660,30 +1655,25 @@ static struct btree_nr_keys btree_mergesort(struct btree_keys *dst,
btree_keys_account_key_add(&nr, 0, out);
out = bkey_next(out);
-
- BUG_ON((void *) out >
- (void *) dst_set + (PAGE_SIZE << dst->page_order));
}
- dst_set->u64s = cpu_to_le16((u64 *) out - dst_set->_data);
+ dst->u64s = cpu_to_le16((u64 *) out - dst->_data);
return nr;
}
-/* Sort, repack, and merge extents */
-static struct btree_nr_keys btree_mergesort_extents(struct btree_keys *dst,
- struct bset *dst_set,
+/* Sort, repack, and merge: */
+static struct btree_nr_keys btree_mergesort_extents(struct bset *dst,
struct btree_keys *src,
struct btree_node_iter *iter,
- ptr_filter_fn filter)
+ struct bkey_format *in_f,
+ struct bkey_format *out_f,
+ key_filter_fn filter,
+ key_merge_fn merge)
{
- struct bkey_format *in_f = &src->format;
- struct bkey_format *out_f = &dst->format;
struct bkey_packed *k, *prev = NULL, *out;
struct btree_nr_keys nr;
BKEY_PADDED(k) tmp;
- EBUG_ON(!dst->ops->is_extents);
-
memset(&nr, 0, sizeof(nr));
while ((k = bch_btree_node_iter_next_all(iter, src))) {
@@ -1702,8 +1692,8 @@ static struct btree_nr_keys btree_mergesort_extents(struct btree_keys *dst,
/* prev is always unpacked, for key merging: */
if (prev &&
- src->ops->key_merge &&
- bch_bkey_try_merge(src, (void *) prev, &tmp.k))
+ merge &&
+ merge(src, (void *) prev, &tmp.k) == BCH_MERGE_MERGE)
continue;
/*
@@ -1716,13 +1706,10 @@ static struct btree_nr_keys btree_mergesort_extents(struct btree_keys *dst,
btree_keys_account_key_add(&nr, 0, prev);
prev = bkey_next(prev);
} else {
- prev = bset_bkey_last(dst_set);
+ prev = bset_bkey_last(dst);
}
bkey_copy(prev, &tmp.k);
-
- BUG_ON((void *) bkey_next(prev) >
- (void *) dst_set + (PAGE_SIZE << dst->page_order));
}
if (prev) {
@@ -1730,102 +1717,34 @@ static struct btree_nr_keys btree_mergesort_extents(struct btree_keys *dst,
btree_keys_account_key_add(&nr, 0, prev);
out = bkey_next(prev);
} else {
- out = bset_bkey_last(dst_set);
+ out = bset_bkey_last(dst);
}
- dst_set->u64s = cpu_to_le16((u64 *) out - dst_set->_data);
+ dst->u64s = cpu_to_le16((u64 *) out - dst->_data);
return nr;
}
-struct btree_nr_keys bch_sort_bsets(struct bset *dst, struct btree_keys *b,
- unsigned from, struct btree_node_iter *iter,
- btree_keys_sort_fn sort,
- struct bset_sort_state *state)
+struct btree_nr_keys bch_sort_bsets(struct bset *dst,
+ struct btree_keys *src,
+ struct btree_node_iter *src_iter,
+ struct bkey_format *in_f,
+ struct bkey_format *out_f,
+ key_filter_fn filter,
+ key_merge_fn merge)
{
- u64 start_time = local_clock();
- struct btree_node_iter _iter;
- struct btree_nr_keys nr;
-
- if (!iter) {
- struct bset_tree *t;
-
- iter = &_iter;
- __bch_btree_node_iter_init(iter, b);
-
- for (t = b->set + from; t <= b->set + b->nsets; t++)
- __bch_btree_node_iter_push(iter, b,
- t->data->start,
- bset_bkey_last(t->data));
- bch_btree_node_iter_sort(iter, b);
- }
-
- dst->u64s = 0;
-
- /*
- * If we're only doing a partial sort (start != 0), then we can't merge
- * extents because that might produce extents that overlap with 0 size
- * extents in bsets we aren't sorting:
- */
- if (sort)
- nr = sort(b, dst, iter);
- else if (b->ops->is_extents && !from)
- nr = btree_mergesort_extents(b, dst, b, iter, NULL);
- else
- nr = btree_mergesort_simple(b, from, dst, iter);
-
- if (!from)
- bch_time_stats_update(state->time, start_time);
-
- return nr;
-}
-
-/**
- * bch_btree_sort_into - sort with a specified output, instead of allocating
- * temporary space
- *
- * does not create the auxiliary search tree
- */
-void bch_btree_sort_into(struct btree_keys *dst,
- struct btree_keys *src,
- ptr_filter_fn filter,
- struct bset_sort_state *state)
-{
- u64 start_time = local_clock();
- struct btree_node_iter iter;
- struct btree_nr_keys nr;
-
- bch_btree_node_iter_init_from_start(&iter, src);
-
- if (!dst->ops->is_extents)
- nr = btree_mergesort(dst, dst->set->data,
- src, &iter, filter);
+ if (merge)
+ return btree_mergesort_extents(dst, src, src_iter,
+ in_f, out_f, filter, merge);
+ else if (memcmp(in_f, out_f, sizeof(*in_f)))
+ return btree_mergesort(dst, src, src_iter, in_f, out_f, filter);
else
- nr = btree_mergesort_extents(dst, dst->set->data,
- src, &iter, filter);
-
- BUG_ON(__set_bytes(dst->set->data,
- le16_to_cpu(dst->set->data->u64s)) >
- (PAGE_SIZE << dst->page_order));
-
- bch_time_stats_update(state->time, start_time);
-
- dst->nr.live_u64s += nr.live_u64s;
- dst->nr.bset_u64s[0] += nr.bset_u64s[0];
- dst->nr.packed_keys += nr.packed_keys;
- dst->nr.unpacked_keys += nr.unpacked_keys;
-
- dst->nsets = 0;
- /* No auxiliary search tree yet */
- dst->set->size = 0;
- dst->set->extra = BSET_AUX_TREE_NONE_VAL;
-
- bch_verify_btree_nr_keys(dst);
+ return btree_mergesort_simple(dst, src, src_iter);
}
bool bch_maybe_compact_deleted_keys(struct btree_keys *b)
{
struct bset_tree *t, *rebuild_from = NULL;
- bool last_set_has_aux_tree = bset_has_aux_tree(bset_tree_last(b));
+ bool last_set_aux_tree_ro = bset_has_ro_aux_tree(bset_tree_last(b));
unsigned idx;
for (idx = 0; idx <= b->nsets; idx++) {
@@ -1841,7 +1760,7 @@ bool bch_maybe_compact_deleted_keys(struct btree_keys *b)
*
* XXX unless they're extents, if we fix assertions elsewhere
*/
- if (idx == b->nsets && !last_set_has_aux_tree)
+ if (idx == b->nsets && !last_set_aux_tree_ro)
break;
for (k = i->start; k != bset_bkey_last(i); k = n) {
@@ -1863,10 +1782,10 @@ bool bch_maybe_compact_deleted_keys(struct btree_keys *b)
return false;
for (t = rebuild_from; t <= b->set + b->nsets; t++) {
- if (t == bset_tree_last(b) && !last_set_has_aux_tree)
- bch_bset_build_unwritten_tree(b);
+ if (t == bset_tree_last(b) && !last_set_aux_tree_ro)
+ bch_bset_build_rw_aux_tree(b, t);
else
- bch_bset_build_written_tree(b, t);
+ bch_bset_build_ro_aux_tree(b, t);
}
return true;
@@ -1884,7 +1803,7 @@ void bch_btree_keys_stats(struct btree_keys *b, struct bset_stats *stats)
stats->sets[type].nr++;
stats->sets[type].bytes += le16_to_cpu(t->data->u64s) * sizeof(u64);
- if (bset_has_aux_tree(t)) {
+ if (bset_has_ro_aux_tree(t)) {
stats->floats += t->size - 1;
for (j = 1; j < t->size; j++)
@@ -1915,7 +1834,7 @@ int bch_bkey_print_bfloat(struct btree_keys *b, struct bkey_packed *k,
if (!size)
return 0;
- if (!bset_has_aux_tree(t))
+ if (!bset_has_ro_aux_tree(t))
goto out;
j = inorder_to_tree(bkey_to_cacheline(t, k), t);
diff --git a/drivers/md/bcache/bset.h b/drivers/md/bcache/bset.h
index 6e33fe6b5b98..48bd4c84ef3a 100644
--- a/drivers/md/bcache/bset.h
+++ b/drivers/md/bcache/bset.h
@@ -6,6 +6,7 @@
#include <linux/types.h>
#include "bkey.h"
+#include "bkey_methods.h"
#include "util.h" /* for time_stats */
/*
@@ -148,34 +149,6 @@ struct btree_node_iter;
struct btree_node_iter_set;
struct bkey_float;
-typedef bool (*ptr_filter_fn)(struct btree_keys *, struct bkey_s);
-
-typedef bool (*iter_cmp_fn)(struct btree_node_iter_set,
- struct btree_node_iter_set);
-
-enum merge_result {
- BCH_MERGE_NOMERGE,
-
- /*
- * The keys were mergeable, but would have overflowed size - so instead
- * l was changed to the maximum size, and both keys were modified:
- */
- BCH_MERGE_PARTIAL,
- BCH_MERGE_MERGE,
-};
-
-struct btree_keys_ops {
- ptr_filter_fn key_normalize;
- enum merge_result (*key_merge)(struct btree_keys *,
- struct bkey_i *, struct bkey_i *);
-
- /*
- * Only used for deciding whether to use bkey_start_pos(k) or just the
- * key itself in a couple places
- */
- bool is_extents;
-};
-
#define MAX_BSETS 3U
struct bset_tree {
@@ -211,27 +184,27 @@ struct bset_tree {
};
enum bset_aux_tree_type {
- BSET_AUX_TREE_NONE,
- BSET_HAS_AUX_TREE,
- BSET_HAS_UNWRITTEN_TABLE,
+ BSET_NO_AUX_TREE,
+ BSET_RO_AUX_TREE,
+ BSET_RW_AUX_TREE,
};
#define BSET_TREE_NR_TYPES 3
-#define BSET_AUX_TREE_NONE_VAL (UINT_MAX)
-#define BSET_UNWRITTEN_TABLE_VAL (UINT_MAX - 1)
+#define BSET_NO_AUX_TREE_VAL (UINT_MAX)
+#define BSET_RW_AUX_TREE_VAL (UINT_MAX - 1)
static inline enum bset_aux_tree_type bset_aux_tree_type(struct bset_tree *t)
{
switch (t->extra) {
- case BSET_AUX_TREE_NONE_VAL:
- return BSET_AUX_TREE_NONE;
- case BSET_UNWRITTEN_TABLE_VAL:
+ case BSET_NO_AUX_TREE_VAL:
+ return BSET_NO_AUX_TREE;
+ case BSET_RW_AUX_TREE_VAL:
EBUG_ON(!t->size);
- return BSET_HAS_UNWRITTEN_TABLE;
+ return BSET_RW_AUX_TREE;
default:
EBUG_ON(!t->size);
- return BSET_HAS_AUX_TREE;
+ return BSET_RO_AUX_TREE;
}
}
@@ -250,7 +223,6 @@ struct btree_nr_keys {
};
struct btree_keys {
- const struct btree_keys_ops *ops;
u8 nsets;
u8 page_order;
@@ -287,9 +259,14 @@ static inline struct bset_tree *bset_tree_last(struct btree_keys *b)
return b->set + b->nsets;
}
-static inline bool bset_has_aux_tree(struct bset_tree *t)
+static inline bool bset_has_ro_aux_tree(struct bset_tree *t)
+{
+ return bset_aux_tree_type(t) == BSET_RO_AUX_TREE;
+}
+
+static inline bool bset_has_rw_aux_tree(struct bset_tree *t)
{
- return bset_aux_tree_type(t) == BSET_HAS_AUX_TREE;
+ return bset_aux_tree_type(t) == BSET_RW_AUX_TREE;
}
#define __set_bytes(_i, _u64s) (sizeof(*(_i)) + (_u64s) * sizeof(u64))
@@ -313,12 +290,11 @@ static inline struct bset *bset_next_set(struct btree_keys *b,
void bch_btree_keys_free(struct btree_keys *);
int bch_btree_keys_alloc(struct btree_keys *, unsigned, gfp_t);
-void bch_btree_keys_init(struct btree_keys *, const struct btree_keys_ops *,
- bool *);
+void bch_btree_keys_init(struct btree_keys *, bool *);
void bch_bset_init_first(struct btree_keys *, struct bset *);
void bch_bset_init_next(struct btree_keys *, struct bset *);
-void bch_bset_build_written_tree(struct btree_keys *, struct bset_tree *);
+void bch_bset_build_ro_aux_tree(struct btree_keys *, struct bset_tree *);
void bch_bset_fix_invalidated_key(struct btree_keys *, struct bset_tree *,
struct bkey_packed *);
@@ -384,20 +360,6 @@ struct bset_tree *bch_bkey_to_bset(struct btree_keys *, struct bkey_packed *);
struct bkey_packed *bkey_prev_all(struct bset_tree *, struct bkey_packed *);
struct bkey_packed *bkey_prev(struct bset_tree *, struct bkey_packed *);
-/*
- * Tries to merge l and r: l should be lower than r
- * Returns true if we were able to merge. If we did merge, l will be the merged
- * key, r will be untouched.
- */
-static inline bool bch_bkey_try_merge(struct btree_keys *b,
- struct bkey_i *l,
- struct bkey_i *r)
-{
- return b->ops->key_merge
- ? b->ops->key_merge(b, l, r) == BCH_MERGE_MERGE
- : false;
-}
-
enum bch_extent_overlap {
BCH_EXTENT_OVERLAP_FRONT,
BCH_EXTENT_OVERLAP_BACK,
@@ -436,19 +398,19 @@ struct btree_node_iter {
};
static inline void __bch_btree_node_iter_init(struct btree_node_iter *iter,
- struct btree_keys *b)
+ bool is_extents)
{
iter->used = 0;
- iter->is_extents = b->ops->is_extents;
+ iter->is_extents = is_extents;
}
void bch_btree_node_iter_push(struct btree_node_iter *, struct btree_keys *,
const struct bkey_packed *,
const struct bkey_packed *);
void bch_btree_node_iter_init(struct btree_node_iter *, struct btree_keys *,
- struct bpos, bool);
+ struct bpos, bool, bool);
void bch_btree_node_iter_init_from_start(struct btree_node_iter *,
- struct btree_keys *);
+ struct btree_keys *, bool);
struct bkey_packed *bch_btree_node_iter_bset_pos(struct btree_node_iter *,
struct btree_keys *,
struct bset *);
@@ -569,8 +531,8 @@ struct bkey_packed *bch_btree_node_iter_prev(struct btree_node_iter *,
* Iterates over all _live_ keys - skipping deleted (and potentially
* overlapping) keys
*/
-#define for_each_btree_node_key(b, k, iter) \
- for (bch_btree_node_iter_init_from_start((iter), (b)); \
+#define for_each_btree_node_key(b, k, iter, _is_extents) \
+ for (bch_btree_node_iter_init_from_start((iter), (b), (_is_extents));\
((k) = bch_btree_node_iter_peek(iter, b)); \
bch_btree_node_iter_advance(iter, b))
@@ -578,8 +540,8 @@ struct bkey_s_c bch_btree_node_iter_peek_unpack(struct btree_node_iter *,
struct btree_keys *,
struct bkey *);
-#define for_each_btree_node_key_unpack(b, k, iter, unpacked) \
- for (bch_btree_node_iter_init_from_start((iter), (b)); \
+#define for_each_btree_node_key_unpack(b, k, iter, _is_extents, unpacked)\
+ for (bch_btree_node_iter_init_from_start((iter), (b), (_is_extents));\
(k = bch_btree_node_iter_peek_unpack((iter), (b), (unpacked))).k;\
bch_btree_node_iter_advance(iter, b))
@@ -620,31 +582,13 @@ static inline void btree_keys_account_key(struct btree_nr_keys *n,
/* Sorting */
-struct bset_sort_state {
- mempool_t pool;
-
- unsigned page_order;
- unsigned crit_factor;
-
- struct time_stats *time;
-};
-
-typedef struct btree_nr_keys (*btree_keys_sort_fn)(struct btree_keys *,
- struct bset *,
- struct btree_node_iter *);
-
-void bch_bset_sort_state_free(struct bset_sort_state *);
-int bch_bset_sort_state_init(struct bset_sort_state *, unsigned,
- struct time_stats *times);
-
-struct btree_nr_keys bch_sort_bsets(struct bset *, struct btree_keys *,
- unsigned, struct btree_node_iter *,
- btree_keys_sort_fn,
- struct bset_sort_state *);
-
-void bch_btree_sort_lazy(struct btree_keys *, struct bset_sort_state *);
-void bch_btree_sort_into(struct btree_keys *, struct btree_keys *,
- ptr_filter_fn, struct bset_sort_state *);
+struct btree_nr_keys bch_sort_bsets(struct bset *,
+ struct btree_keys *,
+ struct btree_node_iter *,
+ struct bkey_format *,
+ struct bkey_format *,
+ key_filter_fn,
+ key_merge_fn);
bool bch_maybe_compact_deleted_keys(struct btree_keys *);
@@ -666,7 +610,7 @@ int bch_bkey_print_bfloat(struct btree_keys *, struct bkey_packed *,
/* Debug stuff */
void bch_dump_bset(struct btree_keys *, struct bset *, unsigned);
-void bch_dump_bucket(struct btree_keys *);
+void bch_dump_btree_node(struct btree_keys *);
void bch_dump_btree_node_iter(struct btree_keys *, struct btree_node_iter *);
#ifdef CONFIG_BCACHE_DEBUG
@@ -686,7 +630,6 @@ static inline void bch_verify_key_order(struct btree_keys *b,
struct bkey_packed *where) {}
#endif
-
static inline void bch_verify_btree_nr_keys(struct btree_keys *b)
{
if (btree_keys_expensive_checks(b))
diff --git a/drivers/md/bcache/btree_cache.c b/drivers/md/bcache/btree_cache.c
index 6959e4699592..06a9921afbcd 100644
--- a/drivers/md/bcache/btree_cache.c
+++ b/drivers/md/bcache/btree_cache.c
@@ -119,11 +119,6 @@ int mca_hash_insert(struct cache_set *c, struct btree *b,
b->level = level;
b->btree_id = id;
- bch_btree_keys_init(&b->keys, b->level
- ? &bch_btree_interior_node_ops
- : bch_btree_ops[id],
- &c->expensive_debug_checks);
-
ret = rhashtable_lookup_insert_fast(&c->btree_cache_table, &b->hash,
bch_btree_cache_params);
if (ret)
@@ -527,6 +522,7 @@ out:
b->keys.set[0].data = NULL;
b->sib_u64s[0] = 0;
b->sib_u64s[1] = 0;
+ bch_btree_keys_init(&b->keys, &c->expensive_debug_checks);
bch_time_stats_update(&c->mca_alloc_time, start_time);
diff --git a/drivers/md/bcache/btree_gc.c b/drivers/md/bcache/btree_gc.c
index 1735ea39d397..f59563f5b2ef 100644
--- a/drivers/md/bcache/btree_gc.c
+++ b/drivers/md/bcache/btree_gc.c
@@ -148,7 +148,9 @@ static bool btree_gc_mark_node(struct cache_set *c, struct btree *b)
struct bkey_s_c k;
u8 stale = 0;
- for_each_btree_node_key_unpack(&b->keys, k, &iter, &unpacked) {
+ for_each_btree_node_key_unpack(&b->keys, k, &iter,
+ btree_node_is_extents(b),
+ &unpacked) {
bkey_debugcheck(c, b, k);
stale = max(stale, btree_mark_key(c, b, k));
}
@@ -849,8 +851,9 @@ static void bch_initial_gc_btree(struct cache_set *c, enum btree_id id)
struct bkey unpacked;
struct bkey_s_c k;
- for_each_btree_node_key_unpack(&b->keys, k,
- &node_iter, &unpacked)
+ for_each_btree_node_key_unpack(&b->keys, k, &node_iter,
+ btree_node_is_extents(b),
+ &unpacked)
btree_mark_key(c, b, k);
}
diff --git a/drivers/md/bcache/btree_io.c b/drivers/md/bcache/btree_io.c
index 035ff2a7caff..c82cf93a01f8 100644
--- a/drivers/md/bcache/btree_io.c
+++ b/drivers/md/bcache/btree_io.c
@@ -18,21 +18,39 @@
static void btree_node_sort(struct cache_set *c, struct btree *b,
struct btree_iter *iter, unsigned from,
- struct btree_node_iter *node_iter,
- btree_keys_sort_fn sort, bool is_write_locked)
+ struct btree_node_iter *sort_iter,
+ sort_fix_overlapping_fn sort_fn,
+ bool is_write_locked)
{
struct btree_node *out;
- bool used_mempool = false;
- unsigned order = b->keys.page_order;
struct btree_nr_keys nr;
+ struct btree_node_iter _sort_iter;
+ bool used_mempool = false;
+ u64 start_time;
+ unsigned order;
- if (from) {
+ if (!sort_iter) {
struct bset_tree *t;
- unsigned u64s = 0;
+
+ sort_iter = &_sort_iter;
+ __bch_btree_node_iter_init(sort_iter, btree_node_is_extents(b));
for (t = b->keys.set + from;
- t <= b->keys.set + b->keys.nsets; t++)
- u64s += le16_to_cpu(t->data->u64s);
+ t <= b->keys.set + b->keys.nsets;
+ t++)
+ bch_btree_node_iter_push(sort_iter, &b->keys,
+ t->data->start,
+ bset_bkey_last(t->data));
+ }
+
+ if (!from) {
+ order = b->keys.page_order;
+ } else {
+ struct btree_node_iter_set *set;
+ unsigned u64s = 0;
+
+ btree_node_iter_for_each(sort_iter, set)
+ u64s += set->end - set->k;
order = get_order(__set_bytes(b->data, u64s));
}
@@ -41,13 +59,42 @@ static void btree_node_sort(struct cache_set *c, struct btree *b,
if (!out) {
struct page *outp;
- outp = mempool_alloc(&c->sort.pool, GFP_NOIO);
+ outp = mempool_alloc(&c->btree_sort_pool, GFP_NOIO);
out = page_address(outp);
used_mempool = true;
}
- nr = bch_sort_bsets(&out->keys, &b->keys, from,
- node_iter, sort, &c->sort);
+ start_time = local_clock();
+
+ out->keys.u64s = 0;
+
+ /*
+ * The nr_keys accounting for number of packed/unpacked keys isn't
+ * broken out by bset, which means we can't merge extents unless we're
+ * sorting the entire node (we'd have to recalculate nr_keys for the
+ * entire node). Also, extent merging is problematic if we're not
+ * sorting the entire node, since we'd end up with extents overlapping
+ * with 0 length whiteouts in other bsets we didn't sort.
+ */
+ if (sort_fn)
+ nr = sort_fn(&out->keys, &b->keys, sort_iter);
+ else if (!from)
+ nr = bch_sort_bsets(&out->keys, &b->keys, sort_iter,
+ &b->keys.format,
+ &b->keys.format,
+ NULL,
+ btree_node_ops(b)->key_merge);
+ else
+ nr = bch_sort_bsets(&out->keys, &b->keys, sort_iter,
+ &b->keys.format,
+ &b->keys.format,
+ NULL, NULL);
+
+ BUG_ON((void *) bset_bkey_last(&out->keys) >
+ (void *) out + (PAGE_SIZE << order));
+
+ if (!from)
+ bch_time_stats_update(&c->btree_sort_time, start_time);
if (!is_write_locked)
__btree_node_lock_write(b, iter);
@@ -66,22 +113,30 @@ static void btree_node_sort(struct cache_set *c, struct btree *b,
out->keys.u64s = cpu_to_le16(u64s);
swap(out, b->data);
b->keys.set->data = &b->data->keys;
+ b->keys.nr = nr;
} else {
+ unsigned i;
+
b->keys.set[from].data->u64s = out->keys.u64s;
memcpy(b->keys.set[from].data->start, out->keys.start,
(void *) bset_bkey_last(&out->keys) -
(void *) out->keys.start);
+
+ for (i = from + 1; i < MAX_BSETS; i++) {
+ b->keys.nr.bset_u64s[from] +=
+ b->keys.nr.bset_u64s[i];
+ b->keys.nr.bset_u64s[i] = 0;
+ }
}
b->keys.nsets = from;
- b->keys.nr = nr;
- bch_bset_build_written_tree(&b->keys, &b->keys.set[from]);
+ bch_bset_build_ro_aux_tree(&b->keys, &b->keys.set[from]);
if (!is_write_locked)
__btree_node_unlock_write(b, iter);
if (used_mempool)
- mempool_free(virt_to_page(out), &c->sort.pool);
+ mempool_free(virt_to_page(out), &c->btree_sort_pool);
else
free_pages((unsigned long) out, order);
@@ -98,6 +153,7 @@ static bool btree_node_compact(struct cache_set *c, struct btree *b,
struct btree_iter *iter)
{
unsigned crit = SORT_CRIT;
+ unsigned crit_factor = int_sqrt(btree_pages(c));
int i = 0;
/* Don't sort if nothing to do */
@@ -109,7 +165,7 @@ static bool btree_node_compact(struct cache_set *c, struct btree *b,
goto sort;
for (i = b->keys.nsets - 1; i >= 0; --i) {
- crit *= c->sort.crit_factor;
+ crit *= crit_factor;
if (le16_to_cpu(b->keys.set[i].data->u64s) < crit)
goto sort;
@@ -123,7 +179,7 @@ static bool btree_node_compact(struct cache_set *c, struct btree *b,
nosort:
__btree_node_lock_write(b, iter);
- bch_bset_build_written_tree(&b->keys, bset_tree_last(&b->keys));
+ bch_bset_build_ro_aux_tree(&b->keys, bset_tree_last(&b->keys));
__btree_node_unlock_write(b, iter);
return false;
sort:
@@ -278,7 +334,7 @@ void bch_btree_node_read_done(struct cache_set *c, struct btree *b,
int ret;
iter = mempool_alloc(&c->fill_iter, GFP_NOIO);
- __bch_btree_node_iter_init(iter, &b->keys);
+ __bch_btree_node_iter_init(iter, btree_node_is_extents(b));
err = "dynamic fault";
if (bch_meta_read_fault("btree"))
@@ -377,7 +433,7 @@ void bch_btree_node_read_done(struct cache_set *c, struct btree *b,
goto err;
btree_node_sort(c, b, NULL, 0, iter,
- b->keys.ops->is_extents
+ btree_node_is_extents(b)
? bch_extent_sort_fix_overlapping
: bch_key_sort_fix_overlapping,
true);
diff --git a/drivers/md/bcache/btree_iter.c b/drivers/md/bcache/btree_iter.c
index 197778addb02..315beca5f51d 100644
--- a/drivers/md/bcache/btree_iter.c
+++ b/drivers/md/bcache/btree_iter.c
@@ -557,7 +557,8 @@ static inline void __btree_iter_init(struct btree_iter *iter,
struct btree *b)
{
bch_btree_node_iter_init(&iter->node_iters[b->level], &b->keys,
- iter->pos, iter->is_extents);
+ iter->pos, iter->is_extents,
+ btree_node_is_extents(b));
/* Skip to first non whiteout: */
if (b->level)
@@ -1106,7 +1107,7 @@ void __bch_btree_iter_init(struct btree_iter *iter, struct cache_set *c,
unsigned locks_want, unsigned depth)
{
iter->level = depth;
- iter->is_extents = btree_id == BTREE_ID_EXTENTS;
+ iter->is_extents = bch_bkey_ops[btree_id]->is_extents;
iter->nodes_locked = 0;
iter->nodes_intent_locked = 0;
iter->locks_want = min(locks_want, BTREE_MAX_DEPTH);
diff --git a/drivers/md/bcache/btree_types.h b/drivers/md/bcache/btree_types.h
index 04ea88560af2..1274b3b8c7de 100644
--- a/drivers/md/bcache/btree_types.h
+++ b/drivers/md/bcache/btree_types.h
@@ -138,11 +138,21 @@ static inline enum bkey_type btree_node_type(struct btree *b)
return b->level ? BKEY_TYPE_BTREE : b->btree_id;
}
+static inline const struct bkey_ops *btree_node_ops(struct btree *b)
+{
+ return bch_bkey_ops[btree_node_type(b)];
+}
+
static inline bool btree_node_has_ptrs(struct btree *b)
{
return btree_type_has_ptrs(btree_node_type(b));
}
+static inline bool btree_node_is_extents(struct btree *b)
+{
+ return btree_node_type(b) == BKEY_TYPE_EXTENTS;
+}
+
/*
* Optional hook that will be called just prior to a btree node update, when
* we're holding the write lock and we know what key is about to be overwritten:
@@ -182,4 +192,8 @@ enum btree_gc_coalesce_fail_reason {
BTREE_GC_COALESCE_FAIL_FORMAT_FITS,
};
+typedef struct btree_nr_keys (*sort_fix_overlapping_fn)(struct bset *,
+ struct btree_keys *,
+ struct btree_node_iter *);
+
#endif /* _BCACHE_BTREE_TYPES_H */
diff --git a/drivers/md/bcache/btree_update.c b/drivers/md/bcache/btree_update.c
index e352bb2f42e5..9e71e6ed3139 100644
--- a/drivers/md/bcache/btree_update.c
+++ b/drivers/md/bcache/btree_update.c
@@ -306,6 +306,37 @@ static struct btree *bch_btree_node_alloc(struct cache_set *c,
return b;
}
+static void bch_btree_sort_into(struct cache_set *c,
+ struct btree *dst,
+ struct btree *src)
+{
+ struct btree_nr_keys nr;
+ struct btree_node_iter iter;
+ u64 start_time = local_clock();
+
+ BUG_ON(dst->keys.nsets);
+
+ dst->keys.set[0].extra = BSET_NO_AUX_TREE_VAL;
+
+ bch_btree_node_iter_init_from_start(&iter, &src->keys,
+ btree_node_is_extents(src));
+
+ nr = bch_sort_bsets(dst->keys.set->data,
+ &src->keys, &iter,
+ &src->keys.format,
+ &dst->keys.format,
+ btree_node_ops(src)->key_normalize,
+ btree_node_ops(src)->key_merge);
+ bch_time_stats_update(&c->btree_sort_time, start_time);
+
+ dst->keys.nr.live_u64s += nr.live_u64s;
+ dst->keys.nr.bset_u64s[0] += nr.bset_u64s[0];
+ dst->keys.nr.packed_keys += nr.packed_keys;
+ dst->keys.nr.unpacked_keys += nr.unpacked_keys;
+
+ bch_verify_btree_nr_keys(&dst->keys);
+}
+
struct btree *__btree_node_alloc_replacement(struct cache_set *c,
struct btree *b,
struct bkey_format format,
@@ -320,9 +351,8 @@ struct btree *__btree_node_alloc_replacement(struct cache_set *c,
n->data->format = format;
n->keys.format = format;
- bch_btree_sort_into(&n->keys, &b->keys,
- b->keys.ops->key_normalize,
- &c->sort);
+ bch_btree_sort_into(c, n, b);
+
btree_node_reset_sib_u64s(n);
n->key.k.p = b->key.k.p;
@@ -1071,7 +1101,7 @@ static void btree_node_interior_verify(struct btree *b)
BUG_ON(!b->level);
- bch_btree_node_iter_init(&iter, &b->keys, b->key.k.p, false);
+ bch_btree_node_iter_init(&iter, &b->keys, b->key.k.p, false, false);
#if 1
BUG_ON(!(k = bch_btree_node_iter_peek(&iter, &b->keys)) ||
bkey_cmp_left_packed(f, k, b->key.k.p));
@@ -1097,7 +1127,7 @@ static void btree_node_interior_verify(struct btree *b)
goto err;
return;
err:
- bch_dump_bucket(&b->keys);
+ bch_dump_btree_node(&b->keys);
printk(KERN_ERR "last key %llu:%llu %s\n", b->key.k.p.inode,
b->key.k.p.offset, msg);
BUG();
@@ -1238,9 +1268,9 @@ static struct btree *__btree_split_node(struct btree_iter *iter, struct btree *n
le16_to_cpu(set2->u64s));
n1->keys.set->size = 0;
- n1->keys.set->extra = BSET_AUX_TREE_NONE_VAL;
+ n1->keys.set->extra = BSET_NO_AUX_TREE_VAL;
n2->keys.set->size = 0;
- n2->keys.set->extra = BSET_AUX_TREE_NONE_VAL;
+ n2->keys.set->extra = BSET_NO_AUX_TREE_VAL;
btree_node_reset_sib_u64s(n1);
btree_node_reset_sib_u64s(n2);
@@ -1276,10 +1306,9 @@ static void btree_split_insert_keys(struct btree_iter *iter, struct btree *b,
struct bkey_packed *p;
struct bset *i;
- BUG_ON(!b->level);
- BUG_ON(b->keys.ops->is_extents);
+ BUG_ON(btree_node_type(b) != BKEY_TYPE_BTREE);
- bch_btree_node_iter_init(&node_iter, &b->keys, k->k.p, false);
+ bch_btree_node_iter_init(&node_iter, &b->keys, k->k.p, false, false);
while (!bch_keylist_empty(keys)) {
k = bch_keylist_front(keys);
@@ -1660,12 +1689,8 @@ retry:
n->keys.format = new_f;
n->key.k.p = next->key.k.p;
- bch_btree_sort_into(&n->keys, &prev->keys,
- b->keys.ops->key_normalize,
- &c->sort);
- bch_btree_sort_into(&n->keys, &next->keys,
- b->keys.ops->key_normalize,
- &c->sort);
+ bch_btree_sort_into(c, n, prev);
+ bch_btree_sort_into(c, n, next);
six_unlock_write(&n->lock);
bkey_init(&delete.k);
@@ -1722,7 +1747,7 @@ btree_insert_key(struct btree_insert *trans,
int old_live_u64s = b->keys.nr.live_u64s;
int live_u64s_added, u64s_added;
- ret = !b->keys.ops->is_extents
+ ret = !btree_node_is_extents(b)
? bch_insert_fixup_key(trans, insert, res)
: bch_insert_fixup_extent(trans, insert, res);
@@ -2132,7 +2157,7 @@ int bch_btree_delete_range(struct cache_set *c, enum btree_id id,
delete.k.p = iter.pos;
delete.k.version = version;
- if (iter.nodes[0]->keys.ops->is_extents) {
+ if (iter.is_extents) {
/*
* The extents btree is special - KEY_TYPE_DISCARD is
* used for deletions, not KEY_TYPE_DELETED. This is an
diff --git a/drivers/md/bcache/btree_update.h b/drivers/md/bcache/btree_update.h
index 277a99476d07..3bb6c0ca16b9 100644
--- a/drivers/md/bcache/btree_update.h
+++ b/drivers/md/bcache/btree_update.h
@@ -199,9 +199,9 @@ static inline size_t bch_btree_keys_u64s_remaining(struct cache_set *c,
* insert into could be written out from under us)
*/
static inline bool bch_btree_node_insert_fits(struct cache_set *c,
- struct btree *b, unsigned u64s)
+ struct btree *b, unsigned u64s)
{
- if (b->keys.ops->is_extents) {
+ if (btree_node_is_extents(b)) {
/* The insert key might split an existing key
* (bch_insert_fixup_extent() -> BCH_EXTENT_OVERLAP_MIDDLE case:
*/
diff --git a/drivers/md/bcache/debug.c b/drivers/md/bcache/debug.c
index b143f98bb3fb..93a5f00f92f3 100644
--- a/drivers/md/bcache/debug.c
+++ b/drivers/md/bcache/debug.c
@@ -59,11 +59,7 @@ void __bch_btree_verify(struct cache_set *c, struct btree *b)
v->written = 0;
v->level = b->level;
v->btree_id = b->btree_id;
- v->keys.ops = b->keys.ops;
- bch_btree_keys_init(&v->keys, v->level
- ? &bch_btree_interior_node_ops
- : bch_btree_ops[v->btree_id],
- &v->c->expensive_debug_checks);
+ bch_btree_keys_init(&v->keys, &v->c->expensive_debug_checks);
pick = bch_btree_pick_ptr(c, b);
if (IS_ERR_OR_NULL(pick.ca))
diff --git a/drivers/md/bcache/dirent.c b/drivers/md/bcache/dirent.c
index 888a011a0d9c..b7fc5ca92f32 100644
--- a/drivers/md/bcache/dirent.c
+++ b/drivers/md/bcache/dirent.c
@@ -138,9 +138,6 @@ static void bch_dirent_to_text(struct cache_set *c, char *buf,
}
}
-const struct btree_keys_ops bch_dirent_ops = {
-};
-
const struct bkey_ops bch_bkey_dirent_ops = {
.key_invalid = bch_dirent_invalid,
.val_to_text = bch_dirent_to_text,
diff --git a/drivers/md/bcache/dirent.h b/drivers/md/bcache/dirent.h
index 63b4aa07f432..d6597da2949b 100644
--- a/drivers/md/bcache/dirent.h
+++ b/drivers/md/bcache/dirent.h
@@ -1,7 +1,6 @@
#ifndef _BCACHE_DIRENT_H
#define _BCACHE_DIRENT_H
-extern const struct btree_keys_ops bch_dirent_ops;
extern const struct bkey_ops bch_bkey_dirent_ops;
struct qstr;
diff --git a/drivers/md/bcache/extents.c b/drivers/md/bcache/extents.c
index a9a39ea67cf3..4be04a208a8c 100644
--- a/drivers/md/bcache/extents.c
+++ b/drivers/md/bcache/extents.c
@@ -22,6 +22,8 @@
#include <trace/events/bcache.h>
static bool __bch_extent_normalize(struct cache_set *, struct bkey_s, bool);
+static enum merge_result bch_extent_merge(struct btree_keys *,
+ struct bkey_i *, struct bkey_i *);
static void sort_key_next(struct btree_node_iter *iter,
struct btree_keys *b,
@@ -76,11 +78,11 @@ static inline bool should_drop_next_key(struct btree_node_iter *iter,
__btree_node_offset_to_key(b, r->k));
}
-struct btree_nr_keys bch_key_sort_fix_overlapping(struct btree_keys *b,
- struct bset *bset,
+struct btree_nr_keys bch_key_sort_fix_overlapping(struct bset *dst,
+ struct btree_keys *b,
struct btree_node_iter *iter)
{
- struct bkey_packed *out = bset->start;
+ struct bkey_packed *out = dst->start;
struct btree_nr_keys nr;
memset(&nr, 0, sizeof(nr));
@@ -101,7 +103,7 @@ struct btree_nr_keys bch_key_sort_fix_overlapping(struct btree_keys *b,
heap_sift(iter, 0, key_sort_cmp);
}
- bset->u64s = cpu_to_le16((u64 *) out - bset->_data);
+ dst->u64s = cpu_to_le16((u64 *) out - dst->_data);
return nr;
}
@@ -590,9 +592,6 @@ bch_btree_pick_ptr(struct cache_set *c, const struct btree *b)
return (struct extent_pick_ptr) { .ca = NULL, };
}
-const struct btree_keys_ops bch_btree_interior_node_ops = {
-};
-
const struct bkey_ops bch_bkey_btree_ops = {
.key_invalid = bch_btree_ptr_invalid,
.key_debugcheck = btree_ptr_debugcheck,
@@ -769,7 +768,7 @@ static void extent_sort_append(struct btree_keys *b,
bkey_unpack(&tmp.k, f, k);
if (*prev &&
- bch_bkey_try_merge(b, (void *) *prev, &tmp.k))
+ bch_extent_merge(b, (void *) *prev, &tmp.k))
return;
if (*prev) {
@@ -784,8 +783,8 @@ static void extent_sort_append(struct btree_keys *b,
bkey_copy(*prev, &tmp.k);
}
-struct btree_nr_keys bch_extent_sort_fix_overlapping(struct btree_keys *b,
- struct bset *bset,
+struct btree_nr_keys bch_extent_sort_fix_overlapping(struct bset *dst,
+ struct btree_keys *b,
struct btree_node_iter *iter)
{
struct bkey_format *f = &b->format;
@@ -803,7 +802,7 @@ struct btree_nr_keys bch_extent_sort_fix_overlapping(struct btree_keys *b,
lk = __btree_node_offset_to_key(b, _l->k);
if (iter->used == 1) {
- extent_sort_append(b, &nr, bset->start, &prev, lk);
+ extent_sort_append(b, &nr, dst->start, &prev, lk);
extent_sort_next(iter, b, _l);
continue;
}
@@ -820,7 +819,7 @@ struct btree_nr_keys bch_extent_sort_fix_overlapping(struct btree_keys *b,
/* If current key and next key don't overlap, just append */
if (bkey_cmp(l.k->p, bkey_start_pos(r.k)) <= 0) {
- extent_sort_append(b, &nr, bset->start, &prev, lk);
+ extent_sort_append(b, &nr, dst->start, &prev, lk);
extent_sort_next(iter, b, _l);
continue;
}
@@ -865,7 +864,7 @@ struct btree_nr_keys bch_extent_sort_fix_overlapping(struct btree_keys *b,
extent_sort_sift(iter, b, 0);
- extent_sort_append(b, &nr, bset->start, &prev,
+ extent_sort_append(b, &nr, dst->start, &prev,
bkey_to_packed(&tmp.k));
} else {
bch_cut_back(bkey_start_pos(r.k), l.k);
@@ -878,10 +877,10 @@ struct btree_nr_keys bch_extent_sort_fix_overlapping(struct btree_keys *b,
btree_keys_account_key_add(&nr, 0, prev);
out = bkey_next(prev);
} else {
- out = bset->start;
+ out = dst->start;
}
- bset->u64s = cpu_to_le16((u64 *) out - bset->_data);
+ dst->u64s = cpu_to_le16((u64 *) out - dst->_data);
return nr;
}
@@ -2385,23 +2384,12 @@ static bool bch_extent_merge_inline(struct btree_iter *iter,
}
}
-static const struct btree_keys_ops bch_extent_ops = {
- .key_normalize = bch_ptr_normalize,
- .key_merge = bch_extent_merge,
- .is_extents = true,
-};
-
const struct bkey_ops bch_bkey_extent_ops = {
.key_invalid = bch_extent_invalid,
.key_debugcheck = bch_extent_debugcheck,
.val_to_text = bch_extent_to_text,
.swab = bch_ptr_swab,
+ .key_normalize = bch_ptr_normalize,
+ .key_merge = bch_extent_merge,
.is_extents = true,
};
-
-const struct btree_keys_ops *bch_btree_ops[] = {
- [BTREE_ID_EXTENTS] = &bch_extent_ops,
- [BTREE_ID_INODES] = &bch_inode_ops,
- [BTREE_ID_DIRENTS] = &bch_dirent_ops,
- [BTREE_ID_XATTRS] = &bch_xattr_ops,
-};
diff --git a/drivers/md/bcache/extents.h b/drivers/md/bcache/extents.h
index ec8ff0078f08..f3eb2a73e167 100644
--- a/drivers/md/bcache/extents.h
+++ b/drivers/md/bcache/extents.h
@@ -11,11 +11,11 @@ struct btree_iter;
struct btree_insert;
struct btree_insert_entry;
-struct btree_nr_keys bch_key_sort_fix_overlapping(struct btree_keys *,
- struct bset *,
+struct btree_nr_keys bch_key_sort_fix_overlapping(struct bset *,
+ struct btree_keys *,
struct btree_node_iter *);
-struct btree_nr_keys bch_extent_sort_fix_overlapping(struct btree_keys *,
- struct bset *,
+struct btree_nr_keys bch_extent_sort_fix_overlapping(struct bset *,
+ struct btree_keys *,
struct btree_node_iter *);
enum btree_insert_ret
@@ -26,9 +26,6 @@ bch_insert_fixup_key(struct btree_insert *,
extern const struct bkey_ops bch_bkey_btree_ops;
extern const struct bkey_ops bch_bkey_extent_ops;
-extern const struct btree_keys_ops bch_btree_interior_node_ops;
-extern const struct btree_keys_ops *bch_btree_ops[];
-
struct cache_set;
struct journal_res;
diff --git a/drivers/md/bcache/inode.c b/drivers/md/bcache/inode.c
index 9b2a4806805e..d36de43cdfc3 100644
--- a/drivers/md/bcache/inode.c
+++ b/drivers/md/bcache/inode.c
@@ -100,9 +100,6 @@ static void bch_inode_to_text(struct cache_set *c, char *buf,
}
}
-const struct btree_keys_ops bch_inode_ops = {
-};
-
const struct bkey_ops bch_bkey_inode_ops = {
.key_invalid = bch_inode_invalid,
.val_to_text = bch_inode_to_text,
diff --git a/drivers/md/bcache/inode.h b/drivers/md/bcache/inode.h
index a89064a37861..d8b28c78c277 100644
--- a/drivers/md/bcache/inode.h
+++ b/drivers/md/bcache/inode.h
@@ -1,7 +1,6 @@
#ifndef _BCACHE_INODE_H
#define _BCACHE_INODE_H
-extern const struct btree_keys_ops bch_inode_ops;
extern const struct bkey_ops bch_bkey_inode_ops;
ssize_t bch_inode_status(char *, size_t, const struct bkey *);
diff --git a/drivers/md/bcache/super.c b/drivers/md/bcache/super.c
index e6935106e980..c834c57b0274 100644
--- a/drivers/md/bcache/super.c
+++ b/drivers/md/bcache/super.c
@@ -888,7 +888,6 @@ static void cache_set_free(struct cache_set *c)
cancel_work_sync(&c->bio_submit_work);
cancel_work_sync(&c->read_retry_work);
- bch_bset_sort_state_free(&c->sort);
bch_btree_cache_free(c);
bch_journal_free(&c->journal);
bch_io_clock_exit(&c->io_clock[WRITE]);
@@ -897,6 +896,7 @@ static void cache_set_free(struct cache_set *c)
bdi_destroy(&c->bdi);
free_percpu(c->bucket_stats_lock.lock);
free_percpu(c->bucket_stats_percpu);
+ mempool_exit(&c->btree_sort_pool);
mempool_exit(&c->bio_bounce_pages);
bioset_exit(&c->bio_write);
bioset_exit(&c->bio_read_split);
@@ -1162,13 +1162,13 @@ static struct cache_set *bch_cache_set_alloc(struct cache_sb *sb,
PAGE_SECTORS, 0) ||
!(c->bucket_stats_percpu = alloc_percpu(struct bucket_stats_cache_set)) ||
!(c->bucket_stats_lock.lock = alloc_percpu(*c->bucket_stats_lock.lock)) ||
+ mempool_init_page_pool(&c->btree_sort_pool, 1,
+ ilog2(btree_pages(c))) ||
bdi_setup_and_register(&c->bdi, "bcache") ||
bch_io_clock_init(&c->io_clock[READ]) ||
bch_io_clock_init(&c->io_clock[WRITE]) ||
bch_journal_alloc(&c->journal) ||
bch_btree_cache_alloc(c) ||
- bch_bset_sort_state_init(&c->sort, ilog2(btree_pages(c)),
- &c->btree_sort_time) ||
bch_compress_init(c))
goto err;
diff --git a/drivers/md/bcache/sysfs.c b/drivers/md/bcache/sysfs.c
index 3c61e57caa46..1c5f9f6f9689 100644
--- a/drivers/md/bcache/sysfs.c
+++ b/drivers/md/bcache/sysfs.c
@@ -496,12 +496,12 @@ static int bch_bset_print_stats(struct cache_set *c, char *buf)
"failed prev: %zu\n"
"failed overflow: %zu\n",
nodes,
- stats.sets[BSET_HAS_AUX_TREE].nr,
- stats.sets[BSET_HAS_AUX_TREE].bytes,
- stats.sets[BSET_HAS_UNWRITTEN_TABLE].nr,
- stats.sets[BSET_HAS_UNWRITTEN_TABLE].bytes,
- stats.sets[BSET_AUX_TREE_NONE].nr,
- stats.sets[BSET_AUX_TREE_NONE].bytes,
+ stats.sets[BSET_RO_AUX_TREE].nr,
+ stats.sets[BSET_RO_AUX_TREE].bytes,
+ stats.sets[BSET_RW_AUX_TREE].nr,
+ stats.sets[BSET_RW_AUX_TREE].bytes,
+ stats.sets[BSET_NO_AUX_TREE].nr,
+ stats.sets[BSET_NO_AUX_TREE].bytes,
stats.floats,
stats.failed_unpacked,
stats.failed_prev,
@@ -524,7 +524,7 @@ lock_root:
six_lock_read(&b->lock);
} while (b != c->btree_roots[BTREE_ID_EXTENTS].b);
- for_each_btree_node_key(&b->keys, k, &iter)
+ for_each_btree_node_key(&b->keys, k, &iter, btree_node_is_extents(b))
bytes += bkey_bytes(k);
six_unlock_read(&b->lock);
diff --git a/drivers/md/bcache/xattr.c b/drivers/md/bcache/xattr.c
index b5686e472ab9..002994eae758 100644
--- a/drivers/md/bcache/xattr.c
+++ b/drivers/md/bcache/xattr.c
@@ -157,9 +157,6 @@ static void bch_xattr_to_text(struct cache_set *c, char *buf,
}
}
-const struct btree_keys_ops bch_xattr_ops = {
-};
-
const struct bkey_ops bch_bkey_xattr_ops = {
.key_invalid = bch_xattr_invalid,
.val_to_text = bch_xattr_to_text,
diff --git a/drivers/md/bcache/xattr.h b/drivers/md/bcache/xattr.h
index 675529d328d0..5e40f13fd11e 100644
--- a/drivers/md/bcache/xattr.h
+++ b/drivers/md/bcache/xattr.h
@@ -1,7 +1,6 @@
#ifndef _BCACHE_XATTR_H
#define _BCACHE_XATTR_H
-extern const struct btree_keys_ops bch_xattr_ops;
extern const struct bkey_ops bch_bkey_xattr_ops;
struct dentry;