summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--drivers/md/bcache/bset.c155
-rw-r--r--drivers/md/bcache/bset.h15
-rw-r--r--drivers/md/bcache/btree_gc.c2
-rw-r--r--drivers/md/bcache/btree_io.c32
-rw-r--r--drivers/md/bcache/btree_io.h1
-rw-r--r--drivers/md/bcache/btree_iter.c4
-rw-r--r--drivers/md/bcache/btree_update.c23
-rw-r--r--drivers/md/bcache/btree_update.h27
8 files changed, 153 insertions, 106 deletions
diff --git a/drivers/md/bcache/bset.c b/drivers/md/bcache/bset.c
index 8b828b7ff145..1006a8d63cd3 100644
--- a/drivers/md/bcache/bset.c
+++ b/drivers/md/bcache/bset.c
@@ -414,11 +414,10 @@ void bch_btree_keys_init(struct btree_keys *b, bool *expensive_debug_checks)
#ifdef CONFIG_BCACHE_DEBUG
b->expensive_debug_checks = expensive_debug_checks;
#endif
- for (i = 0; i < MAX_BSETS; i++) {
+ for (i = 0; i < MAX_BSETS; i++)
b->set[i].data = NULL;
- b->set[i].size = 0;
- b->set[i].extra = BSET_NO_AUX_TREE_VAL;
- }
+
+ bch_bset_set_no_aux_tree(b, b->set);
}
EXPORT_SYMBOL(bch_btree_keys_init);
@@ -733,28 +732,6 @@ static void make_bfloat(struct bkey_format *format,
}
}
-static void bset_alloc_tree(struct btree_keys *b, struct bset_tree *t)
-{
- if (t != b->set) {
- unsigned j = round_up(t[-1].size,
- 64 / sizeof(struct bkey_float));
-
- t->tree = t[-1].tree + j;
- t->prev = t[-1].prev + j;
-
- BUG_ON(t->tree > b->set->tree + btree_keys_cachelines(b));
- }
-
- t->size = 0;
-
- while (++t < b->set + MAX_BSETS) {
- t->size = 0;
- t->extra = BSET_NO_AUX_TREE_VAL;
- t->tree = NULL;
- t->prev = NULL;
- }
-}
-
/* Only valid for the last bset: */
static unsigned bset_tree_capacity(struct btree_keys *b, struct bset_tree *t)
{
@@ -781,15 +758,8 @@ static void bch_bset_lookup_table_add_entries(struct btree_keys *b,
}
}
-static void bch_bset_build_rw_aux_tree(struct btree_keys *b, struct bset_tree *t)
+static void __build_rw_aux_tree(struct btree_keys *b, struct bset_tree *t)
{
- bset_alloc_tree(b, t);
-
- if (!bset_tree_capacity(b, t)) {
- t->extra = BSET_NO_AUX_TREE_VAL;
- return;
- }
-
t->prev[0] = bkey_to_cacheline_offset(t, 0, t->data->start);
t->size = 1;
t->extra = BSET_RW_AUX_TREE_VAL;
@@ -797,50 +767,16 @@ static void bch_bset_build_rw_aux_tree(struct btree_keys *b, struct bset_tree *t
bch_bset_lookup_table_add_entries(b, t);
}
-void bch_bset_init_first(struct btree_keys *b, struct bset *i)
-{
- struct bset_tree *t;
-
- BUG_ON(b->nsets);
-
- t = &b->set[b->nsets++];
- t->data = i;
- memset(i, 0, sizeof(*i));
- get_random_bytes(&i->seq, sizeof(i->seq));
- SET_BSET_BIG_ENDIAN(i, CPU_BIG_ENDIAN);
-
- bch_bset_build_rw_aux_tree(b, t);
-}
-
-void bch_bset_init_next(struct btree_keys *b, struct bset *i)
-{
- struct bset_tree *t;
-
- BUG_ON(b->nsets >= MAX_BSETS);
-
- t = &b->set[b->nsets++];
- t->data = i;
- memset(i, 0, sizeof(*i));
- i->seq = b->set->data->seq;
- SET_BSET_BIG_ENDIAN(i, CPU_BIG_ENDIAN);
-
- bch_bset_build_rw_aux_tree(b, t);
-}
-
-void bch_bset_build_ro_aux_tree(struct btree_keys *b,
- struct bset_tree *t)
+static void __build_ro_aux_tree(struct btree_keys *b, struct bset_tree *t)
{
struct bkey_packed *prev = NULL, *k = t->data->start;
unsigned j, cacheline = 1;
- bset_alloc_tree(b, t);
-
t->size = min(bkey_to_cacheline(t, bset_bkey_last(t->data)),
bset_tree_capacity(b, t));
retry:
if (t->size < 2) {
- t->size = 0;
- t->extra = BSET_NO_AUX_TREE_VAL;
+ bch_bset_set_no_aux_tree(b, t);
return;
}
@@ -877,6 +813,74 @@ retry:
make_bfloat(&b->format, t, j);
}
+static void bset_alloc_tree(struct btree_keys *b, struct bset_tree *t)
+{
+ struct bset_tree *i;
+
+ for (i = b->set; i != t; i++)
+ BUG_ON(bset_has_rw_aux_tree(i));
+
+ if (t != b->set) {
+ unsigned j = round_up(t[-1].size,
+ 64 / sizeof(struct bkey_float));
+
+ t->tree = t[-1].tree + j;
+ t->prev = t[-1].prev + j;
+
+ BUG_ON(t->tree > b->set->tree + btree_keys_cachelines(b));
+ }
+
+ bch_bset_set_no_aux_tree(b, t);
+}
+
+void bch_bset_build_aux_tree(struct btree_keys *b, struct bset_tree *t,
+ bool writeable)
+{
+ if (writeable
+ ? bset_has_rw_aux_tree(t)
+ : bset_has_ro_aux_tree(t))
+ return;
+
+ bset_alloc_tree(b, t);
+
+ if (!bset_tree_capacity(b, t)) {
+ bch_bset_set_no_aux_tree(b, t);
+ return;
+ }
+
+ if (writeable)
+ __build_rw_aux_tree(b, t);
+ else
+ __build_ro_aux_tree(b, t);
+
+}
+
+void bch_bset_init_first(struct btree_keys *b, struct bset *i)
+{
+ struct bset_tree *t;
+
+ BUG_ON(b->nsets);
+
+ t = &b->set[b->nsets++];
+ t->data = i;
+ memset(i, 0, sizeof(*i));
+ get_random_bytes(&i->seq, sizeof(i->seq));
+ SET_BSET_BIG_ENDIAN(i, CPU_BIG_ENDIAN);
+}
+
+void bch_bset_init_next(struct btree_keys *b, struct bset *i)
+{
+ struct bset_tree *t;
+
+ BUG_ON(b->nsets >= MAX_BSETS);
+
+ t = &b->set[b->nsets++];
+ t->data = i;
+ memset(i, 0, sizeof(*i));
+ i->seq = b->set->data->seq;
+ SET_BSET_BIG_ENDIAN(i, CPU_BIG_ENDIAN);
+}
+
static struct bkey_packed *__bkey_prev(struct bset_tree *t, struct bkey_packed *k)
{
struct bkey_packed *p;
@@ -1774,6 +1778,7 @@ bool bch_maybe_compact_deleted_keys(struct btree_keys *b)
}
i->u64s = cpu_to_le16((u64 *) out - i->_data);
+ bch_bset_set_no_aux_tree(b, t);
if (!rebuild_from)
rebuild_from = t;
@@ -1782,12 +1787,10 @@ bool bch_maybe_compact_deleted_keys(struct btree_keys *b)
if (!rebuild_from)
return false;
- for (t = rebuild_from; t < b->set + b->nsets; t++) {
- if (t == bset_tree_last(b) && !last_set_aux_tree_ro)
- bch_bset_build_rw_aux_tree(b, t);
- else
- bch_bset_build_ro_aux_tree(b, t);
- }
+ for (t = rebuild_from; t < b->set + b->nsets; t++)
+ bch_bset_build_aux_tree(b, t,
+ t == bset_tree_last(b) &&
+ !last_set_aux_tree_ro);
return true;
}
diff --git a/drivers/md/bcache/bset.h b/drivers/md/bcache/bset.h
index d3891db681fc..d268e58d6d89 100644
--- a/drivers/md/bcache/bset.h
+++ b/drivers/md/bcache/bset.h
@@ -198,6 +198,7 @@ static inline enum bset_aux_tree_type bset_aux_tree_type(struct bset_tree *t)
{
switch (t->extra) {
case BSET_NO_AUX_TREE_VAL:
+ EBUG_ON(t->size);
return BSET_NO_AUX_TREE;
case BSET_RW_AUX_TREE_VAL:
EBUG_ON(!t->size);
@@ -273,6 +274,18 @@ static inline bool bset_has_rw_aux_tree(struct bset_tree *t)
return bset_aux_tree_type(t) == BSET_RW_AUX_TREE;
}
+static inline void bch_bset_set_no_aux_tree(struct btree_keys *b,
+ struct bset_tree *t)
+{
+ BUG_ON(t < b->set);
+
+ for (; t < b->set + ARRAY_SIZE(b->set); t++) {
+ t->size = 0;
+ t->extra = BSET_NO_AUX_TREE_VAL;
+ }
+
+}
+
#define __set_bytes(_i, _u64s) (sizeof(*(_i)) + (_u64s) * sizeof(u64))
#define set_bytes(_i) __set_bytes(_i, (_i)->u64s)
@@ -298,7 +311,7 @@ void bch_btree_keys_init(struct btree_keys *, bool *);
void bch_bset_init_first(struct btree_keys *, struct bset *);
void bch_bset_init_next(struct btree_keys *, struct bset *);
-void bch_bset_build_ro_aux_tree(struct btree_keys *, struct bset_tree *);
+void bch_bset_build_aux_tree(struct btree_keys *, struct bset_tree *, bool);
void bch_bset_fix_invalidated_key(struct btree_keys *, struct bset_tree *,
struct bkey_packed *);
diff --git a/drivers/md/bcache/btree_gc.c b/drivers/md/bcache/btree_gc.c
index 8753067d684b..d577f1fd0824 100644
--- a/drivers/md/bcache/btree_gc.c
+++ b/drivers/md/bcache/btree_gc.c
@@ -588,7 +588,9 @@ static void bch_coalesce_nodes(struct btree *old_nodes[GC_MERGE_NODES],
recalc_packed_keys(n);
btree_node_reset_sib_u64s(n);
+ bch_btree_build_aux_trees(n);
six_unlock_write(&n->lock);
+
bch_btree_node_write(n, &as->cl, NULL);
}
diff --git a/drivers/md/bcache/btree_io.c b/drivers/md/bcache/btree_io.c
index 26434bcca645..59ba43e604e1 100644
--- a/drivers/md/bcache/btree_io.c
+++ b/drivers/md/bcache/btree_io.c
@@ -130,7 +130,7 @@ static void btree_node_sort(struct cache_set *c, struct btree *b,
}
b->keys.nsets = from + 1;
- bch_bset_build_ro_aux_tree(&b->keys, &b->keys.set[from]);
+ bch_bset_set_no_aux_tree(&b->keys, &b->keys.set[from]);
if (!is_write_locked)
__btree_node_unlock_write(b, iter);
@@ -158,7 +158,7 @@ static bool btree_node_compact(struct cache_set *c, struct btree *b,
/* Don't sort if nothing to do */
if (b->keys.nsets == 1)
- goto nosort;
+ return false;
/* If not a leaf node, always sort */
if (b->level)
@@ -177,16 +177,22 @@ static bool btree_node_compact(struct cache_set *c, struct btree *b,
goto sort;
}
-nosort:
- __btree_node_lock_write(b, iter);
- bch_bset_build_ro_aux_tree(&b->keys, bset_tree_last(&b->keys));
- __btree_node_unlock_write(b, iter);
return false;
sort:
btree_node_sort(c, b, iter, i, NULL, NULL, false);
return true;
}
+void bch_btree_build_aux_trees(struct btree *b)
+{
+ struct bset_tree *t;
+
+ for_each_bset(&b->keys, t)
+ bch_bset_build_aux_tree(&b->keys, t,
+ bset_unwritten(b, t->data) &&
+ t == bset_tree_last(&b->keys));
+}
+
/*
* @bch_btree_init_next - initialize a new (unwritten) bset that can then be
* inserted into
@@ -209,12 +215,18 @@ void bch_btree_init_next(struct cache_set *c, struct btree *b,
if (did_sort && b->keys.nsets == 1)
bch_btree_verify(c, b);
+ __btree_node_lock_write(b, iter);
+
if (b->written < c->sb.btree_node_size) {
- __btree_node_lock_write(b, iter);
- bch_bset_init_next(&b->keys, &write_block(b)->keys);
- __btree_node_unlock_write(b, iter);
+ struct btree_node_entry *bne = write_block(b);
+
+ bch_bset_init_next(&b->keys, &bne->keys);
}
+ bch_btree_build_aux_trees(b);
+
+ __btree_node_unlock_write(b, iter);
+
if (iter && did_sort)
bch_btree_iter_reinit_node(iter, b);
}
@@ -438,6 +450,8 @@ void bch_btree_node_read_done(struct cache_set *c, struct btree *b,
: bch_key_sort_fix_overlapping,
true);
+ bch_bset_build_aux_tree(&b->keys, b->keys.set, false);
+
btree_node_reset_sib_u64s(b);
err = "short btree key";
diff --git a/drivers/md/bcache/btree_io.h b/drivers/md/bcache/btree_io.h
index 203320ac1b8e..d061ac36f88e 100644
--- a/drivers/md/bcache/btree_io.h
+++ b/drivers/md/bcache/btree_io.h
@@ -19,6 +19,7 @@ static inline void btree_node_io_lock(struct btree *b)
TASK_UNINTERRUPTIBLE);
}
+void bch_btree_build_aux_trees(struct btree *);
void bch_btree_init_next(struct cache_set *, struct btree *,
struct btree_iter *);
diff --git a/drivers/md/bcache/btree_iter.c b/drivers/md/bcache/btree_iter.c
index d9d73ff2b976..cbfe35ce0ec4 100644
--- a/drivers/md/bcache/btree_iter.c
+++ b/drivers/md/bcache/btree_iter.c
@@ -1107,7 +1107,9 @@ void __bch_btree_iter_init(struct btree_iter *iter, struct cache_set *c,
unsigned locks_want, unsigned depth)
{
iter->level = depth;
- iter->is_extents = bch_bkey_ops[btree_id]->is_extents;
+ /* bch_bkey_ops isn't used much, this would be a cache miss */
+ /* iter->is_extents = bch_bkey_ops[btree_id]->is_extents; */
+ iter->is_extents = btree_id == BTREE_ID_EXTENTS;
iter->nodes_locked = 0;
iter->nodes_intent_locked = 0;
iter->locks_want = min(locks_want, BTREE_MAX_DEPTH);
diff --git a/drivers/md/bcache/btree_update.c b/drivers/md/bcache/btree_update.c
index 98e701a464c4..b845d82bb9e5 100644
--- a/drivers/md/bcache/btree_update.c
+++ b/drivers/md/bcache/btree_update.c
@@ -300,6 +300,8 @@ static struct btree *bch_btree_node_alloc(struct cache_set *c,
b->data->magic = cpu_to_le64(bset_magic(&c->disk_sb));
SET_BSET_BTREE_LEVEL(&b->data->keys, level);
+ bch_btree_build_aux_trees(b);
+
bch_check_mark_super(c, &b->key, true);
trace_bcache_btree_node_alloc(b);
@@ -316,7 +318,7 @@ static void bch_btree_sort_into(struct cache_set *c,
BUG_ON(dst->keys.nsets != 1);
- dst->keys.set[0].extra = BSET_NO_AUX_TREE_VAL;
+ bch_bset_set_no_aux_tree(&dst->keys, &dst->keys.set[0]);
bch_btree_node_iter_init_from_start(&iter, &src->keys,
btree_node_is_extents(src));
@@ -799,15 +801,13 @@ static void btree_node_lock_for_insert(struct btree *b, struct btree_iter *iter)
relock:
btree_node_lock_write(b, iter);
- BUG_ON(&write_block(b)->keys < btree_bset_last(b));
-
/*
* If the last bset has been written, initialize a new one - check after
* taking the write lock because it can be written with only a read
* lock:
*/
if (b->written != c->sb.btree_node_size &&
- &write_block(b)->keys > btree_bset_last(b)) {
+ bset_written(b, btree_bset_last(b))) {
btree_node_unlock_write(b, iter);
bch_btree_init_next(c, b, iter);
goto relock;
@@ -1213,6 +1213,7 @@ static struct btree *__btree_split_node(struct btree_iter *iter, struct btree *n
n2 = bch_btree_node_alloc(iter->c, n1->level, iter->btree_id, reserve);
n2->data->max_key = n1->data->max_key;
n2->keys.format = n1->keys.format;
+ n2->data->format = n1->keys.format;
n2->key.k.p = n1->key.k.p;
set1 = btree_bset_first(n1);
@@ -1267,11 +1268,6 @@ static struct btree *__btree_split_node(struct btree_iter *iter, struct btree *n
bset_bkey_last(set1),
le16_to_cpu(set2->u64s));
- n1->keys.set->size = 0;
- n1->keys.set->extra = BSET_NO_AUX_TREE_VAL;
- n2->keys.set->size = 0;
- n2->keys.set->extra = BSET_NO_AUX_TREE_VAL;
-
btree_node_reset_sib_u64s(n1);
btree_node_reset_sib_u64s(n2);
@@ -1369,6 +1365,9 @@ static void btree_split(struct btree *b, struct btree_iter *iter,
trace_bcache_btree_node_split(b, b->keys.nr.live_u64s);
n2 = __btree_split_node(iter, n1, reserve);
+
+ bch_btree_build_aux_trees(n2);
+ bch_btree_build_aux_trees(n1);
six_unlock_write(&n2->lock);
six_unlock_write(&n1->lock);
@@ -1396,6 +1395,8 @@ static void btree_split(struct btree *b, struct btree_iter *iter,
}
} else {
trace_bcache_btree_node_compact(b, b->keys.nr.live_u64s);
+
+ bch_btree_build_aux_trees(n1);
six_unlock_write(&n1->lock);
bch_keylist_add(&as->parent_keys, &n1->key);
@@ -1691,6 +1692,8 @@ retry:
bch_btree_sort_into(c, n, prev);
bch_btree_sort_into(c, n, next);
+
+ bch_btree_build_aux_trees(n);
six_unlock_write(&n->lock);
bkey_init(&delete.k);
@@ -2221,6 +2224,8 @@ int bch_btree_node_rewrite(struct btree_iter *iter, struct btree *b,
bch_btree_interior_update_will_free_node(c, as, b);
n = btree_node_alloc_replacement(c, b, reserve);
+
+ bch_btree_build_aux_trees(n);
six_unlock_write(&n->lock);
trace_bcache_btree_gc_rewrite_node(b);
diff --git a/drivers/md/bcache/btree_update.h b/drivers/md/bcache/btree_update.h
index 3bb6c0ca16b9..d42b1b66e166 100644
--- a/drivers/md/bcache/btree_update.h
+++ b/drivers/md/bcache/btree_update.h
@@ -171,27 +171,34 @@ void bch_btree_bset_insert_key(struct btree_iter *, struct btree *,
void bch_btree_journal_key(struct btree_iter *, struct bkey_i *,
struct journal_res *);
-static inline struct btree_node_entry *write_block(struct btree *b)
+static inline void *write_block(struct btree *b)
{
- EBUG_ON(!b->written);
-
return (void *) b->data + (b->written << 9);
}
+static inline bool bset_written(struct btree *b, struct bset *i)
+{
+ return (void *) i < write_block(b);
+}
+
+static inline bool bset_unwritten(struct btree *b, struct bset *i)
+{
+ return (void *) i > write_block(b);
+}
+
static inline size_t bch_btree_keys_u64s_remaining(struct cache_set *c,
struct btree *b)
{
struct bset *i = btree_bset_last(b);
- size_t bytes_used = bset_byte_offset(b, i) +
- __set_bytes(i, le16_to_cpu(i->u64s));
+ unsigned used = bset_byte_offset(b, bset_bkey_last(i)) / sizeof(u64);
+ unsigned total = c->sb.btree_node_size << 6;
- if (b->written == c->sb.btree_node_size)
- return 0;
+ EBUG_ON(used > total);
- EBUG_ON(bytes_used > btree_bytes(c));
- EBUG_ON(i != (b->written ? &write_block(b)->keys : &b->data->keys));
+ if (bset_written(b, i))
+ return 0;
- return (btree_bytes(c) - bytes_used) / sizeof(u64);
+ return total - used;
}
/*