summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorKent Overstreet <kent.overstreet@gmail.com>2016-10-25 14:35:43 -0800
committerKent Overstreet <kent.overstreet@gmail.com>2016-11-13 13:56:14 -0900
commitcc9abf65abbfdb8ccd7238744131def02a00d253 (patch)
tree953a2e5507d314c534afee744192393aef9cf2fc
parent67117b8d77f8c9463a8846878168be3a14027f71 (diff)
bcache: compact whiteouts more aggressively
-rw-r--r--drivers/md/bcache/bset.c107
-rw-r--r--drivers/md/bcache/bset.h20
-rw-r--r--drivers/md/bcache/btree_gc.c9
-rw-r--r--drivers/md/bcache/btree_io.c4
-rw-r--r--drivers/md/bcache/btree_update.c26
-rw-r--r--drivers/md/bcache/extents.c9
6 files changed, 130 insertions, 45 deletions
diff --git a/drivers/md/bcache/bset.c b/drivers/md/bcache/bset.c
index 11b5a2b1547d..4bee8ad1a956 100644
--- a/drivers/md/bcache/bset.c
+++ b/drivers/md/bcache/bset.c
@@ -109,21 +109,18 @@ void bch_dump_bucket(struct btree_keys *b)
void __bch_verify_btree_nr_keys(struct btree_keys *b)
{
- struct btree_node_iter iter;
+ unsigned i;
struct bkey_packed *k;
- unsigned u64s = 0, packed = 0, unpacked = 0;
+ struct btree_nr_keys nr = { 0 };
- for_each_btree_node_key(b, k, &iter) {
- u64s += k->u64s;
- if (bkey_packed(k))
- packed++;
- else
- unpacked++;
- }
+ for (i = 0; i <= b->nsets; i++)
+ for (k = b->set[i].data->start;
+ k != bset_bkey_last(b->set[i].data);
+ k = bkey_next(k))
+ if (!bkey_deleted(k))
+ btree_keys_account_key_add(&nr, i, k);
- BUG_ON(b->nr.live_u64s != u64s);
- BUG_ON(b->nr.packed_keys != packed);
- BUG_ON(b->nr.unpacked_keys != unpacked);
+ BUG_ON(memcmp(&nr, &b->nr, sizeof(nr)));
}
static void bch_btree_node_iter_next_check(struct btree_node_iter *iter,
@@ -697,8 +694,6 @@ static void bset_alloc_tree(struct btree_keys *b, struct bset_tree *t)
/* Only valid for the last bset: */
static unsigned bset_tree_capacity(struct btree_keys *b, struct bset_tree *t)
{
- EBUG_ON(t != bset_tree_last(b));
-
return b->set->tree + btree_keys_cachelines(b) - t->tree;
}
@@ -739,9 +734,9 @@ void bch_bset_init_next(struct btree_keys *b, struct bset *i)
}
EXPORT_SYMBOL(bch_bset_init_next);
-void bch_bset_build_written_tree(struct btree_keys *b)
+void bch_bset_build_written_tree(struct btree_keys *b,
+ struct bset_tree *t)
{
- struct bset_tree *t = bset_tree_last(b);
struct bkey_packed *prev = NULL, *k = t->data->start;
unsigned j, cacheline = 1;
@@ -987,7 +982,7 @@ struct bkey_packed *bch_bset_insert(struct btree_keys *b,
bch_bset_fix_lookup_table(b, t, where);
if (!bkey_deleted(src))
- btree_keys_account_key_add(&b->nr, src);
+ btree_keys_account_key_add(&b->nr, b->nsets, src);
bch_verify_key_order(b, iter, where);
bch_verify_btree_nr_keys(b);
@@ -1007,6 +1002,11 @@ bool bch_bset_try_overwrite(struct btree_keys *b,
EBUG_ON(bkey_cmp_left_packed(f, where, insert->k.p));
EBUG_ON(bkey_deleted(where));
+ /*
+ * If insert is a deletion overwriting is handled by
+ * bch_btree_bset_insert_key(), since in that case we're replacing a non
+ * deleted key with a deleted key and we have to fix iterators:
+ */
if (bkey_deleted(&insert->k))
return false;
@@ -1026,9 +1026,9 @@ bool bch_bset_try_overwrite(struct btree_keys *b,
return false;
overwrite:
if (!bkey_deleted(where))
- btree_keys_account_key_drop(&b->nr, where);
+ btree_keys_account_key_drop(&b->nr, t - b->set, where);
if (!bkey_deleted(src))
- btree_keys_account_key_add(&b->nr, src);
+ btree_keys_account_key_add(&b->nr, t - b->set, src);
memcpy(where, src,
bkeyp_key_bytes(f, src));
memcpy(bkeyp_val(f, where), &insert->v,
@@ -1542,10 +1542,13 @@ EXPORT_SYMBOL(bch_bset_sort_state_init);
/* No repacking: */
static struct btree_nr_keys btree_mergesort_simple(struct btree_keys *b,
+ unsigned from,
struct bset *bset,
struct btree_node_iter *iter)
{
struct bkey_packed *in, *out = bset->start;
+ struct btree_nr_keys ret = b->nr;
+ unsigned i;
while ((in = bch_btree_node_iter_next_all(iter, b))) {
if (!bkey_deleted(in)) {
@@ -1556,7 +1559,13 @@ static struct btree_nr_keys btree_mergesort_simple(struct btree_keys *b,
}
bset->u64s = cpu_to_le16((u64 *) out - bset->_data);
- return b->nr;
+
+ for (i = from + 1; i < MAX_BSETS; i++) {
+ ret.bset_u64s[from] += ret.bset_u64s[i];
+ ret.bset_u64s[i] = 0;
+ }
+
+ return ret;
}
/* Sort + repack in a new format: */
@@ -1586,7 +1595,7 @@ static struct btree_nr_keys btree_mergesort(struct btree_keys *dst,
else
bkey_unpack((void *) out, in_f, in);
- btree_keys_account_key_add(&nr, out);
+ btree_keys_account_key_add(&nr, 0, out);
out = bkey_next(out);
BUG_ON((void *) out >
@@ -1641,7 +1650,7 @@ static struct btree_nr_keys btree_mergesort_extents(struct btree_keys *dst,
if (prev) {
bkey_pack(prev, (void *) prev, out_f);
- btree_keys_account_key_add(&nr, prev);
+ btree_keys_account_key_add(&nr, 0, prev);
prev = bkey_next(prev);
} else {
prev = dst_set->start;
@@ -1655,7 +1664,7 @@ static struct btree_nr_keys btree_mergesort_extents(struct btree_keys *dst,
if (prev) {
bkey_pack(prev, (void *) prev, out_f);
- btree_keys_account_key_add(&nr, prev);
+ btree_keys_account_key_add(&nr, 0, prev);
out = bkey_next(prev);
} else {
out = dst_set->start;
@@ -1697,7 +1706,7 @@ struct btree_nr_keys bch_sort_bsets(struct bset *dst, struct btree_keys *b,
else if (b->ops->is_extents && !from)
nr = btree_mergesort_extents(b, dst, b, iter, NULL);
else
- nr = btree_mergesort_simple(b, dst, iter);
+ nr = btree_mergesort_simple(b, from, dst, iter);
if (!from)
bch_time_stats_update(state->time, start_time);
@@ -1744,6 +1753,56 @@ void bch_btree_sort_into(struct btree_keys *dst,
bch_verify_btree_nr_keys(dst);
}
+bool bch_maybe_compact_deleted_keys(struct btree_keys *b)
+{
+ struct bset_tree *t, *rebuild_from = NULL;
+ bool last_set_has_aux_tree = bset_has_aux_tree(bset_tree_last(b));
+ unsigned idx;
+
+ for (idx = 0; idx <= b->nsets; idx++) {
+ struct bset *i = b->set[idx].data;
+ struct bkey_packed *k, *n, *out = i->start;
+
+ if (b->nr.bset_u64s[idx] * 4 > le16_to_cpu(i->u64s) * 3)
+ continue;
+
+ /*
+ * We cannot drop deleted keys from the last bset, if it hasn't
+ * been written yet - we may need that whiteout on disk:
+ *
+ * XXX unless they're extents, if we fix assertions elsewhere
+ */
+ if (idx == b->nsets && !last_set_has_aux_tree)
+ break;
+
+ for (k = i->start; k != bset_bkey_last(i); k = n) {
+ n = bkey_next(k);
+
+ if (!bkey_deleted(k)) {
+ memmove(out, k, bkey_bytes(k));
+ out = bkey_next(out);
+ }
+ }
+
+ i->u64s = cpu_to_le16((u64 *) out - i->_data);
+
+ if (!rebuild_from)
+ rebuild_from = &b->set[idx];
+ }
+
+ if (!rebuild_from)
+ return false;
+
+ for (t = rebuild_from; t <= b->set + b->nsets; t++) {
+ if (t == bset_tree_last(b) && !last_set_has_aux_tree)
+ bch_bset_build_unwritten_tree(b);
+ else
+ bch_bset_build_written_tree(b, t);
+ }
+
+ return true;
+}
+
void bch_btree_keys_stats(struct btree_keys *b, struct bset_stats *stats)
{
unsigned i;
diff --git a/drivers/md/bcache/bset.h b/drivers/md/bcache/bset.h
index 3cd5f41244f1..23c3561b34d8 100644
--- a/drivers/md/bcache/bset.h
+++ b/drivers/md/bcache/bset.h
@@ -242,6 +242,7 @@ struct btree_nr_keys {
* units of u64s
*/
u16 live_u64s;
+ u16 bset_u64s[MAX_BSETS];
/* live keys only: */
u16 packed_keys;
@@ -317,7 +318,7 @@ void bch_btree_keys_init(struct btree_keys *, const struct btree_keys_ops *,
void bch_bset_init_first(struct btree_keys *, struct bset *);
void bch_bset_init_next(struct btree_keys *, struct bset *);
-void bch_bset_build_written_tree(struct btree_keys *);
+void bch_bset_build_written_tree(struct btree_keys *, struct bset_tree *);
void bch_bset_fix_invalidated_key(struct btree_keys *, struct bset_tree *,
struct bkey_packed *);
@@ -329,20 +330,23 @@ bool bch_bset_try_overwrite(struct btree_keys *, struct btree_node_iter *,
struct bkey_i *);
static inline void btree_keys_account_key(struct btree_nr_keys *n,
+ unsigned bset,
struct bkey_packed *k,
int sign)
{
- n->live_u64s += k->u64s * sign;
+ n->live_u64s += k->u64s * sign;
+ n->bset_u64s[bset] += k->u64s * sign;
+
if (bkey_packed(k))
- n->packed_keys += sign;
+ n->packed_keys += sign;
else
n->unpacked_keys += sign;
}
-#define btree_keys_account_key_add(_b, _k) \
- btree_keys_account_key(_b, _k, 1)
-#define btree_keys_account_key_drop(_b, _k) \
- btree_keys_account_key(_b, _k, -1)
+#define btree_keys_account_key_add(_nr, _bset_idx, _k) \
+ btree_keys_account_key(_nr, _bset_idx, _k, 1)
+#define btree_keys_account_key_drop(_nr, _bset_idx, _k) \
+ btree_keys_account_key(_nr, _bset_idx, _k, -1)
/* Bkey utility code */
@@ -586,6 +590,8 @@ void bch_btree_sort_lazy(struct btree_keys *, struct bset_sort_state *);
void bch_btree_sort_into(struct btree_keys *, struct btree_keys *,
ptr_filter_fn, struct bset_sort_state *);
+bool bch_maybe_compact_deleted_keys(struct btree_keys *);
+
struct bset_stats {
struct {
size_t nr, bytes;
diff --git a/drivers/md/bcache/btree_gc.c b/drivers/md/bcache/btree_gc.c
index 4f4b8c14fefc..1edca3e37b5d 100644
--- a/drivers/md/bcache/btree_gc.c
+++ b/drivers/md/bcache/btree_gc.c
@@ -432,13 +432,16 @@ void bch_gc(struct cache_set *c)
static void recalc_packed_keys(struct btree *b)
{
- struct btree_node_iter iter;
struct bkey_packed *k;
memset(&b->keys.nr, 0, sizeof(b->keys.nr));
- for_each_btree_node_key(&b->keys, k, &iter)
- btree_keys_account_key_add(&b->keys.nr, k);
+ BUG_ON(b->keys.nsets);
+
+ for (k = b->keys.set[0].data->start;
+ k != bset_bkey_last(b->keys.set[0].data);
+ k = bkey_next(k))
+ btree_keys_account_key_add(&b->keys.nr, 0, k);
}
static void bch_coalesce_nodes(struct btree *old_nodes[GC_MERGE_NODES],
diff --git a/drivers/md/bcache/btree_io.c b/drivers/md/bcache/btree_io.c
index d1bfc38c47dc..c343811d9bc9 100644
--- a/drivers/md/bcache/btree_io.c
+++ b/drivers/md/bcache/btree_io.c
@@ -75,7 +75,7 @@ static void btree_node_sort(struct cache_set *c, struct btree *b,
b->keys.nsets = from;
b->keys.nr = nr;
- bch_bset_build_written_tree(&b->keys);
+ bch_bset_build_written_tree(&b->keys, &b->keys.set[from]);
if (!is_write_locked)
__btree_node_unlock_write(b, iter);
@@ -123,7 +123,7 @@ static bool btree_node_compact(struct cache_set *c, struct btree *b,
nosort:
__btree_node_lock_write(b, iter);
- bch_bset_build_written_tree(&b->keys);
+ bch_bset_build_written_tree(&b->keys, bset_tree_last(&b->keys));
__btree_node_unlock_write(b, iter);
return false;
sort:
diff --git a/drivers/md/bcache/btree_update.c b/drivers/md/bcache/btree_update.c
index 55b631eb21e6..8693f50f9ce5 100644
--- a/drivers/md/bcache/btree_update.c
+++ b/drivers/md/bcache/btree_update.c
@@ -683,7 +683,8 @@ void bch_btree_bset_insert_key(struct btree_iter *iter,
}
k->type = KEY_TYPE_DELETED;
- btree_keys_account_key_drop(&b->keys.nr, k);
+ btree_keys_account_key_drop(&b->keys.nr,
+ t - b->keys.set, k);
bch_btree_node_iter_fix(iter, b, node_iter, t, k, true);
if (t == bset_tree_last(&b->keys) && bkey_deleted(&insert->k))
@@ -1146,6 +1147,9 @@ bch_btree_insert_keys_interior(struct btree *b,
btree_interior_update_updated_btree(iter->c, as, b);
+ if (bch_maybe_compact_deleted_keys(&b->keys))
+ bch_btree_iter_reinit_node(iter, b);
+
btree_node_unlock_write(b, iter);
btree_node_interior_verify(b);
@@ -1197,15 +1201,17 @@ static struct btree *__btree_split_node(struct btree_iter *iter, struct btree *n
set2->u64s = cpu_to_le16((u64 *) bset_bkey_last(set1) - (u64 *) k);
set1->u64s = cpu_to_le16(le16_to_cpu(set1->u64s) - le16_to_cpu(set2->u64s));
- n2->keys.nr.live_u64s = le16_to_cpu(set2->u64s);
+ n2->keys.nr.live_u64s = le16_to_cpu(set2->u64s);
+ n2->keys.nr.bset_u64s[0] = le16_to_cpu(set2->u64s);
n2->keys.nr.packed_keys
= n1->keys.nr.packed_keys - nr_packed;
n2->keys.nr.unpacked_keys
= n1->keys.nr.unpacked_keys - nr_unpacked;
- n1->keys.nr.live_u64s = le16_to_cpu(set1->u64s);
- n1->keys.nr.packed_keys = nr_packed;
- n1->keys.nr.unpacked_keys = nr_unpacked;
+ n1->keys.nr.live_u64s = le16_to_cpu(set1->u64s);
+ n1->keys.nr.bset_u64s[0] = le16_to_cpu(set1->u64s);
+ n1->keys.nr.packed_keys = nr_packed;
+ n1->keys.nr.unpacked_keys = nr_unpacked;
BUG_ON(!set1->u64s);
BUG_ON(!set2->u64s);
@@ -1519,11 +1525,21 @@ btree_insert_key(struct btree_insert *trans,
struct btree_iter *iter = insert->iter;
struct btree *b = iter->nodes[0];
enum btree_insert_ret ret;
+ int old_u64s = le16_to_cpu(btree_bset_last(b)->u64s);
+ int old_live_u64s = b->keys.nr.live_u64s;
+ int live_u64s_added, u64s_added;
ret = !b->keys.ops->is_extents
? bch_insert_fixup_key(trans, insert, res)
: bch_insert_fixup_extent(trans, insert, res);
+ live_u64s_added = (int) b->keys.nr.live_u64s - old_live_u64s;
+ u64s_added = (int) le16_to_cpu(btree_bset_last(b)->u64s) - old_u64s;
+
+ if (u64s_added > live_u64s_added &&
+ bch_maybe_compact_deleted_keys(&b->keys))
+ bch_btree_iter_reinit_node(iter, b);
+
trace_bcache_btree_insert_key(b, insert->k);
return ret;
}
diff --git a/drivers/md/bcache/extents.c b/drivers/md/bcache/extents.c
index 274b1685c353..8dbe25c2ebbd 100644
--- a/drivers/md/bcache/extents.c
+++ b/drivers/md/bcache/extents.c
@@ -93,7 +93,7 @@ struct btree_nr_keys bch_key_sort_fix_overlapping(struct btree_keys *b,
/* XXX: need better bkey_copy */
memcpy(out, k, bkey_bytes(k));
- btree_keys_account_key_add(&nr, out);
+ btree_keys_account_key_add(&nr, 0, out);
out = bkey_next(out);
}
@@ -775,7 +775,7 @@ static void extent_sort_append(struct btree_keys *b,
if (*prev) {
bkey_pack(*prev, (void *) *prev, f);
- btree_keys_account_key_add(nr, *prev);
+ btree_keys_account_key_add(nr, 0, *prev);
*prev = bkey_next(*prev);
} else {
*prev = start;
@@ -875,7 +875,7 @@ struct btree_nr_keys bch_extent_sort_fix_overlapping(struct btree_keys *b,
if (prev) {
bkey_pack(prev, (void *) prev, f);
- btree_keys_account_key_add(&nr, prev);
+ btree_keys_account_key_add(&nr, 0, prev);
out = bkey_next(prev);
} else {
out = bset->start;
@@ -1421,7 +1421,8 @@ bch_insert_fixup_extent(struct btree_insert *trans,
/* The insert key completely covers k, invalidate k */
if (!bkey_deleted(_k))
- btree_keys_account_key_drop(&b->keys.nr, _k);
+ btree_keys_account_key_drop(&b->keys.nr,
+ t - b->keys.set, _k);
bch_drop_subtract(iter, k, &stats);
k.k->p = bkey_start_pos(&insert->k->k);