summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorKent Overstreet <kent.overstreet@gmail.com>2016-11-02 02:17:08 -0800
committerKent Overstreet <kent.overstreet@gmail.com>2016-11-13 13:56:17 -0900
commitd96fd309086c928c3361f77a3faf7866cda7a5bf (patch)
treeff14a54f9d465252f2bd46b0f33af7f7929b9f22
parentb9cafd3f6d803fc70605acf51277ca228f3a8a5c (diff)
bcache: avoid creating whiteouts unnecessarily: extents
-rw-r--r--drivers/md/bcache/bset.c142
-rw-r--r--drivers/md/bcache/bset.h7
-rw-r--r--drivers/md/bcache/btree_iter.c78
-rw-r--r--drivers/md/bcache/btree_iter.h2
-rw-r--r--drivers/md/bcache/btree_update.c59
-rw-r--r--drivers/md/bcache/btree_update.h2
-rw-r--r--drivers/md/bcache/extents.c64
7 files changed, 142 insertions, 212 deletions
diff --git a/drivers/md/bcache/bset.c b/drivers/md/bcache/bset.c
index 84cf9bfa9b0e..9fb036a97d14 100644
--- a/drivers/md/bcache/bset.c
+++ b/drivers/md/bcache/bset.c
@@ -518,11 +518,18 @@ static unsigned bkey_to_cacheline(struct bset_tree *t, struct bkey_packed *k)
return ((void *) k - (void *) t->data) / BSET_CACHELINE;
}
+static ssize_t __bkey_to_cacheline_offset(struct bset_tree *t,
+ unsigned cacheline,
+ struct bkey_packed *k)
+{
+ return (u64 *) k - (u64 *) cacheline_to_bkey(t, cacheline, 0);
+}
+
static unsigned bkey_to_cacheline_offset(struct bset_tree *t,
unsigned cacheline,
struct bkey_packed *k)
{
- size_t m = (u64 *) k - (u64 *) cacheline_to_bkey(t, cacheline, 0);
+ size_t m = __bkey_to_cacheline_offset(t, cacheline, k);
BUG_ON(m > (1U << BKEY_MID_BITS) - 1);
return m;
@@ -906,9 +913,11 @@ EXPORT_SYMBOL(bch_bset_fix_invalidated_key);
static void bch_bset_fix_lookup_table(struct btree_keys *b,
struct bset_tree *t,
struct bkey_packed *where,
- int shift)
+ unsigned clobber_u64s,
+ unsigned new_u64s)
{
struct bkey_packed *k;
+ int shift = new_u64s - clobber_u64s;
unsigned j;
BUG_ON(bset_has_aux_tree(t));
@@ -928,23 +937,27 @@ static void bch_bset_fix_lookup_table(struct btree_keys *b,
*/
for (; j < t->size; j++) {
/* Avoid overflow - might temporarily be larger than a u8 */
- int new_offset = (int) t->prev[j] + shift;
-
- if (new_offset < 0 ||
- new_offset > 7) {
- k = new_offset < 0
- ? cacheline_to_bkey(t, j, new_offset)
- : table_to_bkey(t, j - 1);
-
- while (k < cacheline_to_bkey(t, j, 0)) {
- k = bkey_next(k);
- if (k == bset_bkey_last(t->data)) {
- t->size = j;
- return;
- }
+ ssize_t new_offset;
+
+ if (table_to_bkey(t, j) <
+ (struct bkey_packed *) ((u64 *) where + clobber_u64s))
+ new_offset = __bkey_to_cacheline_offset(t, j, where);
+ else
+ new_offset = (int) t->prev[j] + shift;
+
+ if (new_offset > 7) {
+ k = table_to_bkey(t, j - 1);
+ new_offset = __bkey_to_cacheline_offset(t, j, k);
+ }
+
+ while (new_offset < 0) {
+ k = bkey_next(cacheline_to_bkey(t, j, new_offset));
+ if (k == bset_bkey_last(t->data)) {
+ t->size = j;
+ return;
}
- new_offset = bkey_to_cacheline_offset(t, j, k);
+ new_offset = __bkey_to_cacheline_offset(t, j, k);
}
t->prev[j] = new_offset;
@@ -967,101 +980,29 @@ static void bch_bset_fix_lookup_table(struct btree_keys *b,
}
}
-/**
- * bch_bset_insert - insert the key @insert into @b
- *
- * Attempts front and back merges (if @b has a method for key merging).
- *
- * NOTE: after calling @insert may be modified and is undefined
- *
- * @iter is used as a hint for where to insert at, but it's not
- * fixed/revalidated for the insertion, that's the caller's responsibility
- * (because there may be other iterators to fix, it's easier to just do all of
- * them the same way).
- *
- * If an insert was done (and not a merge), returns the position of the insert:
- * it is the caller's responsibility to update all iterators that point to @b
- * with bch_btree_node_iter_fix().
- *
- * If NULL is returned, the caller must sort all iterators that point to @b
- * with bch_btree_node_iter_sort(), because we may have done a merge that
- * modified one of the keys the iterator currently points to.
- */
-struct bkey_packed *bch_bset_insert(struct btree_keys *b,
- struct btree_node_iter *iter,
- struct bkey_i *insert)
+void bch_bset_insert(struct btree_keys *b,
+ struct btree_node_iter *iter,
+ struct bkey_packed *where,
+ struct bkey_i *insert,
+ unsigned clobber_u64s)
{
struct bkey_format *f = &b->format;
struct bset_tree *t = bset_tree_last(b);
struct bset *i = t->data;
- struct bkey_packed *where = bch_btree_node_iter_bset_pos(iter, b, i);
struct bkey_packed packed, *src = bkey_to_packed(insert);
- BUG_ON(bset_has_aux_tree(t));
- BUG_ON(insert->k.u64s < BKEY_U64s);
- BUG_ON(insert->k.format != KEY_FORMAT_CURRENT);
- BUG_ON(b->ops->is_extents &&
- (!insert->k.size || bkey_deleted(&insert->k)));
-
- BUG_ON(where < i->start);
- BUG_ON(where > bset_bkey_last(i));
-
if (bkey_pack_key(&packed, &insert->k, f))
src = &packed;
- memmove((u64 *) where + src->u64s,
- where,
- (void *) bset_bkey_last(i) - (void *) where);
- le16_add_cpu(&i->u64s, src->u64s);
-
- memcpy(where, src,
- bkeyp_key_bytes(f, src));
- memcpy(bkeyp_val(f, where), &insert->v,
- bkeyp_val_bytes(f, src));
-
- bch_bset_fix_lookup_table(b, t, where, where->u64s);
-
if (!bkey_is_whiteout(&insert->k))
btree_keys_account_key_add(&b->nr, b->nsets, src);
- bch_verify_key_order(b, iter, where);
- bch_verify_btree_nr_keys(b);
- return where;
-}
-
-/* @where must compare equal to @insert */
-int bch_bset_overwrite(struct btree_keys *b,
- struct btree_node_iter *iter,
- struct bkey_packed *where,
- struct bkey_i *insert)
-{
- struct bkey_format *f = &b->format;
- struct bset_tree *t = bset_tree_last(b);
- struct bset *i = t->data;
- struct bkey_packed packed, *src = bkey_to_packed(insert);
- int shift;
-
- /*
- * we know where isn't a deleted key, because if it was the iterator
- * would have skipped past it:
- */
- EBUG_ON(bkey_cmp_left_packed(f, where, insert->k.p));
- EBUG_ON(bkey_deleted(where));
-
- if (bkey_pack_key(&packed, &insert->k, f))
- src = &packed;
-
- if (!bkey_packed_is_whiteout(b, where))
- btree_keys_account_key_drop(&b->nr, b->nsets, where);
- if (!bkey_is_whiteout(&insert->k))
- btree_keys_account_key_add(&b->nr, b->nsets, src);
+ if (src->u64s != clobber_u64s) {
+ void *src_p = where->_data + clobber_u64s;
+ void *dst_p = where->_data + src->u64s;
- shift = src->u64s - where->u64s;
- if (shift) {
- memmove(where->_data + src->u64s,
- bkey_next(where),
- (void *) bset_bkey_last(i) - (void *) bkey_next(where));
- le16_add_cpu(&i->u64s, shift);
+ memmove(dst_p, src_p, (void *) bset_bkey_last(i) - src_p);
+ le16_add_cpu(&i->u64s, src->u64s - clobber_u64s);
}
memcpy(where, src,
@@ -1069,11 +1010,10 @@ int bch_bset_overwrite(struct btree_keys *b,
memcpy(bkeyp_val(f, where), &insert->v,
bkeyp_val_bytes(f, src));
- bch_bset_fix_lookup_table(b, t, where, shift);
+ bch_bset_fix_lookup_table(b, t, where, clobber_u64s, src->u64s);
bch_verify_key_order(b, iter, where);
bch_verify_btree_nr_keys(b);
- return shift;
}
/* Lookup */
diff --git a/drivers/md/bcache/bset.h b/drivers/md/bcache/bset.h
index b93e64f608c3..27a42f8dbcd5 100644
--- a/drivers/md/bcache/bset.h
+++ b/drivers/md/bcache/bset.h
@@ -322,11 +322,8 @@ void bch_bset_build_written_tree(struct btree_keys *, struct bset_tree *);
void bch_bset_fix_invalidated_key(struct btree_keys *, struct bset_tree *,
struct bkey_packed *);
-struct bkey_packed *bch_bset_insert(struct btree_keys *,
- struct btree_node_iter *,
- struct bkey_i *);
-int bch_bset_overwrite(struct btree_keys *, struct btree_node_iter *,
- struct bkey_packed *, struct bkey_i *);
+void bch_bset_insert(struct btree_keys *, struct btree_node_iter *,
+ struct bkey_packed *, struct bkey_i *, unsigned);
/* Bkey utility code */
diff --git a/drivers/md/bcache/btree_iter.c b/drivers/md/bcache/btree_iter.c
index ae072d7351e8..43babb54a916 100644
--- a/drivers/md/bcache/btree_iter.c
+++ b/drivers/md/bcache/btree_iter.c
@@ -244,58 +244,45 @@ static void __bch_btree_node_iter_fix(struct btree_iter *iter,
struct btree_node_iter *node_iter,
struct bset_tree *t,
struct bkey_packed *where,
- int shift)
+ unsigned clobber_u64s,
+ unsigned new_u64s)
{
struct bkey_format *f = &b->format;
const struct bkey_packed *end = bset_bkey_last(t->data);
struct btree_node_iter_set *set;
unsigned offset = __btree_node_key_to_offset(b, where);
+ int shift = new_u64s - clobber_u64s;
unsigned old_end = (int) __btree_node_key_to_offset(b, end) - shift;
bool iter_pos_before_new = btree_iter_pos_cmp_packed(f,
iter->pos, where, iter->is_extents);
btree_node_iter_for_each(node_iter, set)
- if (set->end == old_end) {
- set->end = (int) set->end + shift;
-
- /*
- * When we inserted at @where, the key we inserted - the
- * new key at @where - compared strictly less than the
- * key previously at @where (which is now the next key
- * after the key we inserted).
- *
- * However, it is not necessarily true that if the
- * iterator's position is less than the key we inserted
- * it was <= the key previously at where - transitivity
- * doesn't hold here - because the key previously at
- * @where might have been a deleted key that the
- * iterator had skipped past.
- */
- if (set->k >= offset) {
- if (iter_pos_before_new)
- set->k = offset;
- else if (shift > 0 || set->k > offset)
- set->k = (int) set->k + shift;
- }
-
- /*
- * Resort iterator if we changed a key it points to:
- *
- * Do this before checking if we're removing a key from
- * the iterator:
- */
- if (set->k == offset)
- bch_btree_node_iter_sort(node_iter, b);
- goto check_remove;
- }
+ if (set->end == old_end)
+ goto found;
/* didn't find the bset in the iterator - might have to readd it: */
if (iter_pos_before_new)
bch_btree_node_iter_push(node_iter, b, where, end);
-check_remove:
- if (!iter_pos_before_new &&
- bch_btree_node_iter_peek_all(node_iter, b) == where)
- bch_btree_node_iter_advance(node_iter, b);
+ return;
+found:
+ set->end = (int) set->end + shift;
+
+ if (set->k < offset)
+ return;
+
+ if (iter_pos_before_new ||
+ set->k < offset + clobber_u64s) {
+ set->k = offset;
+
+ /* The key the iterator currently points to changed: */
+ bch_btree_node_iter_sort(node_iter, b);
+
+ if (!iter_pos_before_new &&
+ bch_btree_node_iter_peek_all(node_iter, b) == where)
+ bch_btree_node_iter_advance(node_iter, b);
+ } else {
+ set->k = (int) set->k + shift;
+ }
}
void bch_btree_node_iter_fix(struct btree_iter *iter,
@@ -303,23 +290,24 @@ void bch_btree_node_iter_fix(struct btree_iter *iter,
struct btree_node_iter *node_iter,
struct bset_tree *t,
struct bkey_packed *where,
- int shift)
+ unsigned clobber_u64s,
+ unsigned new_u64s)
{
struct btree_iter *linked;
if (node_iter != &iter->node_iters[b->level])
- __bch_btree_node_iter_fix(iter, &b->keys, node_iter,
- t, where, shift);
+ __bch_btree_node_iter_fix(iter, &b->keys, node_iter, t,
+ where, clobber_u64s, new_u64s);
if (iter->nodes[b->level] == b)
__bch_btree_node_iter_fix(iter, &b->keys,
- &iter->node_iters[b->level],
- t, where, shift);
+ &iter->node_iters[b->level], t,
+ where, clobber_u64s, new_u64s);
for_each_linked_btree_node(iter, b, linked)
__bch_btree_node_iter_fix(linked, &b->keys,
- &linked->node_iters[b->level],
- t, where, shift);
+ &linked->node_iters[b->level], t,
+ where, clobber_u64s, new_u64s);
bch_btree_iter_verify(iter, b);
}
diff --git a/drivers/md/bcache/btree_iter.h b/drivers/md/bcache/btree_iter.h
index 18383c8b217a..aa39e37a12f8 100644
--- a/drivers/md/bcache/btree_iter.h
+++ b/drivers/md/bcache/btree_iter.h
@@ -123,7 +123,7 @@ static inline void bch_btree_iter_verify(struct btree_iter *iter,
void bch_btree_node_iter_fix(struct btree_iter *, struct btree *,
struct btree_node_iter *, struct bset_tree *,
- struct bkey_packed *, int);
+ struct bkey_packed *, unsigned, unsigned);
bool bch_btree_iter_upgrade(struct btree_iter *);
int bch_btree_iter_unlock(struct btree_iter *);
diff --git a/drivers/md/bcache/btree_update.c b/drivers/md/bcache/btree_update.c
index 8fb06fc14f86..7aef5cd9f9d9 100644
--- a/drivers/md/bcache/btree_update.c
+++ b/drivers/md/bcache/btree_update.c
@@ -636,33 +636,6 @@ static void bch_insert_fixup_btree_ptr(struct btree_iter *iter,
/* Inserting into a given leaf node (last stage of insert): */
-/* Wrapper around bch_bset_insert() that fixes linked iterators: */
-void bch_btree_bset_insert(struct btree_iter *iter,
- struct btree *b,
- struct btree_node_iter *node_iter,
- struct bkey_i *insert)
-{
- struct bkey_packed *where;
-
- EBUG_ON(bkey_deleted(&insert->k) && bkey_val_u64s(&insert->k));
- EBUG_ON(insert->k.u64s > bch_btree_keys_u64s_remaining(iter->c, b));
- EBUG_ON(bkey_cmp(bkey_start_pos(&insert->k), b->data->min_key) < 0 ||
- bkey_cmp(insert->k.p, b->data->max_key) > 0);
-
- /*
- * Note: when we're called from btree_split(), @b is not in @iter - and
- * thus we can't use the node iter in @iter either, that's why it's
- * passed in separately. This isn't an issue for the linked iterators,
- * though.
- */
-
- where = bch_bset_insert(&b->keys, node_iter, insert);
-
- bch_btree_node_iter_fix(iter, b, node_iter,
- bset_tree_last(&b->keys),
- where, where->u64s);
-}
-
/* Handle overwrites and do insert, for non extents: */
void bch_btree_bset_insert_key(struct btree_iter *iter,
struct btree *b,
@@ -672,29 +645,39 @@ void bch_btree_bset_insert_key(struct btree_iter *iter,
const struct bkey_format *f = &b->keys.format;
struct bkey_packed *k;
struct bset_tree *t;
+ unsigned clobber_u64s;
+
+ EBUG_ON(bkey_deleted(&insert->k) && bkey_val_u64s(&insert->k));
+ EBUG_ON(insert->k.u64s > bch_btree_keys_u64s_remaining(iter->c, b));
+ EBUG_ON(bkey_cmp(bkey_start_pos(&insert->k), b->data->min_key) < 0 ||
+ bkey_cmp(insert->k.p, b->data->max_key) > 0);
k = bch_btree_node_iter_peek_all(node_iter, &b->keys);
if (k && !bkey_cmp_packed(f, k, &insert->k)) {
t = bch_bkey_to_bset(&b->keys, k);
- if (t == bset_tree_last(&b->keys)) {
- int shift = bch_bset_overwrite(&b->keys,
- node_iter, k, insert);
- if (shift || bkey_deleted(&insert->k))
- bch_btree_node_iter_fix(iter, b, node_iter,
- t, k, shift);
- return;
- }
-
if (!bkey_packed_is_whiteout(&b->keys, k))
btree_keys_account_key_drop(&b->keys.nr,
t - b->keys.set, k);
+ if (t == bset_tree_last(&b->keys)) {
+ clobber_u64s = k->u64s;
+ goto overwrite;
+ }
+
k->type = KEY_TYPE_DELETED;
- bch_btree_node_iter_fix(iter, b, node_iter, t, k, 0);
+ bch_btree_node_iter_fix(iter, b, node_iter, t, k,
+ k->u64s, k->u64s);
}
- bch_btree_bset_insert(iter, b, node_iter, insert);
+ t = bset_tree_last(&b->keys);
+ k = bch_btree_node_iter_bset_pos(node_iter, &b->keys, t->data);
+ clobber_u64s = 0;
+overwrite:
+ bch_bset_insert(&b->keys, node_iter, k, insert, clobber_u64s);
+ if (k->u64s != clobber_u64s || bkey_deleted(&insert->k))
+ bch_btree_node_iter_fix(iter, b, node_iter, t, k,
+ clobber_u64s, k->u64s);
}
static void btree_node_flush(struct journal *j, struct journal_entry_pin *pin)
diff --git a/drivers/md/bcache/btree_update.h b/drivers/md/bcache/btree_update.h
index 5f74662a845a..09e8ca48fb3a 100644
--- a/drivers/md/bcache/btree_update.h
+++ b/drivers/md/bcache/btree_update.h
@@ -153,8 +153,6 @@ int bch_btree_root_alloc(struct cache_set *, enum btree_id, struct closure *);
/* Inserting into a given leaf node (last stage of insert): */
-void bch_btree_bset_insert(struct btree_iter *, struct btree *,
- struct btree_node_iter *, struct bkey_i *);
void bch_btree_bset_insert_key(struct btree_iter *, struct btree *,
struct btree_node_iter *, struct bkey_i *);
void bch_btree_journal_key(struct btree_iter *, struct bkey_i *,
diff --git a/drivers/md/bcache/extents.c b/drivers/md/bcache/extents.c
index 2a4c79dd5769..518719f66f00 100644
--- a/drivers/md/bcache/extents.c
+++ b/drivers/md/bcache/extents.c
@@ -1097,27 +1097,49 @@ static enum btree_insert_ret extent_insert_should_stop(struct btree_insert *tran
return BTREE_INSERT_OK;
}
-static void extent_do_insert(struct btree_iter *iter, struct bkey_i *insert,
- struct journal_res *res)
+static void extent_bset_insert(struct btree_iter *iter, struct bkey_i *insert)
{
- struct btree_keys *b = &iter->nodes[0]->keys;
+ struct btree *b = iter->nodes[0];
struct btree_node_iter *node_iter = &iter->node_iters[0];
- struct bset_tree *t = bset_tree_last(b);
- struct bset *i = t->data;
- struct bkey_packed *prev, *where =
- bch_btree_node_iter_bset_pos(node_iter, b, i);
-
- bch_btree_journal_key(iter, insert, res);
-
- if ((prev = bkey_prev_all(t, where)) &&
+ struct bset_tree *t = bset_tree_last(&b->keys);
+ struct bkey_packed *where =
+ bch_btree_node_iter_bset_pos(node_iter, &b->keys, t->data);
+ struct bkey_packed *prev = bkey_prev(t, where);
+ struct bkey_packed *next_live_key = where;
+ unsigned clobber_u64s;
+
+ if (prev &&
+ bkey_next(prev) == where &&
bch_extent_merge_inline(iter, prev, bkey_to_packed(insert), true))
return;
- if (where != bset_bkey_last(i) &&
+ if (where != bset_bkey_last(t->data) &&
bch_extent_merge_inline(iter, bkey_to_packed(insert), where, false))
return;
- bch_btree_bset_insert(iter, iter->nodes[0], node_iter, insert);
+ if (prev)
+ where = bkey_next(prev);
+
+ while (next_live_key != bset_bkey_last(t->data) &&
+ bkey_deleted(next_live_key))
+ next_live_key = bkey_next(next_live_key);
+
+ /*
+ * Everything between where and next_live_key is now deleted keys, and
+ * is overwritten:
+ */
+ clobber_u64s = (u64 *) next_live_key - (u64 *) where;
+ bch_bset_insert(&b->keys, node_iter, where, insert, clobber_u64s);
+ bch_btree_node_iter_fix(iter, b, node_iter, t, where,
+ clobber_u64s, where->u64s);
+}
+
+static void extent_insert_and_journal(struct btree_iter *iter, struct bkey_i *insert,
+ struct journal_res *res)
+{
+ bch_btree_journal_key(iter, insert, res);
+
+ extent_bset_insert(iter, insert);
}
static void extent_insert_committed(struct btree_insert *trans,
@@ -1159,7 +1181,7 @@ static void extent_insert_committed(struct btree_insert *trans,
bkey_debugcheck(iter->c, iter->nodes[iter->level],
bkey_i_to_s_c(&split.k));
- extent_do_insert(iter, &split.k, res);
+ extent_insert_and_journal(iter, &split.k, res);
bch_btree_iter_set_pos_same_leaf(iter, committed_pos);
@@ -1413,7 +1435,8 @@ bch_insert_fixup_extent(struct btree_insert *trans,
* auxiliary tree.
*/
bch_bset_fix_invalidated_key(&b->keys, t, _k);
- bch_btree_node_iter_fix(iter, b, node_iter, t, _k, 0);
+ bch_btree_node_iter_fix(iter, b, node_iter, t,
+ _k, _k->u64s, _k->u64s);
break;
case BCH_EXTENT_OVERLAP_ALL: {
@@ -1455,7 +1478,7 @@ bch_insert_fixup_extent(struct btree_insert *trans,
} else {
bch_bset_fix_invalidated_key(&b->keys, t, _k);
bch_btree_node_iter_fix(iter, b, node_iter, t,
- _k, 0);
+ _k, _k->u64s, _k->u64s);
}
break;
@@ -1487,7 +1510,7 @@ bch_insert_fixup_extent(struct btree_insert *trans,
bch_add_sectors(iter, bkey_i_to_s_c(&split.k),
bkey_start_offset(&split.k.k),
split.k.k.size, &stats);
- bch_btree_bset_insert(iter, b, node_iter, &split.k);
+ extent_bset_insert(iter, &split.k);
break;
}
}
@@ -2182,7 +2205,8 @@ static bool extent_merge_one_overlapping(struct btree_iter *iter,
uk.p = new_pos;
extent_save(&b->keys, node_iter, k, &uk);
bch_bset_fix_invalidated_key(&b->keys, t, k);
- bch_btree_node_iter_fix(iter, b, node_iter, t, k, 0);
+ bch_btree_node_iter_fix(iter, b, node_iter, t,
+ k, k->u64s, k->u64s);
return true;
}
}
@@ -2324,7 +2348,7 @@ static bool bch_extent_merge_inline(struct btree_iter *iter,
bch_btree_iter_set_pos_same_leaf(iter, li.k.k.p);
bch_btree_node_iter_fix(iter, iter->nodes[0], node_iter,
- t, m, 0);
+ t, m, m->u64s, m->u64s);
if (!back_merge)
bkey_copy(packed_to_bkey(l), &li.k);
@@ -2340,7 +2364,7 @@ static bool bch_extent_merge_inline(struct btree_iter *iter,
extent_i_save(b, node_iter, m, &li.k);
bch_btree_node_iter_fix(iter, iter->nodes[0], node_iter,
- t, m, 0);
+ t, m, m->u64s, m->u64s);
return true;
default:
BUG();