diff options
author | Kent Overstreet <kent.overstreet@gmail.com> | 2016-11-02 02:17:08 -0800 |
---|---|---|
committer | Kent Overstreet <kent.overstreet@gmail.com> | 2016-11-13 13:56:17 -0900 |
commit | d96fd309086c928c3361f77a3faf7866cda7a5bf (patch) | |
tree | ff14a54f9d465252f2bd46b0f33af7f7929b9f22 | |
parent | b9cafd3f6d803fc70605acf51277ca228f3a8a5c (diff) |
bcache: avoid creating whiteouts unnecessarily: extents
-rw-r--r-- | drivers/md/bcache/bset.c | 142 | ||||
-rw-r--r-- | drivers/md/bcache/bset.h | 7 | ||||
-rw-r--r-- | drivers/md/bcache/btree_iter.c | 78 | ||||
-rw-r--r-- | drivers/md/bcache/btree_iter.h | 2 | ||||
-rw-r--r-- | drivers/md/bcache/btree_update.c | 59 | ||||
-rw-r--r-- | drivers/md/bcache/btree_update.h | 2 | ||||
-rw-r--r-- | drivers/md/bcache/extents.c | 64 |
7 files changed, 142 insertions, 212 deletions
diff --git a/drivers/md/bcache/bset.c b/drivers/md/bcache/bset.c index 84cf9bfa9b0e..9fb036a97d14 100644 --- a/drivers/md/bcache/bset.c +++ b/drivers/md/bcache/bset.c @@ -518,11 +518,18 @@ static unsigned bkey_to_cacheline(struct bset_tree *t, struct bkey_packed *k) return ((void *) k - (void *) t->data) / BSET_CACHELINE; } +static ssize_t __bkey_to_cacheline_offset(struct bset_tree *t, + unsigned cacheline, + struct bkey_packed *k) +{ + return (u64 *) k - (u64 *) cacheline_to_bkey(t, cacheline, 0); +} + static unsigned bkey_to_cacheline_offset(struct bset_tree *t, unsigned cacheline, struct bkey_packed *k) { - size_t m = (u64 *) k - (u64 *) cacheline_to_bkey(t, cacheline, 0); + size_t m = __bkey_to_cacheline_offset(t, cacheline, k); BUG_ON(m > (1U << BKEY_MID_BITS) - 1); return m; @@ -906,9 +913,11 @@ EXPORT_SYMBOL(bch_bset_fix_invalidated_key); static void bch_bset_fix_lookup_table(struct btree_keys *b, struct bset_tree *t, struct bkey_packed *where, - int shift) + unsigned clobber_u64s, + unsigned new_u64s) { struct bkey_packed *k; + int shift = new_u64s - clobber_u64s; unsigned j; BUG_ON(bset_has_aux_tree(t)); @@ -928,23 +937,27 @@ static void bch_bset_fix_lookup_table(struct btree_keys *b, */ for (; j < t->size; j++) { /* Avoid overflow - might temporarily be larger than a u8 */ - int new_offset = (int) t->prev[j] + shift; - - if (new_offset < 0 || - new_offset > 7) { - k = new_offset < 0 - ? cacheline_to_bkey(t, j, new_offset) - : table_to_bkey(t, j - 1); - - while (k < cacheline_to_bkey(t, j, 0)) { - k = bkey_next(k); - if (k == bset_bkey_last(t->data)) { - t->size = j; - return; - } + ssize_t new_offset; + + if (table_to_bkey(t, j) < + (struct bkey_packed *) ((u64 *) where + clobber_u64s)) + new_offset = __bkey_to_cacheline_offset(t, j, where); + else + new_offset = (int) t->prev[j] + shift; + + if (new_offset > 7) { + k = table_to_bkey(t, j - 1); + new_offset = __bkey_to_cacheline_offset(t, j, k); + } + + while (new_offset < 0) { + k = bkey_next(cacheline_to_bkey(t, j, new_offset)); + if (k == bset_bkey_last(t->data)) { + t->size = j; + return; } - new_offset = bkey_to_cacheline_offset(t, j, k); + new_offset = __bkey_to_cacheline_offset(t, j, k); } t->prev[j] = new_offset; @@ -967,101 +980,29 @@ static void bch_bset_fix_lookup_table(struct btree_keys *b, } } -/** - * bch_bset_insert - insert the key @insert into @b - * - * Attempts front and back merges (if @b has a method for key merging). - * - * NOTE: after calling @insert may be modified and is undefined - * - * @iter is used as a hint for where to insert at, but it's not - * fixed/revalidated for the insertion, that's the caller's responsibility - * (because there may be other iterators to fix, it's easier to just do all of - * them the same way). - * - * If an insert was done (and not a merge), returns the position of the insert: - * it is the caller's responsibility to update all iterators that point to @b - * with bch_btree_node_iter_fix(). - * - * If NULL is returned, the caller must sort all iterators that point to @b - * with bch_btree_node_iter_sort(), because we may have done a merge that - * modified one of the keys the iterator currently points to. - */ -struct bkey_packed *bch_bset_insert(struct btree_keys *b, - struct btree_node_iter *iter, - struct bkey_i *insert) +void bch_bset_insert(struct btree_keys *b, + struct btree_node_iter *iter, + struct bkey_packed *where, + struct bkey_i *insert, + unsigned clobber_u64s) { struct bkey_format *f = &b->format; struct bset_tree *t = bset_tree_last(b); struct bset *i = t->data; - struct bkey_packed *where = bch_btree_node_iter_bset_pos(iter, b, i); struct bkey_packed packed, *src = bkey_to_packed(insert); - BUG_ON(bset_has_aux_tree(t)); - BUG_ON(insert->k.u64s < BKEY_U64s); - BUG_ON(insert->k.format != KEY_FORMAT_CURRENT); - BUG_ON(b->ops->is_extents && - (!insert->k.size || bkey_deleted(&insert->k))); - - BUG_ON(where < i->start); - BUG_ON(where > bset_bkey_last(i)); - if (bkey_pack_key(&packed, &insert->k, f)) src = &packed; - memmove((u64 *) where + src->u64s, - where, - (void *) bset_bkey_last(i) - (void *) where); - le16_add_cpu(&i->u64s, src->u64s); - - memcpy(where, src, - bkeyp_key_bytes(f, src)); - memcpy(bkeyp_val(f, where), &insert->v, - bkeyp_val_bytes(f, src)); - - bch_bset_fix_lookup_table(b, t, where, where->u64s); - if (!bkey_is_whiteout(&insert->k)) btree_keys_account_key_add(&b->nr, b->nsets, src); - bch_verify_key_order(b, iter, where); - bch_verify_btree_nr_keys(b); - return where; -} - -/* @where must compare equal to @insert */ -int bch_bset_overwrite(struct btree_keys *b, - struct btree_node_iter *iter, - struct bkey_packed *where, - struct bkey_i *insert) -{ - struct bkey_format *f = &b->format; - struct bset_tree *t = bset_tree_last(b); - struct bset *i = t->data; - struct bkey_packed packed, *src = bkey_to_packed(insert); - int shift; - - /* - * we know where isn't a deleted key, because if it was the iterator - * would have skipped past it: - */ - EBUG_ON(bkey_cmp_left_packed(f, where, insert->k.p)); - EBUG_ON(bkey_deleted(where)); - - if (bkey_pack_key(&packed, &insert->k, f)) - src = &packed; - - if (!bkey_packed_is_whiteout(b, where)) - btree_keys_account_key_drop(&b->nr, b->nsets, where); - if (!bkey_is_whiteout(&insert->k)) - btree_keys_account_key_add(&b->nr, b->nsets, src); + if (src->u64s != clobber_u64s) { + void *src_p = where->_data + clobber_u64s; + void *dst_p = where->_data + src->u64s; - shift = src->u64s - where->u64s; - if (shift) { - memmove(where->_data + src->u64s, - bkey_next(where), - (void *) bset_bkey_last(i) - (void *) bkey_next(where)); - le16_add_cpu(&i->u64s, shift); + memmove(dst_p, src_p, (void *) bset_bkey_last(i) - src_p); + le16_add_cpu(&i->u64s, src->u64s - clobber_u64s); } memcpy(where, src, @@ -1069,11 +1010,10 @@ int bch_bset_overwrite(struct btree_keys *b, memcpy(bkeyp_val(f, where), &insert->v, bkeyp_val_bytes(f, src)); - bch_bset_fix_lookup_table(b, t, where, shift); + bch_bset_fix_lookup_table(b, t, where, clobber_u64s, src->u64s); bch_verify_key_order(b, iter, where); bch_verify_btree_nr_keys(b); - return shift; } /* Lookup */ diff --git a/drivers/md/bcache/bset.h b/drivers/md/bcache/bset.h index b93e64f608c3..27a42f8dbcd5 100644 --- a/drivers/md/bcache/bset.h +++ b/drivers/md/bcache/bset.h @@ -322,11 +322,8 @@ void bch_bset_build_written_tree(struct btree_keys *, struct bset_tree *); void bch_bset_fix_invalidated_key(struct btree_keys *, struct bset_tree *, struct bkey_packed *); -struct bkey_packed *bch_bset_insert(struct btree_keys *, - struct btree_node_iter *, - struct bkey_i *); -int bch_bset_overwrite(struct btree_keys *, struct btree_node_iter *, - struct bkey_packed *, struct bkey_i *); +void bch_bset_insert(struct btree_keys *, struct btree_node_iter *, + struct bkey_packed *, struct bkey_i *, unsigned); /* Bkey utility code */ diff --git a/drivers/md/bcache/btree_iter.c b/drivers/md/bcache/btree_iter.c index ae072d7351e8..43babb54a916 100644 --- a/drivers/md/bcache/btree_iter.c +++ b/drivers/md/bcache/btree_iter.c @@ -244,58 +244,45 @@ static void __bch_btree_node_iter_fix(struct btree_iter *iter, struct btree_node_iter *node_iter, struct bset_tree *t, struct bkey_packed *where, - int shift) + unsigned clobber_u64s, + unsigned new_u64s) { struct bkey_format *f = &b->format; const struct bkey_packed *end = bset_bkey_last(t->data); struct btree_node_iter_set *set; unsigned offset = __btree_node_key_to_offset(b, where); + int shift = new_u64s - clobber_u64s; unsigned old_end = (int) __btree_node_key_to_offset(b, end) - shift; bool iter_pos_before_new = btree_iter_pos_cmp_packed(f, iter->pos, where, iter->is_extents); btree_node_iter_for_each(node_iter, set) - if (set->end == old_end) { - set->end = (int) set->end + shift; - - /* - * When we inserted at @where, the key we inserted - the - * new key at @where - compared strictly less than the - * key previously at @where (which is now the next key - * after the key we inserted). - * - * However, it is not necessarily true that if the - * iterator's position is less than the key we inserted - * it was <= the key previously at where - transitivity - * doesn't hold here - because the key previously at - * @where might have been a deleted key that the - * iterator had skipped past. - */ - if (set->k >= offset) { - if (iter_pos_before_new) - set->k = offset; - else if (shift > 0 || set->k > offset) - set->k = (int) set->k + shift; - } - - /* - * Resort iterator if we changed a key it points to: - * - * Do this before checking if we're removing a key from - * the iterator: - */ - if (set->k == offset) - bch_btree_node_iter_sort(node_iter, b); - goto check_remove; - } + if (set->end == old_end) + goto found; /* didn't find the bset in the iterator - might have to readd it: */ if (iter_pos_before_new) bch_btree_node_iter_push(node_iter, b, where, end); -check_remove: - if (!iter_pos_before_new && - bch_btree_node_iter_peek_all(node_iter, b) == where) - bch_btree_node_iter_advance(node_iter, b); + return; +found: + set->end = (int) set->end + shift; + + if (set->k < offset) + return; + + if (iter_pos_before_new || + set->k < offset + clobber_u64s) { + set->k = offset; + + /* The key the iterator currently points to changed: */ + bch_btree_node_iter_sort(node_iter, b); + + if (!iter_pos_before_new && + bch_btree_node_iter_peek_all(node_iter, b) == where) + bch_btree_node_iter_advance(node_iter, b); + } else { + set->k = (int) set->k + shift; + } } void bch_btree_node_iter_fix(struct btree_iter *iter, @@ -303,23 +290,24 @@ void bch_btree_node_iter_fix(struct btree_iter *iter, struct btree_node_iter *node_iter, struct bset_tree *t, struct bkey_packed *where, - int shift) + unsigned clobber_u64s, + unsigned new_u64s) { struct btree_iter *linked; if (node_iter != &iter->node_iters[b->level]) - __bch_btree_node_iter_fix(iter, &b->keys, node_iter, - t, where, shift); + __bch_btree_node_iter_fix(iter, &b->keys, node_iter, t, + where, clobber_u64s, new_u64s); if (iter->nodes[b->level] == b) __bch_btree_node_iter_fix(iter, &b->keys, - &iter->node_iters[b->level], - t, where, shift); + &iter->node_iters[b->level], t, + where, clobber_u64s, new_u64s); for_each_linked_btree_node(iter, b, linked) __bch_btree_node_iter_fix(linked, &b->keys, - &linked->node_iters[b->level], - t, where, shift); + &linked->node_iters[b->level], t, + where, clobber_u64s, new_u64s); bch_btree_iter_verify(iter, b); } diff --git a/drivers/md/bcache/btree_iter.h b/drivers/md/bcache/btree_iter.h index 18383c8b217a..aa39e37a12f8 100644 --- a/drivers/md/bcache/btree_iter.h +++ b/drivers/md/bcache/btree_iter.h @@ -123,7 +123,7 @@ static inline void bch_btree_iter_verify(struct btree_iter *iter, void bch_btree_node_iter_fix(struct btree_iter *, struct btree *, struct btree_node_iter *, struct bset_tree *, - struct bkey_packed *, int); + struct bkey_packed *, unsigned, unsigned); bool bch_btree_iter_upgrade(struct btree_iter *); int bch_btree_iter_unlock(struct btree_iter *); diff --git a/drivers/md/bcache/btree_update.c b/drivers/md/bcache/btree_update.c index 8fb06fc14f86..7aef5cd9f9d9 100644 --- a/drivers/md/bcache/btree_update.c +++ b/drivers/md/bcache/btree_update.c @@ -636,33 +636,6 @@ static void bch_insert_fixup_btree_ptr(struct btree_iter *iter, /* Inserting into a given leaf node (last stage of insert): */ -/* Wrapper around bch_bset_insert() that fixes linked iterators: */ -void bch_btree_bset_insert(struct btree_iter *iter, - struct btree *b, - struct btree_node_iter *node_iter, - struct bkey_i *insert) -{ - struct bkey_packed *where; - - EBUG_ON(bkey_deleted(&insert->k) && bkey_val_u64s(&insert->k)); - EBUG_ON(insert->k.u64s > bch_btree_keys_u64s_remaining(iter->c, b)); - EBUG_ON(bkey_cmp(bkey_start_pos(&insert->k), b->data->min_key) < 0 || - bkey_cmp(insert->k.p, b->data->max_key) > 0); - - /* - * Note: when we're called from btree_split(), @b is not in @iter - and - * thus we can't use the node iter in @iter either, that's why it's - * passed in separately. This isn't an issue for the linked iterators, - * though. - */ - - where = bch_bset_insert(&b->keys, node_iter, insert); - - bch_btree_node_iter_fix(iter, b, node_iter, - bset_tree_last(&b->keys), - where, where->u64s); -} - /* Handle overwrites and do insert, for non extents: */ void bch_btree_bset_insert_key(struct btree_iter *iter, struct btree *b, @@ -672,29 +645,39 @@ void bch_btree_bset_insert_key(struct btree_iter *iter, const struct bkey_format *f = &b->keys.format; struct bkey_packed *k; struct bset_tree *t; + unsigned clobber_u64s; + + EBUG_ON(bkey_deleted(&insert->k) && bkey_val_u64s(&insert->k)); + EBUG_ON(insert->k.u64s > bch_btree_keys_u64s_remaining(iter->c, b)); + EBUG_ON(bkey_cmp(bkey_start_pos(&insert->k), b->data->min_key) < 0 || + bkey_cmp(insert->k.p, b->data->max_key) > 0); k = bch_btree_node_iter_peek_all(node_iter, &b->keys); if (k && !bkey_cmp_packed(f, k, &insert->k)) { t = bch_bkey_to_bset(&b->keys, k); - if (t == bset_tree_last(&b->keys)) { - int shift = bch_bset_overwrite(&b->keys, - node_iter, k, insert); - if (shift || bkey_deleted(&insert->k)) - bch_btree_node_iter_fix(iter, b, node_iter, - t, k, shift); - return; - } - if (!bkey_packed_is_whiteout(&b->keys, k)) btree_keys_account_key_drop(&b->keys.nr, t - b->keys.set, k); + if (t == bset_tree_last(&b->keys)) { + clobber_u64s = k->u64s; + goto overwrite; + } + k->type = KEY_TYPE_DELETED; - bch_btree_node_iter_fix(iter, b, node_iter, t, k, 0); + bch_btree_node_iter_fix(iter, b, node_iter, t, k, + k->u64s, k->u64s); } - bch_btree_bset_insert(iter, b, node_iter, insert); + t = bset_tree_last(&b->keys); + k = bch_btree_node_iter_bset_pos(node_iter, &b->keys, t->data); + clobber_u64s = 0; +overwrite: + bch_bset_insert(&b->keys, node_iter, k, insert, clobber_u64s); + if (k->u64s != clobber_u64s || bkey_deleted(&insert->k)) + bch_btree_node_iter_fix(iter, b, node_iter, t, k, + clobber_u64s, k->u64s); } static void btree_node_flush(struct journal *j, struct journal_entry_pin *pin) diff --git a/drivers/md/bcache/btree_update.h b/drivers/md/bcache/btree_update.h index 5f74662a845a..09e8ca48fb3a 100644 --- a/drivers/md/bcache/btree_update.h +++ b/drivers/md/bcache/btree_update.h @@ -153,8 +153,6 @@ int bch_btree_root_alloc(struct cache_set *, enum btree_id, struct closure *); /* Inserting into a given leaf node (last stage of insert): */ -void bch_btree_bset_insert(struct btree_iter *, struct btree *, - struct btree_node_iter *, struct bkey_i *); void bch_btree_bset_insert_key(struct btree_iter *, struct btree *, struct btree_node_iter *, struct bkey_i *); void bch_btree_journal_key(struct btree_iter *, struct bkey_i *, diff --git a/drivers/md/bcache/extents.c b/drivers/md/bcache/extents.c index 2a4c79dd5769..518719f66f00 100644 --- a/drivers/md/bcache/extents.c +++ b/drivers/md/bcache/extents.c @@ -1097,27 +1097,49 @@ static enum btree_insert_ret extent_insert_should_stop(struct btree_insert *tran return BTREE_INSERT_OK; } -static void extent_do_insert(struct btree_iter *iter, struct bkey_i *insert, - struct journal_res *res) +static void extent_bset_insert(struct btree_iter *iter, struct bkey_i *insert) { - struct btree_keys *b = &iter->nodes[0]->keys; + struct btree *b = iter->nodes[0]; struct btree_node_iter *node_iter = &iter->node_iters[0]; - struct bset_tree *t = bset_tree_last(b); - struct bset *i = t->data; - struct bkey_packed *prev, *where = - bch_btree_node_iter_bset_pos(node_iter, b, i); - - bch_btree_journal_key(iter, insert, res); - - if ((prev = bkey_prev_all(t, where)) && + struct bset_tree *t = bset_tree_last(&b->keys); + struct bkey_packed *where = + bch_btree_node_iter_bset_pos(node_iter, &b->keys, t->data); + struct bkey_packed *prev = bkey_prev(t, where); + struct bkey_packed *next_live_key = where; + unsigned clobber_u64s; + + if (prev && + bkey_next(prev) == where && bch_extent_merge_inline(iter, prev, bkey_to_packed(insert), true)) return; - if (where != bset_bkey_last(i) && + if (where != bset_bkey_last(t->data) && bch_extent_merge_inline(iter, bkey_to_packed(insert), where, false)) return; - bch_btree_bset_insert(iter, iter->nodes[0], node_iter, insert); + if (prev) + where = bkey_next(prev); + + while (next_live_key != bset_bkey_last(t->data) && + bkey_deleted(next_live_key)) + next_live_key = bkey_next(next_live_key); + + /* + * Everything between where and next_live_key is now deleted keys, and + * is overwritten: + */ + clobber_u64s = (u64 *) next_live_key - (u64 *) where; + bch_bset_insert(&b->keys, node_iter, where, insert, clobber_u64s); + bch_btree_node_iter_fix(iter, b, node_iter, t, where, + clobber_u64s, where->u64s); +} + +static void extent_insert_and_journal(struct btree_iter *iter, struct bkey_i *insert, + struct journal_res *res) +{ + bch_btree_journal_key(iter, insert, res); + + extent_bset_insert(iter, insert); } static void extent_insert_committed(struct btree_insert *trans, @@ -1159,7 +1181,7 @@ static void extent_insert_committed(struct btree_insert *trans, bkey_debugcheck(iter->c, iter->nodes[iter->level], bkey_i_to_s_c(&split.k)); - extent_do_insert(iter, &split.k, res); + extent_insert_and_journal(iter, &split.k, res); bch_btree_iter_set_pos_same_leaf(iter, committed_pos); @@ -1413,7 +1435,8 @@ bch_insert_fixup_extent(struct btree_insert *trans, * auxiliary tree. */ bch_bset_fix_invalidated_key(&b->keys, t, _k); - bch_btree_node_iter_fix(iter, b, node_iter, t, _k, 0); + bch_btree_node_iter_fix(iter, b, node_iter, t, + _k, _k->u64s, _k->u64s); break; case BCH_EXTENT_OVERLAP_ALL: { @@ -1455,7 +1478,7 @@ bch_insert_fixup_extent(struct btree_insert *trans, } else { bch_bset_fix_invalidated_key(&b->keys, t, _k); bch_btree_node_iter_fix(iter, b, node_iter, t, - _k, 0); + _k, _k->u64s, _k->u64s); } break; @@ -1487,7 +1510,7 @@ bch_insert_fixup_extent(struct btree_insert *trans, bch_add_sectors(iter, bkey_i_to_s_c(&split.k), bkey_start_offset(&split.k.k), split.k.k.size, &stats); - bch_btree_bset_insert(iter, b, node_iter, &split.k); + extent_bset_insert(iter, &split.k); break; } } @@ -2182,7 +2205,8 @@ static bool extent_merge_one_overlapping(struct btree_iter *iter, uk.p = new_pos; extent_save(&b->keys, node_iter, k, &uk); bch_bset_fix_invalidated_key(&b->keys, t, k); - bch_btree_node_iter_fix(iter, b, node_iter, t, k, 0); + bch_btree_node_iter_fix(iter, b, node_iter, t, + k, k->u64s, k->u64s); return true; } } @@ -2324,7 +2348,7 @@ static bool bch_extent_merge_inline(struct btree_iter *iter, bch_btree_iter_set_pos_same_leaf(iter, li.k.k.p); bch_btree_node_iter_fix(iter, iter->nodes[0], node_iter, - t, m, 0); + t, m, m->u64s, m->u64s); if (!back_merge) bkey_copy(packed_to_bkey(l), &li.k); @@ -2340,7 +2364,7 @@ static bool bch_extent_merge_inline(struct btree_iter *iter, extent_i_save(b, node_iter, m, &li.k); bch_btree_node_iter_fix(iter, iter->nodes[0], node_iter, - t, m, 0); + t, m, m->u64s, m->u64s); return true; default: BUG(); |