diff options
author | Kent Overstreet <kent.overstreet@gmail.com> | 2016-07-18 16:50:28 -0800 |
---|---|---|
committer | Kent Overstreet <kent.overstreet@gmail.com> | 2016-10-07 12:36:40 -0800 |
commit | f69aef2c53c4da77826d525eabfa25b526fa0803 (patch) | |
tree | c806793caeb467490e2dca5eb92260e111eba18e | |
parent | f0fedf0b62aca56bdc798edc17e3f6d433e28b4b (diff) |
bcache: Make iterators more rigorous
-rw-r--r-- | drivers/md/bcache/bset.c | 227 | ||||
-rw-r--r-- | drivers/md/bcache/bset.h | 71 | ||||
-rw-r--r-- | drivers/md/bcache/btree_iter.c | 136 | ||||
-rw-r--r-- | drivers/md/bcache/btree_iter.h | 11 | ||||
-rw-r--r-- | drivers/md/bcache/btree_types.h | 4 | ||||
-rw-r--r-- | drivers/md/bcache/btree_update.c | 113 | ||||
-rw-r--r-- | drivers/md/bcache/btree_update.h | 6 | ||||
-rw-r--r-- | drivers/md/bcache/extents.c | 325 | ||||
-rw-r--r-- | drivers/md/bcache/extents.h | 4 | ||||
-rw-r--r-- | drivers/md/bcache/fs-io.c | 8 |
10 files changed, 537 insertions, 368 deletions
diff --git a/drivers/md/bcache/bset.c b/drivers/md/bcache/bset.c index 44a8f23ac448..eb51259c6a8f 100644 --- a/drivers/md/bcache/bset.c +++ b/drivers/md/bcache/bset.c @@ -37,16 +37,15 @@ struct bset_tree *bch_bkey_to_bset(struct btree_keys *b, struct bkey_packed *k) * have been flagged as deleted (and will be cleaned up later) we _will_ see * duplicates. * - * Sort order is important here: - * - For extents, the deleted keys have to come last. This is because we're - * using bkey_deleted() as a proxy for k->size == 0, and we still have to - * maintain the invariant that - * bkey_cmp(k->p, bkey_start_pos(bkey_next(k)) <= 0) + * Thus the sort order is: usual key comparison first, but for keys that compare + * equal the deleted key(s) come first, and the (at most one) live version comes + * last. * - * i.e. a key can't end after the start of the next key. - * - * - For non extents, deleted keys must come first. - * XXX: why is this? + * The main reason for this is insertion: to handle overwrites, we first iterate + * over keys that compare equal to our insert key, and then insert immediately + * prior to the first key greater than the key we're inserting - our insert + * position will be after all keys that compare equal to our insert key, which + * by the time we actually do the insert will all be deleted. */ static bool keys_out_of_order(const struct bkey_format *f, @@ -191,6 +190,57 @@ void bch_btree_node_iter_verify(struct btree_node_iter *iter, } } +void bch_verify_key_order(struct btree_keys *b, + struct btree_node_iter *iter, + struct bkey_packed *where) +{ + const struct bkey_format *f = &b->format; + struct bset_tree *t = bch_bkey_to_bset(b, where); + struct bkey_packed *k, *prev; + struct bkey uk, uw = bkey_unpack_key(f, where); + + k = bkey_prev(t, where); + BUG_ON(k && + keys_out_of_order(f, k, where, b->ops->is_extents)); + + k = bkey_next(where); + BUG_ON(k != bset_bkey_last(t->data) && + keys_out_of_order(f, where, k, b->ops->is_extents)); + + for (t = b->set; t <= b->set + b->nsets; t++) { + if (!t->data->u64s) + continue; + + if (where >= t->data->start && + where < bset_bkey_last(t->data)) + continue; + + k = bch_btree_node_iter_bset_pos(iter, b, t->data) ?: + bkey_prev(t, bset_bkey_last(t->data)); + + while (bkey_cmp_left_packed(f, k, bkey_start_pos(&uw)) > 0 && + (prev = bkey_prev(t, k))) + k = prev; + + for (; + k != bset_bkey_last(t->data); + k = bkey_next(k)) { + uk = bkey_unpack_key(f, k); + + if (b->ops->is_extents) { + BUG_ON(!(bkey_cmp(uw.p, bkey_start_pos(&uk)) <= 0 || + bkey_cmp(uk.p, bkey_start_pos(&uw)) <= 0)); + } else { + BUG_ON(!bkey_cmp(uw.p, uk.p) && + !bkey_deleted(&uk)); + } + + if (bkey_cmp(uw.p, bkey_start_pos(&uk)) <= 0) + break; + } + } +} + #else static void bch_btree_node_iter_next_check(struct btree_node_iter *iter, @@ -788,25 +838,6 @@ struct bkey_packed *bkey_prev(struct bset_tree *t, struct bkey_packed *k) /* Insert */ -static void verify_insert_pos(struct btree_keys *b, - const struct bkey_packed *prev, - const struct bkey_packed *where, - const struct bkey_i *insert) -{ -#ifdef CONFIG_BCACHE_DEBUG - const struct bkey_format *f = &b->format; - struct bset_tree *t = bset_tree_last(b); - - BUG_ON(prev && - keys_out_of_order(f, prev, bkey_to_packed_c(insert), - b->ops->is_extents)); - - BUG_ON(where != bset_bkey_last(t->data) && - keys_out_of_order(f, bkey_to_packed_c(insert), where, - b->ops->is_extents)); -#endif -} - /** * bch_bset_fix_invalidated_key() - given an existing key @k that has been * modified, fix any auxiliary search tree by remaking all the nodes in the @@ -938,13 +969,11 @@ static void bch_bset_fix_lookup_table(struct btree_keys *b, */ struct bkey_packed *bch_bset_insert(struct btree_keys *b, struct btree_node_iter *iter, - struct bkey_i *insert, - bool *overwrote) + struct bkey_i *insert) { struct bkey_format *f = &b->format; struct bset_tree *t = bset_tree_last(b); struct bset *i = t->data; - struct bkey_packed *prev = NULL; struct bkey_packed *where = bch_btree_node_iter_bset_pos(iter, b, i) ?: bset_bkey_last(i); struct bkey_packed packed, *src = bkey_to_packed(insert); @@ -957,72 +986,10 @@ struct bkey_packed *bch_bset_insert(struct btree_keys *b, BUG_ON(where < i->start); BUG_ON(where > bset_bkey_last(i)); - bch_verify_btree_nr_keys(b); - - while (where != bset_bkey_last(i) && - keys_out_of_order(f, bkey_to_packed(insert), - where, b->ops->is_extents)) - prev = where, where = bkey_next(where); - - if (!prev) - prev = bkey_prev(t, where); - - verify_insert_pos(b, prev, where, insert); - - /* - * note: bch_bkey_try_merge_inline() may modify either argument - */ - - /* prev is in the tree, if we merge we're done */ - if (prev && - bch_bkey_try_merge_inline(b, iter, prev, - bkey_to_packed(insert), true)) { - *overwrote = true; - return prev; - } - - if (where != bset_bkey_last(i) && - bch_bkey_try_merge_inline(b, iter, - bkey_to_packed(insert), - where, false)) { - *overwrote = true; - return where; - } - - /* - * Can we overwrite the current key, instead of doing a memmove()? - * - * For extents, only legal to overwrite keys that are deleted - because - * extents are marked as deleted iff they are 0 size, deleted extents - * don't overlap with any other existing keys. - * - * For non extents marked as deleted may be needed as whiteouts, until - * the node is rewritten, only legal to overwrite if the new key - * compares equal to the old. - */ - if (b->ops->is_extents && - where != bset_bkey_last(i) && - where->u64s == src->u64s && - bkey_deleted(where)) - goto overwrite; if (bkey_pack_key(&packed, &insert->k, f)) src = &packed; - /* packing changes u64s - recheck after packing: */ - if (b->ops->is_extents && - where != bset_bkey_last(i) && - where->u64s == src->u64s && - bkey_deleted(where)) - goto overwrite; - - if (!b->ops->is_extents && - prev && prev->u64s == src->u64s && - !bkey_cmp_left_packed(f, prev, insert->k.p)) { - where = prev; - goto overwrite; - } - memmove((u64 *) where + src->u64s, where, (void *) bset_bkey_last(i) - (void *) where); @@ -1033,29 +1000,58 @@ struct bkey_packed *bch_bset_insert(struct btree_keys *b, memcpy(bkeyp_val(f, where), &insert->v, bkeyp_val_bytes(f, src)); + bch_bset_fix_lookup_table(b, t, where); + if (!bkey_deleted(src)) btree_keys_account_key_add(&b->nr, src); - bch_bset_fix_lookup_table(b, t, where); - + bch_verify_key_order(b, iter, where); bch_verify_btree_nr_keys(b); - *overwrote = false; return where; +} + +/* @where must compare equal to @insert */ +bool bch_bset_try_overwrite(struct btree_keys *b, + struct btree_node_iter *iter, + struct bkey_packed *where, + struct bkey_i *insert) +{ + struct bkey_format *f = &b->format; + struct bkey_packed packed, *src = bkey_to_packed(insert); + + EBUG_ON(bkey_cmp_left_packed(f, where, insert->k.p)); + EBUG_ON(bkey_deleted(where)); + + if (bkey_deleted(&insert->k)) + return false; + + if (bch_bkey_to_bset(b, where) != bset_tree_last(b)) + return false; + + if (where->u64s == src->u64s) + goto overwrite; + + if (!bkey_pack_key(&packed, &insert->k, f)) + return false; + + src = &packed; + if (where->u64s == src->u64s) + goto overwrite; + + return false; overwrite: if (!bkey_deleted(where)) btree_keys_account_key_drop(&b->nr, where); - + if (!bkey_deleted(src)) + btree_keys_account_key_add(&b->nr, src); memcpy(where, src, bkeyp_key_bytes(f, src)); memcpy(bkeyp_val(f, where), &insert->v, bkeyp_val_bytes(f, src)); - if (!bkey_deleted(src)) - btree_keys_account_key_add(&b->nr, src); - + bch_verify_key_order(b, iter, where); bch_verify_btree_nr_keys(b); - *overwrote = true; - return where; + return true; } /* Lookup */ @@ -1166,7 +1162,8 @@ static struct bkey_packed *bch_bset_search(struct btree_keys *b, struct bset_tree *t, struct bpos search, struct bkey_packed *packed_search, - const struct bkey_packed *lossy_packed_search) + const struct bkey_packed *lossy_packed_search, + bool strictly_greater) { const struct bkey_format *f = &b->format; struct bkey_packed *m; @@ -1207,21 +1204,21 @@ static struct bkey_packed *bch_bset_search(struct btree_keys *b, if (unlikely(bkey_cmp_p_or_unp(f, t->data->start, packed_search, search) >= 0)) - return t->data->start; - - m = bset_search_tree(f, t, search, lossy_packed_search); + m = t->data->start; + else + m = bset_search_tree(f, t, search, lossy_packed_search); break; } if (lossy_packed_search) while (m != bset_bkey_last(t->data) && - bkey_cmp_p_or_unp(f, m, - lossy_packed_search, search) < 0) + !btree_iter_pos_cmp_p_or_unp(f, search, lossy_packed_search, + m, strictly_greater)) m = bkey_next(m); if (!packed_search) while (m != bset_bkey_last(t->data) && - bkey_cmp_left_packed(f, m, search) < 0) + !btree_iter_pos_cmp_packed(f, search, m, strictly_greater)) m = bkey_next(m); if (btree_keys_expensive_checks(b)) { @@ -1369,17 +1366,9 @@ void bch_btree_node_iter_init(struct btree_node_iter *iter, bch_btree_node_iter_push(iter, b, bch_bset_search(b, t, search, packed_search, - lossy_packed_search), + lossy_packed_search, + strictly_greater), bset_bkey_last(t->data)); - - if (strictly_greater) { - struct bkey_packed *m; - - while ((m = bch_btree_node_iter_peek_all(iter, b)) && - (bkey_cmp_p_or_unp(&b->format, m, - packed_search, search) == 0)) - bch_btree_node_iter_advance(iter, b); - } } EXPORT_SYMBOL(bch_btree_node_iter_init); diff --git a/drivers/md/bcache/bset.h b/drivers/md/bcache/bset.h index f34eb5d63ab7..9d245d825a37 100644 --- a/drivers/md/bcache/bset.h +++ b/drivers/md/bcache/bset.h @@ -168,10 +168,6 @@ struct btree_keys_ops { ptr_filter_fn key_normalize; enum merge_result (*key_merge)(struct btree_keys *, struct bkey_i *, struct bkey_i *); - bool (*key_merge_inline)(struct btree_keys *, - struct btree_node_iter *, - struct bkey_packed *, - struct bkey_packed *, bool); /* * Only used for deciding whether to use bkey_start_pos(k) or just the @@ -326,7 +322,9 @@ void bch_bset_fix_invalidated_key(struct btree_keys *, struct bkey_packed *); struct bkey_packed *bch_bset_insert(struct btree_keys *, struct btree_node_iter *, - struct bkey_i *, bool *); + struct bkey_i *); +bool bch_bset_try_overwrite(struct btree_keys *, struct btree_node_iter *iter, + struct bkey_packed *, struct bkey_i *); static inline void btree_keys_account_key(struct btree_nr_keys *n, struct bkey_packed *k, @@ -346,6 +344,39 @@ static inline void btree_keys_account_key(struct btree_nr_keys *n, /* Bkey utility code */ +/* Returns true if @k is after iterator position @pos */ +static inline bool btree_iter_pos_cmp(struct bpos pos, const struct bkey *k, + bool strictly_greater) +{ + int cmp = bkey_cmp(k->p, pos); + + return cmp > 0 || + (cmp == 0 && !strictly_greater && !bkey_deleted(k)); +} + +static inline bool btree_iter_pos_cmp_packed(const struct bkey_format *f, + struct bpos pos, + const struct bkey_packed *k, + bool strictly_greater) +{ + int cmp = bkey_cmp_left_packed(f, k, pos); + + return cmp > 0 || + (cmp == 0 && !strictly_greater && !bkey_deleted(k)); +} + +static inline bool btree_iter_pos_cmp_p_or_unp(const struct bkey_format *f, + struct bpos pos, + const struct bkey_packed *pos_packed, + const struct bkey_packed *k, + bool strictly_greater) +{ + int cmp = bkey_cmp_p_or_unp(f, k, pos_packed, pos); + + return cmp > 0 || + (cmp == 0 && !strictly_greater && !bkey_deleted(k)); +} + #define BKEY_PADDED(key) __BKEY_PADDED(key, BKEY_EXTENT_VAL_U64s_MAX) #define __bkey_idx(_set, _offset) \ @@ -382,17 +413,6 @@ static inline bool bch_bkey_try_merge(struct btree_keys *b, : false; } -static inline bool bch_bkey_try_merge_inline(struct btree_keys *b, - struct btree_node_iter *iter, - struct bkey_packed *l, - struct bkey_packed *r, - bool back_merge) -{ - return b->ops->key_merge_inline - ? b->ops->key_merge_inline(b, iter, l, r, back_merge) - : false; -} - enum bch_extent_overlap { BCH_EXTENT_OVERLAP_FRONT, BCH_EXTENT_OVERLAP_BACK, @@ -597,24 +617,27 @@ void bch_btree_keys_stats(struct btree_keys *, struct bset_stats *); #ifdef CONFIG_BCACHE_DEBUG -s64 __bch_count_data(struct btree_keys *); -void __bch_count_data_verify(struct btree_keys *, int); +void bch_dump_bset(struct btree_keys *, struct bset *, unsigned); void bch_dump_bucket(struct btree_keys *); -void bch_btree_node_iter_verify(struct btree_node_iter *, struct btree_keys *); +s64 __bch_count_data(struct btree_keys *); void __bch_verify_btree_nr_keys(struct btree_keys *); +void bch_btree_node_iter_verify(struct btree_node_iter *, struct btree_keys *); +void bch_verify_key_order(struct btree_keys *, struct btree_node_iter *, + struct bkey_packed *); #else -static inline s64 __bch_count_data(struct btree_keys *b) { return -1; } -static inline void __bch_count_data_verify(struct btree_keys *b, int oldsize ) {} +static inline void bch_dump_bset(struct btree_keys *b, struct bset *i, unsigned set) {} static inline void bch_dump_bucket(struct btree_keys *b) {} +static inline s64 __bch_count_data(struct btree_keys *b) { return -1; } +static inline void __bch_verify_btree_nr_keys(struct btree_keys *b) {} static inline void bch_btree_node_iter_verify(struct btree_node_iter *iter, struct btree_keys *b) {} -static inline void __bch_verify_btree_nr_keys(struct btree_keys *b) {} - +static inline void bch_verify_key_order(struct btree_keys *b, + struct btree_node_iter *iter, + struct bkey_packed *where) {} #endif -void bch_dump_bset(struct btree_keys *, struct bset *, unsigned); static inline s64 bch_count_data(struct btree_keys *b) { diff --git a/drivers/md/bcache/btree_iter.c b/drivers/md/bcache/btree_iter.c index 97dfb7992a1c..f0628f2544a0 100644 --- a/drivers/md/bcache/btree_iter.c +++ b/drivers/md/bcache/btree_iter.c @@ -190,31 +190,72 @@ bool btree_node_relock(struct btree_iter *iter, unsigned level) /* Btree iterator: */ -static bool btree_iter_pos_cmp(struct btree_iter *iter, - struct bpos pos, struct bpos k) +#ifdef CONFIG_BCACHE_DEBUG + +static void __bch_btree_iter_verify(struct btree_node_iter *iter, + struct btree *b, + struct bpos pos, + bool strictly_greater) { - return iter->is_extents - ? bkey_cmp(pos, k) < 0 - : bkey_cmp(pos, k) <= 0; + const struct bkey_format *f = &b->keys.format; + struct bset_tree *t; + struct bkey_packed *k; + + bch_btree_node_iter_verify(iter, &b->keys); + + for (t = b->keys.set; t <= b->keys.set + b->keys.nsets; t++) { + k = bkey_prev(t, + bch_btree_node_iter_bset_pos(iter, &b->keys, t->data) ?: + bset_bkey_last(t->data)); + + /* + * For interior nodes, the iterator will have skipped past + * deleted keys: + */ + if (b->level) + while (k && bkey_deleted(k)) + k = bkey_prev(t, k); + + BUG_ON(k && btree_iter_pos_cmp_packed(f, pos, k, + strictly_greater)); + } + + k = bch_btree_node_iter_peek_all(iter, &b->keys); + BUG_ON(k && !btree_iter_pos_cmp_packed(f, pos, k, strictly_greater)); } -void bch_btree_node_iter_fix(struct btree_iter *iter, - struct btree_keys *b, - struct btree_node_iter *node_iter, - struct bkey_packed *where, - bool overwrote) +void bch_btree_iter_verify(struct btree_iter *iter, struct btree *b) +{ + struct btree_iter *linked; + + if (iter->nodes[b->level] == b) + __bch_btree_iter_verify(&iter->node_iters[b->level], + b, iter->pos, + iter->is_extents); + + for_each_linked_btree_node(iter, b, linked) + __bch_btree_iter_verify(&linked->node_iters[b->level], + b, linked->pos, + linked->is_extents); +} + +#endif + +static void __bch_btree_node_iter_fix(struct btree_iter *iter, + struct btree_keys *b, + struct btree_node_iter *node_iter, + struct bkey_packed *where, + bool overwrote) { struct bkey_format *f = &b->format; - struct bset *i = bset_tree_last(b)->data; + struct bset *i = bch_bkey_to_bset(b, where)->data; const struct bkey_packed *end = bset_bkey_last(i); struct btree_node_iter_set *set; unsigned shift = overwrote ? 0 : where->u64s; unsigned offset = __btree_node_key_to_offset(b, where); unsigned old_end = __btree_node_key_to_offset(b, end) - shift; - bool iter_pos_before_new = btree_iter_pos_cmp(iter, iter->pos, - bkey_unpack_key(f, where).p); - - BUG_ON(node_iter->used > MAX_BSETS); + bool iter_pos_before_new = btree_iter_pos_cmp_packed(f, + iter->pos, where, iter->is_extents); for (set = node_iter->data; set < node_iter->data + node_iter->used; @@ -222,19 +263,68 @@ void bch_btree_node_iter_fix(struct btree_iter *iter, if (set->end == old_end) { set->end += shift; + /* + * When we inserted at @where, the key we inserted - the + * new key at @where - compared strictly less than the + * key previously at @where (which is now the next key + * after the key we inserted). + * + * However, it is not necessarily true that if the + * iterator's position is less than the key we inserted + * it was <= the key previously at where - transitivity + * doesn't hold here - because the key previously at + * @where might have been a deleted key that the + * iterator had skipped past. + */ if (set->k >= offset) { if (iter_pos_before_new) set->k = offset; else set->k += shift; - bch_btree_node_iter_sort(node_iter, b); } - return; + + /* + * Resort iterator if we changed a key it points to: + * + * Do this before checking if we're removing a key from + * the iterator: + */ + if (set->k == offset) + bch_btree_node_iter_sort(node_iter, b); + goto check_remove; } /* didn't find the bset in the iterator - might have to readd it: */ if (iter_pos_before_new) bch_btree_node_iter_push(node_iter, b, where, end); +check_remove: + if (!iter_pos_before_new && + bch_btree_node_iter_peek_all(node_iter, b) == where) + bch_btree_node_iter_advance(node_iter, b); +} + +void bch_btree_node_iter_fix(struct btree_iter *iter, + struct btree *b, + struct btree_node_iter *node_iter, + struct bkey_packed *k, + bool overwrote) +{ + struct btree_iter *linked; + + if (node_iter != &iter->node_iters[b->level]) + __bch_btree_node_iter_fix(iter, &b->keys, node_iter, + k, overwrote); + + if (iter->nodes[b->level] == b) + __bch_btree_node_iter_fix(iter, &b->keys, + &iter->node_iters[b->level], + k, overwrote); + + for_each_linked_btree_node(iter, b, linked) + __bch_btree_node_iter_fix(linked, &b->keys, + &linked->node_iters[b->level], + k, overwrote); + bch_btree_iter_verify(iter, b); } /* peek_all() doesn't skip deleted keys */ @@ -294,12 +384,11 @@ static inline void btree_iter_node_set(struct btree_iter *iter, pos, iter->is_extents); } -static bool btree_iter_pos_in_node(struct btree_iter *iter, - struct btree *b) +static bool btree_iter_pos_in_node(struct btree_iter *iter, struct btree *b) { return iter->btree_id == b->btree_id && bkey_cmp(iter->pos, b->data->min_key) >= 0 && - btree_iter_pos_cmp(iter, iter->pos, b->key.k.p); + btree_iter_pos_cmp(iter->pos, &b->key.k, iter->is_extents); } /* @@ -493,8 +582,9 @@ retry: while (iter->nodes[iter->level] && !(is_btree_node(iter, iter->level) && btree_node_relock(iter, iter->level) && - btree_iter_pos_cmp(iter, pos, - iter->nodes[iter->level]->key.k.p))) + btree_iter_pos_cmp(pos, + &iter->nodes[iter->level]->key.k, + iter->is_extents))) btree_iter_up(iter); /* @@ -505,7 +595,7 @@ retry: struct bkey_s_c k; while ((k = __btree_iter_peek_all(iter)).k && - !btree_iter_pos_cmp(iter, pos, k.k->p)) + !btree_iter_pos_cmp(pos, k.k, iter->is_extents)) __btree_iter_advance(iter); } diff --git a/drivers/md/bcache/btree_iter.h b/drivers/md/bcache/btree_iter.h index 032a720ecf60..a20614f46d3a 100644 --- a/drivers/md/bcache/btree_iter.h +++ b/drivers/md/bcache/btree_iter.h @@ -108,9 +108,16 @@ __next_linked_btree_node(struct btree_iter *iter, struct btree *b, for ((_linked) = (_iter); \ ((_linked) = __next_linked_btree_node(_iter, _b, _linked));) -void bch_btree_node_iter_fix(struct btree_iter *, struct btree_keys *, +#ifdef CONFIG_BCACHE_DEBUG +void bch_btree_iter_verify(struct btree_iter *, struct btree *); +#else +static inline void bch_btree_iter_verify(struct btree_iter *iter, + struct btree *b) {} +#endif + +void bch_btree_node_iter_fix(struct btree_iter *, struct btree *, struct btree_node_iter *, struct bkey_packed *, - bool overwrote); + bool); bool bch_btree_iter_upgrade(struct btree_iter *); int bch_btree_iter_unlock(struct btree_iter *); diff --git a/drivers/md/bcache/btree_types.h b/drivers/md/bcache/btree_types.h index d6110c3f0188..791dddadfae4 100644 --- a/drivers/md/bcache/btree_types.h +++ b/drivers/md/bcache/btree_types.h @@ -162,8 +162,8 @@ enum extent_insert_hook_ret { struct extent_insert_hook { enum extent_insert_hook_ret - (*fn)(struct extent_insert_hook *, struct btree_iter *, - struct bpos, struct bkey_s_c, const struct bkey_i *); + (*fn)(struct extent_insert_hook *, struct bpos, struct bpos, + struct bkey_s_c, const struct bkey_i *); }; enum btree_insert_ret { diff --git a/drivers/md/bcache/btree_update.c b/drivers/md/bcache/btree_update.c index b09829644731..e37fce09c7a5 100644 --- a/drivers/md/bcache/btree_update.c +++ b/drivers/md/bcache/btree_update.c @@ -614,7 +614,7 @@ int bch_btree_root_alloc(struct cache_set *c, enum btree_id id, return 0; } -static bool bch_insert_fixup_btree_ptr(struct btree_iter *iter, +static void bch_insert_fixup_btree_ptr(struct btree_iter *iter, struct btree *b, struct bkey_i *insert, struct btree_node_iter *node_iter, @@ -624,45 +624,30 @@ static bool bch_insert_fixup_btree_ptr(struct btree_iter *iter, const struct bkey_format *f = &b->keys.format; struct bucket_stats_cache_set stats = { 0 }; struct bkey_packed *k; - int cmp; - - EBUG_ON((k = bch_btree_node_iter_prev_all(node_iter, &b->keys)) && - (bkey_deleted(k) - ? bkey_cmp_packed(f, k, &insert->k) > 0 - : bkey_cmp_packed(f, k, &insert->k) >= 0)); + struct bkey tmp; if (bkey_extent_is_data(&insert->k)) bch_mark_key(c, bkey_i_to_s_c(insert), c->sb.btree_node_size, true, gc_pos_btree_node(b), &stats); - while ((k = bch_btree_node_iter_peek_all(node_iter, &b->keys))) { - struct bkey tmp; - struct bkey_s_c u = bkey_disassemble(f, k, &tmp); - - cmp = bkey_cmp(u.k->p, insert->k.p); - if (cmp > 0) - break; - - if (!cmp && !bkey_deleted(k)) { - bch_btree_node_free_index(c, b, iter->btree_id, - u, &stats); - /* - * Look up pending delete, mark so that gc marks it on - * the pending delete list - */ - k->type = KEY_TYPE_DELETED; - btree_keys_account_key_drop(&b->keys.nr, k); - } - + while ((k = bch_btree_node_iter_peek_all(node_iter, &b->keys)) && + !btree_iter_pos_cmp_packed(f, insert->k.p, k, false)) bch_btree_node_iter_advance(node_iter, &b->keys); - } - bch_btree_bset_insert(iter, b, node_iter, insert); - set_btree_node_dirty(b); + /* + * If we're overwriting, look up pending delete and mark so that gc + * marks it on the pending delete list: + */ + if (k && !bkey_cmp_packed(f, k, &insert->k)) + bch_btree_node_free_index(c, b, iter->btree_id, + bkey_disassemble(f, k, &tmp), + &stats); bch_cache_set_stats_apply(c, &stats, disk_res, gc_pos_btree_node(b)); - return true; + + bch_btree_bset_insert_key(iter, b, node_iter, insert); + set_btree_node_dirty(b); } /* Inserting into a given leaf node (last stage of insert): */ @@ -673,9 +658,7 @@ void bch_btree_bset_insert(struct btree_iter *iter, struct btree_node_iter *node_iter, struct bkey_i *insert) { - struct btree_iter *linked; struct bkey_packed *where; - bool overwrote; EBUG_ON(bkey_deleted(&insert->k) && bkey_val_u64s(&insert->k)); EBUG_ON(insert->k.u64s > bch_btree_keys_u64s_remaining(iter->c, b)); @@ -689,26 +672,37 @@ void bch_btree_bset_insert(struct btree_iter *iter, * though. */ - where = bch_bset_insert(&b->keys, node_iter, insert, &overwrote); + where = bch_bset_insert(&b->keys, node_iter, insert); - bch_btree_node_iter_fix(iter, &b->keys, node_iter, where, overwrote); + bch_btree_node_iter_fix(iter, b, node_iter, where, false); +} + +/* Handle overwrites and do insert, for non extents: */ +void bch_btree_bset_insert_key(struct btree_iter *iter, + struct btree *b, + struct btree_node_iter *node_iter, + struct bkey_i *insert) +{ + const struct bkey_format *f = &b->keys.format; + struct bkey_packed *k; - if (iter->nodes[b->level] == b && - node_iter != &iter->node_iters[b->level]) - bch_btree_node_iter_fix(iter, &b->keys, - &iter->node_iters[b->level], - where, overwrote); + k = bch_btree_node_iter_peek_all(node_iter, &b->keys); + if (k && !bkey_cmp_packed(f, k, &insert->k)) { + if (bch_bset_try_overwrite(&b->keys, node_iter, k, insert)) { + bch_btree_iter_verify(iter, b); + return; + } - for_each_linked_btree_node(iter, b, linked) - bch_btree_node_iter_fix(linked, &b->keys, - &linked->node_iters[b->level], - where, overwrote); + k->type = KEY_TYPE_DELETED; + btree_keys_account_key_drop(&b->keys.nr, k); + bch_btree_node_iter_fix(iter, b, node_iter, k, true); - bch_btree_node_iter_verify(node_iter, &b->keys); + if (bkey_deleted(&insert->k) && + bch_bkey_to_bset(&b->keys, k) == bset_tree_last(&b->keys)) + return; + } - for_each_linked_btree_node(iter, b, linked) - bch_btree_node_iter_verify(&linked->node_iters[b->level], - &b->keys); + bch_btree_bset_insert(iter, b, node_iter, insert); } static void btree_node_flush(struct journal_entry_pin *pin) @@ -721,20 +715,12 @@ static void btree_node_flush(struct journal_entry_pin *pin) six_unlock_read(&b->lock); } -/** - * bch_btree_insert_and_journal - insert a non-overlapping key into a btree node - * - * This is called from bch_insert_fixup_extent() and bch_insert_fixup_key() - * - * The insert is journalled. - */ -void bch_btree_insert_and_journal(struct btree_iter *iter, - struct bkey_i *insert, - struct journal_res *res) +void bch_btree_journal_key(struct btree_iter *iter, + struct bkey_i *insert, + struct journal_res *res) { struct cache_set *c = iter->c; struct btree *b = iter->nodes[0]; - struct btree_node_iter *node_iter = &iter->node_iters[0]; EBUG_ON(iter->level || b->level); @@ -764,8 +750,6 @@ void bch_btree_insert_and_journal(struct btree_iter *iter, insert, b->level); btree_bset_last(b)->journal_seq = cpu_to_le64(c->journal.seq); } - - bch_btree_bset_insert(iter, b, node_iter, insert); } static void verify_keys_sorted(struct keylist *l) @@ -1116,7 +1100,6 @@ bch_btree_insert_keys_interior(struct btree *b, const struct bkey_format *f = &b->keys.format; struct bkey_i *insert = bch_keylist_front(insert_keys); struct bkey_packed *k; - bool inserted = false; BUG_ON(!btree_node_intent_locked(iter, btree_node_root(b)->level)); BUG_ON(!b->level); @@ -1131,6 +1114,7 @@ bch_btree_insert_keys_interior(struct btree *b, return BTREE_INSERT_BTREE_NODE_FULL; } + /* Don't screw up @iter's position: */ node_iter = iter->node_iters[b->level]; /* @@ -1147,11 +1131,9 @@ bch_btree_insert_keys_interior(struct btree *b, bch_insert_fixup_btree_ptr(iter, b, insert, &node_iter, &res->disk_res); - inserted = true; bch_keylist_dequeue(insert_keys); } - BUG_ON(!inserted); async_split_updated_btree(as, b); btree_node_unlock_write(b, iter); @@ -1530,12 +1512,11 @@ btree_insert_key(struct btree_insert_trans *trans, struct journal_res *res, unsigned flags) { - struct btree *b = insert->iter->nodes[0]; + struct btree_iter *iter = insert->iter; + struct btree *b = iter->nodes[0]; s64 oldsize = bch_count_data(&b->keys); enum btree_insert_ret ret; - bch_btree_node_iter_verify(&insert->iter->node_iters[0], &b->keys); - ret = !b->keys.ops->is_extents ? bch_insert_fixup_key(trans, insert, res) : bch_insert_fixup_extent(trans, insert, disk_res, diff --git a/drivers/md/bcache/btree_update.h b/drivers/md/bcache/btree_update.h index f7ce263f9908..3e31f685d85b 100644 --- a/drivers/md/bcache/btree_update.h +++ b/drivers/md/bcache/btree_update.h @@ -138,8 +138,10 @@ int bch_btree_root_alloc(struct cache_set *, enum btree_id, struct closure *); void bch_btree_bset_insert(struct btree_iter *, struct btree *, struct btree_node_iter *, struct bkey_i *); -void bch_btree_insert_and_journal(struct btree_iter *, struct bkey_i *, - struct journal_res *); +void bch_btree_bset_insert_key(struct btree_iter *, struct btree *, + struct btree_node_iter *, struct bkey_i *); +void bch_btree_journal_key(struct btree_iter *, struct bkey_i *, + struct journal_res *); static inline struct btree_node_entry *write_block(struct btree *b) { diff --git a/drivers/md/bcache/extents.c b/drivers/md/bcache/extents.c index 41b1eb508f09..e24d582e3007 100644 --- a/drivers/md/bcache/extents.c +++ b/drivers/md/bcache/extents.c @@ -112,29 +112,16 @@ bch_insert_fixup_key(struct btree_insert_trans *trans, struct btree_trans_entry *insert, struct journal_res *res) { - struct btree *b = insert->iter->nodes[0]; - struct btree_node_iter *node_iter = &insert->iter->node_iters[0]; - const struct bkey_format *f = &b->keys.format; - struct bkey_packed *k; - int cmp; - - BUG_ON(insert->iter->level); - EBUG_ON((k = bch_btree_node_iter_prev_all(node_iter, &b->keys)) && - (bkey_deleted(k) - ? bkey_cmp_packed(f, k, &insert->k->k) > 0 - : bkey_cmp_packed(f, k, &insert->k->k) >= 0)); - - while ((k = bch_btree_node_iter_peek_all(node_iter, &b->keys)) && - (cmp = bkey_cmp_packed(f, k, &insert->k->k)) <= 0) { - if (!cmp && !bkey_deleted(k)) { - k->type = KEY_TYPE_DELETED; - btree_keys_account_key_drop(&b->keys.nr, k); - } + struct btree_iter *iter = insert->iter; - bch_btree_node_iter_advance(node_iter, &b->keys); - } + BUG_ON(iter->level); + + bch_btree_journal_key(iter, insert->k, res); + bch_btree_bset_insert_key(iter, + iter->nodes[0], + &iter->node_iters[0], + insert->k); - bch_btree_insert_and_journal(insert->iter, insert->k, res); trans->did_work = true; return BTREE_INSERT_OK; } @@ -645,23 +632,32 @@ void bch_key_resize(struct bkey *k, * that we have to unpack the key, modify the unpacked key - then this * copies/repacks the unpacked to the original as necessary. */ -static bool __extent_save(struct bkey_packed *dst, struct bkey *src, - const struct bkey_format *f) +static bool __extent_save(struct btree_keys *b, struct btree_node_iter *iter, + struct bkey_packed *dst, struct bkey *src) { + struct bkey_format *f = &b->format; struct bkey_i *dst_unpacked; + bool ret; + + BUG_ON(bkeyp_val_u64s(f, dst) != bkey_val_u64s(src)); if ((dst_unpacked = packed_to_bkey(dst))) { dst_unpacked->k = *src; - return true; + ret = true; } else { - return bkey_pack_key(dst, src, f); + ret = bkey_pack_key(dst, src, f); } + + if (iter) + bch_verify_key_order(b, iter, dst); + + return ret; } -static void extent_save(struct bkey_packed *dst, struct bkey *src, - const struct bkey_format *f) +static void extent_save(struct btree_keys *b, struct btree_node_iter *iter, + struct bkey_packed *dst, struct bkey *src) { - BUG_ON(!__extent_save(dst, src, f)); + BUG_ON(!__extent_save(b, iter, dst, src)); } /* @@ -788,7 +784,7 @@ struct btree_nr_keys bch_extent_sort_fix_overlapping(struct btree_keys *b, sort_key_next(iter, b, _r); } else { __bch_cut_front(l.k->p, r); - extent_save(rk, r.k, f); + extent_save(b, NULL, rk, r.k); } extent_sort_sift(iter, b, _r - iter->data); @@ -802,7 +798,7 @@ struct btree_nr_keys bch_extent_sort_fix_overlapping(struct btree_keys *b, bch_cut_back(bkey_start_pos(r.k), &tmp.k.k); __bch_cut_front(r.k->p, l); - extent_save(lk, l.k, f); + extent_save(b, NULL, lk, l.k); extent_sort_sift(iter, b, 0); @@ -810,7 +806,7 @@ struct btree_nr_keys bch_extent_sort_fix_overlapping(struct btree_keys *b, bkey_to_packed(&tmp.k)); } else { bch_cut_back(bkey_start_pos(r.k), l.k); - extent_save(lk, l.k, f); + extent_save(b, NULL, lk, l.k); } } @@ -962,7 +958,7 @@ static bool bch_extent_cmpxchg_cmp(struct bkey_s_c l, struct bkey_s_c r) * as @new. */ enum extent_insert_hook_ret bch_extent_cmpxchg(struct extent_insert_hook *hook, - struct btree_iter *iter, + struct bpos committed_pos, struct bpos next_pos, struct bkey_s_c k, const struct bkey_i *new) @@ -971,10 +967,7 @@ enum extent_insert_hook_ret bch_extent_cmpxchg(struct extent_insert_hook *hook, struct bch_replace_info, hook); struct bkey_i *old = &replace->key; - EBUG_ON(iter->btree_id != BTREE_ID_EXTENTS); - - /* iter->pos tracks what we've checked so far: */ - EBUG_ON(bkey_cmp(iter->pos, bkey_start_pos(&new->k)) < 0); + EBUG_ON(bkey_cmp(committed_pos, bkey_start_pos(&new->k)) < 0); /* must have something to compare against */ EBUG_ON(!bkey_val_u64s(&old->k)); @@ -992,6 +985,11 @@ enum extent_insert_hook_ret bch_extent_cmpxchg(struct extent_insert_hook *hook, } } +static bool bch_extent_merge_inline(struct btree_iter *iter, + struct bkey_packed *, + struct bkey_packed *, + bool); + #define MAX_LOCK_HOLD_TIME (5 * NSEC_PER_MSEC) static enum btree_insert_ret extent_insert_should_stop(struct btree_insert_trans *trans, @@ -1027,28 +1025,65 @@ static enum btree_insert_ret extent_insert_should_stop(struct btree_insert_trans return BTREE_INSERT_OK; } +static void extent_do_insert(struct btree_iter *iter, struct bkey_i *insert, + struct journal_res *res, unsigned flags, + struct bucket_stats_cache_set *stats) +{ + struct btree_keys *b = &iter->nodes[0]->keys; + struct btree_node_iter *node_iter = &iter->node_iters[0]; + struct bset_tree *t = bset_tree_last(b); + struct bset *i = t->data; + struct bkey_packed *prev, *where = + bch_btree_node_iter_bset_pos(node_iter, b, i) ?: + bset_bkey_last(i); + + if (!(flags & BTREE_INSERT_NO_MARK_KEY)) + bch_add_sectors(iter, bkey_i_to_s_c(insert), + bkey_start_offset(&insert->k), + insert->k.size, stats); + + bch_btree_journal_key(iter, insert, res); + + if ((prev = bkey_prev(t, where)) && + bch_extent_merge_inline(iter, prev, bkey_to_packed(insert), true)) + return; + + if (where != bset_bkey_last(i) && + bch_extent_merge_inline(iter, bkey_to_packed(insert), where, false)) + return; + + bch_btree_bset_insert(iter, iter->nodes[0], node_iter, insert); +} + static void extent_insert_committed(struct btree_insert_trans *trans, struct btree_trans_entry *insert, + struct bpos committed_pos, struct journal_res *res, unsigned flags, struct bucket_stats_cache_set *stats) { - struct btree_iter *iter = insert->iter; - struct bkey_i *k = insert->k, *split; - EBUG_ON(bkey_cmp(k->k.p, iter->pos) < 0); - EBUG_ON(bkey_cmp(iter->pos, bkey_start_pos(&k->k)) < 0); + EBUG_ON(bkey_cmp(bkey_start_pos(&insert->k->k), insert->iter->pos)); + EBUG_ON(bkey_cmp(insert->k->k.p, committed_pos) < 0); + EBUG_ON(bkey_cmp(committed_pos, insert->iter->pos) < 0); + + if (bkey_cmp(committed_pos, insert->iter->pos) > 0) { + struct btree_iter *iter = insert->iter; + struct btree_keys *b = &iter->nodes[0]->keys; + struct btree_node_iter *node_iter = &iter->node_iters[0]; + struct bkey_packed *k; + struct bkey_i *split; + + EBUG_ON(bkey_deleted(&insert->k->k) || !insert->k->k.size); - if (bkey_cmp(iter->pos, bkey_start_pos(&k->k)) > 0) { - EBUG_ON(bkey_deleted(&k->k) || !k->k.size); + split = bch_key_split(committed_pos, insert->k); + extent_do_insert(insert->iter, split, res, flags, stats); - split = bch_key_split(iter->pos, k); + bch_btree_iter_set_pos(iter, committed_pos); - if (!(flags & BTREE_INSERT_NO_MARK_KEY)) - bch_add_sectors(iter, bkey_i_to_s_c(split), - bkey_start_offset(&split->k), - split->k.size, stats); + while ((k = bch_btree_node_iter_peek_all(node_iter, b)) && + !btree_iter_pos_cmp_packed(&b->format, iter->pos, k, true)) + bch_btree_node_iter_advance(node_iter, b); - bch_btree_insert_and_journal(iter, split, res); trans->did_work = true; } } @@ -1057,6 +1092,7 @@ static enum extent_insert_hook_ret __extent_insert_advance_pos(struct btree_insert_trans *trans, struct btree_trans_entry *insert, struct extent_insert_hook *hook, + struct bpos *committed_pos, struct bpos next_pos, struct bkey_s_c k, struct journal_res *res, unsigned flags, @@ -1069,7 +1105,7 @@ __extent_insert_advance_pos(struct btree_insert_trans *trans, k.k->version > insert->k->k.version) ret = BTREE_HOOK_NO_INSERT; else if (hook) - ret = hook->fn(hook, insert->iter, next_pos, k, insert->k); + ret = hook->fn(hook, *committed_pos, next_pos, k, insert->k); else ret = BTREE_HOOK_DO_INSERT; @@ -1078,19 +1114,28 @@ __extent_insert_advance_pos(struct btree_insert_trans *trans, switch (ret) { case BTREE_HOOK_DO_INSERT: break; - case BTREE_HOOK_NO_INSERT: - extent_insert_committed(trans, insert, res, flags, stats); + case BTREE_HOOK_NO_INSERT: { + struct btree_iter *iter = insert->iter; + struct btree_keys *b = &iter->nodes[0]->keys; + struct btree_node_iter *node_iter = &iter->node_iters[0]; + struct bkey_packed *k; + + extent_insert_committed(trans, insert, *committed_pos, + res, flags, stats); __bch_cut_front(next_pos, bkey_i_to_s(insert->k)); + + bch_btree_iter_set_pos(iter, next_pos); + + while ((k = bch_btree_node_iter_peek_all(node_iter, b)) && + !btree_iter_pos_cmp_packed(&b->format, iter->pos, k, true)) + bch_btree_node_iter_advance(node_iter, b); break; + } case BTREE_HOOK_RESTART_TRANS: return ret; } - /* - * Don't update iter->pos until after calling the hook, - * because the hook fn may use it: - */ - bch_btree_iter_set_pos(insert->iter, next_pos); + *committed_pos = next_pos; return ret; } @@ -1102,21 +1147,22 @@ static enum extent_insert_hook_ret extent_insert_advance_pos(struct btree_insert_trans *trans, struct btree_trans_entry *insert, struct extent_insert_hook *hook, + struct bpos *committed_pos, struct bkey_s_c k, struct journal_res *res, unsigned flags, struct bucket_stats_cache_set *stats) { struct btree *b = insert->iter->nodes[0]; - struct bpos next_pos = k.k - ? bpos_min(insert->k->k.p, k.k->p) - : bpos_min(insert->k->k.p, b->key.k.p); + struct bpos next_pos = + bpos_min(insert->k->k.p, k.k ? k.k->p : b->key.k.p); /* hole? */ - if (k.k && bkey_cmp(insert->iter->pos, bkey_start_pos(k.k)) < 0) { - bool have_uncommitted = bkey_cmp(insert->iter->pos, + if (k.k && bkey_cmp(*committed_pos, bkey_start_pos(k.k)) < 0) { + bool have_uncommitted = bkey_cmp(*committed_pos, bkey_start_pos(&insert->k->k)) > 0; switch (__extent_insert_advance_pos(trans, insert, hook, + committed_pos, bkey_start_pos(k.k), bkey_s_c_null, res, flags, stats)) { @@ -1137,10 +1183,11 @@ extent_insert_advance_pos(struct btree_insert_trans *trans, } /* avoid redundant calls to hook fn: */ - if (!bkey_cmp(insert->iter->pos, next_pos)) + if (!bkey_cmp(*committed_pos, next_pos)) return BTREE_HOOK_DO_INSERT; - return __extent_insert_advance_pos(trans, insert, hook, next_pos, + return __extent_insert_advance_pos(trans, insert, hook, + committed_pos, next_pos, k, res, flags, stats); } @@ -1202,6 +1249,7 @@ bch_insert_fixup_extent(struct btree_insert_trans *trans, unsigned nr_done = 0; u64 start_time = local_clock(); enum btree_insert_ret ret = BTREE_INSERT_OK; + struct bpos committed_pos = iter->pos; EBUG_ON(iter->level); EBUG_ON(bkey_deleted(&insert->k->k) || !insert->k->k.size); @@ -1212,9 +1260,9 @@ bch_insert_fixup_extent(struct btree_insert_trans *trans, * been inserted, and also to keep @iter->pos consistent with * @insert->k and the node iterator that we're advancing: */ - EBUG_ON(bkey_cmp(iter->pos, bkey_start_pos(&insert->k->k))); + EBUG_ON(bkey_cmp(committed_pos, bkey_start_pos(&insert->k->k))); - while (bkey_cmp(iter->pos, insert->k->k.p) < 0 && + while (bkey_cmp(committed_pos, insert->k->k.p) < 0 && (ret = extent_insert_should_stop(trans, insert, res, start_time, nr_done)) == BTREE_INSERT_OK && (_k = bch_btree_node_iter_peek_overlapping(node_iter, @@ -1228,6 +1276,7 @@ bch_insert_fixup_extent(struct btree_insert_trans *trans, */ if (k.k->size) switch (extent_insert_advance_pos(trans, insert, hook, + &committed_pos, k.s_c, res, flags, &stats)) { case BTREE_HOOK_DO_INSERT: @@ -1244,7 +1293,8 @@ bch_insert_fixup_extent(struct btree_insert_trans *trans, case BCH_EXTENT_OVERLAP_FRONT: /* insert and k share the start, invalidate in k */ bch_cut_subtract_front(iter, insert->k->k.p, k, &stats); - extent_save(_k, k.k, f); + BUG_ON(bkey_deleted(k.k)); + extent_save(&b->keys, node_iter, _k, k.k); break; case BCH_EXTENT_OVERLAP_BACK: @@ -1252,7 +1302,8 @@ bch_insert_fixup_extent(struct btree_insert_trans *trans, bch_cut_subtract_back(iter, bkey_start_pos(&insert->k->k), k, &stats); - extent_save(_k, k.k, f); + BUG_ON(bkey_deleted(k.k)); + extent_save(&b->keys, node_iter, _k, k.k); /* * As the auxiliary tree is indexed by the end of the @@ -1260,7 +1311,7 @@ bch_insert_fixup_extent(struct btree_insert_trans *trans, * auxiliary tree. */ bch_bset_fix_invalidated_key(&b->keys, _k); - bch_btree_node_iter_advance(node_iter, &b->keys); + bch_btree_node_iter_fix(iter, b, node_iter, _k, true); break; case BCH_EXTENT_OVERLAP_ALL: { @@ -1272,7 +1323,7 @@ bch_insert_fixup_extent(struct btree_insert_trans *trans, bch_drop_subtract(iter, k, &stats); k.k->p = bkey_start_pos(&insert->k->k); - if (!__extent_save(_k, k.k, f)) { + if (!__extent_save(&b->keys, node_iter, _k, k.k)) { /* * Couldn't repack: we aren't necessarily able * to repack if the new key is outside the range @@ -1280,27 +1331,28 @@ bch_insert_fixup_extent(struct btree_insert_trans *trans, * @insert: */ k.k->p = orig_pos; - extent_save(_k, k.k, f); + extent_save(&b->keys, node_iter, _k, k.k); if (extent_insert_advance_pos(trans, insert, - hook, k.s_c, - res, flags, - &stats) == + hook, &committed_pos, + k.s_c, res, flags, + &stats) == BTREE_HOOK_RESTART_TRANS) { ret = BTREE_INSERT_NEED_TRAVERSE; goto stop; } - extent_insert_committed(trans, insert, res, flags, &stats); + extent_insert_committed(trans, insert, committed_pos, + res, flags, &stats); /* * We split and inserted upto at k.k->p - that * has to coincide with iter->pos, so that we * don't have anything more we have to insert * until we recheck our journal reservation: */ - EBUG_ON(bkey_cmp(iter->pos, k.k->p)); + EBUG_ON(bkey_cmp(committed_pos, k.k->p)); } else { bch_bset_fix_invalidated_key(&b->keys, _k); - bch_btree_node_iter_advance(node_iter, &b->keys); + bch_btree_node_iter_fix(iter, b, node_iter, _k, true); } break; @@ -1323,10 +1375,12 @@ bch_insert_fixup_extent(struct btree_insert_trans *trans, */ bkey_reassemble(&split.k, k.s_c); bch_cut_back(bkey_start_pos(&insert->k->k), &split.k.k); + BUG_ON(bkey_deleted(&split.k.k)); __bch_cut_front(bkey_start_pos(&insert->k->k), k); bch_cut_subtract_front(iter, insert->k->k.p, k, &stats); - extent_save(_k, k.k, f); + BUG_ON(bkey_deleted(k.k)); + extent_save(&b->keys, node_iter, _k, k.k); bch_btree_bset_insert(iter, b, node_iter, &split.k); break; @@ -1334,17 +1388,18 @@ bch_insert_fixup_extent(struct btree_insert_trans *trans, } } - if (bkey_cmp(iter->pos, insert->k->k.p) < 0 && + if (bkey_cmp(committed_pos, insert->k->k.p) < 0 && ret == BTREE_INSERT_OK && - extent_insert_advance_pos(trans, insert, hook, bkey_s_c_null, res, - flags, &stats) == BTREE_HOOK_RESTART_TRANS) + extent_insert_advance_pos(trans, insert, hook, &committed_pos, + bkey_s_c_null, res, flags, + &stats) == BTREE_HOOK_RESTART_TRANS) ret = BTREE_INSERT_NEED_TRAVERSE; stop: - extent_insert_committed(trans, insert, res, flags, &stats); + extent_insert_committed(trans, insert, committed_pos, res, flags, &stats); bch_cache_set_stats_apply(c, &stats, disk_res, gc_pos_btree_node(b)); - if (insert->k->k.size && !bkey_cmp(iter->pos, b->key.k.p)) + if (insert->k->k.size && !bkey_cmp(committed_pos, b->key.k.p)) ret = BTREE_INSERT_NEED_TRAVERSE; return ret; @@ -1886,48 +1941,50 @@ static enum merge_result bch_extent_merge(struct btree_keys *bk, return BCH_MERGE_MERGE; } -static bool extent_i_save(struct bkey_packed *dst, struct bkey_i *src, - const struct bkey_format *f) +static void extent_i_save(struct btree_keys *b, struct btree_node_iter *iter, + struct bkey_packed *dst, struct bkey_i *src) { - BUG_ON(bkeyp_val_u64s(f, dst) != bkey_val_u64s(&src->k)); + struct bkey_format *f = &b->format; - if (!__extent_save(dst, &src->k, f)) - return false; + BUG_ON(bkeyp_val_u64s(f, dst) != bkey_val_u64s(&src->k)); + extent_save(b, iter, dst, &src->k); memcpy(bkeyp_val(f, dst), &src->v, bkey_val_bytes(&src->k)); - return true; } -static bool extent_merge_one_overlapping(struct btree_keys *b, - struct bkey *m, struct bpos new_pos, - struct bkey_packed *k, struct bkey uk, - bool check, bool could_pack) +static bool extent_merge_one_overlapping(struct btree_iter *iter, + struct bpos new_pos, + struct bkey_packed *k, struct bkey uk, + bool check, bool could_pack) { + struct btree *b = iter->nodes[0]; + struct btree_node_iter *node_iter = &iter->node_iters[0]; + BUG_ON(!bkey_deleted(k)); if (check) { - if (bkey_packed(k) && !could_pack) - return false; + return !bkey_packed(k) || could_pack; } else { uk.p = new_pos; - extent_save(k, &uk, &b->format); - bch_bset_fix_invalidated_key(b, k); + extent_save(&b->keys, node_iter, k, &uk); + bch_bset_fix_invalidated_key(&b->keys, k); + bch_btree_node_iter_fix(iter, b, node_iter, k, true); + return true; } - - return true; } -static bool extent_merge_do_overlapping(struct btree_keys *b, - struct btree_node_iter *iter, - struct bkey *m, - bool back_merge, bool check) +static bool extent_merge_do_overlapping(struct btree_iter *iter, + struct bkey *m, bool back_merge) { + struct btree_keys *b = &iter->nodes[0]->keys; + struct btree_node_iter *node_iter = &iter->node_iters[0]; const struct bkey_format *f = &b->format; struct bset_tree *t; struct bkey_packed *k; struct bkey uk; - struct bpos new_pos = back_merge ? bkey_start_pos(m) : m->p; + struct bpos new_pos = back_merge ? m->p : bkey_start_pos(m); bool could_pack = bkey_pack_pos((void *) &uk, new_pos, f); + bool check = true; /* * @m is the new merged extent: @@ -1938,6 +1995,7 @@ static bool extent_merge_do_overlapping(struct btree_keys *b, * * But in the other bsets, we have to check for and fix such extents: */ +do_fixup: for (t = b->set; t < b->set + b->nsets; t++) { if (!t->data->u64s) continue; @@ -1946,7 +2004,7 @@ static bool extent_merge_do_overlapping(struct btree_keys *b, * if we don't find this bset in the iterator we already got to * the end of that bset, so start searching from the end. */ - k = bch_btree_node_iter_bset_pos(iter, b, t->data) ?: + k = bch_btree_node_iter_bset_pos(node_iter, b, t->data) ?: bkey_prev(t, bset_bkey_last(t->data)); if (back_merge) { @@ -1963,7 +2021,7 @@ static bool extent_merge_do_overlapping(struct btree_keys *b, if (bkey_cmp(uk.p, m->p) >= 0) continue; - if (!extent_merge_one_overlapping(b, m, new_pos, + if (!extent_merge_one_overlapping(iter, new_pos, k, uk, check, could_pack)) return false; } @@ -1978,13 +2036,18 @@ static bool extent_merge_do_overlapping(struct btree_keys *b, bkey_start_pos(m)) <= 0) continue; - if (!extent_merge_one_overlapping(b, m, new_pos, + if (!extent_merge_one_overlapping(iter, new_pos, k, uk, check, could_pack)) return false; } } } + if (check) { + check = false; + goto do_fixup; + } + return true; } @@ -1996,18 +2059,19 @@ static bool extent_merge_do_overlapping(struct btree_keys *b, * * Also unpacks and repacks. */ -static bool bch_extent_merge_inline(struct btree_keys *b, - struct btree_node_iter *iter, +static bool bch_extent_merge_inline(struct btree_iter *iter, struct bkey_packed *l, struct bkey_packed *r, bool back_merge) { + struct btree_keys *b = &iter->nodes[0]->keys; + struct btree_node_iter *node_iter = &iter->node_iters[0]; const struct bkey_format *f = &b->format; struct bkey_packed *m; BKEY_PADDED(k) li; BKEY_PADDED(k) ri; struct bkey_i *mi; - bool ret; + struct bkey tmp; /* * We need to save copies of both l and r, because we might get a @@ -2026,42 +2090,55 @@ static bool bch_extent_merge_inline(struct btree_keys *b, case BCH_MERGE_NOMERGE: return false; case BCH_MERGE_PARTIAL: - if (!extent_merge_do_overlapping(b, iter, &li.k.k, - back_merge, true)) + if (bkey_packed(m) && !bkey_pack_key((void *) &tmp, &mi->k, f)) return false; - if (!extent_i_save(m, mi, f)) + if (!extent_merge_do_overlapping(iter, &li.k.k, back_merge)) return false; + extent_i_save(b, node_iter, m, mi); + + /* + * Update iterator to reflect what we just inserted - otherwise, + * the iter_fix() call is going to put us _before_ the key we + * just partially merged with: + */ + if (back_merge) { + struct bkey_packed *k; + + bch_btree_iter_set_pos(iter, li.k.k.p); + while ((k = bch_btree_node_iter_peek_all(node_iter, b)) && + !btree_iter_pos_cmp_packed(&b->format, iter->pos, k, true)) + bch_btree_node_iter_advance(node_iter, b); + } + + bch_btree_node_iter_fix(iter, iter->nodes[0], node_iter, + m, true); + if (!back_merge) bkey_copy(packed_to_bkey(l), &li.k); else bkey_copy(packed_to_bkey(r), &ri.k); - - ret = false; - break; + return false; case BCH_MERGE_MERGE: - if (!extent_merge_do_overlapping(b, iter, &li.k.k, - back_merge, true)) + if (bkey_packed(m) && !bkey_pack_key((void *) &tmp, &li.k.k, f)) return false; - if (!extent_i_save(m, &li.k, f)) + if (!extent_merge_do_overlapping(iter, &li.k.k, back_merge)) return false; - ret = true; - break; + extent_i_save(b, node_iter, m, &li.k); + bch_btree_node_iter_fix(iter, iter->nodes[0], node_iter, + m, true); + return true; default: BUG(); } - - extent_merge_do_overlapping(b, iter, &li.k.k, back_merge, false); - return ret; } static const struct btree_keys_ops bch_extent_ops = { .key_normalize = bch_ptr_normalize, .key_merge = bch_extent_merge, - .key_merge_inline = bch_extent_merge_inline, .is_extents = true, }; diff --git a/drivers/md/bcache/extents.h b/drivers/md/bcache/extents.h index 23418ccb04f9..7bab66a30c97 100644 --- a/drivers/md/bcache/extents.h +++ b/drivers/md/bcache/extents.h @@ -52,8 +52,8 @@ bch_extent_pick_ptr(struct cache_set *c, struct bkey_s_c k, } enum extent_insert_hook_ret -bch_extent_cmpxchg(struct extent_insert_hook *, struct btree_iter *, - struct bpos, struct bkey_s_c, const struct bkey_i *); +bch_extent_cmpxchg(struct extent_insert_hook *, struct bpos, struct bpos, + struct bkey_s_c, const struct bkey_i *); enum btree_insert_ret bch_insert_fixup_extent(struct btree_insert_trans *, diff --git a/drivers/md/bcache/fs-io.c b/drivers/md/bcache/fs-io.c index 60bc059de187..a71a4cdd45d3 100644 --- a/drivers/md/bcache/fs-io.c +++ b/drivers/md/bcache/fs-io.c @@ -100,14 +100,14 @@ static inline void i_size_dirty_get(struct bch_inode_info *ei) static enum extent_insert_hook_ret i_sectors_hook_fn(struct extent_insert_hook *hook, - struct btree_iter *iter, + struct bpos committed_pos, struct bpos next_pos, struct bkey_s_c k, const struct bkey_i *insert) { struct i_sectors_hook *h = container_of(hook, struct i_sectors_hook, hook); - s64 sectors = next_pos.offset - iter->pos.offset; + s64 sectors = next_pos.offset - committed_pos.offset; int sign = bkey_extent_is_allocation(&insert->k) - (k.k && bkey_extent_is_allocation(k.k)); @@ -207,7 +207,7 @@ struct bchfs_extent_trans_hook { static enum extent_insert_hook_ret bchfs_extent_update_hook(struct extent_insert_hook *hook, - struct btree_iter *iter, + struct bpos committed_pos, struct bpos next_pos, struct bkey_s_c k, const struct bkey_i *insert) @@ -218,7 +218,7 @@ bchfs_extent_update_hook(struct extent_insert_hook *hook, struct inode *inode = &ei->vfs_inode; int sign = bkey_extent_is_allocation(&insert->k) - (k.k && bkey_extent_is_allocation(k.k)); - s64 sectors = (s64) (next_pos.offset - iter->pos.offset) * sign; + s64 sectors = (s64) (next_pos.offset - committed_pos.offset) * sign; u64 offset = min(next_pos.offset << 9, h->op->new_i_size); BUG_ON((next_pos.offset << 9) > round_up(offset, PAGE_SIZE)); |