summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorKent Overstreet <kent.overstreet@gmail.com>2016-07-18 16:50:28 -0800
committerKent Overstreet <kent.overstreet@gmail.com>2016-10-07 12:36:40 -0800
commitf69aef2c53c4da77826d525eabfa25b526fa0803 (patch)
treec806793caeb467490e2dca5eb92260e111eba18e
parentf0fedf0b62aca56bdc798edc17e3f6d433e28b4b (diff)
bcache: Make iterators more rigorous
-rw-r--r--drivers/md/bcache/bset.c227
-rw-r--r--drivers/md/bcache/bset.h71
-rw-r--r--drivers/md/bcache/btree_iter.c136
-rw-r--r--drivers/md/bcache/btree_iter.h11
-rw-r--r--drivers/md/bcache/btree_types.h4
-rw-r--r--drivers/md/bcache/btree_update.c113
-rw-r--r--drivers/md/bcache/btree_update.h6
-rw-r--r--drivers/md/bcache/extents.c325
-rw-r--r--drivers/md/bcache/extents.h4
-rw-r--r--drivers/md/bcache/fs-io.c8
10 files changed, 537 insertions, 368 deletions
diff --git a/drivers/md/bcache/bset.c b/drivers/md/bcache/bset.c
index 44a8f23ac448..eb51259c6a8f 100644
--- a/drivers/md/bcache/bset.c
+++ b/drivers/md/bcache/bset.c
@@ -37,16 +37,15 @@ struct bset_tree *bch_bkey_to_bset(struct btree_keys *b, struct bkey_packed *k)
* have been flagged as deleted (and will be cleaned up later) we _will_ see
* duplicates.
*
- * Sort order is important here:
- * - For extents, the deleted keys have to come last. This is because we're
- * using bkey_deleted() as a proxy for k->size == 0, and we still have to
- * maintain the invariant that
- * bkey_cmp(k->p, bkey_start_pos(bkey_next(k)) <= 0)
+ * Thus the sort order is: usual key comparison first, but for keys that compare
+ * equal the deleted key(s) come first, and the (at most one) live version comes
+ * last.
*
- * i.e. a key can't end after the start of the next key.
- *
- * - For non extents, deleted keys must come first.
- * XXX: why is this?
+ * The main reason for this is insertion: to handle overwrites, we first iterate
+ * over keys that compare equal to our insert key, and then insert immediately
+ * prior to the first key greater than the key we're inserting - our insert
+ * position will be after all keys that compare equal to our insert key, which
+ * by the time we actually do the insert will all be deleted.
*/
static bool keys_out_of_order(const struct bkey_format *f,
@@ -191,6 +190,57 @@ void bch_btree_node_iter_verify(struct btree_node_iter *iter,
}
}
+void bch_verify_key_order(struct btree_keys *b,
+ struct btree_node_iter *iter,
+ struct bkey_packed *where)
+{
+ const struct bkey_format *f = &b->format;
+ struct bset_tree *t = bch_bkey_to_bset(b, where);
+ struct bkey_packed *k, *prev;
+ struct bkey uk, uw = bkey_unpack_key(f, where);
+
+ k = bkey_prev(t, where);
+ BUG_ON(k &&
+ keys_out_of_order(f, k, where, b->ops->is_extents));
+
+ k = bkey_next(where);
+ BUG_ON(k != bset_bkey_last(t->data) &&
+ keys_out_of_order(f, where, k, b->ops->is_extents));
+
+ for (t = b->set; t <= b->set + b->nsets; t++) {
+ if (!t->data->u64s)
+ continue;
+
+ if (where >= t->data->start &&
+ where < bset_bkey_last(t->data))
+ continue;
+
+ k = bch_btree_node_iter_bset_pos(iter, b, t->data) ?:
+ bkey_prev(t, bset_bkey_last(t->data));
+
+ while (bkey_cmp_left_packed(f, k, bkey_start_pos(&uw)) > 0 &&
+ (prev = bkey_prev(t, k)))
+ k = prev;
+
+ for (;
+ k != bset_bkey_last(t->data);
+ k = bkey_next(k)) {
+ uk = bkey_unpack_key(f, k);
+
+ if (b->ops->is_extents) {
+ BUG_ON(!(bkey_cmp(uw.p, bkey_start_pos(&uk)) <= 0 ||
+ bkey_cmp(uk.p, bkey_start_pos(&uw)) <= 0));
+ } else {
+ BUG_ON(!bkey_cmp(uw.p, uk.p) &&
+ !bkey_deleted(&uk));
+ }
+
+ if (bkey_cmp(uw.p, bkey_start_pos(&uk)) <= 0)
+ break;
+ }
+ }
+}
+
#else
static void bch_btree_node_iter_next_check(struct btree_node_iter *iter,
@@ -788,25 +838,6 @@ struct bkey_packed *bkey_prev(struct bset_tree *t, struct bkey_packed *k)
/* Insert */
-static void verify_insert_pos(struct btree_keys *b,
- const struct bkey_packed *prev,
- const struct bkey_packed *where,
- const struct bkey_i *insert)
-{
-#ifdef CONFIG_BCACHE_DEBUG
- const struct bkey_format *f = &b->format;
- struct bset_tree *t = bset_tree_last(b);
-
- BUG_ON(prev &&
- keys_out_of_order(f, prev, bkey_to_packed_c(insert),
- b->ops->is_extents));
-
- BUG_ON(where != bset_bkey_last(t->data) &&
- keys_out_of_order(f, bkey_to_packed_c(insert), where,
- b->ops->is_extents));
-#endif
-}
-
/**
* bch_bset_fix_invalidated_key() - given an existing key @k that has been
* modified, fix any auxiliary search tree by remaking all the nodes in the
@@ -938,13 +969,11 @@ static void bch_bset_fix_lookup_table(struct btree_keys *b,
*/
struct bkey_packed *bch_bset_insert(struct btree_keys *b,
struct btree_node_iter *iter,
- struct bkey_i *insert,
- bool *overwrote)
+ struct bkey_i *insert)
{
struct bkey_format *f = &b->format;
struct bset_tree *t = bset_tree_last(b);
struct bset *i = t->data;
- struct bkey_packed *prev = NULL;
struct bkey_packed *where = bch_btree_node_iter_bset_pos(iter, b, i) ?:
bset_bkey_last(i);
struct bkey_packed packed, *src = bkey_to_packed(insert);
@@ -957,72 +986,10 @@ struct bkey_packed *bch_bset_insert(struct btree_keys *b,
BUG_ON(where < i->start);
BUG_ON(where > bset_bkey_last(i));
- bch_verify_btree_nr_keys(b);
-
- while (where != bset_bkey_last(i) &&
- keys_out_of_order(f, bkey_to_packed(insert),
- where, b->ops->is_extents))
- prev = where, where = bkey_next(where);
-
- if (!prev)
- prev = bkey_prev(t, where);
-
- verify_insert_pos(b, prev, where, insert);
-
- /*
- * note: bch_bkey_try_merge_inline() may modify either argument
- */
-
- /* prev is in the tree, if we merge we're done */
- if (prev &&
- bch_bkey_try_merge_inline(b, iter, prev,
- bkey_to_packed(insert), true)) {
- *overwrote = true;
- return prev;
- }
-
- if (where != bset_bkey_last(i) &&
- bch_bkey_try_merge_inline(b, iter,
- bkey_to_packed(insert),
- where, false)) {
- *overwrote = true;
- return where;
- }
-
- /*
- * Can we overwrite the current key, instead of doing a memmove()?
- *
- * For extents, only legal to overwrite keys that are deleted - because
- * extents are marked as deleted iff they are 0 size, deleted extents
- * don't overlap with any other existing keys.
- *
- * For non extents marked as deleted may be needed as whiteouts, until
- * the node is rewritten, only legal to overwrite if the new key
- * compares equal to the old.
- */
- if (b->ops->is_extents &&
- where != bset_bkey_last(i) &&
- where->u64s == src->u64s &&
- bkey_deleted(where))
- goto overwrite;
if (bkey_pack_key(&packed, &insert->k, f))
src = &packed;
- /* packing changes u64s - recheck after packing: */
- if (b->ops->is_extents &&
- where != bset_bkey_last(i) &&
- where->u64s == src->u64s &&
- bkey_deleted(where))
- goto overwrite;
-
- if (!b->ops->is_extents &&
- prev && prev->u64s == src->u64s &&
- !bkey_cmp_left_packed(f, prev, insert->k.p)) {
- where = prev;
- goto overwrite;
- }
-
memmove((u64 *) where + src->u64s,
where,
(void *) bset_bkey_last(i) - (void *) where);
@@ -1033,29 +1000,58 @@ struct bkey_packed *bch_bset_insert(struct btree_keys *b,
memcpy(bkeyp_val(f, where), &insert->v,
bkeyp_val_bytes(f, src));
+ bch_bset_fix_lookup_table(b, t, where);
+
if (!bkey_deleted(src))
btree_keys_account_key_add(&b->nr, src);
- bch_bset_fix_lookup_table(b, t, where);
-
+ bch_verify_key_order(b, iter, where);
bch_verify_btree_nr_keys(b);
- *overwrote = false;
return where;
+}
+
+/* @where must compare equal to @insert */
+bool bch_bset_try_overwrite(struct btree_keys *b,
+ struct btree_node_iter *iter,
+ struct bkey_packed *where,
+ struct bkey_i *insert)
+{
+ struct bkey_format *f = &b->format;
+ struct bkey_packed packed, *src = bkey_to_packed(insert);
+
+ EBUG_ON(bkey_cmp_left_packed(f, where, insert->k.p));
+ EBUG_ON(bkey_deleted(where));
+
+ if (bkey_deleted(&insert->k))
+ return false;
+
+ if (bch_bkey_to_bset(b, where) != bset_tree_last(b))
+ return false;
+
+ if (where->u64s == src->u64s)
+ goto overwrite;
+
+ if (!bkey_pack_key(&packed, &insert->k, f))
+ return false;
+
+ src = &packed;
+ if (where->u64s == src->u64s)
+ goto overwrite;
+
+ return false;
overwrite:
if (!bkey_deleted(where))
btree_keys_account_key_drop(&b->nr, where);
-
+ if (!bkey_deleted(src))
+ btree_keys_account_key_add(&b->nr, src);
memcpy(where, src,
bkeyp_key_bytes(f, src));
memcpy(bkeyp_val(f, where), &insert->v,
bkeyp_val_bytes(f, src));
- if (!bkey_deleted(src))
- btree_keys_account_key_add(&b->nr, src);
-
+ bch_verify_key_order(b, iter, where);
bch_verify_btree_nr_keys(b);
- *overwrote = true;
- return where;
+ return true;
}
/* Lookup */
@@ -1166,7 +1162,8 @@ static struct bkey_packed *bch_bset_search(struct btree_keys *b,
struct bset_tree *t,
struct bpos search,
struct bkey_packed *packed_search,
- const struct bkey_packed *lossy_packed_search)
+ const struct bkey_packed *lossy_packed_search,
+ bool strictly_greater)
{
const struct bkey_format *f = &b->format;
struct bkey_packed *m;
@@ -1207,21 +1204,21 @@ static struct bkey_packed *bch_bset_search(struct btree_keys *b,
if (unlikely(bkey_cmp_p_or_unp(f, t->data->start,
packed_search, search) >= 0))
- return t->data->start;
-
- m = bset_search_tree(f, t, search, lossy_packed_search);
+ m = t->data->start;
+ else
+ m = bset_search_tree(f, t, search, lossy_packed_search);
break;
}
if (lossy_packed_search)
while (m != bset_bkey_last(t->data) &&
- bkey_cmp_p_or_unp(f, m,
- lossy_packed_search, search) < 0)
+ !btree_iter_pos_cmp_p_or_unp(f, search, lossy_packed_search,
+ m, strictly_greater))
m = bkey_next(m);
if (!packed_search)
while (m != bset_bkey_last(t->data) &&
- bkey_cmp_left_packed(f, m, search) < 0)
+ !btree_iter_pos_cmp_packed(f, search, m, strictly_greater))
m = bkey_next(m);
if (btree_keys_expensive_checks(b)) {
@@ -1369,17 +1366,9 @@ void bch_btree_node_iter_init(struct btree_node_iter *iter,
bch_btree_node_iter_push(iter, b,
bch_bset_search(b, t, search,
packed_search,
- lossy_packed_search),
+ lossy_packed_search,
+ strictly_greater),
bset_bkey_last(t->data));
-
- if (strictly_greater) {
- struct bkey_packed *m;
-
- while ((m = bch_btree_node_iter_peek_all(iter, b)) &&
- (bkey_cmp_p_or_unp(&b->format, m,
- packed_search, search) == 0))
- bch_btree_node_iter_advance(iter, b);
- }
}
EXPORT_SYMBOL(bch_btree_node_iter_init);
diff --git a/drivers/md/bcache/bset.h b/drivers/md/bcache/bset.h
index f34eb5d63ab7..9d245d825a37 100644
--- a/drivers/md/bcache/bset.h
+++ b/drivers/md/bcache/bset.h
@@ -168,10 +168,6 @@ struct btree_keys_ops {
ptr_filter_fn key_normalize;
enum merge_result (*key_merge)(struct btree_keys *,
struct bkey_i *, struct bkey_i *);
- bool (*key_merge_inline)(struct btree_keys *,
- struct btree_node_iter *,
- struct bkey_packed *,
- struct bkey_packed *, bool);
/*
* Only used for deciding whether to use bkey_start_pos(k) or just the
@@ -326,7 +322,9 @@ void bch_bset_fix_invalidated_key(struct btree_keys *, struct bkey_packed *);
struct bkey_packed *bch_bset_insert(struct btree_keys *,
struct btree_node_iter *,
- struct bkey_i *, bool *);
+ struct bkey_i *);
+bool bch_bset_try_overwrite(struct btree_keys *, struct btree_node_iter *iter,
+ struct bkey_packed *, struct bkey_i *);
static inline void btree_keys_account_key(struct btree_nr_keys *n,
struct bkey_packed *k,
@@ -346,6 +344,39 @@ static inline void btree_keys_account_key(struct btree_nr_keys *n,
/* Bkey utility code */
+/* Returns true if @k is after iterator position @pos */
+static inline bool btree_iter_pos_cmp(struct bpos pos, const struct bkey *k,
+ bool strictly_greater)
+{
+ int cmp = bkey_cmp(k->p, pos);
+
+ return cmp > 0 ||
+ (cmp == 0 && !strictly_greater && !bkey_deleted(k));
+}
+
+static inline bool btree_iter_pos_cmp_packed(const struct bkey_format *f,
+ struct bpos pos,
+ const struct bkey_packed *k,
+ bool strictly_greater)
+{
+ int cmp = bkey_cmp_left_packed(f, k, pos);
+
+ return cmp > 0 ||
+ (cmp == 0 && !strictly_greater && !bkey_deleted(k));
+}
+
+static inline bool btree_iter_pos_cmp_p_or_unp(const struct bkey_format *f,
+ struct bpos pos,
+ const struct bkey_packed *pos_packed,
+ const struct bkey_packed *k,
+ bool strictly_greater)
+{
+ int cmp = bkey_cmp_p_or_unp(f, k, pos_packed, pos);
+
+ return cmp > 0 ||
+ (cmp == 0 && !strictly_greater && !bkey_deleted(k));
+}
+
#define BKEY_PADDED(key) __BKEY_PADDED(key, BKEY_EXTENT_VAL_U64s_MAX)
#define __bkey_idx(_set, _offset) \
@@ -382,17 +413,6 @@ static inline bool bch_bkey_try_merge(struct btree_keys *b,
: false;
}
-static inline bool bch_bkey_try_merge_inline(struct btree_keys *b,
- struct btree_node_iter *iter,
- struct bkey_packed *l,
- struct bkey_packed *r,
- bool back_merge)
-{
- return b->ops->key_merge_inline
- ? b->ops->key_merge_inline(b, iter, l, r, back_merge)
- : false;
-}
-
enum bch_extent_overlap {
BCH_EXTENT_OVERLAP_FRONT,
BCH_EXTENT_OVERLAP_BACK,
@@ -597,24 +617,27 @@ void bch_btree_keys_stats(struct btree_keys *, struct bset_stats *);
#ifdef CONFIG_BCACHE_DEBUG
-s64 __bch_count_data(struct btree_keys *);
-void __bch_count_data_verify(struct btree_keys *, int);
+void bch_dump_bset(struct btree_keys *, struct bset *, unsigned);
void bch_dump_bucket(struct btree_keys *);
-void bch_btree_node_iter_verify(struct btree_node_iter *, struct btree_keys *);
+s64 __bch_count_data(struct btree_keys *);
void __bch_verify_btree_nr_keys(struct btree_keys *);
+void bch_btree_node_iter_verify(struct btree_node_iter *, struct btree_keys *);
+void bch_verify_key_order(struct btree_keys *, struct btree_node_iter *,
+ struct bkey_packed *);
#else
-static inline s64 __bch_count_data(struct btree_keys *b) { return -1; }
-static inline void __bch_count_data_verify(struct btree_keys *b, int oldsize ) {}
+static inline void bch_dump_bset(struct btree_keys *b, struct bset *i, unsigned set) {}
static inline void bch_dump_bucket(struct btree_keys *b) {}
+static inline s64 __bch_count_data(struct btree_keys *b) { return -1; }
+static inline void __bch_verify_btree_nr_keys(struct btree_keys *b) {}
static inline void bch_btree_node_iter_verify(struct btree_node_iter *iter,
struct btree_keys *b) {}
-static inline void __bch_verify_btree_nr_keys(struct btree_keys *b) {}
-
+static inline void bch_verify_key_order(struct btree_keys *b,
+ struct btree_node_iter *iter,
+ struct bkey_packed *where) {}
#endif
-void bch_dump_bset(struct btree_keys *, struct bset *, unsigned);
static inline s64 bch_count_data(struct btree_keys *b)
{
diff --git a/drivers/md/bcache/btree_iter.c b/drivers/md/bcache/btree_iter.c
index 97dfb7992a1c..f0628f2544a0 100644
--- a/drivers/md/bcache/btree_iter.c
+++ b/drivers/md/bcache/btree_iter.c
@@ -190,31 +190,72 @@ bool btree_node_relock(struct btree_iter *iter, unsigned level)
/* Btree iterator: */
-static bool btree_iter_pos_cmp(struct btree_iter *iter,
- struct bpos pos, struct bpos k)
+#ifdef CONFIG_BCACHE_DEBUG
+
+static void __bch_btree_iter_verify(struct btree_node_iter *iter,
+ struct btree *b,
+ struct bpos pos,
+ bool strictly_greater)
{
- return iter->is_extents
- ? bkey_cmp(pos, k) < 0
- : bkey_cmp(pos, k) <= 0;
+ const struct bkey_format *f = &b->keys.format;
+ struct bset_tree *t;
+ struct bkey_packed *k;
+
+ bch_btree_node_iter_verify(iter, &b->keys);
+
+ for (t = b->keys.set; t <= b->keys.set + b->keys.nsets; t++) {
+ k = bkey_prev(t,
+ bch_btree_node_iter_bset_pos(iter, &b->keys, t->data) ?:
+ bset_bkey_last(t->data));
+
+ /*
+ * For interior nodes, the iterator will have skipped past
+ * deleted keys:
+ */
+ if (b->level)
+ while (k && bkey_deleted(k))
+ k = bkey_prev(t, k);
+
+ BUG_ON(k && btree_iter_pos_cmp_packed(f, pos, k,
+ strictly_greater));
+ }
+
+ k = bch_btree_node_iter_peek_all(iter, &b->keys);
+ BUG_ON(k && !btree_iter_pos_cmp_packed(f, pos, k, strictly_greater));
}
-void bch_btree_node_iter_fix(struct btree_iter *iter,
- struct btree_keys *b,
- struct btree_node_iter *node_iter,
- struct bkey_packed *where,
- bool overwrote)
+void bch_btree_iter_verify(struct btree_iter *iter, struct btree *b)
+{
+ struct btree_iter *linked;
+
+ if (iter->nodes[b->level] == b)
+ __bch_btree_iter_verify(&iter->node_iters[b->level],
+ b, iter->pos,
+ iter->is_extents);
+
+ for_each_linked_btree_node(iter, b, linked)
+ __bch_btree_iter_verify(&linked->node_iters[b->level],
+ b, linked->pos,
+ linked->is_extents);
+}
+
+#endif
+
+static void __bch_btree_node_iter_fix(struct btree_iter *iter,
+ struct btree_keys *b,
+ struct btree_node_iter *node_iter,
+ struct bkey_packed *where,
+ bool overwrote)
{
struct bkey_format *f = &b->format;
- struct bset *i = bset_tree_last(b)->data;
+ struct bset *i = bch_bkey_to_bset(b, where)->data;
const struct bkey_packed *end = bset_bkey_last(i);
struct btree_node_iter_set *set;
unsigned shift = overwrote ? 0 : where->u64s;
unsigned offset = __btree_node_key_to_offset(b, where);
unsigned old_end = __btree_node_key_to_offset(b, end) - shift;
- bool iter_pos_before_new = btree_iter_pos_cmp(iter, iter->pos,
- bkey_unpack_key(f, where).p);
-
- BUG_ON(node_iter->used > MAX_BSETS);
+ bool iter_pos_before_new = btree_iter_pos_cmp_packed(f,
+ iter->pos, where, iter->is_extents);
for (set = node_iter->data;
set < node_iter->data + node_iter->used;
@@ -222,19 +263,68 @@ void bch_btree_node_iter_fix(struct btree_iter *iter,
if (set->end == old_end) {
set->end += shift;
+ /*
+ * When we inserted at @where, the key we inserted - the
+ * new key at @where - compared strictly less than the
+ * key previously at @where (which is now the next key
+ * after the key we inserted).
+ *
+ * However, it is not necessarily true that if the
+ * iterator's position is less than the key we inserted
+ * it was <= the key previously at where - transitivity
+ * doesn't hold here - because the key previously at
+ * @where might have been a deleted key that the
+ * iterator had skipped past.
+ */
if (set->k >= offset) {
if (iter_pos_before_new)
set->k = offset;
else
set->k += shift;
- bch_btree_node_iter_sort(node_iter, b);
}
- return;
+
+ /*
+ * Resort iterator if we changed a key it points to:
+ *
+ * Do this before checking if we're removing a key from
+ * the iterator:
+ */
+ if (set->k == offset)
+ bch_btree_node_iter_sort(node_iter, b);
+ goto check_remove;
}
/* didn't find the bset in the iterator - might have to readd it: */
if (iter_pos_before_new)
bch_btree_node_iter_push(node_iter, b, where, end);
+check_remove:
+ if (!iter_pos_before_new &&
+ bch_btree_node_iter_peek_all(node_iter, b) == where)
+ bch_btree_node_iter_advance(node_iter, b);
+}
+
+void bch_btree_node_iter_fix(struct btree_iter *iter,
+ struct btree *b,
+ struct btree_node_iter *node_iter,
+ struct bkey_packed *k,
+ bool overwrote)
+{
+ struct btree_iter *linked;
+
+ if (node_iter != &iter->node_iters[b->level])
+ __bch_btree_node_iter_fix(iter, &b->keys, node_iter,
+ k, overwrote);
+
+ if (iter->nodes[b->level] == b)
+ __bch_btree_node_iter_fix(iter, &b->keys,
+ &iter->node_iters[b->level],
+ k, overwrote);
+
+ for_each_linked_btree_node(iter, b, linked)
+ __bch_btree_node_iter_fix(linked, &b->keys,
+ &linked->node_iters[b->level],
+ k, overwrote);
+ bch_btree_iter_verify(iter, b);
}
/* peek_all() doesn't skip deleted keys */
@@ -294,12 +384,11 @@ static inline void btree_iter_node_set(struct btree_iter *iter,
pos, iter->is_extents);
}
-static bool btree_iter_pos_in_node(struct btree_iter *iter,
- struct btree *b)
+static bool btree_iter_pos_in_node(struct btree_iter *iter, struct btree *b)
{
return iter->btree_id == b->btree_id &&
bkey_cmp(iter->pos, b->data->min_key) >= 0 &&
- btree_iter_pos_cmp(iter, iter->pos, b->key.k.p);
+ btree_iter_pos_cmp(iter->pos, &b->key.k, iter->is_extents);
}
/*
@@ -493,8 +582,9 @@ retry:
while (iter->nodes[iter->level] &&
!(is_btree_node(iter, iter->level) &&
btree_node_relock(iter, iter->level) &&
- btree_iter_pos_cmp(iter, pos,
- iter->nodes[iter->level]->key.k.p)))
+ btree_iter_pos_cmp(pos,
+ &iter->nodes[iter->level]->key.k,
+ iter->is_extents)))
btree_iter_up(iter);
/*
@@ -505,7 +595,7 @@ retry:
struct bkey_s_c k;
while ((k = __btree_iter_peek_all(iter)).k &&
- !btree_iter_pos_cmp(iter, pos, k.k->p))
+ !btree_iter_pos_cmp(pos, k.k, iter->is_extents))
__btree_iter_advance(iter);
}
diff --git a/drivers/md/bcache/btree_iter.h b/drivers/md/bcache/btree_iter.h
index 032a720ecf60..a20614f46d3a 100644
--- a/drivers/md/bcache/btree_iter.h
+++ b/drivers/md/bcache/btree_iter.h
@@ -108,9 +108,16 @@ __next_linked_btree_node(struct btree_iter *iter, struct btree *b,
for ((_linked) = (_iter); \
((_linked) = __next_linked_btree_node(_iter, _b, _linked));)
-void bch_btree_node_iter_fix(struct btree_iter *, struct btree_keys *,
+#ifdef CONFIG_BCACHE_DEBUG
+void bch_btree_iter_verify(struct btree_iter *, struct btree *);
+#else
+static inline void bch_btree_iter_verify(struct btree_iter *iter,
+ struct btree *b) {}
+#endif
+
+void bch_btree_node_iter_fix(struct btree_iter *, struct btree *,
struct btree_node_iter *, struct bkey_packed *,
- bool overwrote);
+ bool);
bool bch_btree_iter_upgrade(struct btree_iter *);
int bch_btree_iter_unlock(struct btree_iter *);
diff --git a/drivers/md/bcache/btree_types.h b/drivers/md/bcache/btree_types.h
index d6110c3f0188..791dddadfae4 100644
--- a/drivers/md/bcache/btree_types.h
+++ b/drivers/md/bcache/btree_types.h
@@ -162,8 +162,8 @@ enum extent_insert_hook_ret {
struct extent_insert_hook {
enum extent_insert_hook_ret
- (*fn)(struct extent_insert_hook *, struct btree_iter *,
- struct bpos, struct bkey_s_c, const struct bkey_i *);
+ (*fn)(struct extent_insert_hook *, struct bpos, struct bpos,
+ struct bkey_s_c, const struct bkey_i *);
};
enum btree_insert_ret {
diff --git a/drivers/md/bcache/btree_update.c b/drivers/md/bcache/btree_update.c
index b09829644731..e37fce09c7a5 100644
--- a/drivers/md/bcache/btree_update.c
+++ b/drivers/md/bcache/btree_update.c
@@ -614,7 +614,7 @@ int bch_btree_root_alloc(struct cache_set *c, enum btree_id id,
return 0;
}
-static bool bch_insert_fixup_btree_ptr(struct btree_iter *iter,
+static void bch_insert_fixup_btree_ptr(struct btree_iter *iter,
struct btree *b,
struct bkey_i *insert,
struct btree_node_iter *node_iter,
@@ -624,45 +624,30 @@ static bool bch_insert_fixup_btree_ptr(struct btree_iter *iter,
const struct bkey_format *f = &b->keys.format;
struct bucket_stats_cache_set stats = { 0 };
struct bkey_packed *k;
- int cmp;
-
- EBUG_ON((k = bch_btree_node_iter_prev_all(node_iter, &b->keys)) &&
- (bkey_deleted(k)
- ? bkey_cmp_packed(f, k, &insert->k) > 0
- : bkey_cmp_packed(f, k, &insert->k) >= 0));
+ struct bkey tmp;
if (bkey_extent_is_data(&insert->k))
bch_mark_key(c, bkey_i_to_s_c(insert),
c->sb.btree_node_size, true,
gc_pos_btree_node(b), &stats);
- while ((k = bch_btree_node_iter_peek_all(node_iter, &b->keys))) {
- struct bkey tmp;
- struct bkey_s_c u = bkey_disassemble(f, k, &tmp);
-
- cmp = bkey_cmp(u.k->p, insert->k.p);
- if (cmp > 0)
- break;
-
- if (!cmp && !bkey_deleted(k)) {
- bch_btree_node_free_index(c, b, iter->btree_id,
- u, &stats);
- /*
- * Look up pending delete, mark so that gc marks it on
- * the pending delete list
- */
- k->type = KEY_TYPE_DELETED;
- btree_keys_account_key_drop(&b->keys.nr, k);
- }
-
+ while ((k = bch_btree_node_iter_peek_all(node_iter, &b->keys)) &&
+ !btree_iter_pos_cmp_packed(f, insert->k.p, k, false))
bch_btree_node_iter_advance(node_iter, &b->keys);
- }
- bch_btree_bset_insert(iter, b, node_iter, insert);
- set_btree_node_dirty(b);
+ /*
+ * If we're overwriting, look up pending delete and mark so that gc
+ * marks it on the pending delete list:
+ */
+ if (k && !bkey_cmp_packed(f, k, &insert->k))
+ bch_btree_node_free_index(c, b, iter->btree_id,
+ bkey_disassemble(f, k, &tmp),
+ &stats);
bch_cache_set_stats_apply(c, &stats, disk_res, gc_pos_btree_node(b));
- return true;
+
+ bch_btree_bset_insert_key(iter, b, node_iter, insert);
+ set_btree_node_dirty(b);
}
/* Inserting into a given leaf node (last stage of insert): */
@@ -673,9 +658,7 @@ void bch_btree_bset_insert(struct btree_iter *iter,
struct btree_node_iter *node_iter,
struct bkey_i *insert)
{
- struct btree_iter *linked;
struct bkey_packed *where;
- bool overwrote;
EBUG_ON(bkey_deleted(&insert->k) && bkey_val_u64s(&insert->k));
EBUG_ON(insert->k.u64s > bch_btree_keys_u64s_remaining(iter->c, b));
@@ -689,26 +672,37 @@ void bch_btree_bset_insert(struct btree_iter *iter,
* though.
*/
- where = bch_bset_insert(&b->keys, node_iter, insert, &overwrote);
+ where = bch_bset_insert(&b->keys, node_iter, insert);
- bch_btree_node_iter_fix(iter, &b->keys, node_iter, where, overwrote);
+ bch_btree_node_iter_fix(iter, b, node_iter, where, false);
+}
+
+/* Handle overwrites and do insert, for non extents: */
+void bch_btree_bset_insert_key(struct btree_iter *iter,
+ struct btree *b,
+ struct btree_node_iter *node_iter,
+ struct bkey_i *insert)
+{
+ const struct bkey_format *f = &b->keys.format;
+ struct bkey_packed *k;
- if (iter->nodes[b->level] == b &&
- node_iter != &iter->node_iters[b->level])
- bch_btree_node_iter_fix(iter, &b->keys,
- &iter->node_iters[b->level],
- where, overwrote);
+ k = bch_btree_node_iter_peek_all(node_iter, &b->keys);
+ if (k && !bkey_cmp_packed(f, k, &insert->k)) {
+ if (bch_bset_try_overwrite(&b->keys, node_iter, k, insert)) {
+ bch_btree_iter_verify(iter, b);
+ return;
+ }
- for_each_linked_btree_node(iter, b, linked)
- bch_btree_node_iter_fix(linked, &b->keys,
- &linked->node_iters[b->level],
- where, overwrote);
+ k->type = KEY_TYPE_DELETED;
+ btree_keys_account_key_drop(&b->keys.nr, k);
+ bch_btree_node_iter_fix(iter, b, node_iter, k, true);
- bch_btree_node_iter_verify(node_iter, &b->keys);
+ if (bkey_deleted(&insert->k) &&
+ bch_bkey_to_bset(&b->keys, k) == bset_tree_last(&b->keys))
+ return;
+ }
- for_each_linked_btree_node(iter, b, linked)
- bch_btree_node_iter_verify(&linked->node_iters[b->level],
- &b->keys);
+ bch_btree_bset_insert(iter, b, node_iter, insert);
}
static void btree_node_flush(struct journal_entry_pin *pin)
@@ -721,20 +715,12 @@ static void btree_node_flush(struct journal_entry_pin *pin)
six_unlock_read(&b->lock);
}
-/**
- * bch_btree_insert_and_journal - insert a non-overlapping key into a btree node
- *
- * This is called from bch_insert_fixup_extent() and bch_insert_fixup_key()
- *
- * The insert is journalled.
- */
-void bch_btree_insert_and_journal(struct btree_iter *iter,
- struct bkey_i *insert,
- struct journal_res *res)
+void bch_btree_journal_key(struct btree_iter *iter,
+ struct bkey_i *insert,
+ struct journal_res *res)
{
struct cache_set *c = iter->c;
struct btree *b = iter->nodes[0];
- struct btree_node_iter *node_iter = &iter->node_iters[0];
EBUG_ON(iter->level || b->level);
@@ -764,8 +750,6 @@ void bch_btree_insert_and_journal(struct btree_iter *iter,
insert, b->level);
btree_bset_last(b)->journal_seq = cpu_to_le64(c->journal.seq);
}
-
- bch_btree_bset_insert(iter, b, node_iter, insert);
}
static void verify_keys_sorted(struct keylist *l)
@@ -1116,7 +1100,6 @@ bch_btree_insert_keys_interior(struct btree *b,
const struct bkey_format *f = &b->keys.format;
struct bkey_i *insert = bch_keylist_front(insert_keys);
struct bkey_packed *k;
- bool inserted = false;
BUG_ON(!btree_node_intent_locked(iter, btree_node_root(b)->level));
BUG_ON(!b->level);
@@ -1131,6 +1114,7 @@ bch_btree_insert_keys_interior(struct btree *b,
return BTREE_INSERT_BTREE_NODE_FULL;
}
+ /* Don't screw up @iter's position: */
node_iter = iter->node_iters[b->level];
/*
@@ -1147,11 +1131,9 @@ bch_btree_insert_keys_interior(struct btree *b,
bch_insert_fixup_btree_ptr(iter, b, insert,
&node_iter, &res->disk_res);
- inserted = true;
bch_keylist_dequeue(insert_keys);
}
- BUG_ON(!inserted);
async_split_updated_btree(as, b);
btree_node_unlock_write(b, iter);
@@ -1530,12 +1512,11 @@ btree_insert_key(struct btree_insert_trans *trans,
struct journal_res *res,
unsigned flags)
{
- struct btree *b = insert->iter->nodes[0];
+ struct btree_iter *iter = insert->iter;
+ struct btree *b = iter->nodes[0];
s64 oldsize = bch_count_data(&b->keys);
enum btree_insert_ret ret;
- bch_btree_node_iter_verify(&insert->iter->node_iters[0], &b->keys);
-
ret = !b->keys.ops->is_extents
? bch_insert_fixup_key(trans, insert, res)
: bch_insert_fixup_extent(trans, insert, disk_res,
diff --git a/drivers/md/bcache/btree_update.h b/drivers/md/bcache/btree_update.h
index f7ce263f9908..3e31f685d85b 100644
--- a/drivers/md/bcache/btree_update.h
+++ b/drivers/md/bcache/btree_update.h
@@ -138,8 +138,10 @@ int bch_btree_root_alloc(struct cache_set *, enum btree_id, struct closure *);
void bch_btree_bset_insert(struct btree_iter *, struct btree *,
struct btree_node_iter *, struct bkey_i *);
-void bch_btree_insert_and_journal(struct btree_iter *, struct bkey_i *,
- struct journal_res *);
+void bch_btree_bset_insert_key(struct btree_iter *, struct btree *,
+ struct btree_node_iter *, struct bkey_i *);
+void bch_btree_journal_key(struct btree_iter *, struct bkey_i *,
+ struct journal_res *);
static inline struct btree_node_entry *write_block(struct btree *b)
{
diff --git a/drivers/md/bcache/extents.c b/drivers/md/bcache/extents.c
index 41b1eb508f09..e24d582e3007 100644
--- a/drivers/md/bcache/extents.c
+++ b/drivers/md/bcache/extents.c
@@ -112,29 +112,16 @@ bch_insert_fixup_key(struct btree_insert_trans *trans,
struct btree_trans_entry *insert,
struct journal_res *res)
{
- struct btree *b = insert->iter->nodes[0];
- struct btree_node_iter *node_iter = &insert->iter->node_iters[0];
- const struct bkey_format *f = &b->keys.format;
- struct bkey_packed *k;
- int cmp;
-
- BUG_ON(insert->iter->level);
- EBUG_ON((k = bch_btree_node_iter_prev_all(node_iter, &b->keys)) &&
- (bkey_deleted(k)
- ? bkey_cmp_packed(f, k, &insert->k->k) > 0
- : bkey_cmp_packed(f, k, &insert->k->k) >= 0));
-
- while ((k = bch_btree_node_iter_peek_all(node_iter, &b->keys)) &&
- (cmp = bkey_cmp_packed(f, k, &insert->k->k)) <= 0) {
- if (!cmp && !bkey_deleted(k)) {
- k->type = KEY_TYPE_DELETED;
- btree_keys_account_key_drop(&b->keys.nr, k);
- }
+ struct btree_iter *iter = insert->iter;
- bch_btree_node_iter_advance(node_iter, &b->keys);
- }
+ BUG_ON(iter->level);
+
+ bch_btree_journal_key(iter, insert->k, res);
+ bch_btree_bset_insert_key(iter,
+ iter->nodes[0],
+ &iter->node_iters[0],
+ insert->k);
- bch_btree_insert_and_journal(insert->iter, insert->k, res);
trans->did_work = true;
return BTREE_INSERT_OK;
}
@@ -645,23 +632,32 @@ void bch_key_resize(struct bkey *k,
* that we have to unpack the key, modify the unpacked key - then this
* copies/repacks the unpacked to the original as necessary.
*/
-static bool __extent_save(struct bkey_packed *dst, struct bkey *src,
- const struct bkey_format *f)
+static bool __extent_save(struct btree_keys *b, struct btree_node_iter *iter,
+ struct bkey_packed *dst, struct bkey *src)
{
+ struct bkey_format *f = &b->format;
struct bkey_i *dst_unpacked;
+ bool ret;
+
+ BUG_ON(bkeyp_val_u64s(f, dst) != bkey_val_u64s(src));
if ((dst_unpacked = packed_to_bkey(dst))) {
dst_unpacked->k = *src;
- return true;
+ ret = true;
} else {
- return bkey_pack_key(dst, src, f);
+ ret = bkey_pack_key(dst, src, f);
}
+
+ if (iter)
+ bch_verify_key_order(b, iter, dst);
+
+ return ret;
}
-static void extent_save(struct bkey_packed *dst, struct bkey *src,
- const struct bkey_format *f)
+static void extent_save(struct btree_keys *b, struct btree_node_iter *iter,
+ struct bkey_packed *dst, struct bkey *src)
{
- BUG_ON(!__extent_save(dst, src, f));
+ BUG_ON(!__extent_save(b, iter, dst, src));
}
/*
@@ -788,7 +784,7 @@ struct btree_nr_keys bch_extent_sort_fix_overlapping(struct btree_keys *b,
sort_key_next(iter, b, _r);
} else {
__bch_cut_front(l.k->p, r);
- extent_save(rk, r.k, f);
+ extent_save(b, NULL, rk, r.k);
}
extent_sort_sift(iter, b, _r - iter->data);
@@ -802,7 +798,7 @@ struct btree_nr_keys bch_extent_sort_fix_overlapping(struct btree_keys *b,
bch_cut_back(bkey_start_pos(r.k), &tmp.k.k);
__bch_cut_front(r.k->p, l);
- extent_save(lk, l.k, f);
+ extent_save(b, NULL, lk, l.k);
extent_sort_sift(iter, b, 0);
@@ -810,7 +806,7 @@ struct btree_nr_keys bch_extent_sort_fix_overlapping(struct btree_keys *b,
bkey_to_packed(&tmp.k));
} else {
bch_cut_back(bkey_start_pos(r.k), l.k);
- extent_save(lk, l.k, f);
+ extent_save(b, NULL, lk, l.k);
}
}
@@ -962,7 +958,7 @@ static bool bch_extent_cmpxchg_cmp(struct bkey_s_c l, struct bkey_s_c r)
* as @new.
*/
enum extent_insert_hook_ret bch_extent_cmpxchg(struct extent_insert_hook *hook,
- struct btree_iter *iter,
+ struct bpos committed_pos,
struct bpos next_pos,
struct bkey_s_c k,
const struct bkey_i *new)
@@ -971,10 +967,7 @@ enum extent_insert_hook_ret bch_extent_cmpxchg(struct extent_insert_hook *hook,
struct bch_replace_info, hook);
struct bkey_i *old = &replace->key;
- EBUG_ON(iter->btree_id != BTREE_ID_EXTENTS);
-
- /* iter->pos tracks what we've checked so far: */
- EBUG_ON(bkey_cmp(iter->pos, bkey_start_pos(&new->k)) < 0);
+ EBUG_ON(bkey_cmp(committed_pos, bkey_start_pos(&new->k)) < 0);
/* must have something to compare against */
EBUG_ON(!bkey_val_u64s(&old->k));
@@ -992,6 +985,11 @@ enum extent_insert_hook_ret bch_extent_cmpxchg(struct extent_insert_hook *hook,
}
}
+static bool bch_extent_merge_inline(struct btree_iter *iter,
+ struct bkey_packed *,
+ struct bkey_packed *,
+ bool);
+
#define MAX_LOCK_HOLD_TIME (5 * NSEC_PER_MSEC)
static enum btree_insert_ret extent_insert_should_stop(struct btree_insert_trans *trans,
@@ -1027,28 +1025,65 @@ static enum btree_insert_ret extent_insert_should_stop(struct btree_insert_trans
return BTREE_INSERT_OK;
}
+static void extent_do_insert(struct btree_iter *iter, struct bkey_i *insert,
+ struct journal_res *res, unsigned flags,
+ struct bucket_stats_cache_set *stats)
+{
+ struct btree_keys *b = &iter->nodes[0]->keys;
+ struct btree_node_iter *node_iter = &iter->node_iters[0];
+ struct bset_tree *t = bset_tree_last(b);
+ struct bset *i = t->data;
+ struct bkey_packed *prev, *where =
+ bch_btree_node_iter_bset_pos(node_iter, b, i) ?:
+ bset_bkey_last(i);
+
+ if (!(flags & BTREE_INSERT_NO_MARK_KEY))
+ bch_add_sectors(iter, bkey_i_to_s_c(insert),
+ bkey_start_offset(&insert->k),
+ insert->k.size, stats);
+
+ bch_btree_journal_key(iter, insert, res);
+
+ if ((prev = bkey_prev(t, where)) &&
+ bch_extent_merge_inline(iter, prev, bkey_to_packed(insert), true))
+ return;
+
+ if (where != bset_bkey_last(i) &&
+ bch_extent_merge_inline(iter, bkey_to_packed(insert), where, false))
+ return;
+
+ bch_btree_bset_insert(iter, iter->nodes[0], node_iter, insert);
+}
+
static void extent_insert_committed(struct btree_insert_trans *trans,
struct btree_trans_entry *insert,
+ struct bpos committed_pos,
struct journal_res *res, unsigned flags,
struct bucket_stats_cache_set *stats)
{
- struct btree_iter *iter = insert->iter;
- struct bkey_i *k = insert->k, *split;
- EBUG_ON(bkey_cmp(k->k.p, iter->pos) < 0);
- EBUG_ON(bkey_cmp(iter->pos, bkey_start_pos(&k->k)) < 0);
+ EBUG_ON(bkey_cmp(bkey_start_pos(&insert->k->k), insert->iter->pos));
+ EBUG_ON(bkey_cmp(insert->k->k.p, committed_pos) < 0);
+ EBUG_ON(bkey_cmp(committed_pos, insert->iter->pos) < 0);
+
+ if (bkey_cmp(committed_pos, insert->iter->pos) > 0) {
+ struct btree_iter *iter = insert->iter;
+ struct btree_keys *b = &iter->nodes[0]->keys;
+ struct btree_node_iter *node_iter = &iter->node_iters[0];
+ struct bkey_packed *k;
+ struct bkey_i *split;
+
+ EBUG_ON(bkey_deleted(&insert->k->k) || !insert->k->k.size);
- if (bkey_cmp(iter->pos, bkey_start_pos(&k->k)) > 0) {
- EBUG_ON(bkey_deleted(&k->k) || !k->k.size);
+ split = bch_key_split(committed_pos, insert->k);
+ extent_do_insert(insert->iter, split, res, flags, stats);
- split = bch_key_split(iter->pos, k);
+ bch_btree_iter_set_pos(iter, committed_pos);
- if (!(flags & BTREE_INSERT_NO_MARK_KEY))
- bch_add_sectors(iter, bkey_i_to_s_c(split),
- bkey_start_offset(&split->k),
- split->k.size, stats);
+ while ((k = bch_btree_node_iter_peek_all(node_iter, b)) &&
+ !btree_iter_pos_cmp_packed(&b->format, iter->pos, k, true))
+ bch_btree_node_iter_advance(node_iter, b);
- bch_btree_insert_and_journal(iter, split, res);
trans->did_work = true;
}
}
@@ -1057,6 +1092,7 @@ static enum extent_insert_hook_ret
__extent_insert_advance_pos(struct btree_insert_trans *trans,
struct btree_trans_entry *insert,
struct extent_insert_hook *hook,
+ struct bpos *committed_pos,
struct bpos next_pos,
struct bkey_s_c k,
struct journal_res *res, unsigned flags,
@@ -1069,7 +1105,7 @@ __extent_insert_advance_pos(struct btree_insert_trans *trans,
k.k->version > insert->k->k.version)
ret = BTREE_HOOK_NO_INSERT;
else if (hook)
- ret = hook->fn(hook, insert->iter, next_pos, k, insert->k);
+ ret = hook->fn(hook, *committed_pos, next_pos, k, insert->k);
else
ret = BTREE_HOOK_DO_INSERT;
@@ -1078,19 +1114,28 @@ __extent_insert_advance_pos(struct btree_insert_trans *trans,
switch (ret) {
case BTREE_HOOK_DO_INSERT:
break;
- case BTREE_HOOK_NO_INSERT:
- extent_insert_committed(trans, insert, res, flags, stats);
+ case BTREE_HOOK_NO_INSERT: {
+ struct btree_iter *iter = insert->iter;
+ struct btree_keys *b = &iter->nodes[0]->keys;
+ struct btree_node_iter *node_iter = &iter->node_iters[0];
+ struct bkey_packed *k;
+
+ extent_insert_committed(trans, insert, *committed_pos,
+ res, flags, stats);
__bch_cut_front(next_pos, bkey_i_to_s(insert->k));
+
+ bch_btree_iter_set_pos(iter, next_pos);
+
+ while ((k = bch_btree_node_iter_peek_all(node_iter, b)) &&
+ !btree_iter_pos_cmp_packed(&b->format, iter->pos, k, true))
+ bch_btree_node_iter_advance(node_iter, b);
break;
+ }
case BTREE_HOOK_RESTART_TRANS:
return ret;
}
- /*
- * Don't update iter->pos until after calling the hook,
- * because the hook fn may use it:
- */
- bch_btree_iter_set_pos(insert->iter, next_pos);
+ *committed_pos = next_pos;
return ret;
}
@@ -1102,21 +1147,22 @@ static enum extent_insert_hook_ret
extent_insert_advance_pos(struct btree_insert_trans *trans,
struct btree_trans_entry *insert,
struct extent_insert_hook *hook,
+ struct bpos *committed_pos,
struct bkey_s_c k,
struct journal_res *res, unsigned flags,
struct bucket_stats_cache_set *stats)
{
struct btree *b = insert->iter->nodes[0];
- struct bpos next_pos = k.k
- ? bpos_min(insert->k->k.p, k.k->p)
- : bpos_min(insert->k->k.p, b->key.k.p);
+ struct bpos next_pos =
+ bpos_min(insert->k->k.p, k.k ? k.k->p : b->key.k.p);
/* hole? */
- if (k.k && bkey_cmp(insert->iter->pos, bkey_start_pos(k.k)) < 0) {
- bool have_uncommitted = bkey_cmp(insert->iter->pos,
+ if (k.k && bkey_cmp(*committed_pos, bkey_start_pos(k.k)) < 0) {
+ bool have_uncommitted = bkey_cmp(*committed_pos,
bkey_start_pos(&insert->k->k)) > 0;
switch (__extent_insert_advance_pos(trans, insert, hook,
+ committed_pos,
bkey_start_pos(k.k),
bkey_s_c_null,
res, flags, stats)) {
@@ -1137,10 +1183,11 @@ extent_insert_advance_pos(struct btree_insert_trans *trans,
}
/* avoid redundant calls to hook fn: */
- if (!bkey_cmp(insert->iter->pos, next_pos))
+ if (!bkey_cmp(*committed_pos, next_pos))
return BTREE_HOOK_DO_INSERT;
- return __extent_insert_advance_pos(trans, insert, hook, next_pos,
+ return __extent_insert_advance_pos(trans, insert, hook,
+ committed_pos, next_pos,
k, res, flags, stats);
}
@@ -1202,6 +1249,7 @@ bch_insert_fixup_extent(struct btree_insert_trans *trans,
unsigned nr_done = 0;
u64 start_time = local_clock();
enum btree_insert_ret ret = BTREE_INSERT_OK;
+ struct bpos committed_pos = iter->pos;
EBUG_ON(iter->level);
EBUG_ON(bkey_deleted(&insert->k->k) || !insert->k->k.size);
@@ -1212,9 +1260,9 @@ bch_insert_fixup_extent(struct btree_insert_trans *trans,
* been inserted, and also to keep @iter->pos consistent with
* @insert->k and the node iterator that we're advancing:
*/
- EBUG_ON(bkey_cmp(iter->pos, bkey_start_pos(&insert->k->k)));
+ EBUG_ON(bkey_cmp(committed_pos, bkey_start_pos(&insert->k->k)));
- while (bkey_cmp(iter->pos, insert->k->k.p) < 0 &&
+ while (bkey_cmp(committed_pos, insert->k->k.p) < 0 &&
(ret = extent_insert_should_stop(trans, insert, res,
start_time, nr_done)) == BTREE_INSERT_OK &&
(_k = bch_btree_node_iter_peek_overlapping(node_iter,
@@ -1228,6 +1276,7 @@ bch_insert_fixup_extent(struct btree_insert_trans *trans,
*/
if (k.k->size)
switch (extent_insert_advance_pos(trans, insert, hook,
+ &committed_pos,
k.s_c, res, flags,
&stats)) {
case BTREE_HOOK_DO_INSERT:
@@ -1244,7 +1293,8 @@ bch_insert_fixup_extent(struct btree_insert_trans *trans,
case BCH_EXTENT_OVERLAP_FRONT:
/* insert and k share the start, invalidate in k */
bch_cut_subtract_front(iter, insert->k->k.p, k, &stats);
- extent_save(_k, k.k, f);
+ BUG_ON(bkey_deleted(k.k));
+ extent_save(&b->keys, node_iter, _k, k.k);
break;
case BCH_EXTENT_OVERLAP_BACK:
@@ -1252,7 +1302,8 @@ bch_insert_fixup_extent(struct btree_insert_trans *trans,
bch_cut_subtract_back(iter,
bkey_start_pos(&insert->k->k),
k, &stats);
- extent_save(_k, k.k, f);
+ BUG_ON(bkey_deleted(k.k));
+ extent_save(&b->keys, node_iter, _k, k.k);
/*
* As the auxiliary tree is indexed by the end of the
@@ -1260,7 +1311,7 @@ bch_insert_fixup_extent(struct btree_insert_trans *trans,
* auxiliary tree.
*/
bch_bset_fix_invalidated_key(&b->keys, _k);
- bch_btree_node_iter_advance(node_iter, &b->keys);
+ bch_btree_node_iter_fix(iter, b, node_iter, _k, true);
break;
case BCH_EXTENT_OVERLAP_ALL: {
@@ -1272,7 +1323,7 @@ bch_insert_fixup_extent(struct btree_insert_trans *trans,
bch_drop_subtract(iter, k, &stats);
k.k->p = bkey_start_pos(&insert->k->k);
- if (!__extent_save(_k, k.k, f)) {
+ if (!__extent_save(&b->keys, node_iter, _k, k.k)) {
/*
* Couldn't repack: we aren't necessarily able
* to repack if the new key is outside the range
@@ -1280,27 +1331,28 @@ bch_insert_fixup_extent(struct btree_insert_trans *trans,
* @insert:
*/
k.k->p = orig_pos;
- extent_save(_k, k.k, f);
+ extent_save(&b->keys, node_iter, _k, k.k);
if (extent_insert_advance_pos(trans, insert,
- hook, k.s_c,
- res, flags,
- &stats) ==
+ hook, &committed_pos,
+ k.s_c, res, flags,
+ &stats) ==
BTREE_HOOK_RESTART_TRANS) {
ret = BTREE_INSERT_NEED_TRAVERSE;
goto stop;
}
- extent_insert_committed(trans, insert, res, flags, &stats);
+ extent_insert_committed(trans, insert, committed_pos,
+ res, flags, &stats);
/*
* We split and inserted upto at k.k->p - that
* has to coincide with iter->pos, so that we
* don't have anything more we have to insert
* until we recheck our journal reservation:
*/
- EBUG_ON(bkey_cmp(iter->pos, k.k->p));
+ EBUG_ON(bkey_cmp(committed_pos, k.k->p));
} else {
bch_bset_fix_invalidated_key(&b->keys, _k);
- bch_btree_node_iter_advance(node_iter, &b->keys);
+ bch_btree_node_iter_fix(iter, b, node_iter, _k, true);
}
break;
@@ -1323,10 +1375,12 @@ bch_insert_fixup_extent(struct btree_insert_trans *trans,
*/
bkey_reassemble(&split.k, k.s_c);
bch_cut_back(bkey_start_pos(&insert->k->k), &split.k.k);
+ BUG_ON(bkey_deleted(&split.k.k));
__bch_cut_front(bkey_start_pos(&insert->k->k), k);
bch_cut_subtract_front(iter, insert->k->k.p, k, &stats);
- extent_save(_k, k.k, f);
+ BUG_ON(bkey_deleted(k.k));
+ extent_save(&b->keys, node_iter, _k, k.k);
bch_btree_bset_insert(iter, b, node_iter, &split.k);
break;
@@ -1334,17 +1388,18 @@ bch_insert_fixup_extent(struct btree_insert_trans *trans,
}
}
- if (bkey_cmp(iter->pos, insert->k->k.p) < 0 &&
+ if (bkey_cmp(committed_pos, insert->k->k.p) < 0 &&
ret == BTREE_INSERT_OK &&
- extent_insert_advance_pos(trans, insert, hook, bkey_s_c_null, res,
- flags, &stats) == BTREE_HOOK_RESTART_TRANS)
+ extent_insert_advance_pos(trans, insert, hook, &committed_pos,
+ bkey_s_c_null, res, flags,
+ &stats) == BTREE_HOOK_RESTART_TRANS)
ret = BTREE_INSERT_NEED_TRAVERSE;
stop:
- extent_insert_committed(trans, insert, res, flags, &stats);
+ extent_insert_committed(trans, insert, committed_pos, res, flags, &stats);
bch_cache_set_stats_apply(c, &stats, disk_res, gc_pos_btree_node(b));
- if (insert->k->k.size && !bkey_cmp(iter->pos, b->key.k.p))
+ if (insert->k->k.size && !bkey_cmp(committed_pos, b->key.k.p))
ret = BTREE_INSERT_NEED_TRAVERSE;
return ret;
@@ -1886,48 +1941,50 @@ static enum merge_result bch_extent_merge(struct btree_keys *bk,
return BCH_MERGE_MERGE;
}
-static bool extent_i_save(struct bkey_packed *dst, struct bkey_i *src,
- const struct bkey_format *f)
+static void extent_i_save(struct btree_keys *b, struct btree_node_iter *iter,
+ struct bkey_packed *dst, struct bkey_i *src)
{
- BUG_ON(bkeyp_val_u64s(f, dst) != bkey_val_u64s(&src->k));
+ struct bkey_format *f = &b->format;
- if (!__extent_save(dst, &src->k, f))
- return false;
+ BUG_ON(bkeyp_val_u64s(f, dst) != bkey_val_u64s(&src->k));
+ extent_save(b, iter, dst, &src->k);
memcpy(bkeyp_val(f, dst), &src->v, bkey_val_bytes(&src->k));
- return true;
}
-static bool extent_merge_one_overlapping(struct btree_keys *b,
- struct bkey *m, struct bpos new_pos,
- struct bkey_packed *k, struct bkey uk,
- bool check, bool could_pack)
+static bool extent_merge_one_overlapping(struct btree_iter *iter,
+ struct bpos new_pos,
+ struct bkey_packed *k, struct bkey uk,
+ bool check, bool could_pack)
{
+ struct btree *b = iter->nodes[0];
+ struct btree_node_iter *node_iter = &iter->node_iters[0];
+
BUG_ON(!bkey_deleted(k));
if (check) {
- if (bkey_packed(k) && !could_pack)
- return false;
+ return !bkey_packed(k) || could_pack;
} else {
uk.p = new_pos;
- extent_save(k, &uk, &b->format);
- bch_bset_fix_invalidated_key(b, k);
+ extent_save(&b->keys, node_iter, k, &uk);
+ bch_bset_fix_invalidated_key(&b->keys, k);
+ bch_btree_node_iter_fix(iter, b, node_iter, k, true);
+ return true;
}
-
- return true;
}
-static bool extent_merge_do_overlapping(struct btree_keys *b,
- struct btree_node_iter *iter,
- struct bkey *m,
- bool back_merge, bool check)
+static bool extent_merge_do_overlapping(struct btree_iter *iter,
+ struct bkey *m, bool back_merge)
{
+ struct btree_keys *b = &iter->nodes[0]->keys;
+ struct btree_node_iter *node_iter = &iter->node_iters[0];
const struct bkey_format *f = &b->format;
struct bset_tree *t;
struct bkey_packed *k;
struct bkey uk;
- struct bpos new_pos = back_merge ? bkey_start_pos(m) : m->p;
+ struct bpos new_pos = back_merge ? m->p : bkey_start_pos(m);
bool could_pack = bkey_pack_pos((void *) &uk, new_pos, f);
+ bool check = true;
/*
* @m is the new merged extent:
@@ -1938,6 +1995,7 @@ static bool extent_merge_do_overlapping(struct btree_keys *b,
*
* But in the other bsets, we have to check for and fix such extents:
*/
+do_fixup:
for (t = b->set; t < b->set + b->nsets; t++) {
if (!t->data->u64s)
continue;
@@ -1946,7 +2004,7 @@ static bool extent_merge_do_overlapping(struct btree_keys *b,
* if we don't find this bset in the iterator we already got to
* the end of that bset, so start searching from the end.
*/
- k = bch_btree_node_iter_bset_pos(iter, b, t->data) ?:
+ k = bch_btree_node_iter_bset_pos(node_iter, b, t->data) ?:
bkey_prev(t, bset_bkey_last(t->data));
if (back_merge) {
@@ -1963,7 +2021,7 @@ static bool extent_merge_do_overlapping(struct btree_keys *b,
if (bkey_cmp(uk.p, m->p) >= 0)
continue;
- if (!extent_merge_one_overlapping(b, m, new_pos,
+ if (!extent_merge_one_overlapping(iter, new_pos,
k, uk, check, could_pack))
return false;
}
@@ -1978,13 +2036,18 @@ static bool extent_merge_do_overlapping(struct btree_keys *b,
bkey_start_pos(m)) <= 0)
continue;
- if (!extent_merge_one_overlapping(b, m, new_pos,
+ if (!extent_merge_one_overlapping(iter, new_pos,
k, uk, check, could_pack))
return false;
}
}
}
+ if (check) {
+ check = false;
+ goto do_fixup;
+ }
+
return true;
}
@@ -1996,18 +2059,19 @@ static bool extent_merge_do_overlapping(struct btree_keys *b,
*
* Also unpacks and repacks.
*/
-static bool bch_extent_merge_inline(struct btree_keys *b,
- struct btree_node_iter *iter,
+static bool bch_extent_merge_inline(struct btree_iter *iter,
struct bkey_packed *l,
struct bkey_packed *r,
bool back_merge)
{
+ struct btree_keys *b = &iter->nodes[0]->keys;
+ struct btree_node_iter *node_iter = &iter->node_iters[0];
const struct bkey_format *f = &b->format;
struct bkey_packed *m;
BKEY_PADDED(k) li;
BKEY_PADDED(k) ri;
struct bkey_i *mi;
- bool ret;
+ struct bkey tmp;
/*
* We need to save copies of both l and r, because we might get a
@@ -2026,42 +2090,55 @@ static bool bch_extent_merge_inline(struct btree_keys *b,
case BCH_MERGE_NOMERGE:
return false;
case BCH_MERGE_PARTIAL:
- if (!extent_merge_do_overlapping(b, iter, &li.k.k,
- back_merge, true))
+ if (bkey_packed(m) && !bkey_pack_key((void *) &tmp, &mi->k, f))
return false;
- if (!extent_i_save(m, mi, f))
+ if (!extent_merge_do_overlapping(iter, &li.k.k, back_merge))
return false;
+ extent_i_save(b, node_iter, m, mi);
+
+ /*
+ * Update iterator to reflect what we just inserted - otherwise,
+ * the iter_fix() call is going to put us _before_ the key we
+ * just partially merged with:
+ */
+ if (back_merge) {
+ struct bkey_packed *k;
+
+ bch_btree_iter_set_pos(iter, li.k.k.p);
+ while ((k = bch_btree_node_iter_peek_all(node_iter, b)) &&
+ !btree_iter_pos_cmp_packed(&b->format, iter->pos, k, true))
+ bch_btree_node_iter_advance(node_iter, b);
+ }
+
+ bch_btree_node_iter_fix(iter, iter->nodes[0], node_iter,
+ m, true);
+
if (!back_merge)
bkey_copy(packed_to_bkey(l), &li.k);
else
bkey_copy(packed_to_bkey(r), &ri.k);
-
- ret = false;
- break;
+ return false;
case BCH_MERGE_MERGE:
- if (!extent_merge_do_overlapping(b, iter, &li.k.k,
- back_merge, true))
+ if (bkey_packed(m) && !bkey_pack_key((void *) &tmp, &li.k.k, f))
return false;
- if (!extent_i_save(m, &li.k, f))
+ if (!extent_merge_do_overlapping(iter, &li.k.k, back_merge))
return false;
- ret = true;
- break;
+ extent_i_save(b, node_iter, m, &li.k);
+ bch_btree_node_iter_fix(iter, iter->nodes[0], node_iter,
+ m, true);
+ return true;
default:
BUG();
}
-
- extent_merge_do_overlapping(b, iter, &li.k.k, back_merge, false);
- return ret;
}
static const struct btree_keys_ops bch_extent_ops = {
.key_normalize = bch_ptr_normalize,
.key_merge = bch_extent_merge,
- .key_merge_inline = bch_extent_merge_inline,
.is_extents = true,
};
diff --git a/drivers/md/bcache/extents.h b/drivers/md/bcache/extents.h
index 23418ccb04f9..7bab66a30c97 100644
--- a/drivers/md/bcache/extents.h
+++ b/drivers/md/bcache/extents.h
@@ -52,8 +52,8 @@ bch_extent_pick_ptr(struct cache_set *c, struct bkey_s_c k,
}
enum extent_insert_hook_ret
-bch_extent_cmpxchg(struct extent_insert_hook *, struct btree_iter *,
- struct bpos, struct bkey_s_c, const struct bkey_i *);
+bch_extent_cmpxchg(struct extent_insert_hook *, struct bpos, struct bpos,
+ struct bkey_s_c, const struct bkey_i *);
enum btree_insert_ret
bch_insert_fixup_extent(struct btree_insert_trans *,
diff --git a/drivers/md/bcache/fs-io.c b/drivers/md/bcache/fs-io.c
index 60bc059de187..a71a4cdd45d3 100644
--- a/drivers/md/bcache/fs-io.c
+++ b/drivers/md/bcache/fs-io.c
@@ -100,14 +100,14 @@ static inline void i_size_dirty_get(struct bch_inode_info *ei)
static enum extent_insert_hook_ret
i_sectors_hook_fn(struct extent_insert_hook *hook,
- struct btree_iter *iter,
+ struct bpos committed_pos,
struct bpos next_pos,
struct bkey_s_c k,
const struct bkey_i *insert)
{
struct i_sectors_hook *h = container_of(hook,
struct i_sectors_hook, hook);
- s64 sectors = next_pos.offset - iter->pos.offset;
+ s64 sectors = next_pos.offset - committed_pos.offset;
int sign = bkey_extent_is_allocation(&insert->k) -
(k.k && bkey_extent_is_allocation(k.k));
@@ -207,7 +207,7 @@ struct bchfs_extent_trans_hook {
static enum extent_insert_hook_ret
bchfs_extent_update_hook(struct extent_insert_hook *hook,
- struct btree_iter *iter,
+ struct bpos committed_pos,
struct bpos next_pos,
struct bkey_s_c k,
const struct bkey_i *insert)
@@ -218,7 +218,7 @@ bchfs_extent_update_hook(struct extent_insert_hook *hook,
struct inode *inode = &ei->vfs_inode;
int sign = bkey_extent_is_allocation(&insert->k) -
(k.k && bkey_extent_is_allocation(k.k));
- s64 sectors = (s64) (next_pos.offset - iter->pos.offset) * sign;
+ s64 sectors = (s64) (next_pos.offset - committed_pos.offset) * sign;
u64 offset = min(next_pos.offset << 9, h->op->new_i_size);
BUG_ON((next_pos.offset << 9) > round_up(offset, PAGE_SIZE));