diff options
-rw-r--r-- | drivers/md/bcache/btree.c | 125 |
1 files changed, 50 insertions, 75 deletions
diff --git a/drivers/md/bcache/btree.c b/drivers/md/bcache/btree.c index 2b1d4be19f23..d366d4ca2543 100644 --- a/drivers/md/bcache/btree.c +++ b/drivers/md/bcache/btree.c @@ -2032,6 +2032,8 @@ static int btree_split(struct btree *b, uint64_t start_time = local_clock(); struct bkey_packed *k; enum btree_insert_status status; + unsigned u64s_to_insert = b->level + ? bch_keylist_nkeys(insert_keys) : 0; int ret; BUG_ON(!parent && (b != btree_node_root(b))); @@ -2051,68 +2053,10 @@ static int btree_split(struct btree *b, } n1 = btree_node_alloc_replacement(b); - bch_verify_btree_keys_accounting(&n1->keys); set1 = btree_bset_first(n1); - /* - * For updates to interior nodes, we've got to do the insert before we - * split because the stuff we're inserting has to be inserted - * atomically. Post split, the keys might have to go in different nodes - * and the split would no longer be atomic. - * - * But for updates to leaf nodes (in the extent btree, anyways) - we - * can't update the new replacement node while the old node is still - * visible. Reason being as we do the update we're updating garbage - * collection information on the fly, possibly causing a bucket to - * become unreferenced and available to the allocator to reuse - we - * don't want that to happen while other threads can still use the old - * version of the btree node. - */ - if (b->level) { - six_unlock_write(&n1->lock); - - btree_iter_node_set(iter, n1); /* set temporarily for insert */ - status = bch_btree_insert_keys(n1, iter, insert_keys, - replace, persistent, 0); - BUG_ON(status != BTREE_INSERT_INSERTED); - - iter->nodes[b->level] = b; /* still have b locked */ - - six_lock_write(&n1->lock); - - /* - * There might be duplicate (deleted) keys after the - * bch_btree_insert_keys() call - we need to remove them before - * we split, as it would be rather bad if we picked a duplicate - * for the pivot. - * - * Additionally, inserting might overwrite a bunch of existing - * keys (i.e. a big discard when there were a bunch of small - * extents previously) - we might not want to split after the - * insert. Splitting a node that's too small to be split would - * be bad (if the node had only one key, we wouldn't be able to - * assign the new node a key different from the original node) - */ - k = set1->start; - while (k != bset_bkey_last(set1)) - if (bkey_deleted(k)) { - set1->u64s -= k->u64s; - memmove(k, bkey_next(k), - (void *) bset_bkey_last(set1) - - (void *) k); - } else - k = bkey_next(k); - } - bch_verify_btree_keys_accounting(&n1->keys); - - /* - * Note that on recursive parent_keys == insert_keys, so we can't start - * adding new keys to parent_keys before emptying it out (by doing the - * insert, which we just did above) - */ - if (__set_blocks(n1->data, - n1->data->keys.u64s, + n1->data->keys.u64s + u64s_to_insert, block_bytes(n1->c)) > btree_blocks(iter->c) * 3 / 4) { size_t nr_packed = 0, nr_unpacked = 0; @@ -2172,12 +2116,40 @@ static int btree_split(struct btree *b, n2->key.k.p = b->key.k.p; + n1->keys.set->size = 0; + n2->keys.set->size = 0; + six_unlock_write(&n1->lock); six_unlock_write(&n2->lock); bch_verify_btree_keys_accounting(&n1->keys); bch_verify_btree_keys_accounting(&n2->keys); + /* + * For updates to interior nodes, we've got to do the insert + * before we split because the stuff we're inserting has to be + * inserted atomically. Post split, the keys might have to go in + * different nodes and the split would no longer be atomic. + */ + if (b->level) { + btree_iter_node_set(iter, n1); + status = bch_btree_insert_keys(n1, iter, insert_keys, + replace, persistent, 0); + BUG_ON(status == BTREE_INSERT_NEED_SPLIT); + + btree_iter_node_set(iter, n2); + status = bch_btree_insert_keys(n2, iter, insert_keys, + replace, persistent, 0); + BUG_ON(status == BTREE_INSERT_NEED_SPLIT); + BUG_ON(!bch_keylist_empty(insert_keys)); + iter->nodes[b->level] = b; /* still have b locked */ + } + + /* + * Note that on recursive parent_keys == insert_keys, so we + * can't start adding new keys to parent_keys before emptying it + * out (by doing the insert, which we just did above) + */ bch_keylist_add(parent_keys, &n1->key); bch_keylist_add(parent_keys, &n2->key); @@ -2194,8 +2166,17 @@ static int btree_split(struct btree *b, six_unlock_write(&b->lock); } else { trace_bcache_btree_node_compact(b, set1->u64s); - six_unlock_write(&n1->lock); + + if (b->level) { + btree_iter_node_set(iter, n1); + status = bch_btree_insert_keys(n1, iter, insert_keys, + replace, persistent, 0); + BUG_ON(status != BTREE_INSERT_INSERTED); + BUG_ON(!bch_keylist_empty(insert_keys)); + iter->nodes[b->level] = b; /* still have b locked */ + } + bch_keylist_add(parent_keys, &n1->key); } @@ -2243,26 +2224,16 @@ static int btree_split(struct btree *b, /* Update iterator, and finish insert now that new nodes are visible: */ BUG_ON(iter->nodes[b->level] != b); - six_unlock_intent(&b->lock); - btree_iter_node_set(iter, n1); - if (!n1->level) - bch_btree_insert_keys(n1, iter, insert_keys, - replace, persistent, - flags); - - if (n2 && - bkey_cmp(iter->pos, n1->key.k.p) > 0) { + if (n2 && bkey_cmp(iter->pos, n1->key.k.p) > 0) { six_unlock_intent(&n1->lock); btree_iter_node_set(iter, n2); - - if (!n2->level) - bch_btree_insert_keys(n2, iter, insert_keys, - replace, persistent, - flags); } else if (n2) { six_unlock_intent(&n2->lock); + btree_iter_node_set(iter, n1); + } else { + btree_iter_node_set(iter, n1); } bch_time_stats_update(&iter->c->btree_split_time, start_time); @@ -2393,10 +2364,14 @@ traverse: bch_btree_iter_unlock(iter); if (bch_keylist_empty(insert_keys) || - (flags & BTREE_INSERT_ATOMIC) || ret == -EROFS) break; + if (flags & BTREE_INSERT_ATOMIC) { + ret = ret ?: -EINTR; + break; + } + bch_btree_iter_set_pos(iter, bkey_start_pos(&bch_keylist_front(insert_keys)->k)); |