diff options
author | Kent Overstreet <kent.overstreet@gmail.com> | 2016-11-06 08:00:44 -0900 |
---|---|---|
committer | Kent Overstreet <kent.overstreet@gmail.com> | 2017-01-18 21:41:07 -0900 |
commit | d0844f5e00edab7a90a0ddcf612e5e76c458b9f4 (patch) | |
tree | a8568d9faa02944baabe45ea0f861927b84a6a9f | |
parent | b2dfeb671b53f608ab7299aee1bd5958c82ba367 (diff) |
bcache: Optimize btree node writes
-rw-r--r-- | drivers/md/bcache/bkey.h | 9 | ||||
-rw-r--r-- | drivers/md/bcache/bset.c | 52 | ||||
-rw-r--r-- | drivers/md/bcache/bset.h | 16 | ||||
-rw-r--r-- | drivers/md/bcache/btree_gc.c | 2 | ||||
-rw-r--r-- | drivers/md/bcache/btree_io.c | 269 | ||||
-rw-r--r-- | drivers/md/bcache/btree_io.h | 10 | ||||
-rw-r--r-- | drivers/md/bcache/btree_update.c | 44 | ||||
-rw-r--r-- | drivers/md/bcache/btree_update.h | 17 | ||||
-rw-r--r-- | drivers/md/bcache/extents.c | 6 |
9 files changed, 234 insertions, 191 deletions
diff --git a/drivers/md/bcache/bkey.h b/drivers/md/bcache/bkey.h index b562ac04c4d9..1466e890dad1 100644 --- a/drivers/md/bcache/bkey.h +++ b/drivers/md/bcache/bkey.h @@ -73,6 +73,9 @@ static inline void set_bkey_deleted(struct bkey *k) #define bkey_deleted(_k) ((_k)->type == KEY_TYPE_DELETED) +#define bkey_whiteout(_k) \ + ((_k)->type == KEY_TYPE_DELETED || (_k)->type == KEY_TYPE_DISCARD) + static inline void __memcpy_u64s(void *dst, const void *src, unsigned u64s) { @@ -354,6 +357,12 @@ static inline size_t bkeyp_val_bytes(const struct bkey_format *format, return bkeyp_val_u64s(format, k) * sizeof(u64); } +static inline void set_bkeyp_val_u64s(const struct bkey_format *format, + struct bkey_packed *k, unsigned val_u64s) +{ + k->u64s = bkeyp_key_u64s(format, k) + val_u64s; +} + #define bkeyp_val(_format, _k) \ ((struct bch_val *) ((_k)->_data + bkeyp_key_u64s(_format, _k))) diff --git a/drivers/md/bcache/bset.c b/drivers/md/bcache/bset.c index 0b581b9203f8..f06b5eeb5d7e 100644 --- a/drivers/md/bcache/bset.c +++ b/drivers/md/bcache/bset.c @@ -145,7 +145,7 @@ void __bch_verify_btree_nr_keys(struct btree_keys *b) for (k = t->data->start; k != bset_bkey_last(t->data); k = bkey_next(k)) - if (!bkey_packed_is_whiteout(b, k)) + if (!bkey_whiteout(k)) btree_keys_account_key_add(&nr, t - b->set, k); BUG_ON(memcmp(&nr, &b->nr, sizeof(nr))); @@ -1116,7 +1116,7 @@ void bch_bset_insert(struct btree_keys *b, if (bkey_pack_key(&packed, &insert->k, f)) src = &packed; - if (!bkey_is_whiteout(&insert->k)) + if (!bkey_whiteout(&insert->k)) btree_keys_account_key_add(&b->nr, t - b->set, src); if (src->u64s != clobber_u64s) { @@ -1612,53 +1612,7 @@ struct bkey_s_c bch_btree_node_iter_peek_unpack(struct btree_node_iter *iter, } EXPORT_SYMBOL(bch_btree_node_iter_peek_unpack); -bool bch_maybe_compact_deleted_keys(struct btree_keys *b) -{ - struct bset_tree *t, *rebuild_from = NULL; - bool last_set_aux_tree_ro = bset_has_ro_aux_tree(bset_tree_last(b)); - - for_each_bset(b, t) { - struct bset *i = t->data; - struct bkey_packed *k, *n, *out = i->start; - - if (b->nr.bset_u64s[t - b->set] * 4 > le16_to_cpu(i->u64s) * 3) - continue; - - /* - * We cannot drop deleted keys from the last bset, if it hasn't - * been written yet - we may need that whiteout on disk: - * - * XXX unless they're extents, if we fix assertions elsewhere - */ - if (t == bset_tree_last(b) && !last_set_aux_tree_ro) - break; - - for (k = i->start; k != bset_bkey_last(i); k = n) { - n = bkey_next(k); - - if (!bkey_packed_is_whiteout(b, k)) { - bkey_copy(out, k); - out = bkey_next(out); - } - } - - i->u64s = cpu_to_le16((u64 *) out - i->_data); - bch_bset_set_no_aux_tree(b, t); - - if (!rebuild_from) - rebuild_from = t; - } - - if (!rebuild_from) - return false; - - for (t = rebuild_from; t < b->set + b->nsets; t++) - bch_bset_build_aux_tree(b, t, - t == bset_tree_last(b) && - !last_set_aux_tree_ro); - - return true; -} +/* Mergesort */ void bch_btree_keys_stats(struct btree_keys *b, struct bset_stats *stats) { diff --git a/drivers/md/bcache/bset.h b/drivers/md/bcache/bset.h index aba5ae3f99ed..361c8dafd39a 100644 --- a/drivers/md/bcache/bset.h +++ b/drivers/md/bcache/bset.h @@ -564,20 +564,6 @@ struct bkey_s_c bch_btree_node_iter_peek_unpack(struct btree_node_iter *, /* Accounting: */ -static inline bool bkey_is_whiteout(const struct bkey *k) -{ - return bkey_deleted(k) || - (k->type == KEY_TYPE_DISCARD && !k->version); -} - -static inline bool bkey_packed_is_whiteout(const struct btree_keys *b, - const struct bkey_packed *k) -{ - return bkey_deleted(k) || - (k->type == KEY_TYPE_DISCARD && - !bkey_unpack_key(&b->format, k).version); -} - static inline void btree_keys_account_key(struct btree_nr_keys *n, unsigned bset, struct bkey_packed *k, @@ -597,8 +583,6 @@ static inline void btree_keys_account_key(struct btree_nr_keys *n, #define btree_keys_account_key_drop(_nr, _bset_idx, _k) \ btree_keys_account_key(_nr, _bset_idx, _k, -1) -bool bch_maybe_compact_deleted_keys(struct btree_keys *); - struct bset_stats { struct { size_t nr, bytes; diff --git a/drivers/md/bcache/btree_gc.c b/drivers/md/bcache/btree_gc.c index b25398826f42..e0b08e137094 100644 --- a/drivers/md/bcache/btree_gc.c +++ b/drivers/md/bcache/btree_gc.c @@ -591,7 +591,7 @@ static void bch_coalesce_nodes(struct btree *old_nodes[GC_MERGE_NODES], bch_btree_build_aux_trees(n); six_unlock_write(&n->lock); - bch_btree_node_write(c, n, &as->cl, SIX_LOCK_intent, NULL, -1); + bch_btree_node_write(c, n, &as->cl, SIX_LOCK_intent, -1); } /* diff --git a/drivers/md/bcache/btree_io.c b/drivers/md/bcache/btree_io.c index 8345c733edfb..a467fd4c4421 100644 --- a/drivers/md/bcache/btree_io.c +++ b/drivers/md/bcache/btree_io.c @@ -127,12 +127,82 @@ static inline struct bkey_packed *sort_iter_next(struct sort_iter *iter, return ret; } +bool bch_maybe_compact_whiteouts(struct cache_set *c, struct btree *b) +{ + struct bset_tree *t, *rebuild_from = NULL; + + for_each_bset(&b->keys, t) { + struct bset *i = t->data; + struct bkey_packed *k, *n, *out = i->start; + unsigned live_u64s = b->keys.nr.bset_u64s[t - b->keys.set]; + + if (live_u64s * 4 > le16_to_cpu(i->u64s) * 3) + continue; + + if (!bset_written(b, i)) + continue; + + for (k = i->start; k != bset_bkey_last(i); k = n) { + n = bkey_next(k); + + if (!bkey_whiteout(k)) { + bkey_copy(out, k); + out = bkey_next(out); + } else if (!bset_written(b, i)) { + /* + * We cannot drop whiteouts that haven't been + * written out - we can drop the value, though: + * + * XXX we could drop 0 size extents, if we fix + * assertions elsewhere + */ + set_bkeyp_val_u64s(&b->keys.format, k, 0); + bkey_copy(out, k); + out = bkey_next(out); + } + } + + i->u64s = cpu_to_le16((u64 *) out - i->_data); + bch_bset_set_no_aux_tree(&b->keys, t); + if (!rebuild_from) + rebuild_from = t; + } + + if (!rebuild_from) + return false; + + for (t = rebuild_from; t < b->keys.set + b->keys.nsets; t++) { + /* + * Compacting a set that hasn't been written out yet might mean + * we need to shift the set after it down: + */ + if (!bset_written(b, t->data) && + t != bset_tree_last(&b->keys)) { + struct btree_node_entry *dst = (void *) b->data + + (bset_end_sector(c, b, t->data) << 9); + struct btree_node_entry *src = container_of(t[1].data, + struct btree_node_entry, keys); + + if (dst != src) { + memmove(dst, src, + (void *) bset_bkey_last(&src->keys) - + (void *) src); + t[1].data = &dst->keys; + } + } + } + + bch_btree_build_aux_trees(b); + bch_verify_btree_nr_keys(&b->keys); + return true; +} + static inline int sort_keys_cmp(struct btree_keys *b, struct bkey_packed *l, struct bkey_packed *r) { return bkey_cmp_packed(&b->format, l, r) ?: - (int) bkey_deleted(r) - (int) bkey_deleted(l); + (int) bkey_whiteout(r) - (int) bkey_whiteout(l); } static unsigned sort_keys(struct bkey_packed *dst, @@ -148,7 +218,7 @@ static unsigned sort_keys(struct bkey_packed *dst, !bkey_cmp_packed(&iter->b->format, in, next)) continue; - if (filter_whiteouts && bkey_deleted(in)) + if (filter_whiteouts && bkey_whiteout(in)) continue; bkey_copy(out, in); @@ -178,7 +248,7 @@ static unsigned sort_extents(struct bkey_packed *dst, if (bkey_deleted(in)) continue; - if (filter_whiteouts && bkey_packed_is_whiteout(iter->b, in)) + if (filter_whiteouts && bkey_whiteout(in)) continue; bkey_copy(out, in); @@ -189,7 +259,9 @@ static unsigned sort_extents(struct bkey_packed *dst, } static void btree_node_sort(struct cache_set *c, struct btree *b, - struct btree_iter *iter, unsigned from, + struct btree_iter *iter, + unsigned start_idx, + unsigned end_idx, bool is_write_locked) { struct btree_node *out; @@ -197,13 +269,14 @@ static void btree_node_sort(struct cache_set *c, struct btree *b, struct bset_tree *t; bool used_mempool = false, filter_whiteouts; u64 start_time; - unsigned i, u64s, order, shift = b->keys.nsets - from - 1; - bool sorting_entire_node = from == 0; + unsigned i, u64s, order, shift = end_idx - start_idx - 1; + bool sorting_entire_node = start_idx == 0 && + end_idx == b->keys.nsets; sort_iter_init(&sort_iter, &b->keys); - for (t = b->keys.set + from; - t < b->keys.set + b->keys.nsets; + for (t = b->keys.set + start_idx; + t < b->keys.set + end_idx; t++) { u64s += le16_to_cpu(t->data->u64s); sort_iter_add(&sort_iter, t->data->start, @@ -218,7 +291,7 @@ static void btree_node_sort(struct cache_set *c, struct btree *b, start_time = local_clock(); - filter_whiteouts = bset_written(b, b->keys.set[from].data); + filter_whiteouts = bset_written(b, b->keys.set[start_idx].data); u64s = btree_node_is_extents(b) ? sort_extents(out->keys.start, &sort_iter, filter_whiteouts) @@ -229,18 +302,18 @@ static void btree_node_sort(struct cache_set *c, struct btree *b, BUG_ON((void *) bset_bkey_last(&out->keys) > (void *) out + (PAGE_SIZE << order)); - if (!from) + if (sorting_entire_node) bch_time_stats_update(&c->btree_sort_time, start_time); if (!is_write_locked) __btree_node_lock_write(b, iter); /* Make sure we preserve bset journal_seq: */ - for (t = b->keys.set + from + 1; - t < b->keys.set + b->keys.nsets; + for (t = b->keys.set + start_idx + 1; + t < b->keys.set + end_idx; t++) - b->keys.set[from].data->journal_seq = - max(b->keys.set[from].data->journal_seq, + b->keys.set[start_idx].data->journal_seq = + max(b->keys.set[start_idx].data->journal_seq, t->data->journal_seq); if (sorting_entire_node) { @@ -258,19 +331,19 @@ static void btree_node_sort(struct cache_set *c, struct btree *b, swap(out, b->data); b->keys.set->data = &b->data->keys; } else { - b->keys.set[from].data->u64s = out->keys.u64s; - memcpy_u64s(b->keys.set[from].data->start, + b->keys.set[start_idx].data->u64s = out->keys.u64s; + memcpy_u64s(b->keys.set[start_idx].data->start, out->keys.start, le16_to_cpu(out->keys.u64s)); } - for (i = from + 1; i < b->keys.nsets; i++) - b->keys.nr.bset_u64s[from] += + for (i = start_idx + 1; i < end_idx; i++) + b->keys.nr.bset_u64s[start_idx] += b->keys.nr.bset_u64s[i]; b->keys.nsets -= shift; - for (i = from + 1; i < b->keys.nsets; i++) { + for (i = start_idx + 1; i < b->keys.nsets; i++) { b->keys.nr.bset_u64s[i] = b->keys.nr.bset_u64s[i + shift]; b->keys.set[i] = b->keys.set[i + shift]; } @@ -278,7 +351,7 @@ static void btree_node_sort(struct cache_set *c, struct btree *b, for (i = b->keys.nsets; i < MAX_BSETS; i++) b->keys.nr.bset_u64s[i] = 0; - bch_bset_set_no_aux_tree(&b->keys, &b->keys.set[from]); + bch_bset_set_no_aux_tree(&b->keys, &b->keys.set[start_idx]); if (!is_write_locked) __btree_node_unlock_write(b, iter); @@ -302,7 +375,7 @@ static struct btree_nr_keys sort_repack(struct bset *dst, memset(&nr, 0, sizeof(nr)); while ((in = bch_btree_node_iter_next_all(src_iter, src))) { - if (filter_whiteouts && bkey_packed_is_whiteout(src, in)) + if (filter_whiteouts && bkey_whiteout(in)) continue; if (bch_bkey_transform(out_f, out, bkey_packed(in) @@ -337,7 +410,7 @@ static struct btree_nr_keys sort_repack_merge(struct cache_set *c, memset(&nr, 0, sizeof(nr)); while ((k = bch_btree_node_iter_next_all(iter, src))) { - if (filter_whiteouts && bkey_packed_is_whiteout(src, k)) + if (filter_whiteouts && bkey_whiteout(k)) continue; /* @@ -434,35 +507,27 @@ void bch_btree_sort_into(struct cache_set *c, static bool btree_node_compact(struct cache_set *c, struct btree *b, struct btree_iter *iter) { - unsigned crit = SORT_CRIT; - unsigned crit_factor = int_sqrt(btree_pages(c)); - int i = 0; - - /* Don't sort if nothing to do */ - if (b->keys.nsets == 1) - return false; + unsigned unwritten_idx; + bool ret = false; - /* If not a leaf node, always sort */ - if (b->level) - goto sort; - - for (i = b->keys.nsets - 2; i >= 0; --i) { - crit *= crit_factor; + for (unwritten_idx = 0; + unwritten_idx < b->keys.nsets; + unwritten_idx++) + if (!bset_written(b, b->keys.set[unwritten_idx].data)) + break; - if (le16_to_cpu(b->keys.set[i].data->u64s) < crit) - goto sort; + if (b->keys.nsets - unwritten_idx > 1) { + btree_node_sort(c, b, iter, unwritten_idx, + b->keys.nsets, false); + ret = true; } - /* Sort if we'd overflow */ - if (b->keys.nsets == MAX_BSETS) { - i = 0; - goto sort; + if (unwritten_idx > 1) { + btree_node_sort(c, b, iter, 0, unwritten_idx, false); + ret = true; } - return false; -sort: - btree_node_sort(c, b, iter, i, false); - return true; + return ret; } void bch_btree_build_aux_trees(struct btree *b) @@ -487,7 +552,9 @@ void bch_btree_build_aux_trees(struct btree *b) void bch_btree_init_next(struct cache_set *c, struct btree *b, struct btree_iter *iter) { + struct bset *i; bool did_sort; + unsigned offset; BUG_ON(iter && iter->nodes[b->level] != b); @@ -499,8 +566,17 @@ void bch_btree_init_next(struct cache_set *c, struct btree *b, __btree_node_lock_write(b, iter); - if (b->written < c->sb.btree_node_size) { - struct btree_node_entry *bne = write_block(b); + /* + * If the last bset has been written, or if it's gotten too big - start + * a new bset to insert into: + */ + i = btree_bset_last(b); + offset = next_bset_offset(c, b); + + if (offset < c->sb.btree_node_size && + (bset_written(b, i) || + le16_to_cpu(i->u64s) * sizeof(u64) > BTREE_WRITE_SET_BUFFER)) { + struct btree_node_entry *bne = (void *) b->data + (offset << 9); bch_bset_init_next(&b->keys, &bne->keys); } @@ -612,7 +688,7 @@ static const char *validate_bset(struct cache_set *c, struct btree *b, if (BSET_SEPARATE_WHITEOUTS(i) && !seen_non_whiteout && - !bkey_packed_is_whiteout(&b->keys, k)) { + !bkey_whiteout(k)) { *whiteout_u64s = k->_data - i->_data; seen_non_whiteout = true; } @@ -928,7 +1004,8 @@ void __bch_btree_node_write(struct cache_set *c, struct btree *b, { struct bio *bio; struct bch_write_bio *wbio; - struct bset *i; + struct bset_tree *t; + struct bset *last_set_to_write = NULL; BKEY_PADDED(key) k; struct bkey_s_extent e; struct bch_extent_ptr *ptr; @@ -965,38 +1042,64 @@ void __bch_btree_node_write(struct cache_set *c, struct btree *b, #endif trace_bcache_btree_write(c, b); - i = btree_bset_last(b); BUG_ON(b->written >= c->sb.btree_node_size); - BUG_ON(b->written && !i->u64s); - BUG_ON(btree_bset_first(b)->seq != i->seq); - BUG_ON(BSET_BIG_ENDIAN(i) != CPU_BIG_ENDIAN); + BUG_ON(bset_written(b, btree_bset_last(b))); + BUG_ON(le64_to_cpu(b->data->magic) != bset_magic(&c->disk_sb)); + BUG_ON(memcmp(&b->data->format, &b->keys.format, + sizeof(b->keys.format))); change_bit(BTREE_NODE_write_idx, &b->flags); - i->version = cpu_to_le16(BCACHE_BSET_VERSION); + for_each_bset(&b->keys, t) + if (!bset_written(b, t->data)) + break; - SET_BSET_CSUM_TYPE(i, c->opts.metadata_checksum); + if (t != bset_tree_last(&b->keys)) { + /* + * See if we can get a write lock and compact these bsets + * together: + */ + } - if (!b->written) { - BUG_ON(le64_to_cpu(b->data->magic) != bset_magic(&c->disk_sb)); + for_each_bset(&b->keys, t) { + struct bset *i = t->data; - b->data->format = b->keys.format; - data = b->data; - b->data->csum = cpu_to_le64(btree_csum_set(b, b->data)); - sectors_to_write = __set_blocks(b->data, - le16_to_cpu(b->data->keys.u64s), - block_bytes(c)) << c->block_bits; + if (bset_written(b, i)) + continue; - } else { - struct btree_node_entry *bne = write_block(b); + BUG_ON(i->seq != b->data->keys.seq); + BUG_ON(BSET_BIG_ENDIAN(i) != CPU_BIG_ENDIAN); + BUG_ON(i != btree_bset_last(b) && + bset_end_sector(c, b, i) << 9 != + bset_byte_offset(b, container_of(t[1].data, + struct btree_node_entry, keys))); - data = bne; - bne->csum = cpu_to_le64(btree_csum_set(b, bne)); - sectors_to_write = __set_blocks(bne, - le16_to_cpu(bne->keys.u64s), - block_bytes(c)) << c->block_bits; + /* + * We may have merely initialized a bset but not written + * anything to it yet: + */ + if (i != btree_bset_first(b) && !i->u64s) + continue; + + last_set_to_write = i; + + i->version = cpu_to_le16(BCACHE_BSET_VERSION); + SET_BSET_CSUM_TYPE(i, c->opts.metadata_checksum); + + if (t == b->keys.set) { + b->data->csum = cpu_to_le64(btree_csum_set(b, b->data)); + } else { + struct btree_node_entry *bne = container_of(i, + struct btree_node_entry, keys); + + bne->csum = cpu_to_le64(btree_csum_set(b, bne)); + } } + BUG_ON(!last_set_to_write); + sectors_to_write = bset_end_sector(c, b, last_set_to_write) - b->written; + data = write_block(b); + BUG_ON(b->written + sectors_to_write > c->sb.btree_node_size); /* @@ -1070,7 +1173,6 @@ void __bch_btree_node_write(struct cache_set *c, struct btree *b, void bch_btree_node_write(struct cache_set *c, struct btree *b, struct closure *parent, enum six_lock_type lock_type_held, - struct btree_iter *iter, int idx_to_write) { BUG_ON(lock_type_held == SIX_LOCK_write); @@ -1080,7 +1182,7 @@ void bch_btree_node_write(struct cache_set *c, struct btree *b, if (lock_type_held == SIX_LOCK_intent || six_trylock_convert(&b->lock, SIX_LOCK_read, SIX_LOCK_intent)) { - bch_btree_init_next(c, b, iter); + bch_btree_init_next(c, b, NULL); if (lock_type_held == SIX_LOCK_read) six_lock_downgrade(&b->lock); @@ -1093,31 +1195,10 @@ static void bch_btree_node_write_dirty(struct cache_set *c, struct btree *b, six_lock_read(&b->lock); BUG_ON(b->level); - bch_btree_node_write(c, b, parent, SIX_LOCK_read, NULL, -1); + bch_btree_node_write(c, b, parent, SIX_LOCK_read, -1); six_unlock_read(&b->lock); } -/* buffer up 4k before writing out a bset */ -#define BTREE_WRITE_SET_BUFFER (4 << 10) - -/* - * Write leaf nodes if the unwritten bset is getting too big: - */ -void bch_btree_node_write_lazy(struct cache_set *c, struct btree *b, - struct btree_iter *iter) -{ - struct btree_node_entry *bne = - container_of(btree_bset_last(b), - struct btree_node_entry, keys); - unsigned long bytes = __set_bytes(bne, le16_to_cpu(bne->keys.u64s)); - - if ((max(round_up(bytes, block_bytes(iter->c)), - PAGE_SIZE) - bytes < 48 || - bytes > BTREE_WRITE_SET_BUFFER) && - !btree_node_write_in_flight(b)) - bch_btree_node_write(c, b, NULL, SIX_LOCK_intent, iter, -1); -} - /* * Write all dirty btree nodes to disk, including roots */ diff --git a/drivers/md/bcache/btree_io.h b/drivers/md/bcache/btree_io.h index 701e86d484a4..55c021acd9d9 100644 --- a/drivers/md/bcache/btree_io.h +++ b/drivers/md/bcache/btree_io.h @@ -19,6 +19,8 @@ static inline void btree_node_io_lock(struct btree *b) TASK_UNINTERRUPTIBLE); } +bool bch_maybe_compact_whiteouts(struct cache_set *, struct btree *); + void bch_btree_sort_into(struct cache_set *, struct btree *, struct btree *); void bch_btree_build_aux_trees(struct btree *); @@ -37,13 +39,13 @@ void bch_btree_complete_write(struct cache_set *, struct btree *, void __bch_btree_node_write(struct cache_set *, struct btree *, struct closure *, int); void bch_btree_node_write(struct cache_set *, struct btree *, - struct closure *, enum six_lock_type, - struct btree_iter *, int); -void bch_btree_node_write_lazy(struct cache_set *, struct btree *, - struct btree_iter *); + struct closure *, enum six_lock_type, int); void bch_btree_flush(struct cache_set *); void bch_btree_node_flush_journal_entries(struct cache_set *, struct btree *, struct closure *); +/* buffer up 4k before writing out a bset */ +#define BTREE_WRITE_SET_BUFFER (4 << 10) + #endif /* _BCACHE_BTREE_IO_H */ diff --git a/drivers/md/bcache/btree_update.c b/drivers/md/bcache/btree_update.c index cdb11a8734d0..a4e65a896f21 100644 --- a/drivers/md/bcache/btree_update.c +++ b/drivers/md/bcache/btree_update.c @@ -34,7 +34,7 @@ void __bch_btree_calc_format(struct bkey_format_state *s, struct btree *b) for (k = t->data->start; k != bset_bkey_last(t->data); k = bkey_next(k)) - if (!bkey_packed_is_whiteout(&b->keys, k)) { + if (!bkey_whiteout(k)) { uk = bkey_unpack_key(&b->keys.format, k); bch_bkey_format_add_key(s, &uk); } @@ -605,7 +605,7 @@ int bch_btree_root_alloc(struct cache_set *c, enum btree_id id, b = __btree_root_alloc(c, 0, id, reserve); - bch_btree_node_write(c, b, writes, SIX_LOCK_intent, NULL, -1); + bch_btree_node_write(c, b, writes, SIX_LOCK_intent, -1); bch_btree_set_root_initial(c, b, reserve); btree_open_bucket_put(c, b); @@ -674,7 +674,7 @@ void bch_btree_bset_insert_key(struct btree_iter *iter, if (k && !bkey_cmp_packed(f, k, &insert->k)) { t = bch_bkey_to_bset(&b->keys, k); - if (!bkey_packed_is_whiteout(&b->keys, k)) + if (!bkey_whiteout(k)) btree_keys_account_key_drop(&b->keys.nr, t - b->keys.set, k); @@ -800,16 +800,18 @@ static void verify_keys_sorted(struct keylist *l) static void btree_node_lock_for_insert(struct btree *b, struct btree_iter *iter) { struct cache_set *c = iter->c; + struct bset *i; relock: btree_node_lock_write(b, iter); + i = btree_bset_last(b); /* - * If the last bset has been written, initialize a new one - check after - * taking the write lock because it can be written with only a read - * lock: + * If the last bset has been written, or if it's gotten too big - start + * a new bset to insert into: */ - if (b->written != c->sb.btree_node_size && - bset_written(b, btree_bset_last(b))) { + if (next_bset_offset(c, b) < c->sb.btree_node_size && + (bset_written(b, i) || + le16_to_cpu(i->u64s) * sizeof(u64) > BTREE_WRITE_SET_BUFFER)) { btree_node_unlock_write(b, iter); bch_btree_init_next(c, b, iter); goto relock; @@ -914,8 +916,7 @@ retry: list_del(&as->write_blocked_list); if (list_empty(&b->write_blocked)) - bch_btree_node_write(c, b, NULL, SIX_LOCK_read, - NULL, -1); + bch_btree_node_write(c, b, NULL, SIX_LOCK_read, -1); six_unlock_read(&b->lock); break; @@ -1159,7 +1160,7 @@ bch_btree_insert_keys_interior(struct btree *b, btree_node_lock_for_insert(b, iter); if (bch_keylist_u64s(insert_keys) > - bch_btree_keys_u64s_remaining(iter->c, b)) { + bch_btree_keys_u64s_remaining(c, b)) { btree_node_unlock_write(b, iter); return BTREE_INSERT_BTREE_NODE_FULL; } @@ -1184,7 +1185,7 @@ bch_btree_insert_keys_interior(struct btree *b, bch_keylist_pop_front(insert_keys); } - btree_interior_update_updated_btree(iter->c, as, b); + btree_interior_update_updated_btree(c, as, b); for_each_linked_btree_node(iter, b, linked) bch_btree_node_iter_peek(&linked->node_iters[b->level], @@ -1193,7 +1194,7 @@ bch_btree_insert_keys_interior(struct btree *b, bch_btree_iter_verify(iter, b); - if (bch_maybe_compact_deleted_keys(&b->keys)) + if (bch_maybe_compact_whiteouts(c, b)) bch_btree_iter_reinit_node(iter, b); btree_node_unlock_write(b, iter); @@ -1375,7 +1376,7 @@ static void btree_split(struct btree *b, struct btree_iter *iter, six_unlock_write(&n2->lock); six_unlock_write(&n1->lock); - bch_btree_node_write(c, n2, &as->cl, SIX_LOCK_intent, NULL, -1); + bch_btree_node_write(c, n2, &as->cl, SIX_LOCK_intent, -1); /* * Note that on recursive parent_keys == insert_keys, so we @@ -1395,8 +1396,7 @@ static void btree_split(struct btree *b, struct btree_iter *iter, btree_split_insert_keys(iter, n3, &as->parent_keys, reserve); - bch_btree_node_write(c, n3, &as->cl, SIX_LOCK_intent, - NULL, -1); + bch_btree_node_write(c, n3, &as->cl, SIX_LOCK_intent, -1); } } else { trace_bcache_btree_node_compact(c, b, b->keys.nr.live_u64s); @@ -1407,7 +1407,7 @@ static void btree_split(struct btree *b, struct btree_iter *iter, bch_keylist_add(&as->parent_keys, &n1->key); } - bch_btree_node_write(c, n1, &as->cl, SIX_LOCK_intent, NULL, -1); + bch_btree_node_write(c, n1, &as->cl, SIX_LOCK_intent, -1); /* New nodes all written, now make them visible: */ @@ -1706,7 +1706,7 @@ retry: bch_keylist_add(&as->parent_keys, &delete); bch_keylist_add(&as->parent_keys, &n->key); - bch_btree_node_write(c, n, &as->cl, SIX_LOCK_intent, NULL, -1); + bch_btree_node_write(c, n, &as->cl, SIX_LOCK_intent, -1); bch_btree_insert_node(parent, iter, &as->parent_keys, reserve, as); @@ -1768,7 +1768,7 @@ btree_insert_key(struct btree_insert *trans, b->sib_u64s[1] = max(0, (int) b->sib_u64s[1] + live_u64s_added); if (u64s_added > live_u64s_added && - bch_maybe_compact_deleted_keys(&b->keys)) + bch_maybe_compact_whiteouts(iter->c, b)) bch_btree_iter_reinit_node(iter, b); trace_bcache_btree_insert_key(c, b, insert->k); @@ -1944,10 +1944,6 @@ unlock: if (!same_leaf_as_prev(trans, i)) { foreground_maybe_merge(i->iter, btree_prev_sib); foreground_maybe_merge(i->iter, btree_next_sib); - - if (btree_node_relock(i->iter, 0)) - bch_btree_node_write_lazy(c, i->iter->nodes[0], - i->iter); } out: /* make sure we didn't lose an error: */ @@ -2238,7 +2234,7 @@ int bch_btree_node_rewrite(struct btree_iter *iter, struct btree *b, trace_bcache_btree_gc_rewrite_node(c, b); - bch_btree_node_write(c, n, &as->cl, SIX_LOCK_intent, NULL, -1); + bch_btree_node_write(c, n, &as->cl, SIX_LOCK_intent, -1); if (parent) { bch_btree_insert_node(parent, iter, diff --git a/drivers/md/bcache/btree_update.h b/drivers/md/bcache/btree_update.h index 4311dd17f82a..4846c4790b4f 100644 --- a/drivers/md/bcache/btree_update.h +++ b/drivers/md/bcache/btree_update.h @@ -186,6 +186,23 @@ static inline bool bset_unwritten(struct btree *b, struct bset *i) return (void *) i > write_block(b); } +static inline unsigned bset_end_sector(struct cache_set *c, struct btree *b, + struct bset *i) +{ + return round_up(bset_byte_offset(b, bset_bkey_last(i)), + block_bytes(c)) >> 9; +} + +static inline unsigned next_bset_offset(struct cache_set *c, struct btree *b) +{ + + unsigned offset = max_t(unsigned, b->written, + bset_end_sector(c, b, btree_bset_last(b))); + + EBUG_ON(offset > c->sb.btree_node_size); + return offset; +} + static inline size_t bch_btree_keys_u64s_remaining(struct cache_set *c, struct btree *b) { diff --git a/drivers/md/bcache/extents.c b/drivers/md/bcache/extents.c index 5419b1f09738..bd3670331102 100644 --- a/drivers/md/bcache/extents.c +++ b/drivers/md/bcache/extents.c @@ -58,7 +58,7 @@ static inline bool should_drop_next_key(struct btree_node_iter *iter, struct btree_node_iter_set *l = iter->data, *r = iter->data + 1; struct bkey_packed *k = __btree_node_offset_to_key(b, l->k); - if (bkey_packed_is_whiteout(b, k)) + if (bkey_whiteout(k)) return true; if (iter->used < 2) @@ -741,7 +741,7 @@ static void extent_sort_append(struct cache_set *c, struct bkey_format *f = &b->format; BKEY_PADDED(k) tmp; - if (bkey_packed_is_whiteout(b, k)) + if (bkey_whiteout(k)) return; bkey_unpack(&tmp.k, f, k); @@ -1416,7 +1416,7 @@ bch_insert_fixup_extent(struct btree_insert *trans, struct bpos orig_pos = k.k->p; /* The insert key completely covers k, invalidate k */ - if (!bkey_is_whiteout(k.k)) + if (!bkey_whiteout(k.k)) btree_keys_account_key_drop(&b->keys.nr, t - b->keys.set, _k); |