diff options
-rw-r--r-- | drivers/md/bcache/bcache.h | 2 | ||||
-rw-r--r-- | drivers/md/bcache/bkey_methods.c | 2 | ||||
-rw-r--r-- | drivers/md/bcache/bkey_methods.h | 21 | ||||
-rw-r--r-- | drivers/md/bcache/bset.c | 357 | ||||
-rw-r--r-- | drivers/md/bcache/bset.h | 129 | ||||
-rw-r--r-- | drivers/md/bcache/btree_cache.c | 6 | ||||
-rw-r--r-- | drivers/md/bcache/btree_gc.c | 9 | ||||
-rw-r--r-- | drivers/md/bcache/btree_io.c | 92 | ||||
-rw-r--r-- | drivers/md/bcache/btree_iter.c | 5 | ||||
-rw-r--r-- | drivers/md/bcache/btree_types.h | 14 | ||||
-rw-r--r-- | drivers/md/bcache/btree_update.c | 61 | ||||
-rw-r--r-- | drivers/md/bcache/btree_update.h | 4 | ||||
-rw-r--r-- | drivers/md/bcache/debug.c | 6 | ||||
-rw-r--r-- | drivers/md/bcache/dirent.c | 3 | ||||
-rw-r--r-- | drivers/md/bcache/dirent.h | 1 | ||||
-rw-r--r-- | drivers/md/bcache/extents.c | 44 | ||||
-rw-r--r-- | drivers/md/bcache/extents.h | 11 | ||||
-rw-r--r-- | drivers/md/bcache/inode.c | 3 | ||||
-rw-r--r-- | drivers/md/bcache/inode.h | 1 | ||||
-rw-r--r-- | drivers/md/bcache/super.c | 6 | ||||
-rw-r--r-- | drivers/md/bcache/sysfs.c | 14 | ||||
-rw-r--r-- | drivers/md/bcache/xattr.c | 3 | ||||
-rw-r--r-- | drivers/md/bcache/xattr.h | 1 |
23 files changed, 370 insertions, 425 deletions
diff --git a/drivers/md/bcache/bcache.h b/drivers/md/bcache/bcache.h index 599896689bc1..3f604fcd8560 100644 --- a/drivers/md/bcache/bcache.h +++ b/drivers/md/bcache/bcache.h @@ -755,7 +755,7 @@ struct cache_set { */ mempool_t fill_iter; - struct bset_sort_state sort; + mempool_t btree_sort_pool; struct journal journal; unsigned btree_flush_delay; diff --git a/drivers/md/bcache/bkey_methods.c b/drivers/md/bcache/bkey_methods.c index 47db7a2ba04a..9755cc60557f 100644 --- a/drivers/md/bcache/bkey_methods.c +++ b/drivers/md/bcache/bkey_methods.c @@ -8,7 +8,7 @@ #include "inode.h" #include "xattr.h" -static const struct bkey_ops *bch_bkey_ops[] = { +const struct bkey_ops *bch_bkey_ops[] = { [BKEY_TYPE_EXTENTS] = &bch_bkey_extent_ops, [BKEY_TYPE_INODES] = &bch_bkey_inode_ops, [BKEY_TYPE_DIRENTS] = &bch_bkey_dirent_ops, diff --git a/drivers/md/bcache/bkey_methods.h b/drivers/md/bcache/bkey_methods.h index 03ca92e28a29..f8c7f42aba7e 100644 --- a/drivers/md/bcache/bkey_methods.h +++ b/drivers/md/bcache/bkey_methods.h @@ -26,9 +26,25 @@ static inline bool btree_type_has_ptrs(enum bkey_type type) } struct cache_set; +struct btree_keys; struct btree; struct bkey; +enum merge_result { + BCH_MERGE_NOMERGE, + + /* + * The keys were mergeable, but would have overflowed size - so instead + * l was changed to the maximum size, and both keys were modified: + */ + BCH_MERGE_PARTIAL, + BCH_MERGE_MERGE, +}; + +typedef bool (*key_filter_fn)(struct btree_keys *, struct bkey_s); +typedef enum merge_result (*key_merge_fn)(struct btree_keys *, + struct bkey_i *, struct bkey_i *); + struct bkey_ops { /* Returns reason for being invalid if invalid, else NULL: */ const char * (*key_invalid)(const struct cache_set *, @@ -38,7 +54,8 @@ struct bkey_ops { void (*val_to_text)(struct cache_set *, char *, size_t, struct bkey_s_c); void (*swab)(const struct bkey_format *, struct bkey_packed *); - + key_filter_fn key_normalize; + key_merge_fn key_merge; bool is_extents; }; @@ -53,6 +70,8 @@ void bch_bkey_val_to_text(struct cache_set *, enum bkey_type, void bch_bkey_swab(enum bkey_type, const struct bkey_format *, struct bkey_packed *); +extern const struct bkey_ops *bch_bkey_ops[]; + #undef DEF_BTREE_ID #endif /* _BCACHE_BKEY_METHODS_H */ diff --git a/drivers/md/bcache/bset.c b/drivers/md/bcache/bset.c index 553f8c4f04ab..04a940487077 100644 --- a/drivers/md/bcache/bset.c +++ b/drivers/md/bcache/bset.c @@ -72,16 +72,25 @@ void bch_dump_bset(struct btree_keys *b, struct bset *i, unsigned set) n = bkey_unpack_key(f, _n); - if (bkey_cmp(bkey_start_pos(&n), k.p) < 0) + if (bkey_cmp(bkey_start_pos(&n), k.p) < 0) { printk(KERN_ERR "Key skipped backwards\n"); - else if (!b->ops->is_extents && - !bkey_deleted(&k) && - !bkey_cmp(n.p, k.p)) + continue; + } + + /* + * Weird check for duplicate non extent keys: extents are + * deleted iff they have 0 size, so if it has zero size and it's + * not deleted these aren't extents: + */ + if (((!k.size && !bkey_deleted(&k)) || + (!n.size && !bkey_deleted(&n))) && + !bkey_deleted(&k) && + !bkey_cmp(n.p, k.p)) printk(KERN_ERR "Duplicate keys\n"); } } -void bch_dump_bucket(struct btree_keys *b) +void bch_dump_btree_node(struct btree_keys *b) { unsigned i; @@ -157,7 +166,7 @@ static void bch_btree_node_iter_next_check(struct btree_node_iter *iter, struct bkey nu = bkey_unpack_key(f, n); char buf1[80], buf2[80]; - bch_dump_bucket(b); + bch_dump_btree_node(b); bch_bkey_to_text(buf1, sizeof(buf1), &ku); bch_bkey_to_text(buf2, sizeof(buf2), &nu); panic("out of order/overlapping:\n%s\n%s\n", buf1, buf2); @@ -207,10 +216,10 @@ void bch_verify_key_order(struct btree_keys *b, k = bkey_prev_all(t, where); if (k && - keys_out_of_order(f, k, where, b->ops->is_extents)) { + keys_out_of_order(f, k, where, iter->is_extents)) { char buf1[100], buf2[100]; - bch_dump_bucket(b); + bch_dump_btree_node(b); uk = bkey_unpack_key(f, k); bch_bkey_to_text(buf1, sizeof(buf1), &uk); bch_bkey_to_text(buf2, sizeof(buf2), &uw); @@ -220,7 +229,7 @@ void bch_verify_key_order(struct btree_keys *b, k = bkey_next(where); BUG_ON(k != bset_bkey_last(t->data) && - keys_out_of_order(f, where, k, b->ops->is_extents)); + keys_out_of_order(f, where, k, iter->is_extents)); for (t = b->set; t <= b->set + b->nsets; t++) { if (!t->data->u64s) @@ -244,7 +253,7 @@ void bch_verify_key_order(struct btree_keys *b, k = bkey_next(k)) { uk = bkey_unpack_key(f, k); - if (b->ops->is_extents) { + if (iter->is_extents) { BUG_ON(!(bkey_cmp(uw.p, bkey_start_pos(&uk)) <= 0 || bkey_cmp(uk.p, bkey_start_pos(&uw)) <= 0)); } else { @@ -392,8 +401,7 @@ err: } EXPORT_SYMBOL(bch_btree_keys_alloc); -void bch_btree_keys_init(struct btree_keys *b, const struct btree_keys_ops *ops, - bool *expensive_debug_checks) +void bch_btree_keys_init(struct btree_keys *b, bool *expensive_debug_checks) { struct bkey_format_state s; unsigned i; @@ -401,7 +409,6 @@ void bch_btree_keys_init(struct btree_keys *b, const struct btree_keys_ops *ops, bch_bkey_format_init(&s); b->format = bch_bkey_format_done(&s); - b->ops = ops; b->nsets = 0; memset(&b->nr, 0, sizeof(b->nr)); #ifdef CONFIG_BCACHE_DEBUG @@ -410,7 +417,7 @@ void bch_btree_keys_init(struct btree_keys *b, const struct btree_keys_ops *ops, for (i = 0; i < MAX_BSETS; i++) { b->set[i].data = NULL; b->set[i].size = 0; - b->set[i].extra = BSET_AUX_TREE_NONE_VAL; + b->set[i].extra = BSET_NO_AUX_TREE_VAL; } } EXPORT_SYMBOL(bch_btree_keys_init); @@ -742,7 +749,7 @@ static void bset_alloc_tree(struct btree_keys *b, struct bset_tree *t) while (++t < b->set + MAX_BSETS) { t->size = 0; - t->extra = BSET_AUX_TREE_NONE_VAL; + t->extra = BSET_NO_AUX_TREE_VAL; t->tree = NULL; t->prev = NULL; } @@ -754,49 +761,72 @@ static unsigned bset_tree_capacity(struct btree_keys *b, struct bset_tree *t) return b->set->tree + btree_keys_cachelines(b) - t->tree; } -static void bch_bset_build_unwritten_tree(struct btree_keys *b) +static void bch_bset_lookup_table_add_entries(struct btree_keys *b, + struct bset_tree *t) { - struct bset_tree *t = bset_tree_last(b); + struct bkey_packed *k; + + BUG_ON(!bset_has_rw_aux_tree(t)); + BUG_ON(t->size > bset_tree_capacity(b, t)); + for (k = table_to_bkey(t, t->size - 1); + k != bset_bkey_last(t->data); + k = bkey_next(k)) + while (bkey_to_cacheline(t, k) >= t->size) { + if (t->size == bset_tree_capacity(b, t)) + return; + + t->prev[t->size] = bkey_to_cacheline_offset(t, t->size, k); + t->size++; + } +} + +static void bch_bset_build_rw_aux_tree(struct btree_keys *b, struct bset_tree *t) +{ bset_alloc_tree(b, t); - if (bset_tree_capacity(b, t)) { - t->prev[0] = bkey_to_cacheline_offset(t, 0, t->data->start); - t->size = 1; - t->extra = BSET_UNWRITTEN_TABLE_VAL; - } else { - t->extra = BSET_AUX_TREE_NONE_VAL; + if (!bset_tree_capacity(b, t)) { + t->extra = BSET_NO_AUX_TREE_VAL; + return; } + + t->prev[0] = bkey_to_cacheline_offset(t, 0, t->data->start); + t->size = 1; + t->extra = BSET_RW_AUX_TREE_VAL; + + bch_bset_lookup_table_add_entries(b, t); } void bch_bset_init_first(struct btree_keys *b, struct bset *i) { + struct bset_tree *t = &b->set[0]; + BUG_ON(b->nsets); - b->set[0].data = i; + t->data = i; memset(i, 0, sizeof(*i)); get_random_bytes(&i->seq, sizeof(i->seq)); SET_BSET_BIG_ENDIAN(i, CPU_BIG_ENDIAN); - bch_bset_build_unwritten_tree(b); + bch_bset_build_rw_aux_tree(b, t); } -EXPORT_SYMBOL(bch_bset_init_first); void bch_bset_init_next(struct btree_keys *b, struct bset *i) { + struct bset_tree *t; BUG_ON(b->nsets + 1 == MAX_BSETS); - b->set[++b->nsets].data = i; + t = &b->set[++b->nsets]; + t->data = i; memset(i, 0, sizeof(*i)); i->seq = b->set->data->seq; SET_BSET_BIG_ENDIAN(i, CPU_BIG_ENDIAN); - bch_bset_build_unwritten_tree(b); + bch_bset_build_rw_aux_tree(b, t); } -EXPORT_SYMBOL(bch_bset_init_next); -void bch_bset_build_written_tree(struct btree_keys *b, - struct bset_tree *t) +void bch_bset_build_ro_aux_tree(struct btree_keys *b, + struct bset_tree *t) { struct bkey_packed *prev = NULL, *k = t->data->start; unsigned j, cacheline = 1; @@ -808,7 +838,7 @@ void bch_bset_build_written_tree(struct btree_keys *b, retry: if (t->size < 2) { t->size = 0; - t->extra = BSET_AUX_TREE_NONE_VAL; + t->extra = BSET_NO_AUX_TREE_VAL; return; } @@ -844,7 +874,6 @@ retry: j = inorder_next(j, t->size)) make_bfloat(&b->format, t, j); } -EXPORT_SYMBOL(bch_bset_build_written_tree); static struct bkey_packed *__bkey_prev(struct bset_tree *t, struct bkey_packed *k) { @@ -865,9 +894,17 @@ static struct bkey_packed *__bkey_prev(struct bset_tree *t, struct bkey_packed * } - p = bset_has_aux_tree(t) - ? tree_to_bkey(t, inorder_to_tree(j, t)) - : table_to_bkey(t, j); + switch (bset_aux_tree_type(t)) { + case BSET_NO_AUX_TREE: + p = t->data->start; + break; + case BSET_RO_AUX_TREE: + p = tree_to_bkey(t, inorder_to_tree(j, t)); + break; + case BSET_RW_AUX_TREE: + p = table_to_bkey(t, j); + break; + } } while (p >= k); return p; @@ -919,7 +956,7 @@ void bch_bset_fix_invalidated_key(struct btree_keys *b, struct bset_tree *t, { unsigned inorder, j = 1; - if (!bset_has_aux_tree(t)) + if (bset_aux_tree_type(t) != BSET_RO_AUX_TREE) return; inorder = bkey_to_cacheline(t, k); @@ -972,9 +1009,9 @@ static void bch_bset_fix_lookup_table(struct btree_keys *b, int shift = new_u64s - clobber_u64s; unsigned j; - BUG_ON(bset_has_aux_tree(t)); + BUG_ON(bset_has_ro_aux_tree(t)); - if (bset_aux_tree_type(t) != BSET_HAS_UNWRITTEN_TABLE) + if (!bset_has_rw_aux_tree(t)) return; /* Did we just truncate? */ @@ -1024,21 +1061,7 @@ static void bch_bset_fix_lookup_table(struct btree_keys *b, t->prev[j] = new_offset; } - BUG_ON(t->size > bset_tree_capacity(b, t)); - - if (t->size == bset_tree_capacity(b, t)) - return; - - /* Possibly add a new entry to the end of the lookup table */ - - for (k = table_to_bkey(t, t->size - 1); - k != bset_bkey_last(t->data); - k = bkey_next(k)) - if (t->size == bkey_to_cacheline(t, k)) { - t->prev[t->size] = bkey_to_cacheline_offset(t, t->size, k); - t->size++; - return; - } + bch_bset_lookup_table_add_entries(b, t); } static void bch_bset_verify_lookup_table(struct btree_keys *b, @@ -1050,9 +1073,9 @@ static void bch_bset_verify_lookup_table(struct btree_keys *b, if (!btree_keys_expensive_checks(b)) return; - BUG_ON(bset_has_aux_tree(t)); + BUG_ON(bset_has_ro_aux_tree(t)); - if (bset_aux_tree_type(t) != BSET_HAS_UNWRITTEN_TABLE) + if (!bset_has_rw_aux_tree(t)) return; BUG_ON(t->size < 1); @@ -1258,13 +1281,13 @@ static struct bkey_packed *bch_bset_search(struct btree_keys *b, */ switch (bset_aux_tree_type(t)) { - case BSET_AUX_TREE_NONE: + case BSET_NO_AUX_TREE: m = t->data->start; break; - case BSET_HAS_UNWRITTEN_TABLE: + case BSET_RW_AUX_TREE: m = bset_search_write_set(f, t, search, lossy_packed_search); break; - case BSET_HAS_AUX_TREE: + case BSET_RO_AUX_TREE: /* * Each node in the auxiliary search tree covers a certain range * of bits, and keys above and below the set it covers might @@ -1373,7 +1396,7 @@ void bch_btree_node_iter_push(struct btree_node_iter *iter, */ void bch_btree_node_iter_init(struct btree_node_iter *iter, struct btree_keys *b, struct bpos search, - bool strictly_greater) + bool strictly_greater, bool is_extents) { struct bset_tree *t; struct bkey_packed p, *packed_search, *lossy_packed_search; @@ -1399,7 +1422,7 @@ void bch_btree_node_iter_init(struct btree_node_iter *iter, BUG(); } - __bch_btree_node_iter_init(iter, b); + __bch_btree_node_iter_init(iter, is_extents); for (t = b->set; t <= b->set + b->nsets; t++) __bch_btree_node_iter_push(iter, b, @@ -1413,11 +1436,12 @@ void bch_btree_node_iter_init(struct btree_node_iter *iter, EXPORT_SYMBOL(bch_btree_node_iter_init); void bch_btree_node_iter_init_from_start(struct btree_node_iter *iter, - struct btree_keys *b) + struct btree_keys *b, + bool is_extents) { struct bset_tree *t; - __bch_btree_node_iter_init(iter, b); + __bch_btree_node_iter_init(iter, is_extents); for (t = b->set; t <= b->set + b->nsets; t++) __bch_btree_node_iter_push(iter, b, @@ -1425,7 +1449,6 @@ void bch_btree_node_iter_init_from_start(struct btree_node_iter *iter, bset_bkey_last(t->data)); bch_btree_node_iter_sort(iter, b); } -EXPORT_SYMBOL(bch_btree_node_iter_init_from_start); struct bkey_packed *bch_btree_node_iter_bset_pos(struct btree_node_iter *iter, struct btree_keys *b, @@ -1585,70 +1608,42 @@ EXPORT_SYMBOL(bch_btree_node_iter_peek_unpack); /* Mergesort */ -void bch_bset_sort_state_free(struct bset_sort_state *state) -{ - mempool_exit(&state->pool); -} - -int bch_bset_sort_state_init(struct bset_sort_state *state, unsigned page_order, - struct time_stats *time) -{ - state->page_order = page_order; - state->crit_factor = int_sqrt(1 << page_order); - state->time = time; - - if (mempool_init_page_pool(&state->pool, 1, page_order)) - return -ENOMEM; - - return 0; -} -EXPORT_SYMBOL(bch_bset_sort_state_init); - /* No repacking: */ -static struct btree_nr_keys btree_mergesort_simple(struct btree_keys *b, - unsigned from, - struct bset *bset, - struct btree_node_iter *iter) +static struct btree_nr_keys btree_mergesort_simple(struct bset *dst, + struct btree_keys *src, + struct btree_node_iter *src_iter) { - struct bkey_packed *in, *out = bset->start; - struct btree_nr_keys ret = b->nr; - unsigned i; + struct bkey_packed *in, *out = bset_bkey_last(dst); + struct btree_nr_keys nr; + + memset(&nr, 0, sizeof(nr)); - while ((in = bch_btree_node_iter_next_all(iter, b))) { - if (!bkey_packed_is_whiteout(b, in)) { + while ((in = bch_btree_node_iter_next_all(src_iter, src))) { + if (!bkey_packed_is_whiteout(src, in)) { bkey_copy(out, in); + btree_keys_account_key_add(&nr, 0, out); out = bkey_next(out); } } - bset->u64s = cpu_to_le16((u64 *) out - bset->_data); - - for (i = from + 1; i < MAX_BSETS; i++) { - ret.bset_u64s[from] += ret.bset_u64s[i]; - ret.bset_u64s[i] = 0; - } - - return ret; + dst->u64s = cpu_to_le16((u64 *) out - dst->_data); + return nr; } /* Sort + repack in a new format: */ -static struct btree_nr_keys btree_mergesort(struct btree_keys *dst, - struct bset *dst_set, +static struct btree_nr_keys btree_mergesort(struct bset *dst, struct btree_keys *src, - struct btree_node_iter *iter, - ptr_filter_fn filter) + struct btree_node_iter *src_iter, + struct bkey_format *in_f, + struct bkey_format *out_f, + key_filter_fn filter) { - struct bkey_format *in_f = &src->format; - struct bkey_format *out_f = &dst->format; - struct bkey_packed *in, *out = bset_bkey_last(dst_set); + struct bkey_packed *in, *out = bset_bkey_last(dst); struct btree_nr_keys nr; - BUG_ON(filter); - EBUG_ON(filter && !dst->ops->is_extents); - memset(&nr, 0, sizeof(nr)); - while ((in = bch_btree_node_iter_next_all(iter, src))) { + while ((in = bch_btree_node_iter_next_all(src_iter, src))) { if (bkey_packed_is_whiteout(src, in)) continue; @@ -1660,30 +1655,25 @@ static struct btree_nr_keys btree_mergesort(struct btree_keys *dst, btree_keys_account_key_add(&nr, 0, out); out = bkey_next(out); - - BUG_ON((void *) out > - (void *) dst_set + (PAGE_SIZE << dst->page_order)); } - dst_set->u64s = cpu_to_le16((u64 *) out - dst_set->_data); + dst->u64s = cpu_to_le16((u64 *) out - dst->_data); return nr; } -/* Sort, repack, and merge extents */ -static struct btree_nr_keys btree_mergesort_extents(struct btree_keys *dst, - struct bset *dst_set, +/* Sort, repack, and merge: */ +static struct btree_nr_keys btree_mergesort_extents(struct bset *dst, struct btree_keys *src, struct btree_node_iter *iter, - ptr_filter_fn filter) + struct bkey_format *in_f, + struct bkey_format *out_f, + key_filter_fn filter, + key_merge_fn merge) { - struct bkey_format *in_f = &src->format; - struct bkey_format *out_f = &dst->format; struct bkey_packed *k, *prev = NULL, *out; struct btree_nr_keys nr; BKEY_PADDED(k) tmp; - EBUG_ON(!dst->ops->is_extents); - memset(&nr, 0, sizeof(nr)); while ((k = bch_btree_node_iter_next_all(iter, src))) { @@ -1702,8 +1692,8 @@ static struct btree_nr_keys btree_mergesort_extents(struct btree_keys *dst, /* prev is always unpacked, for key merging: */ if (prev && - src->ops->key_merge && - bch_bkey_try_merge(src, (void *) prev, &tmp.k)) + merge && + merge(src, (void *) prev, &tmp.k) == BCH_MERGE_MERGE) continue; /* @@ -1716,13 +1706,10 @@ static struct btree_nr_keys btree_mergesort_extents(struct btree_keys *dst, btree_keys_account_key_add(&nr, 0, prev); prev = bkey_next(prev); } else { - prev = bset_bkey_last(dst_set); + prev = bset_bkey_last(dst); } bkey_copy(prev, &tmp.k); - - BUG_ON((void *) bkey_next(prev) > - (void *) dst_set + (PAGE_SIZE << dst->page_order)); } if (prev) { @@ -1730,102 +1717,34 @@ static struct btree_nr_keys btree_mergesort_extents(struct btree_keys *dst, btree_keys_account_key_add(&nr, 0, prev); out = bkey_next(prev); } else { - out = bset_bkey_last(dst_set); + out = bset_bkey_last(dst); } - dst_set->u64s = cpu_to_le16((u64 *) out - dst_set->_data); + dst->u64s = cpu_to_le16((u64 *) out - dst->_data); return nr; } -struct btree_nr_keys bch_sort_bsets(struct bset *dst, struct btree_keys *b, - unsigned from, struct btree_node_iter *iter, - btree_keys_sort_fn sort, - struct bset_sort_state *state) +struct btree_nr_keys bch_sort_bsets(struct bset *dst, + struct btree_keys *src, + struct btree_node_iter *src_iter, + struct bkey_format *in_f, + struct bkey_format *out_f, + key_filter_fn filter, + key_merge_fn merge) { - u64 start_time = local_clock(); - struct btree_node_iter _iter; - struct btree_nr_keys nr; - - if (!iter) { - struct bset_tree *t; - - iter = &_iter; - __bch_btree_node_iter_init(iter, b); - - for (t = b->set + from; t <= b->set + b->nsets; t++) - __bch_btree_node_iter_push(iter, b, - t->data->start, - bset_bkey_last(t->data)); - bch_btree_node_iter_sort(iter, b); - } - - dst->u64s = 0; - - /* - * If we're only doing a partial sort (start != 0), then we can't merge - * extents because that might produce extents that overlap with 0 size - * extents in bsets we aren't sorting: - */ - if (sort) - nr = sort(b, dst, iter); - else if (b->ops->is_extents && !from) - nr = btree_mergesort_extents(b, dst, b, iter, NULL); - else - nr = btree_mergesort_simple(b, from, dst, iter); - - if (!from) - bch_time_stats_update(state->time, start_time); - - return nr; -} - -/** - * bch_btree_sort_into - sort with a specified output, instead of allocating - * temporary space - * - * does not create the auxiliary search tree - */ -void bch_btree_sort_into(struct btree_keys *dst, - struct btree_keys *src, - ptr_filter_fn filter, - struct bset_sort_state *state) -{ - u64 start_time = local_clock(); - struct btree_node_iter iter; - struct btree_nr_keys nr; - - bch_btree_node_iter_init_from_start(&iter, src); - - if (!dst->ops->is_extents) - nr = btree_mergesort(dst, dst->set->data, - src, &iter, filter); + if (merge) + return btree_mergesort_extents(dst, src, src_iter, + in_f, out_f, filter, merge); + else if (memcmp(in_f, out_f, sizeof(*in_f))) + return btree_mergesort(dst, src, src_iter, in_f, out_f, filter); else - nr = btree_mergesort_extents(dst, dst->set->data, - src, &iter, filter); - - BUG_ON(__set_bytes(dst->set->data, - le16_to_cpu(dst->set->data->u64s)) > - (PAGE_SIZE << dst->page_order)); - - bch_time_stats_update(state->time, start_time); - - dst->nr.live_u64s += nr.live_u64s; - dst->nr.bset_u64s[0] += nr.bset_u64s[0]; - dst->nr.packed_keys += nr.packed_keys; - dst->nr.unpacked_keys += nr.unpacked_keys; - - dst->nsets = 0; - /* No auxiliary search tree yet */ - dst->set->size = 0; - dst->set->extra = BSET_AUX_TREE_NONE_VAL; - - bch_verify_btree_nr_keys(dst); + return btree_mergesort_simple(dst, src, src_iter); } bool bch_maybe_compact_deleted_keys(struct btree_keys *b) { struct bset_tree *t, *rebuild_from = NULL; - bool last_set_has_aux_tree = bset_has_aux_tree(bset_tree_last(b)); + bool last_set_aux_tree_ro = bset_has_ro_aux_tree(bset_tree_last(b)); unsigned idx; for (idx = 0; idx <= b->nsets; idx++) { @@ -1841,7 +1760,7 @@ bool bch_maybe_compact_deleted_keys(struct btree_keys *b) * * XXX unless they're extents, if we fix assertions elsewhere */ - if (idx == b->nsets && !last_set_has_aux_tree) + if (idx == b->nsets && !last_set_aux_tree_ro) break; for (k = i->start; k != bset_bkey_last(i); k = n) { @@ -1863,10 +1782,10 @@ bool bch_maybe_compact_deleted_keys(struct btree_keys *b) return false; for (t = rebuild_from; t <= b->set + b->nsets; t++) { - if (t == bset_tree_last(b) && !last_set_has_aux_tree) - bch_bset_build_unwritten_tree(b); + if (t == bset_tree_last(b) && !last_set_aux_tree_ro) + bch_bset_build_rw_aux_tree(b, t); else - bch_bset_build_written_tree(b, t); + bch_bset_build_ro_aux_tree(b, t); } return true; @@ -1884,7 +1803,7 @@ void bch_btree_keys_stats(struct btree_keys *b, struct bset_stats *stats) stats->sets[type].nr++; stats->sets[type].bytes += le16_to_cpu(t->data->u64s) * sizeof(u64); - if (bset_has_aux_tree(t)) { + if (bset_has_ro_aux_tree(t)) { stats->floats += t->size - 1; for (j = 1; j < t->size; j++) @@ -1915,7 +1834,7 @@ int bch_bkey_print_bfloat(struct btree_keys *b, struct bkey_packed *k, if (!size) return 0; - if (!bset_has_aux_tree(t)) + if (!bset_has_ro_aux_tree(t)) goto out; j = inorder_to_tree(bkey_to_cacheline(t, k), t); diff --git a/drivers/md/bcache/bset.h b/drivers/md/bcache/bset.h index 6e33fe6b5b98..48bd4c84ef3a 100644 --- a/drivers/md/bcache/bset.h +++ b/drivers/md/bcache/bset.h @@ -6,6 +6,7 @@ #include <linux/types.h> #include "bkey.h" +#include "bkey_methods.h" #include "util.h" /* for time_stats */ /* @@ -148,34 +149,6 @@ struct btree_node_iter; struct btree_node_iter_set; struct bkey_float; -typedef bool (*ptr_filter_fn)(struct btree_keys *, struct bkey_s); - -typedef bool (*iter_cmp_fn)(struct btree_node_iter_set, - struct btree_node_iter_set); - -enum merge_result { - BCH_MERGE_NOMERGE, - - /* - * The keys were mergeable, but would have overflowed size - so instead - * l was changed to the maximum size, and both keys were modified: - */ - BCH_MERGE_PARTIAL, - BCH_MERGE_MERGE, -}; - -struct btree_keys_ops { - ptr_filter_fn key_normalize; - enum merge_result (*key_merge)(struct btree_keys *, - struct bkey_i *, struct bkey_i *); - - /* - * Only used for deciding whether to use bkey_start_pos(k) or just the - * key itself in a couple places - */ - bool is_extents; -}; - #define MAX_BSETS 3U struct bset_tree { @@ -211,27 +184,27 @@ struct bset_tree { }; enum bset_aux_tree_type { - BSET_AUX_TREE_NONE, - BSET_HAS_AUX_TREE, - BSET_HAS_UNWRITTEN_TABLE, + BSET_NO_AUX_TREE, + BSET_RO_AUX_TREE, + BSET_RW_AUX_TREE, }; #define BSET_TREE_NR_TYPES 3 -#define BSET_AUX_TREE_NONE_VAL (UINT_MAX) -#define BSET_UNWRITTEN_TABLE_VAL (UINT_MAX - 1) +#define BSET_NO_AUX_TREE_VAL (UINT_MAX) +#define BSET_RW_AUX_TREE_VAL (UINT_MAX - 1) static inline enum bset_aux_tree_type bset_aux_tree_type(struct bset_tree *t) { switch (t->extra) { - case BSET_AUX_TREE_NONE_VAL: - return BSET_AUX_TREE_NONE; - case BSET_UNWRITTEN_TABLE_VAL: + case BSET_NO_AUX_TREE_VAL: + return BSET_NO_AUX_TREE; + case BSET_RW_AUX_TREE_VAL: EBUG_ON(!t->size); - return BSET_HAS_UNWRITTEN_TABLE; + return BSET_RW_AUX_TREE; default: EBUG_ON(!t->size); - return BSET_HAS_AUX_TREE; + return BSET_RO_AUX_TREE; } } @@ -250,7 +223,6 @@ struct btree_nr_keys { }; struct btree_keys { - const struct btree_keys_ops *ops; u8 nsets; u8 page_order; @@ -287,9 +259,14 @@ static inline struct bset_tree *bset_tree_last(struct btree_keys *b) return b->set + b->nsets; } -static inline bool bset_has_aux_tree(struct bset_tree *t) +static inline bool bset_has_ro_aux_tree(struct bset_tree *t) +{ + return bset_aux_tree_type(t) == BSET_RO_AUX_TREE; +} + +static inline bool bset_has_rw_aux_tree(struct bset_tree *t) { - return bset_aux_tree_type(t) == BSET_HAS_AUX_TREE; + return bset_aux_tree_type(t) == BSET_RW_AUX_TREE; } #define __set_bytes(_i, _u64s) (sizeof(*(_i)) + (_u64s) * sizeof(u64)) @@ -313,12 +290,11 @@ static inline struct bset *bset_next_set(struct btree_keys *b, void bch_btree_keys_free(struct btree_keys *); int bch_btree_keys_alloc(struct btree_keys *, unsigned, gfp_t); -void bch_btree_keys_init(struct btree_keys *, const struct btree_keys_ops *, - bool *); +void bch_btree_keys_init(struct btree_keys *, bool *); void bch_bset_init_first(struct btree_keys *, struct bset *); void bch_bset_init_next(struct btree_keys *, struct bset *); -void bch_bset_build_written_tree(struct btree_keys *, struct bset_tree *); +void bch_bset_build_ro_aux_tree(struct btree_keys *, struct bset_tree *); void bch_bset_fix_invalidated_key(struct btree_keys *, struct bset_tree *, struct bkey_packed *); @@ -384,20 +360,6 @@ struct bset_tree *bch_bkey_to_bset(struct btree_keys *, struct bkey_packed *); struct bkey_packed *bkey_prev_all(struct bset_tree *, struct bkey_packed *); struct bkey_packed *bkey_prev(struct bset_tree *, struct bkey_packed *); -/* - * Tries to merge l and r: l should be lower than r - * Returns true if we were able to merge. If we did merge, l will be the merged - * key, r will be untouched. - */ -static inline bool bch_bkey_try_merge(struct btree_keys *b, - struct bkey_i *l, - struct bkey_i *r) -{ - return b->ops->key_merge - ? b->ops->key_merge(b, l, r) == BCH_MERGE_MERGE - : false; -} - enum bch_extent_overlap { BCH_EXTENT_OVERLAP_FRONT, BCH_EXTENT_OVERLAP_BACK, @@ -436,19 +398,19 @@ struct btree_node_iter { }; static inline void __bch_btree_node_iter_init(struct btree_node_iter *iter, - struct btree_keys *b) + bool is_extents) { iter->used = 0; - iter->is_extents = b->ops->is_extents; + iter->is_extents = is_extents; } void bch_btree_node_iter_push(struct btree_node_iter *, struct btree_keys *, const struct bkey_packed *, const struct bkey_packed *); void bch_btree_node_iter_init(struct btree_node_iter *, struct btree_keys *, - struct bpos, bool); + struct bpos, bool, bool); void bch_btree_node_iter_init_from_start(struct btree_node_iter *, - struct btree_keys *); + struct btree_keys *, bool); struct bkey_packed *bch_btree_node_iter_bset_pos(struct btree_node_iter *, struct btree_keys *, struct bset *); @@ -569,8 +531,8 @@ struct bkey_packed *bch_btree_node_iter_prev(struct btree_node_iter *, * Iterates over all _live_ keys - skipping deleted (and potentially * overlapping) keys */ -#define for_each_btree_node_key(b, k, iter) \ - for (bch_btree_node_iter_init_from_start((iter), (b)); \ +#define for_each_btree_node_key(b, k, iter, _is_extents) \ + for (bch_btree_node_iter_init_from_start((iter), (b), (_is_extents));\ ((k) = bch_btree_node_iter_peek(iter, b)); \ bch_btree_node_iter_advance(iter, b)) @@ -578,8 +540,8 @@ struct bkey_s_c bch_btree_node_iter_peek_unpack(struct btree_node_iter *, struct btree_keys *, struct bkey *); -#define for_each_btree_node_key_unpack(b, k, iter, unpacked) \ - for (bch_btree_node_iter_init_from_start((iter), (b)); \ +#define for_each_btree_node_key_unpack(b, k, iter, _is_extents, unpacked)\ + for (bch_btree_node_iter_init_from_start((iter), (b), (_is_extents));\ (k = bch_btree_node_iter_peek_unpack((iter), (b), (unpacked))).k;\ bch_btree_node_iter_advance(iter, b)) @@ -620,31 +582,13 @@ static inline void btree_keys_account_key(struct btree_nr_keys *n, /* Sorting */ -struct bset_sort_state { - mempool_t pool; - - unsigned page_order; - unsigned crit_factor; - - struct time_stats *time; -}; - -typedef struct btree_nr_keys (*btree_keys_sort_fn)(struct btree_keys *, - struct bset *, - struct btree_node_iter *); - -void bch_bset_sort_state_free(struct bset_sort_state *); -int bch_bset_sort_state_init(struct bset_sort_state *, unsigned, - struct time_stats *times); - -struct btree_nr_keys bch_sort_bsets(struct bset *, struct btree_keys *, - unsigned, struct btree_node_iter *, - btree_keys_sort_fn, - struct bset_sort_state *); - -void bch_btree_sort_lazy(struct btree_keys *, struct bset_sort_state *); -void bch_btree_sort_into(struct btree_keys *, struct btree_keys *, - ptr_filter_fn, struct bset_sort_state *); +struct btree_nr_keys bch_sort_bsets(struct bset *, + struct btree_keys *, + struct btree_node_iter *, + struct bkey_format *, + struct bkey_format *, + key_filter_fn, + key_merge_fn); bool bch_maybe_compact_deleted_keys(struct btree_keys *); @@ -666,7 +610,7 @@ int bch_bkey_print_bfloat(struct btree_keys *, struct bkey_packed *, /* Debug stuff */ void bch_dump_bset(struct btree_keys *, struct bset *, unsigned); -void bch_dump_bucket(struct btree_keys *); +void bch_dump_btree_node(struct btree_keys *); void bch_dump_btree_node_iter(struct btree_keys *, struct btree_node_iter *); #ifdef CONFIG_BCACHE_DEBUG @@ -686,7 +630,6 @@ static inline void bch_verify_key_order(struct btree_keys *b, struct bkey_packed *where) {} #endif - static inline void bch_verify_btree_nr_keys(struct btree_keys *b) { if (btree_keys_expensive_checks(b)) diff --git a/drivers/md/bcache/btree_cache.c b/drivers/md/bcache/btree_cache.c index 6959e4699592..06a9921afbcd 100644 --- a/drivers/md/bcache/btree_cache.c +++ b/drivers/md/bcache/btree_cache.c @@ -119,11 +119,6 @@ int mca_hash_insert(struct cache_set *c, struct btree *b, b->level = level; b->btree_id = id; - bch_btree_keys_init(&b->keys, b->level - ? &bch_btree_interior_node_ops - : bch_btree_ops[id], - &c->expensive_debug_checks); - ret = rhashtable_lookup_insert_fast(&c->btree_cache_table, &b->hash, bch_btree_cache_params); if (ret) @@ -527,6 +522,7 @@ out: b->keys.set[0].data = NULL; b->sib_u64s[0] = 0; b->sib_u64s[1] = 0; + bch_btree_keys_init(&b->keys, &c->expensive_debug_checks); bch_time_stats_update(&c->mca_alloc_time, start_time); diff --git a/drivers/md/bcache/btree_gc.c b/drivers/md/bcache/btree_gc.c index 1735ea39d397..f59563f5b2ef 100644 --- a/drivers/md/bcache/btree_gc.c +++ b/drivers/md/bcache/btree_gc.c @@ -148,7 +148,9 @@ static bool btree_gc_mark_node(struct cache_set *c, struct btree *b) struct bkey_s_c k; u8 stale = 0; - for_each_btree_node_key_unpack(&b->keys, k, &iter, &unpacked) { + for_each_btree_node_key_unpack(&b->keys, k, &iter, + btree_node_is_extents(b), + &unpacked) { bkey_debugcheck(c, b, k); stale = max(stale, btree_mark_key(c, b, k)); } @@ -849,8 +851,9 @@ static void bch_initial_gc_btree(struct cache_set *c, enum btree_id id) struct bkey unpacked; struct bkey_s_c k; - for_each_btree_node_key_unpack(&b->keys, k, - &node_iter, &unpacked) + for_each_btree_node_key_unpack(&b->keys, k, &node_iter, + btree_node_is_extents(b), + &unpacked) btree_mark_key(c, b, k); } diff --git a/drivers/md/bcache/btree_io.c b/drivers/md/bcache/btree_io.c index 035ff2a7caff..c82cf93a01f8 100644 --- a/drivers/md/bcache/btree_io.c +++ b/drivers/md/bcache/btree_io.c @@ -18,21 +18,39 @@ static void btree_node_sort(struct cache_set *c, struct btree *b, struct btree_iter *iter, unsigned from, - struct btree_node_iter *node_iter, - btree_keys_sort_fn sort, bool is_write_locked) + struct btree_node_iter *sort_iter, + sort_fix_overlapping_fn sort_fn, + bool is_write_locked) { struct btree_node *out; - bool used_mempool = false; - unsigned order = b->keys.page_order; struct btree_nr_keys nr; + struct btree_node_iter _sort_iter; + bool used_mempool = false; + u64 start_time; + unsigned order; - if (from) { + if (!sort_iter) { struct bset_tree *t; - unsigned u64s = 0; + + sort_iter = &_sort_iter; + __bch_btree_node_iter_init(sort_iter, btree_node_is_extents(b)); for (t = b->keys.set + from; - t <= b->keys.set + b->keys.nsets; t++) - u64s += le16_to_cpu(t->data->u64s); + t <= b->keys.set + b->keys.nsets; + t++) + bch_btree_node_iter_push(sort_iter, &b->keys, + t->data->start, + bset_bkey_last(t->data)); + } + + if (!from) { + order = b->keys.page_order; + } else { + struct btree_node_iter_set *set; + unsigned u64s = 0; + + btree_node_iter_for_each(sort_iter, set) + u64s += set->end - set->k; order = get_order(__set_bytes(b->data, u64s)); } @@ -41,13 +59,42 @@ static void btree_node_sort(struct cache_set *c, struct btree *b, if (!out) { struct page *outp; - outp = mempool_alloc(&c->sort.pool, GFP_NOIO); + outp = mempool_alloc(&c->btree_sort_pool, GFP_NOIO); out = page_address(outp); used_mempool = true; } - nr = bch_sort_bsets(&out->keys, &b->keys, from, - node_iter, sort, &c->sort); + start_time = local_clock(); + + out->keys.u64s = 0; + + /* + * The nr_keys accounting for number of packed/unpacked keys isn't + * broken out by bset, which means we can't merge extents unless we're + * sorting the entire node (we'd have to recalculate nr_keys for the + * entire node). Also, extent merging is problematic if we're not + * sorting the entire node, since we'd end up with extents overlapping + * with 0 length whiteouts in other bsets we didn't sort. + */ + if (sort_fn) + nr = sort_fn(&out->keys, &b->keys, sort_iter); + else if (!from) + nr = bch_sort_bsets(&out->keys, &b->keys, sort_iter, + &b->keys.format, + &b->keys.format, + NULL, + btree_node_ops(b)->key_merge); + else + nr = bch_sort_bsets(&out->keys, &b->keys, sort_iter, + &b->keys.format, + &b->keys.format, + NULL, NULL); + + BUG_ON((void *) bset_bkey_last(&out->keys) > + (void *) out + (PAGE_SIZE << order)); + + if (!from) + bch_time_stats_update(&c->btree_sort_time, start_time); if (!is_write_locked) __btree_node_lock_write(b, iter); @@ -66,22 +113,30 @@ static void btree_node_sort(struct cache_set *c, struct btree *b, out->keys.u64s = cpu_to_le16(u64s); swap(out, b->data); b->keys.set->data = &b->data->keys; + b->keys.nr = nr; } else { + unsigned i; + b->keys.set[from].data->u64s = out->keys.u64s; memcpy(b->keys.set[from].data->start, out->keys.start, (void *) bset_bkey_last(&out->keys) - (void *) out->keys.start); + + for (i = from + 1; i < MAX_BSETS; i++) { + b->keys.nr.bset_u64s[from] += + b->keys.nr.bset_u64s[i]; + b->keys.nr.bset_u64s[i] = 0; + } } b->keys.nsets = from; - b->keys.nr = nr; - bch_bset_build_written_tree(&b->keys, &b->keys.set[from]); + bch_bset_build_ro_aux_tree(&b->keys, &b->keys.set[from]); if (!is_write_locked) __btree_node_unlock_write(b, iter); if (used_mempool) - mempool_free(virt_to_page(out), &c->sort.pool); + mempool_free(virt_to_page(out), &c->btree_sort_pool); else free_pages((unsigned long) out, order); @@ -98,6 +153,7 @@ static bool btree_node_compact(struct cache_set *c, struct btree *b, struct btree_iter *iter) { unsigned crit = SORT_CRIT; + unsigned crit_factor = int_sqrt(btree_pages(c)); int i = 0; /* Don't sort if nothing to do */ @@ -109,7 +165,7 @@ static bool btree_node_compact(struct cache_set *c, struct btree *b, goto sort; for (i = b->keys.nsets - 1; i >= 0; --i) { - crit *= c->sort.crit_factor; + crit *= crit_factor; if (le16_to_cpu(b->keys.set[i].data->u64s) < crit) goto sort; @@ -123,7 +179,7 @@ static bool btree_node_compact(struct cache_set *c, struct btree *b, nosort: __btree_node_lock_write(b, iter); - bch_bset_build_written_tree(&b->keys, bset_tree_last(&b->keys)); + bch_bset_build_ro_aux_tree(&b->keys, bset_tree_last(&b->keys)); __btree_node_unlock_write(b, iter); return false; sort: @@ -278,7 +334,7 @@ void bch_btree_node_read_done(struct cache_set *c, struct btree *b, int ret; iter = mempool_alloc(&c->fill_iter, GFP_NOIO); - __bch_btree_node_iter_init(iter, &b->keys); + __bch_btree_node_iter_init(iter, btree_node_is_extents(b)); err = "dynamic fault"; if (bch_meta_read_fault("btree")) @@ -377,7 +433,7 @@ void bch_btree_node_read_done(struct cache_set *c, struct btree *b, goto err; btree_node_sort(c, b, NULL, 0, iter, - b->keys.ops->is_extents + btree_node_is_extents(b) ? bch_extent_sort_fix_overlapping : bch_key_sort_fix_overlapping, true); diff --git a/drivers/md/bcache/btree_iter.c b/drivers/md/bcache/btree_iter.c index 197778addb02..315beca5f51d 100644 --- a/drivers/md/bcache/btree_iter.c +++ b/drivers/md/bcache/btree_iter.c @@ -557,7 +557,8 @@ static inline void __btree_iter_init(struct btree_iter *iter, struct btree *b) { bch_btree_node_iter_init(&iter->node_iters[b->level], &b->keys, - iter->pos, iter->is_extents); + iter->pos, iter->is_extents, + btree_node_is_extents(b)); /* Skip to first non whiteout: */ if (b->level) @@ -1106,7 +1107,7 @@ void __bch_btree_iter_init(struct btree_iter *iter, struct cache_set *c, unsigned locks_want, unsigned depth) { iter->level = depth; - iter->is_extents = btree_id == BTREE_ID_EXTENTS; + iter->is_extents = bch_bkey_ops[btree_id]->is_extents; iter->nodes_locked = 0; iter->nodes_intent_locked = 0; iter->locks_want = min(locks_want, BTREE_MAX_DEPTH); diff --git a/drivers/md/bcache/btree_types.h b/drivers/md/bcache/btree_types.h index 04ea88560af2..1274b3b8c7de 100644 --- a/drivers/md/bcache/btree_types.h +++ b/drivers/md/bcache/btree_types.h @@ -138,11 +138,21 @@ static inline enum bkey_type btree_node_type(struct btree *b) return b->level ? BKEY_TYPE_BTREE : b->btree_id; } +static inline const struct bkey_ops *btree_node_ops(struct btree *b) +{ + return bch_bkey_ops[btree_node_type(b)]; +} + static inline bool btree_node_has_ptrs(struct btree *b) { return btree_type_has_ptrs(btree_node_type(b)); } +static inline bool btree_node_is_extents(struct btree *b) +{ + return btree_node_type(b) == BKEY_TYPE_EXTENTS; +} + /* * Optional hook that will be called just prior to a btree node update, when * we're holding the write lock and we know what key is about to be overwritten: @@ -182,4 +192,8 @@ enum btree_gc_coalesce_fail_reason { BTREE_GC_COALESCE_FAIL_FORMAT_FITS, }; +typedef struct btree_nr_keys (*sort_fix_overlapping_fn)(struct bset *, + struct btree_keys *, + struct btree_node_iter *); + #endif /* _BCACHE_BTREE_TYPES_H */ diff --git a/drivers/md/bcache/btree_update.c b/drivers/md/bcache/btree_update.c index e352bb2f42e5..9e71e6ed3139 100644 --- a/drivers/md/bcache/btree_update.c +++ b/drivers/md/bcache/btree_update.c @@ -306,6 +306,37 @@ static struct btree *bch_btree_node_alloc(struct cache_set *c, return b; } +static void bch_btree_sort_into(struct cache_set *c, + struct btree *dst, + struct btree *src) +{ + struct btree_nr_keys nr; + struct btree_node_iter iter; + u64 start_time = local_clock(); + + BUG_ON(dst->keys.nsets); + + dst->keys.set[0].extra = BSET_NO_AUX_TREE_VAL; + + bch_btree_node_iter_init_from_start(&iter, &src->keys, + btree_node_is_extents(src)); + + nr = bch_sort_bsets(dst->keys.set->data, + &src->keys, &iter, + &src->keys.format, + &dst->keys.format, + btree_node_ops(src)->key_normalize, + btree_node_ops(src)->key_merge); + bch_time_stats_update(&c->btree_sort_time, start_time); + + dst->keys.nr.live_u64s += nr.live_u64s; + dst->keys.nr.bset_u64s[0] += nr.bset_u64s[0]; + dst->keys.nr.packed_keys += nr.packed_keys; + dst->keys.nr.unpacked_keys += nr.unpacked_keys; + + bch_verify_btree_nr_keys(&dst->keys); +} + struct btree *__btree_node_alloc_replacement(struct cache_set *c, struct btree *b, struct bkey_format format, @@ -320,9 +351,8 @@ struct btree *__btree_node_alloc_replacement(struct cache_set *c, n->data->format = format; n->keys.format = format; - bch_btree_sort_into(&n->keys, &b->keys, - b->keys.ops->key_normalize, - &c->sort); + bch_btree_sort_into(c, n, b); + btree_node_reset_sib_u64s(n); n->key.k.p = b->key.k.p; @@ -1071,7 +1101,7 @@ static void btree_node_interior_verify(struct btree *b) BUG_ON(!b->level); - bch_btree_node_iter_init(&iter, &b->keys, b->key.k.p, false); + bch_btree_node_iter_init(&iter, &b->keys, b->key.k.p, false, false); #if 1 BUG_ON(!(k = bch_btree_node_iter_peek(&iter, &b->keys)) || bkey_cmp_left_packed(f, k, b->key.k.p)); @@ -1097,7 +1127,7 @@ static void btree_node_interior_verify(struct btree *b) goto err; return; err: - bch_dump_bucket(&b->keys); + bch_dump_btree_node(&b->keys); printk(KERN_ERR "last key %llu:%llu %s\n", b->key.k.p.inode, b->key.k.p.offset, msg); BUG(); @@ -1238,9 +1268,9 @@ static struct btree *__btree_split_node(struct btree_iter *iter, struct btree *n le16_to_cpu(set2->u64s)); n1->keys.set->size = 0; - n1->keys.set->extra = BSET_AUX_TREE_NONE_VAL; + n1->keys.set->extra = BSET_NO_AUX_TREE_VAL; n2->keys.set->size = 0; - n2->keys.set->extra = BSET_AUX_TREE_NONE_VAL; + n2->keys.set->extra = BSET_NO_AUX_TREE_VAL; btree_node_reset_sib_u64s(n1); btree_node_reset_sib_u64s(n2); @@ -1276,10 +1306,9 @@ static void btree_split_insert_keys(struct btree_iter *iter, struct btree *b, struct bkey_packed *p; struct bset *i; - BUG_ON(!b->level); - BUG_ON(b->keys.ops->is_extents); + BUG_ON(btree_node_type(b) != BKEY_TYPE_BTREE); - bch_btree_node_iter_init(&node_iter, &b->keys, k->k.p, false); + bch_btree_node_iter_init(&node_iter, &b->keys, k->k.p, false, false); while (!bch_keylist_empty(keys)) { k = bch_keylist_front(keys); @@ -1660,12 +1689,8 @@ retry: n->keys.format = new_f; n->key.k.p = next->key.k.p; - bch_btree_sort_into(&n->keys, &prev->keys, - b->keys.ops->key_normalize, - &c->sort); - bch_btree_sort_into(&n->keys, &next->keys, - b->keys.ops->key_normalize, - &c->sort); + bch_btree_sort_into(c, n, prev); + bch_btree_sort_into(c, n, next); six_unlock_write(&n->lock); bkey_init(&delete.k); @@ -1722,7 +1747,7 @@ btree_insert_key(struct btree_insert *trans, int old_live_u64s = b->keys.nr.live_u64s; int live_u64s_added, u64s_added; - ret = !b->keys.ops->is_extents + ret = !btree_node_is_extents(b) ? bch_insert_fixup_key(trans, insert, res) : bch_insert_fixup_extent(trans, insert, res); @@ -2132,7 +2157,7 @@ int bch_btree_delete_range(struct cache_set *c, enum btree_id id, delete.k.p = iter.pos; delete.k.version = version; - if (iter.nodes[0]->keys.ops->is_extents) { + if (iter.is_extents) { /* * The extents btree is special - KEY_TYPE_DISCARD is * used for deletions, not KEY_TYPE_DELETED. This is an diff --git a/drivers/md/bcache/btree_update.h b/drivers/md/bcache/btree_update.h index 277a99476d07..3bb6c0ca16b9 100644 --- a/drivers/md/bcache/btree_update.h +++ b/drivers/md/bcache/btree_update.h @@ -199,9 +199,9 @@ static inline size_t bch_btree_keys_u64s_remaining(struct cache_set *c, * insert into could be written out from under us) */ static inline bool bch_btree_node_insert_fits(struct cache_set *c, - struct btree *b, unsigned u64s) + struct btree *b, unsigned u64s) { - if (b->keys.ops->is_extents) { + if (btree_node_is_extents(b)) { /* The insert key might split an existing key * (bch_insert_fixup_extent() -> BCH_EXTENT_OVERLAP_MIDDLE case: */ diff --git a/drivers/md/bcache/debug.c b/drivers/md/bcache/debug.c index b143f98bb3fb..93a5f00f92f3 100644 --- a/drivers/md/bcache/debug.c +++ b/drivers/md/bcache/debug.c @@ -59,11 +59,7 @@ void __bch_btree_verify(struct cache_set *c, struct btree *b) v->written = 0; v->level = b->level; v->btree_id = b->btree_id; - v->keys.ops = b->keys.ops; - bch_btree_keys_init(&v->keys, v->level - ? &bch_btree_interior_node_ops - : bch_btree_ops[v->btree_id], - &v->c->expensive_debug_checks); + bch_btree_keys_init(&v->keys, &v->c->expensive_debug_checks); pick = bch_btree_pick_ptr(c, b); if (IS_ERR_OR_NULL(pick.ca)) diff --git a/drivers/md/bcache/dirent.c b/drivers/md/bcache/dirent.c index 888a011a0d9c..b7fc5ca92f32 100644 --- a/drivers/md/bcache/dirent.c +++ b/drivers/md/bcache/dirent.c @@ -138,9 +138,6 @@ static void bch_dirent_to_text(struct cache_set *c, char *buf, } } -const struct btree_keys_ops bch_dirent_ops = { -}; - const struct bkey_ops bch_bkey_dirent_ops = { .key_invalid = bch_dirent_invalid, .val_to_text = bch_dirent_to_text, diff --git a/drivers/md/bcache/dirent.h b/drivers/md/bcache/dirent.h index 63b4aa07f432..d6597da2949b 100644 --- a/drivers/md/bcache/dirent.h +++ b/drivers/md/bcache/dirent.h @@ -1,7 +1,6 @@ #ifndef _BCACHE_DIRENT_H #define _BCACHE_DIRENT_H -extern const struct btree_keys_ops bch_dirent_ops; extern const struct bkey_ops bch_bkey_dirent_ops; struct qstr; diff --git a/drivers/md/bcache/extents.c b/drivers/md/bcache/extents.c index a9a39ea67cf3..4be04a208a8c 100644 --- a/drivers/md/bcache/extents.c +++ b/drivers/md/bcache/extents.c @@ -22,6 +22,8 @@ #include <trace/events/bcache.h> static bool __bch_extent_normalize(struct cache_set *, struct bkey_s, bool); +static enum merge_result bch_extent_merge(struct btree_keys *, + struct bkey_i *, struct bkey_i *); static void sort_key_next(struct btree_node_iter *iter, struct btree_keys *b, @@ -76,11 +78,11 @@ static inline bool should_drop_next_key(struct btree_node_iter *iter, __btree_node_offset_to_key(b, r->k)); } -struct btree_nr_keys bch_key_sort_fix_overlapping(struct btree_keys *b, - struct bset *bset, +struct btree_nr_keys bch_key_sort_fix_overlapping(struct bset *dst, + struct btree_keys *b, struct btree_node_iter *iter) { - struct bkey_packed *out = bset->start; + struct bkey_packed *out = dst->start; struct btree_nr_keys nr; memset(&nr, 0, sizeof(nr)); @@ -101,7 +103,7 @@ struct btree_nr_keys bch_key_sort_fix_overlapping(struct btree_keys *b, heap_sift(iter, 0, key_sort_cmp); } - bset->u64s = cpu_to_le16((u64 *) out - bset->_data); + dst->u64s = cpu_to_le16((u64 *) out - dst->_data); return nr; } @@ -590,9 +592,6 @@ bch_btree_pick_ptr(struct cache_set *c, const struct btree *b) return (struct extent_pick_ptr) { .ca = NULL, }; } -const struct btree_keys_ops bch_btree_interior_node_ops = { -}; - const struct bkey_ops bch_bkey_btree_ops = { .key_invalid = bch_btree_ptr_invalid, .key_debugcheck = btree_ptr_debugcheck, @@ -769,7 +768,7 @@ static void extent_sort_append(struct btree_keys *b, bkey_unpack(&tmp.k, f, k); if (*prev && - bch_bkey_try_merge(b, (void *) *prev, &tmp.k)) + bch_extent_merge(b, (void *) *prev, &tmp.k)) return; if (*prev) { @@ -784,8 +783,8 @@ static void extent_sort_append(struct btree_keys *b, bkey_copy(*prev, &tmp.k); } -struct btree_nr_keys bch_extent_sort_fix_overlapping(struct btree_keys *b, - struct bset *bset, +struct btree_nr_keys bch_extent_sort_fix_overlapping(struct bset *dst, + struct btree_keys *b, struct btree_node_iter *iter) { struct bkey_format *f = &b->format; @@ -803,7 +802,7 @@ struct btree_nr_keys bch_extent_sort_fix_overlapping(struct btree_keys *b, lk = __btree_node_offset_to_key(b, _l->k); if (iter->used == 1) { - extent_sort_append(b, &nr, bset->start, &prev, lk); + extent_sort_append(b, &nr, dst->start, &prev, lk); extent_sort_next(iter, b, _l); continue; } @@ -820,7 +819,7 @@ struct btree_nr_keys bch_extent_sort_fix_overlapping(struct btree_keys *b, /* If current key and next key don't overlap, just append */ if (bkey_cmp(l.k->p, bkey_start_pos(r.k)) <= 0) { - extent_sort_append(b, &nr, bset->start, &prev, lk); + extent_sort_append(b, &nr, dst->start, &prev, lk); extent_sort_next(iter, b, _l); continue; } @@ -865,7 +864,7 @@ struct btree_nr_keys bch_extent_sort_fix_overlapping(struct btree_keys *b, extent_sort_sift(iter, b, 0); - extent_sort_append(b, &nr, bset->start, &prev, + extent_sort_append(b, &nr, dst->start, &prev, bkey_to_packed(&tmp.k)); } else { bch_cut_back(bkey_start_pos(r.k), l.k); @@ -878,10 +877,10 @@ struct btree_nr_keys bch_extent_sort_fix_overlapping(struct btree_keys *b, btree_keys_account_key_add(&nr, 0, prev); out = bkey_next(prev); } else { - out = bset->start; + out = dst->start; } - bset->u64s = cpu_to_le16((u64 *) out - bset->_data); + dst->u64s = cpu_to_le16((u64 *) out - dst->_data); return nr; } @@ -2385,23 +2384,12 @@ static bool bch_extent_merge_inline(struct btree_iter *iter, } } -static const struct btree_keys_ops bch_extent_ops = { - .key_normalize = bch_ptr_normalize, - .key_merge = bch_extent_merge, - .is_extents = true, -}; - const struct bkey_ops bch_bkey_extent_ops = { .key_invalid = bch_extent_invalid, .key_debugcheck = bch_extent_debugcheck, .val_to_text = bch_extent_to_text, .swab = bch_ptr_swab, + .key_normalize = bch_ptr_normalize, + .key_merge = bch_extent_merge, .is_extents = true, }; - -const struct btree_keys_ops *bch_btree_ops[] = { - [BTREE_ID_EXTENTS] = &bch_extent_ops, - [BTREE_ID_INODES] = &bch_inode_ops, - [BTREE_ID_DIRENTS] = &bch_dirent_ops, - [BTREE_ID_XATTRS] = &bch_xattr_ops, -}; diff --git a/drivers/md/bcache/extents.h b/drivers/md/bcache/extents.h index ec8ff0078f08..f3eb2a73e167 100644 --- a/drivers/md/bcache/extents.h +++ b/drivers/md/bcache/extents.h @@ -11,11 +11,11 @@ struct btree_iter; struct btree_insert; struct btree_insert_entry; -struct btree_nr_keys bch_key_sort_fix_overlapping(struct btree_keys *, - struct bset *, +struct btree_nr_keys bch_key_sort_fix_overlapping(struct bset *, + struct btree_keys *, struct btree_node_iter *); -struct btree_nr_keys bch_extent_sort_fix_overlapping(struct btree_keys *, - struct bset *, +struct btree_nr_keys bch_extent_sort_fix_overlapping(struct bset *, + struct btree_keys *, struct btree_node_iter *); enum btree_insert_ret @@ -26,9 +26,6 @@ bch_insert_fixup_key(struct btree_insert *, extern const struct bkey_ops bch_bkey_btree_ops; extern const struct bkey_ops bch_bkey_extent_ops; -extern const struct btree_keys_ops bch_btree_interior_node_ops; -extern const struct btree_keys_ops *bch_btree_ops[]; - struct cache_set; struct journal_res; diff --git a/drivers/md/bcache/inode.c b/drivers/md/bcache/inode.c index 9b2a4806805e..d36de43cdfc3 100644 --- a/drivers/md/bcache/inode.c +++ b/drivers/md/bcache/inode.c @@ -100,9 +100,6 @@ static void bch_inode_to_text(struct cache_set *c, char *buf, } } -const struct btree_keys_ops bch_inode_ops = { -}; - const struct bkey_ops bch_bkey_inode_ops = { .key_invalid = bch_inode_invalid, .val_to_text = bch_inode_to_text, diff --git a/drivers/md/bcache/inode.h b/drivers/md/bcache/inode.h index a89064a37861..d8b28c78c277 100644 --- a/drivers/md/bcache/inode.h +++ b/drivers/md/bcache/inode.h @@ -1,7 +1,6 @@ #ifndef _BCACHE_INODE_H #define _BCACHE_INODE_H -extern const struct btree_keys_ops bch_inode_ops; extern const struct bkey_ops bch_bkey_inode_ops; ssize_t bch_inode_status(char *, size_t, const struct bkey *); diff --git a/drivers/md/bcache/super.c b/drivers/md/bcache/super.c index e6935106e980..c834c57b0274 100644 --- a/drivers/md/bcache/super.c +++ b/drivers/md/bcache/super.c @@ -888,7 +888,6 @@ static void cache_set_free(struct cache_set *c) cancel_work_sync(&c->bio_submit_work); cancel_work_sync(&c->read_retry_work); - bch_bset_sort_state_free(&c->sort); bch_btree_cache_free(c); bch_journal_free(&c->journal); bch_io_clock_exit(&c->io_clock[WRITE]); @@ -897,6 +896,7 @@ static void cache_set_free(struct cache_set *c) bdi_destroy(&c->bdi); free_percpu(c->bucket_stats_lock.lock); free_percpu(c->bucket_stats_percpu); + mempool_exit(&c->btree_sort_pool); mempool_exit(&c->bio_bounce_pages); bioset_exit(&c->bio_write); bioset_exit(&c->bio_read_split); @@ -1162,13 +1162,13 @@ static struct cache_set *bch_cache_set_alloc(struct cache_sb *sb, PAGE_SECTORS, 0) || !(c->bucket_stats_percpu = alloc_percpu(struct bucket_stats_cache_set)) || !(c->bucket_stats_lock.lock = alloc_percpu(*c->bucket_stats_lock.lock)) || + mempool_init_page_pool(&c->btree_sort_pool, 1, + ilog2(btree_pages(c))) || bdi_setup_and_register(&c->bdi, "bcache") || bch_io_clock_init(&c->io_clock[READ]) || bch_io_clock_init(&c->io_clock[WRITE]) || bch_journal_alloc(&c->journal) || bch_btree_cache_alloc(c) || - bch_bset_sort_state_init(&c->sort, ilog2(btree_pages(c)), - &c->btree_sort_time) || bch_compress_init(c)) goto err; diff --git a/drivers/md/bcache/sysfs.c b/drivers/md/bcache/sysfs.c index 3c61e57caa46..1c5f9f6f9689 100644 --- a/drivers/md/bcache/sysfs.c +++ b/drivers/md/bcache/sysfs.c @@ -496,12 +496,12 @@ static int bch_bset_print_stats(struct cache_set *c, char *buf) "failed prev: %zu\n" "failed overflow: %zu\n", nodes, - stats.sets[BSET_HAS_AUX_TREE].nr, - stats.sets[BSET_HAS_AUX_TREE].bytes, - stats.sets[BSET_HAS_UNWRITTEN_TABLE].nr, - stats.sets[BSET_HAS_UNWRITTEN_TABLE].bytes, - stats.sets[BSET_AUX_TREE_NONE].nr, - stats.sets[BSET_AUX_TREE_NONE].bytes, + stats.sets[BSET_RO_AUX_TREE].nr, + stats.sets[BSET_RO_AUX_TREE].bytes, + stats.sets[BSET_RW_AUX_TREE].nr, + stats.sets[BSET_RW_AUX_TREE].bytes, + stats.sets[BSET_NO_AUX_TREE].nr, + stats.sets[BSET_NO_AUX_TREE].bytes, stats.floats, stats.failed_unpacked, stats.failed_prev, @@ -524,7 +524,7 @@ lock_root: six_lock_read(&b->lock); } while (b != c->btree_roots[BTREE_ID_EXTENTS].b); - for_each_btree_node_key(&b->keys, k, &iter) + for_each_btree_node_key(&b->keys, k, &iter, btree_node_is_extents(b)) bytes += bkey_bytes(k); six_unlock_read(&b->lock); diff --git a/drivers/md/bcache/xattr.c b/drivers/md/bcache/xattr.c index b5686e472ab9..002994eae758 100644 --- a/drivers/md/bcache/xattr.c +++ b/drivers/md/bcache/xattr.c @@ -157,9 +157,6 @@ static void bch_xattr_to_text(struct cache_set *c, char *buf, } } -const struct btree_keys_ops bch_xattr_ops = { -}; - const struct bkey_ops bch_bkey_xattr_ops = { .key_invalid = bch_xattr_invalid, .val_to_text = bch_xattr_to_text, diff --git a/drivers/md/bcache/xattr.h b/drivers/md/bcache/xattr.h index 675529d328d0..5e40f13fd11e 100644 --- a/drivers/md/bcache/xattr.h +++ b/drivers/md/bcache/xattr.h @@ -1,7 +1,6 @@ #ifndef _BCACHE_XATTR_H #define _BCACHE_XATTR_H -extern const struct btree_keys_ops bch_xattr_ops; extern const struct bkey_ops bch_bkey_xattr_ops; struct dentry; |