diff options
-rw-r--r-- | drivers/md/bcache/btree_types.h | 14 | ||||
-rw-r--r-- | drivers/md/bcache/btree_update.c | 28 | ||||
-rw-r--r-- | drivers/md/bcache/btree_update.h | 5 | ||||
-rw-r--r-- | drivers/md/bcache/extents.c | 124 | ||||
-rw-r--r-- | drivers/md/bcache/extents.h | 10 | ||||
-rw-r--r-- | drivers/md/bcache/io.c | 3 | ||||
-rw-r--r-- | drivers/md/bcache/io_types.h | 8 | ||||
-rw-r--r-- | drivers/md/bcache/writeback.c | 3 | ||||
-rw-r--r-- | include/trace/events/bcache.h | 9 |
9 files changed, 123 insertions, 81 deletions
diff --git a/drivers/md/bcache/btree_types.h b/drivers/md/bcache/btree_types.h index 0385f7107a4d..4f335f56aa67 100644 --- a/drivers/md/bcache/btree_types.h +++ b/drivers/md/bcache/btree_types.h @@ -134,4 +134,18 @@ static inline bool btree_node_has_ptrs(struct btree *b) return btree_type_has_ptrs(btree_node_type(b)); } +/* + * Optional hook that will be called just prior to a btree node update, when + * we're holding the write lock and we know what key is about to be overwritten: + */ + +struct btree_iter; +struct btree_insert_hook { + void (*fn)(struct btree_insert_hook *, + struct btree_iter *iter, + struct bkey_s_c k, + struct bkey_i *insert, + struct journal_res *res); +}; + #endif /* _BCACHE_BTREE_TYPES_H */ diff --git a/drivers/md/bcache/btree_update.c b/drivers/md/bcache/btree_update.c index 9c81b9d7516d..593b5c821f97 100644 --- a/drivers/md/bcache/btree_update.c +++ b/drivers/md/bcache/btree_update.c @@ -738,7 +738,7 @@ void bch_btree_insert_and_journal(struct btree_iter *iter, static void btree_insert_key(struct btree_iter *iter, struct btree *b, struct btree_node_iter *node_iter, struct keylist *insert_keys, - struct bch_replace_info *replace, + struct btree_insert_hook *hook, struct journal_res *res, unsigned flags) { @@ -751,7 +751,6 @@ static void btree_insert_key(struct btree_iter *iter, struct btree *b, if (b->level) { BUG_ON(res->ref); - BUG_ON(replace); bch_insert_fixup_btree_ptr(iter, b, insert, node_iter); bch_keylist_dequeue(insert_keys); @@ -759,7 +758,7 @@ static void btree_insert_key(struct btree_iter *iter, struct btree *b, BUG_ON(iter->nodes[0] != b || &iter->node_iters[0] != node_iter); - bch_insert_fixup_key(iter, insert, replace, res); + bch_insert_fixup_key(iter, insert, hook, res); bch_keylist_dequeue(insert_keys); } else { BUG_ON(iter->nodes[0] != b || @@ -771,7 +770,7 @@ static void btree_insert_key(struct btree_iter *iter, struct btree *b, if (bkey_cmp(insert->k.p, b->key.k.p) > 0) bch_cut_back(b->key.k.p, &insert->k); - bch_insert_fixup_extent(iter, insert, replace, res, flags); + bch_insert_fixup_extent(iter, insert, hook, res, flags); bch_cut_front(iter->pos, orig); if (orig->k.size == 0) @@ -780,7 +779,7 @@ static void btree_insert_key(struct btree_iter *iter, struct btree *b, bch_count_data_verify(&b->keys, oldsize); - trace_bcache_btree_insert_key(b, insert, replace != NULL); + trace_bcache_btree_insert_key(b, insert); } enum btree_insert_status { @@ -1139,7 +1138,7 @@ static enum btree_insert_status bch_btree_insert_keys_leaf(struct btree *b, struct btree_iter *iter, struct keylist *insert_keys, - struct bch_replace_info *replace, + struct btree_insert_hook *hook, u64 *journal_seq, unsigned flags) { @@ -1196,7 +1195,7 @@ bch_btree_insert_keys_leaf(struct btree *b, break; btree_insert_key(iter, b, &iter->node_iters[b->level], - insert_keys, replace, &res, flags); + insert_keys, hook, &res, flags); } btree_node_unlock_write(b, iter); @@ -1524,7 +1523,7 @@ err: * * @iter: btree iterator * @insert_keys: list of keys to insert - * @replace: old key for compare exchange (+ stats) + * @hook: insert callback * @persistent: if not null, @persistent will wait on journal write * @flags: BTREE_INSERT_NOFAIL_IF_STALE * @@ -1594,7 +1593,7 @@ static int bch_btree_split_leaf(struct btree_iter *iter, unsigned flags) * bch_btree_insert_at - insert bkeys starting at a given btree node * @iter: btree iterator * @insert_keys: list of keys to insert - * @replace: old key for compare exchange (+ stats) + * @hook: insert callback * @persistent: if not null, @persistent will wait on journal write * @flags: BTREE_INSERT_ATOMIC | BTREE_INSERT_NOFAIL_IF_STALE * @@ -1625,7 +1624,7 @@ static int bch_btree_split_leaf(struct btree_iter *iter, unsigned flags) */ int bch_btree_insert_at(struct btree_iter *iter, struct keylist *insert_keys, - struct bch_replace_info *replace, + struct btree_insert_hook *hook, u64 *journal_seq, unsigned flags) { int ret = -EINTR; @@ -1644,7 +1643,7 @@ int bch_btree_insert_at(struct btree_iter *iter, iter->pos)); switch (bch_btree_insert_keys_leaf(iter->nodes[0], iter, - insert_keys, replace, + insert_keys, hook, journal_seq, flags)) { case BTREE_INSERT_OK: ret = 0; @@ -1871,10 +1870,10 @@ int bch_btree_insert_check_key(struct btree_iter *iter, * @c: pointer to struct cache_set * @id: btree to insert into * @insert_keys: list of keys to insert - * @replace: old key for compare exchange (+ stats) + * @hook: insert callback */ int bch_btree_insert(struct cache_set *c, enum btree_id id, - struct keylist *keys, struct bch_replace_info *replace, + struct keylist *keys, struct btree_insert_hook *hook, u64 *journal_seq, int flags) { struct btree_iter iter; @@ -1887,8 +1886,7 @@ int bch_btree_insert(struct cache_set *c, enum btree_id id, if (unlikely(ret)) goto out; - ret = bch_btree_insert_at(&iter, keys, replace, - journal_seq, flags); + ret = bch_btree_insert_at(&iter, keys, hook, journal_seq, flags); out: ret2 = bch_btree_iter_unlock(&iter); return ret ?: ret2; diff --git a/drivers/md/bcache/btree_update.h b/drivers/md/bcache/btree_update.h index 0310147f73e3..6552cdf5b4f6 100644 --- a/drivers/md/bcache/btree_update.h +++ b/drivers/md/bcache/btree_update.h @@ -8,7 +8,6 @@ struct cache_set; struct bkey_format_state; struct bkey_format; struct btree; -struct bch_replace_info; struct btree_reserve { unsigned nr; @@ -174,7 +173,7 @@ int bch_btree_insert_node(struct btree *, struct btree_iter *, #define BTREE_INSERT_NOFAIL_IF_STALE (1 << 2) int bch_btree_insert_at(struct btree_iter *, struct keylist *, - struct bch_replace_info *, u64 *, unsigned); + struct btree_insert_hook *, u64 *, unsigned); struct btree_insert_multi { struct btree_iter *iter; @@ -186,7 +185,7 @@ int bch_btree_insert_at_multi(struct btree_insert_multi[], unsigned, int bch_btree_insert_check_key(struct btree_iter *, struct bkey_i *); int bch_btree_insert(struct cache_set *, enum btree_id, struct keylist *, - struct bch_replace_info *, u64 *, int flags); + struct btree_insert_hook *, u64 *, int flags); int bch_btree_update(struct cache_set *, enum btree_id, struct bkey_i *, u64 *); diff --git a/drivers/md/bcache/extents.c b/drivers/md/bcache/extents.c index ff50f66a6a0e..1cf07f2a654e 100644 --- a/drivers/md/bcache/extents.c +++ b/drivers/md/bcache/extents.c @@ -107,7 +107,7 @@ struct btree_nr_keys bch_key_sort_fix_overlapping(struct btree_keys *b, void bch_insert_fixup_key(struct btree_iter *iter, struct bkey_i *insert, - struct bch_replace_info *replace, + struct btree_insert_hook *hook, struct journal_res *res) { struct btree *b = iter->nodes[0]; @@ -117,7 +117,7 @@ void bch_insert_fixup_key(struct btree_iter *iter, int cmp; BUG_ON(iter->level); - BUG_ON(replace); + BUG_ON(hook); EBUG_ON((k = bch_btree_node_iter_prev_all(node_iter, &b->keys)) && (bkey_deleted(k) ? bkey_cmp_packed(f, k, &insert->k) > 0 @@ -861,7 +861,7 @@ static void bch_drop_subtract(struct btree_iter *iter, struct bkey_s k) * splitting done in bch_extent_insert_fixup, preserving such * caching is difficult. */ -static bool bkey_cmpxchg_cmp(struct bkey_s_c l, struct bkey_s_c r) +static bool bch_extent_cmpxchg_cmp(struct bkey_s_c l, struct bkey_s_c r) { struct bkey_s_c_extent le, re; const struct bch_extent_ptr *lp, *rp; @@ -937,14 +937,17 @@ static bool bkey_cmpxchg_cmp(struct bkey_s_c l, struct bkey_s_c r) * On return, there is room in @res for at least one more key of the same size * as @new. */ -static bool bkey_cmpxchg(struct btree_iter *iter, - struct bkey_s_c k, - struct bch_replace_info *replace, - struct bkey_i *new, - struct journal_res *res) +void bch_extent_cmpxchg(struct btree_insert_hook *hook, + struct btree_iter *iter, + struct bkey_s_c k, + struct bkey_i *new, + struct journal_res *res) { + struct bch_replace_info *replace = container_of(hook, + struct bch_replace_info, hook); struct bkey_i *old = &replace->key; - bool ret; + + BUG_ON(iter->btree_id != BTREE_ID_EXTENTS); /* must have something to compare against */ BUG_ON(!bkey_val_u64s(&old->k)); @@ -954,6 +957,11 @@ static bool bkey_cmpxchg(struct btree_iter *iter, bkey_cmp(bkey_start_pos(&new->k), bkey_start_pos(&old->k)) < 0); + if (!k.k) { + bch_cut_subtract_back(iter, iter->pos, bkey_i_to_s(new)); + return; + } + /* * first, check if there was a hole - part of the new key that we * haven't checked against any existing key @@ -983,8 +991,7 @@ static bool bkey_cmpxchg(struct btree_iter *iter, bch_btree_iter_set_pos(iter, bkey_start_pos(k.k)); } - ret = bkey_cmpxchg_cmp(k, bkey_i_to_s_c(old)); - if (!ret) { + if (!bch_extent_cmpxchg_cmp(k, bkey_i_to_s_c(old))) { /* failed: */ replace->failures += 1; @@ -1010,13 +1017,9 @@ static bool bkey_cmpxchg(struct btree_iter *iter, bch_cut_subtract_front(iter, k.k->p, bkey_i_to_s(new)); } else replace->successes += 1; - - /* advance @iter->pos past the part of @k overlapping @new */ - bch_btree_iter_set_pos(iter, bkey_cmp(k.k->p, new->k.p) < 0 - ? k.k->p : new->k.p); - return ret; } +#if 0 /* We are trying to insert a key with an older version than the existing one */ static void handle_existing_key_newer(struct btree_iter *iter, struct bkey_i *insert, @@ -1065,6 +1068,7 @@ static void handle_existing_key_newer(struct btree_iter *iter, break; } } +#endif #define MAX_LOCK_HOLD_TIME (5 * NSEC_PER_MSEC) @@ -1109,7 +1113,7 @@ static void handle_existing_key_newer(struct btree_iter *iter, */ void bch_insert_fixup_extent(struct btree_iter *iter, struct bkey_i *insert, - struct bch_replace_info *replace, + struct btree_insert_hook *hook, struct journal_res *res, unsigned flags) { @@ -1117,10 +1121,10 @@ void bch_insert_fixup_extent(struct btree_iter *iter, struct btree *b = iter->nodes[0]; struct btree_node_iter *node_iter = &iter->node_iters[0]; const struct bkey_format *f = &b->keys.format; - struct bpos insert_end = insert->k.p; struct bkey_packed *_k; struct bkey_tup tup; struct bkey_s k; + struct bpos next_pos; BKEY_PADDED(k) split; unsigned nr_done = 0; u64 start_time = local_clock(); @@ -1160,8 +1164,6 @@ void bch_insert_fixup_extent(struct btree_iter *iter, /* We raced - a dirty pointer was stale */ bch_btree_iter_set_pos(iter, insert->k.p); insert->k.size = 0; - if (replace != NULL) - replace->failures += 1; return; } @@ -1204,9 +1206,28 @@ void bch_insert_fixup_extent(struct btree_iter *iter, * need to split */ bch_cut_subtract_back(iter, iter->pos, bkey_i_to_s(insert)); - goto out; + if (!insert->k.size) + return; + break; } - +#if 0 + /* + * Currently broken w.r.t. cmpxchg: - + * handle_existing_key_newer() can't insert @insert itself + * (which it does when @k splits @insert) because bkey_cmpxchg() + * hasn't seen @insert yet, and might not want to insert it: + * + * need to change handle_existing_key_newer() to just drop the + * tail half of the split until later, and have + * btree_insert_key() be responsible for inserting it later - + * but tricky: + */ + if (k.k->size && insert->k.version && + insert->k.version < k.k->version) { + handle_existing_key_newer(iter, insert, k.k, res); + continue; + } +#endif /* * We might overlap with 0 size extents; we can't skip these * because if they're in the set we're inserting to we have to @@ -1214,22 +1235,29 @@ void bch_insert_fixup_extent(struct btree_iter *iter, * inserting. But we don't want to check them for replace * operations. */ - if (!replace) - bch_btree_iter_set_pos(iter, - bkey_cmp(k.k->p, insert->k.p) < 0 - ? k.k->p : insert->k.p); - else if (k.k->size && - !bkey_cmpxchg(iter, k.s_c, replace, - insert, res)) - continue; + if (k.k->size) { + next_pos = bkey_cmp(k.k->p, insert->k.p) < 0 + ? k.k->p : insert->k.p; - if (k.k->size && insert->k.version && - insert->k.version < k.k->version) { - handle_existing_key_newer(iter, insert, k.k, res); - continue; + if (hook) + hook->fn(hook, iter, k.s_c, insert, res); + + /* + * Don't update iter->pos until after calling the hook, + * because the hook fn may use it: + */ + bch_btree_iter_set_pos(iter, next_pos); + + if (!insert->k.size) + return; + + /* insert and k might not overlap after calling hook fn: */ + if (bkey_cmp(insert->k.p, bkey_start_pos(k.k)) <= 0 || + bkey_cmp(k.k->p, bkey_start_pos(&insert->k)) <= 0) + continue; } - /* k is the key currently in the tree, 'insert' the new key */ + /* k is the key currently in the tree, 'insert' is the new key */ switch (bch_extent_overlap(&insert->k, k.k)) { case BCH_EXTENT_OVERLAP_FRONT: @@ -1293,23 +1321,17 @@ void bch_insert_fixup_extent(struct btree_iter *iter, } } - /* Was there a hole? */ - if (bkey_cmp(iter->pos, insert->k.p) < 0) { - /* - * Holes not allowed for cmpxchg operations, so chop off - * whatever we're not inserting: - */ - if (replace != NULL) - bch_cut_subtract_back(iter, iter->pos, bkey_i_to_s(insert)); + next_pos = insert->k.p; - /* - * We did get to the end of @insert, so update iter->pos: - */ - bch_btree_iter_set_pos(iter, insert_end); - } -out: - if (insert->k.size) - bch_btree_insert_and_journal(iter, insert, res); + if (hook) + hook->fn(hook, iter, bkey_s_c_null, insert, res); + + bch_btree_iter_set_pos(iter, next_pos); + + if (!insert->k.size) + return; + + bch_btree_insert_and_journal(iter, insert, res); } static const char *bch_extent_invalid(const struct cache_set *c, diff --git a/drivers/md/bcache/extents.h b/drivers/md/bcache/extents.h index 5d1d55c4189c..082745c6854c 100644 --- a/drivers/md/bcache/extents.h +++ b/drivers/md/bcache/extents.h @@ -17,7 +17,7 @@ struct btree_nr_keys bch_extent_sort_fix_overlapping(struct btree_keys *, struct btree_node_iter *); void bch_insert_fixup_key(struct btree_iter *, struct bkey_i *, - struct bch_replace_info *, struct journal_res *); + struct btree_insert_hook *, struct journal_res *); extern const struct bkey_ops bch_bkey_btree_ops; extern const struct bkey_ops bch_bkey_extent_ops; @@ -47,8 +47,14 @@ bch_extent_pick_ptr(struct cache_set *c, struct bkey_s_c k, bch_extent_pick_ptr_avoiding(c, k, NULL, ret); } +void bch_extent_cmpxchg(struct btree_insert_hook *, + struct btree_iter *, + struct bkey_s_c, + struct bkey_i *, + struct journal_res *); + void bch_insert_fixup_extent(struct btree_iter *, struct bkey_i *, - struct bch_replace_info *, + struct btree_insert_hook *, struct journal_res *, unsigned); void bch_extent_drop_stale(struct cache_set *c, struct bkey_s_extent); diff --git a/drivers/md/bcache/io.c b/drivers/md/bcache/io.c index acc7f2df7731..7e551018e19c 100644 --- a/drivers/md/bcache/io.c +++ b/drivers/md/bcache/io.c @@ -435,7 +435,7 @@ static void bch_write_index(struct closure *cl) int ret; ret = bch_btree_insert(op->c, BTREE_ID_EXTENTS, &op->insert_keys, - op->replace ? &op->replace_info : NULL, + op->replace ? &op->replace_info.hook : NULL, op_journal_seq(op), BTREE_INSERT_NOFAIL); op->written += sectors_start - keylist_sectors(&op->insert_keys); @@ -1095,6 +1095,7 @@ void bch_write_op_init(struct bch_write_op *op, struct cache_set *c, op->replace = true; /* The caller can overwrite any replace_info fields */ memset(&op->replace_info, 0, sizeof(op->replace_info)); + op->replace_info.hook.fn = bch_extent_cmpxchg; bkey_reassemble(&op->replace_info.key, replace_key); } } diff --git a/drivers/md/bcache/io_types.h b/drivers/md/bcache/io_types.h index 495743950af8..26cec226621e 100644 --- a/drivers/md/bcache/io_types.h +++ b/drivers/md/bcache/io_types.h @@ -1,6 +1,7 @@ #ifndef _BCACHE_IO_TYPES_H #define _BCACHE_IO_TYPES_H +#include "btree_types.h" #include "keylist_types.h" #include <linux/llist.h> @@ -59,8 +60,11 @@ struct bch_write_bio { }; struct bch_replace_info { - unsigned successes; /* How many insertions succeeded */ - unsigned failures; /* How many insertions failed */ + struct btree_insert_hook hook; + /* How many insertions succeeded */ + unsigned successes; + /* How many insertions failed */ + unsigned failures; BKEY_PADDED(key); }; diff --git a/drivers/md/bcache/writeback.c b/drivers/md/bcache/writeback.c index e066c6df89e3..1a0424961a40 100644 --- a/drivers/md/bcache/writeback.c +++ b/drivers/md/bcache/writeback.c @@ -115,11 +115,12 @@ static void write_dirty_finish(struct closure *cl) int ret; bkey_copy(&tmp.k, &io->replace.key); + io->replace.hook.fn = bch_extent_cmpxchg; bkey_extent_set_cached(&tmp.k.k, true); ret = bch_btree_insert(dc->disk.c, BTREE_ID_EXTENTS, &keylist_single(&tmp.k), - &io->replace, NULL, 0); + &io->replace.hook, NULL, 0); if (io->replace.successes == 0) trace_bcache_writeback_collision(&io->replace.key.k); diff --git a/include/trace/events/bcache.h b/include/trace/events/bcache.h index fc417f35af67..3a710a91125f 100644 --- a/include/trace/events/bcache.h +++ b/include/trace/events/bcache.h @@ -619,8 +619,8 @@ DEFINE_EVENT(btree_node_op, bcache_btree_intent_lock_fail, ); TRACE_EVENT(bcache_btree_insert_key, - TP_PROTO(struct btree *b, struct bkey_i *k, unsigned op), - TP_ARGS(b, k, op), + TP_PROTO(struct btree *b, struct bkey_i *k), + TP_ARGS(b, k), TP_STRUCT__entry( __field(u64, b_bucket ) @@ -631,7 +631,6 @@ TRACE_EVENT(bcache_btree_insert_key, __field(u32, size ) __field(u8, level ) __field(u8, id ) - __field(u8, op ) ), TP_fast_assign( @@ -643,11 +642,9 @@ TRACE_EVENT(bcache_btree_insert_key, __entry->inode = k->k.p.inode; __entry->offset = k->k.p.offset; __entry->size = k->k.size; - __entry->op = op; ), - TP_printk("%s at bucket %llu(%u) id %u: %u:%llu %u:%llu len %u", - __entry->op ? "replace" : "insert", + TP_printk("bucket %llu(%u) id %u: %u:%llu %u:%llu len %u", __entry->b_bucket, __entry->level, __entry->id, __entry->b_inode, __entry->b_offset, __entry->inode, __entry->offset, __entry->size) |