summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorKent Overstreet <kent.overstreet@gmail.com>2016-01-25 18:56:32 -0900
committerKent Overstreet <kent.overstreet@gmail.com>2016-10-07 12:35:25 -0800
commit84eb193e7389b9effc0e5f607c9999f5a16a77b4 (patch)
treef176d11fbc910b3ef2311101aa62a0d214dfb45f
parenta0691c1298f269e6543158c724b45b8ae05d6b52 (diff)
bcache: Turn the bkey_cmpxchg path into a generic insert hook
-rw-r--r--drivers/md/bcache/btree_types.h14
-rw-r--r--drivers/md/bcache/btree_update.c28
-rw-r--r--drivers/md/bcache/btree_update.h5
-rw-r--r--drivers/md/bcache/extents.c124
-rw-r--r--drivers/md/bcache/extents.h10
-rw-r--r--drivers/md/bcache/io.c3
-rw-r--r--drivers/md/bcache/io_types.h8
-rw-r--r--drivers/md/bcache/writeback.c3
-rw-r--r--include/trace/events/bcache.h9
9 files changed, 123 insertions, 81 deletions
diff --git a/drivers/md/bcache/btree_types.h b/drivers/md/bcache/btree_types.h
index 0385f7107a4d..4f335f56aa67 100644
--- a/drivers/md/bcache/btree_types.h
+++ b/drivers/md/bcache/btree_types.h
@@ -134,4 +134,18 @@ static inline bool btree_node_has_ptrs(struct btree *b)
return btree_type_has_ptrs(btree_node_type(b));
}
+/*
+ * Optional hook that will be called just prior to a btree node update, when
+ * we're holding the write lock and we know what key is about to be overwritten:
+ */
+
+struct btree_iter;
+struct btree_insert_hook {
+ void (*fn)(struct btree_insert_hook *,
+ struct btree_iter *iter,
+ struct bkey_s_c k,
+ struct bkey_i *insert,
+ struct journal_res *res);
+};
+
#endif /* _BCACHE_BTREE_TYPES_H */
diff --git a/drivers/md/bcache/btree_update.c b/drivers/md/bcache/btree_update.c
index 9c81b9d7516d..593b5c821f97 100644
--- a/drivers/md/bcache/btree_update.c
+++ b/drivers/md/bcache/btree_update.c
@@ -738,7 +738,7 @@ void bch_btree_insert_and_journal(struct btree_iter *iter,
static void btree_insert_key(struct btree_iter *iter, struct btree *b,
struct btree_node_iter *node_iter,
struct keylist *insert_keys,
- struct bch_replace_info *replace,
+ struct btree_insert_hook *hook,
struct journal_res *res,
unsigned flags)
{
@@ -751,7 +751,6 @@ static void btree_insert_key(struct btree_iter *iter, struct btree *b,
if (b->level) {
BUG_ON(res->ref);
- BUG_ON(replace);
bch_insert_fixup_btree_ptr(iter, b, insert, node_iter);
bch_keylist_dequeue(insert_keys);
@@ -759,7 +758,7 @@ static void btree_insert_key(struct btree_iter *iter, struct btree *b,
BUG_ON(iter->nodes[0] != b ||
&iter->node_iters[0] != node_iter);
- bch_insert_fixup_key(iter, insert, replace, res);
+ bch_insert_fixup_key(iter, insert, hook, res);
bch_keylist_dequeue(insert_keys);
} else {
BUG_ON(iter->nodes[0] != b ||
@@ -771,7 +770,7 @@ static void btree_insert_key(struct btree_iter *iter, struct btree *b,
if (bkey_cmp(insert->k.p, b->key.k.p) > 0)
bch_cut_back(b->key.k.p, &insert->k);
- bch_insert_fixup_extent(iter, insert, replace, res, flags);
+ bch_insert_fixup_extent(iter, insert, hook, res, flags);
bch_cut_front(iter->pos, orig);
if (orig->k.size == 0)
@@ -780,7 +779,7 @@ static void btree_insert_key(struct btree_iter *iter, struct btree *b,
bch_count_data_verify(&b->keys, oldsize);
- trace_bcache_btree_insert_key(b, insert, replace != NULL);
+ trace_bcache_btree_insert_key(b, insert);
}
enum btree_insert_status {
@@ -1139,7 +1138,7 @@ static enum btree_insert_status
bch_btree_insert_keys_leaf(struct btree *b,
struct btree_iter *iter,
struct keylist *insert_keys,
- struct bch_replace_info *replace,
+ struct btree_insert_hook *hook,
u64 *journal_seq,
unsigned flags)
{
@@ -1196,7 +1195,7 @@ bch_btree_insert_keys_leaf(struct btree *b,
break;
btree_insert_key(iter, b, &iter->node_iters[b->level],
- insert_keys, replace, &res, flags);
+ insert_keys, hook, &res, flags);
}
btree_node_unlock_write(b, iter);
@@ -1524,7 +1523,7 @@ err:
*
* @iter: btree iterator
* @insert_keys: list of keys to insert
- * @replace: old key for compare exchange (+ stats)
+ * @hook: insert callback
* @persistent: if not null, @persistent will wait on journal write
* @flags: BTREE_INSERT_NOFAIL_IF_STALE
*
@@ -1594,7 +1593,7 @@ static int bch_btree_split_leaf(struct btree_iter *iter, unsigned flags)
* bch_btree_insert_at - insert bkeys starting at a given btree node
* @iter: btree iterator
* @insert_keys: list of keys to insert
- * @replace: old key for compare exchange (+ stats)
+ * @hook: insert callback
* @persistent: if not null, @persistent will wait on journal write
* @flags: BTREE_INSERT_ATOMIC | BTREE_INSERT_NOFAIL_IF_STALE
*
@@ -1625,7 +1624,7 @@ static int bch_btree_split_leaf(struct btree_iter *iter, unsigned flags)
*/
int bch_btree_insert_at(struct btree_iter *iter,
struct keylist *insert_keys,
- struct bch_replace_info *replace,
+ struct btree_insert_hook *hook,
u64 *journal_seq, unsigned flags)
{
int ret = -EINTR;
@@ -1644,7 +1643,7 @@ int bch_btree_insert_at(struct btree_iter *iter,
iter->pos));
switch (bch_btree_insert_keys_leaf(iter->nodes[0], iter,
- insert_keys, replace,
+ insert_keys, hook,
journal_seq, flags)) {
case BTREE_INSERT_OK:
ret = 0;
@@ -1871,10 +1870,10 @@ int bch_btree_insert_check_key(struct btree_iter *iter,
* @c: pointer to struct cache_set
* @id: btree to insert into
* @insert_keys: list of keys to insert
- * @replace: old key for compare exchange (+ stats)
+ * @hook: insert callback
*/
int bch_btree_insert(struct cache_set *c, enum btree_id id,
- struct keylist *keys, struct bch_replace_info *replace,
+ struct keylist *keys, struct btree_insert_hook *hook,
u64 *journal_seq, int flags)
{
struct btree_iter iter;
@@ -1887,8 +1886,7 @@ int bch_btree_insert(struct cache_set *c, enum btree_id id,
if (unlikely(ret))
goto out;
- ret = bch_btree_insert_at(&iter, keys, replace,
- journal_seq, flags);
+ ret = bch_btree_insert_at(&iter, keys, hook, journal_seq, flags);
out: ret2 = bch_btree_iter_unlock(&iter);
return ret ?: ret2;
diff --git a/drivers/md/bcache/btree_update.h b/drivers/md/bcache/btree_update.h
index 0310147f73e3..6552cdf5b4f6 100644
--- a/drivers/md/bcache/btree_update.h
+++ b/drivers/md/bcache/btree_update.h
@@ -8,7 +8,6 @@ struct cache_set;
struct bkey_format_state;
struct bkey_format;
struct btree;
-struct bch_replace_info;
struct btree_reserve {
unsigned nr;
@@ -174,7 +173,7 @@ int bch_btree_insert_node(struct btree *, struct btree_iter *,
#define BTREE_INSERT_NOFAIL_IF_STALE (1 << 2)
int bch_btree_insert_at(struct btree_iter *, struct keylist *,
- struct bch_replace_info *, u64 *, unsigned);
+ struct btree_insert_hook *, u64 *, unsigned);
struct btree_insert_multi {
struct btree_iter *iter;
@@ -186,7 +185,7 @@ int bch_btree_insert_at_multi(struct btree_insert_multi[], unsigned,
int bch_btree_insert_check_key(struct btree_iter *, struct bkey_i *);
int bch_btree_insert(struct cache_set *, enum btree_id, struct keylist *,
- struct bch_replace_info *, u64 *, int flags);
+ struct btree_insert_hook *, u64 *, int flags);
int bch_btree_update(struct cache_set *, enum btree_id,
struct bkey_i *, u64 *);
diff --git a/drivers/md/bcache/extents.c b/drivers/md/bcache/extents.c
index ff50f66a6a0e..1cf07f2a654e 100644
--- a/drivers/md/bcache/extents.c
+++ b/drivers/md/bcache/extents.c
@@ -107,7 +107,7 @@ struct btree_nr_keys bch_key_sort_fix_overlapping(struct btree_keys *b,
void bch_insert_fixup_key(struct btree_iter *iter,
struct bkey_i *insert,
- struct bch_replace_info *replace,
+ struct btree_insert_hook *hook,
struct journal_res *res)
{
struct btree *b = iter->nodes[0];
@@ -117,7 +117,7 @@ void bch_insert_fixup_key(struct btree_iter *iter,
int cmp;
BUG_ON(iter->level);
- BUG_ON(replace);
+ BUG_ON(hook);
EBUG_ON((k = bch_btree_node_iter_prev_all(node_iter, &b->keys)) &&
(bkey_deleted(k)
? bkey_cmp_packed(f, k, &insert->k) > 0
@@ -861,7 +861,7 @@ static void bch_drop_subtract(struct btree_iter *iter, struct bkey_s k)
* splitting done in bch_extent_insert_fixup, preserving such
* caching is difficult.
*/
-static bool bkey_cmpxchg_cmp(struct bkey_s_c l, struct bkey_s_c r)
+static bool bch_extent_cmpxchg_cmp(struct bkey_s_c l, struct bkey_s_c r)
{
struct bkey_s_c_extent le, re;
const struct bch_extent_ptr *lp, *rp;
@@ -937,14 +937,17 @@ static bool bkey_cmpxchg_cmp(struct bkey_s_c l, struct bkey_s_c r)
* On return, there is room in @res for at least one more key of the same size
* as @new.
*/
-static bool bkey_cmpxchg(struct btree_iter *iter,
- struct bkey_s_c k,
- struct bch_replace_info *replace,
- struct bkey_i *new,
- struct journal_res *res)
+void bch_extent_cmpxchg(struct btree_insert_hook *hook,
+ struct btree_iter *iter,
+ struct bkey_s_c k,
+ struct bkey_i *new,
+ struct journal_res *res)
{
+ struct bch_replace_info *replace = container_of(hook,
+ struct bch_replace_info, hook);
struct bkey_i *old = &replace->key;
- bool ret;
+
+ BUG_ON(iter->btree_id != BTREE_ID_EXTENTS);
/* must have something to compare against */
BUG_ON(!bkey_val_u64s(&old->k));
@@ -954,6 +957,11 @@ static bool bkey_cmpxchg(struct btree_iter *iter,
bkey_cmp(bkey_start_pos(&new->k),
bkey_start_pos(&old->k)) < 0);
+ if (!k.k) {
+ bch_cut_subtract_back(iter, iter->pos, bkey_i_to_s(new));
+ return;
+ }
+
/*
* first, check if there was a hole - part of the new key that we
* haven't checked against any existing key
@@ -983,8 +991,7 @@ static bool bkey_cmpxchg(struct btree_iter *iter,
bch_btree_iter_set_pos(iter, bkey_start_pos(k.k));
}
- ret = bkey_cmpxchg_cmp(k, bkey_i_to_s_c(old));
- if (!ret) {
+ if (!bch_extent_cmpxchg_cmp(k, bkey_i_to_s_c(old))) {
/* failed: */
replace->failures += 1;
@@ -1010,13 +1017,9 @@ static bool bkey_cmpxchg(struct btree_iter *iter,
bch_cut_subtract_front(iter, k.k->p, bkey_i_to_s(new));
} else
replace->successes += 1;
-
- /* advance @iter->pos past the part of @k overlapping @new */
- bch_btree_iter_set_pos(iter, bkey_cmp(k.k->p, new->k.p) < 0
- ? k.k->p : new->k.p);
- return ret;
}
+#if 0
/* We are trying to insert a key with an older version than the existing one */
static void handle_existing_key_newer(struct btree_iter *iter,
struct bkey_i *insert,
@@ -1065,6 +1068,7 @@ static void handle_existing_key_newer(struct btree_iter *iter,
break;
}
}
+#endif
#define MAX_LOCK_HOLD_TIME (5 * NSEC_PER_MSEC)
@@ -1109,7 +1113,7 @@ static void handle_existing_key_newer(struct btree_iter *iter,
*/
void bch_insert_fixup_extent(struct btree_iter *iter,
struct bkey_i *insert,
- struct bch_replace_info *replace,
+ struct btree_insert_hook *hook,
struct journal_res *res,
unsigned flags)
{
@@ -1117,10 +1121,10 @@ void bch_insert_fixup_extent(struct btree_iter *iter,
struct btree *b = iter->nodes[0];
struct btree_node_iter *node_iter = &iter->node_iters[0];
const struct bkey_format *f = &b->keys.format;
- struct bpos insert_end = insert->k.p;
struct bkey_packed *_k;
struct bkey_tup tup;
struct bkey_s k;
+ struct bpos next_pos;
BKEY_PADDED(k) split;
unsigned nr_done = 0;
u64 start_time = local_clock();
@@ -1160,8 +1164,6 @@ void bch_insert_fixup_extent(struct btree_iter *iter,
/* We raced - a dirty pointer was stale */
bch_btree_iter_set_pos(iter, insert->k.p);
insert->k.size = 0;
- if (replace != NULL)
- replace->failures += 1;
return;
}
@@ -1204,9 +1206,28 @@ void bch_insert_fixup_extent(struct btree_iter *iter,
* need to split
*/
bch_cut_subtract_back(iter, iter->pos, bkey_i_to_s(insert));
- goto out;
+ if (!insert->k.size)
+ return;
+ break;
}
-
+#if 0
+ /*
+ * Currently broken w.r.t. cmpxchg: -
+ * handle_existing_key_newer() can't insert @insert itself
+ * (which it does when @k splits @insert) because bkey_cmpxchg()
+ * hasn't seen @insert yet, and might not want to insert it:
+ *
+ * need to change handle_existing_key_newer() to just drop the
+ * tail half of the split until later, and have
+ * btree_insert_key() be responsible for inserting it later -
+ * but tricky:
+ */
+ if (k.k->size && insert->k.version &&
+ insert->k.version < k.k->version) {
+ handle_existing_key_newer(iter, insert, k.k, res);
+ continue;
+ }
+#endif
/*
* We might overlap with 0 size extents; we can't skip these
* because if they're in the set we're inserting to we have to
@@ -1214,22 +1235,29 @@ void bch_insert_fixup_extent(struct btree_iter *iter,
* inserting. But we don't want to check them for replace
* operations.
*/
- if (!replace)
- bch_btree_iter_set_pos(iter,
- bkey_cmp(k.k->p, insert->k.p) < 0
- ? k.k->p : insert->k.p);
- else if (k.k->size &&
- !bkey_cmpxchg(iter, k.s_c, replace,
- insert, res))
- continue;
+ if (k.k->size) {
+ next_pos = bkey_cmp(k.k->p, insert->k.p) < 0
+ ? k.k->p : insert->k.p;
- if (k.k->size && insert->k.version &&
- insert->k.version < k.k->version) {
- handle_existing_key_newer(iter, insert, k.k, res);
- continue;
+ if (hook)
+ hook->fn(hook, iter, k.s_c, insert, res);
+
+ /*
+ * Don't update iter->pos until after calling the hook,
+ * because the hook fn may use it:
+ */
+ bch_btree_iter_set_pos(iter, next_pos);
+
+ if (!insert->k.size)
+ return;
+
+ /* insert and k might not overlap after calling hook fn: */
+ if (bkey_cmp(insert->k.p, bkey_start_pos(k.k)) <= 0 ||
+ bkey_cmp(k.k->p, bkey_start_pos(&insert->k)) <= 0)
+ continue;
}
- /* k is the key currently in the tree, 'insert' the new key */
+ /* k is the key currently in the tree, 'insert' is the new key */
switch (bch_extent_overlap(&insert->k, k.k)) {
case BCH_EXTENT_OVERLAP_FRONT:
@@ -1293,23 +1321,17 @@ void bch_insert_fixup_extent(struct btree_iter *iter,
}
}
- /* Was there a hole? */
- if (bkey_cmp(iter->pos, insert->k.p) < 0) {
- /*
- * Holes not allowed for cmpxchg operations, so chop off
- * whatever we're not inserting:
- */
- if (replace != NULL)
- bch_cut_subtract_back(iter, iter->pos, bkey_i_to_s(insert));
+ next_pos = insert->k.p;
- /*
- * We did get to the end of @insert, so update iter->pos:
- */
- bch_btree_iter_set_pos(iter, insert_end);
- }
-out:
- if (insert->k.size)
- bch_btree_insert_and_journal(iter, insert, res);
+ if (hook)
+ hook->fn(hook, iter, bkey_s_c_null, insert, res);
+
+ bch_btree_iter_set_pos(iter, next_pos);
+
+ if (!insert->k.size)
+ return;
+
+ bch_btree_insert_and_journal(iter, insert, res);
}
static const char *bch_extent_invalid(const struct cache_set *c,
diff --git a/drivers/md/bcache/extents.h b/drivers/md/bcache/extents.h
index 5d1d55c4189c..082745c6854c 100644
--- a/drivers/md/bcache/extents.h
+++ b/drivers/md/bcache/extents.h
@@ -17,7 +17,7 @@ struct btree_nr_keys bch_extent_sort_fix_overlapping(struct btree_keys *,
struct btree_node_iter *);
void bch_insert_fixup_key(struct btree_iter *, struct bkey_i *,
- struct bch_replace_info *, struct journal_res *);
+ struct btree_insert_hook *, struct journal_res *);
extern const struct bkey_ops bch_bkey_btree_ops;
extern const struct bkey_ops bch_bkey_extent_ops;
@@ -47,8 +47,14 @@ bch_extent_pick_ptr(struct cache_set *c, struct bkey_s_c k,
bch_extent_pick_ptr_avoiding(c, k, NULL, ret);
}
+void bch_extent_cmpxchg(struct btree_insert_hook *,
+ struct btree_iter *,
+ struct bkey_s_c,
+ struct bkey_i *,
+ struct journal_res *);
+
void bch_insert_fixup_extent(struct btree_iter *, struct bkey_i *,
- struct bch_replace_info *,
+ struct btree_insert_hook *,
struct journal_res *, unsigned);
void bch_extent_drop_stale(struct cache_set *c, struct bkey_s_extent);
diff --git a/drivers/md/bcache/io.c b/drivers/md/bcache/io.c
index acc7f2df7731..7e551018e19c 100644
--- a/drivers/md/bcache/io.c
+++ b/drivers/md/bcache/io.c
@@ -435,7 +435,7 @@ static void bch_write_index(struct closure *cl)
int ret;
ret = bch_btree_insert(op->c, BTREE_ID_EXTENTS, &op->insert_keys,
- op->replace ? &op->replace_info : NULL,
+ op->replace ? &op->replace_info.hook : NULL,
op_journal_seq(op), BTREE_INSERT_NOFAIL);
op->written += sectors_start - keylist_sectors(&op->insert_keys);
@@ -1095,6 +1095,7 @@ void bch_write_op_init(struct bch_write_op *op, struct cache_set *c,
op->replace = true;
/* The caller can overwrite any replace_info fields */
memset(&op->replace_info, 0, sizeof(op->replace_info));
+ op->replace_info.hook.fn = bch_extent_cmpxchg;
bkey_reassemble(&op->replace_info.key, replace_key);
}
}
diff --git a/drivers/md/bcache/io_types.h b/drivers/md/bcache/io_types.h
index 495743950af8..26cec226621e 100644
--- a/drivers/md/bcache/io_types.h
+++ b/drivers/md/bcache/io_types.h
@@ -1,6 +1,7 @@
#ifndef _BCACHE_IO_TYPES_H
#define _BCACHE_IO_TYPES_H
+#include "btree_types.h"
#include "keylist_types.h"
#include <linux/llist.h>
@@ -59,8 +60,11 @@ struct bch_write_bio {
};
struct bch_replace_info {
- unsigned successes; /* How many insertions succeeded */
- unsigned failures; /* How many insertions failed */
+ struct btree_insert_hook hook;
+ /* How many insertions succeeded */
+ unsigned successes;
+ /* How many insertions failed */
+ unsigned failures;
BKEY_PADDED(key);
};
diff --git a/drivers/md/bcache/writeback.c b/drivers/md/bcache/writeback.c
index e066c6df89e3..1a0424961a40 100644
--- a/drivers/md/bcache/writeback.c
+++ b/drivers/md/bcache/writeback.c
@@ -115,11 +115,12 @@ static void write_dirty_finish(struct closure *cl)
int ret;
bkey_copy(&tmp.k, &io->replace.key);
+ io->replace.hook.fn = bch_extent_cmpxchg;
bkey_extent_set_cached(&tmp.k.k, true);
ret = bch_btree_insert(dc->disk.c, BTREE_ID_EXTENTS,
&keylist_single(&tmp.k),
- &io->replace, NULL, 0);
+ &io->replace.hook, NULL, 0);
if (io->replace.successes == 0)
trace_bcache_writeback_collision(&io->replace.key.k);
diff --git a/include/trace/events/bcache.h b/include/trace/events/bcache.h
index fc417f35af67..3a710a91125f 100644
--- a/include/trace/events/bcache.h
+++ b/include/trace/events/bcache.h
@@ -619,8 +619,8 @@ DEFINE_EVENT(btree_node_op, bcache_btree_intent_lock_fail,
);
TRACE_EVENT(bcache_btree_insert_key,
- TP_PROTO(struct btree *b, struct bkey_i *k, unsigned op),
- TP_ARGS(b, k, op),
+ TP_PROTO(struct btree *b, struct bkey_i *k),
+ TP_ARGS(b, k),
TP_STRUCT__entry(
__field(u64, b_bucket )
@@ -631,7 +631,6 @@ TRACE_EVENT(bcache_btree_insert_key,
__field(u32, size )
__field(u8, level )
__field(u8, id )
- __field(u8, op )
),
TP_fast_assign(
@@ -643,11 +642,9 @@ TRACE_EVENT(bcache_btree_insert_key,
__entry->inode = k->k.p.inode;
__entry->offset = k->k.p.offset;
__entry->size = k->k.size;
- __entry->op = op;
),
- TP_printk("%s at bucket %llu(%u) id %u: %u:%llu %u:%llu len %u",
- __entry->op ? "replace" : "insert",
+ TP_printk("bucket %llu(%u) id %u: %u:%llu %u:%llu len %u",
__entry->b_bucket, __entry->level, __entry->id,
__entry->b_inode, __entry->b_offset,
__entry->inode, __entry->offset, __entry->size)