summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorKent Overstreet <kent.overstreet@gmail.com>2016-04-15 15:24:24 -0800
committerKent Overstreet <kent.overstreet@gmail.com>2017-01-18 21:39:08 -0900
commit6f7fd88a05182d057e630ed6c13fdc11dfbf0c7b (patch)
tree9733955304b2db0daad9d50eeff1b8abf5761bac
parent89c4830407dfcf6292eb835c8c66ee6235e3b4a1 (diff)
bcache: make transactions work for extents
-rw-r--r--drivers/md/bcache/btree_types.h1
-rw-r--r--drivers/md/bcache/btree_update.c153
-rw-r--r--drivers/md/bcache/btree_update.h36
-rw-r--r--drivers/md/bcache/dirent.c9
-rw-r--r--drivers/md/bcache/extents.c223
-rw-r--r--drivers/md/bcache/extents.h8
-rw-r--r--drivers/md/bcache/journal.h11
7 files changed, 254 insertions, 187 deletions
diff --git a/drivers/md/bcache/btree_types.h b/drivers/md/bcache/btree_types.h
index b691ec49e14a..d6110c3f0188 100644
--- a/drivers/md/bcache/btree_types.h
+++ b/drivers/md/bcache/btree_types.h
@@ -157,6 +157,7 @@ struct bucket_stats_cache_set;
enum extent_insert_hook_ret {
BTREE_HOOK_DO_INSERT,
BTREE_HOOK_NO_INSERT,
+ BTREE_HOOK_RESTART_TRANS,
};
struct extent_insert_hook {
diff --git a/drivers/md/bcache/btree_update.c b/drivers/md/bcache/btree_update.c
index 0ebd018d4b48..7c478fbf73d1 100644
--- a/drivers/md/bcache/btree_update.c
+++ b/drivers/md/bcache/btree_update.c
@@ -1495,91 +1495,98 @@ out_unlock:
* btree_insert_key - insert a key one key into a leaf node
*/
static enum btree_insert_ret
-btree_insert_key(struct btree_iter *iter, struct bkey_i *insert,
+btree_insert_key(struct btree_insert_trans *trans,
+ struct btree_trans_entry *insert,
struct disk_reservation *disk_res,
struct extent_insert_hook *hook,
struct journal_res *res,
unsigned flags)
{
- struct btree *b = iter->nodes[0];
+ struct btree *b = insert->iter->nodes[0];
s64 oldsize = bch_count_data(&b->keys);
enum btree_insert_ret ret;
- bch_btree_node_iter_verify(&iter->node_iters[0], &b->keys);
+ bch_btree_node_iter_verify(&insert->iter->node_iters[0], &b->keys);
ret = !b->keys.ops->is_extents
- ? bch_insert_fixup_key(iter, insert, res)
- : bch_insert_fixup_extent(iter, insert, disk_res,
+ ? bch_insert_fixup_key(trans, insert, res)
+ : bch_insert_fixup_extent(trans, insert, disk_res,
hook, res, flags);
bch_count_data_verify(&b->keys, oldsize);
- trace_bcache_btree_insert_key(b, insert);
+ trace_bcache_btree_insert_key(b, insert->k);
return ret;
}
-static bool same_leaf_as_prev(struct btree_insert_trans *m,
- struct btree_insert_trans *i)
+static bool same_leaf_as_prev(struct btree_insert_trans *trans,
+ struct btree_trans_entry *i)
{
/*
* Because we sorted the transaction entries, if multiple iterators
* point to the same leaf node they'll always be adjacent now:
*/
- return i != m && i[0].iter->nodes[0] == i[-1].iter->nodes[0];
+ return i != trans->entries &&
+ i[0].iter->nodes[0] == i[-1].iter->nodes[0];
}
-static void multi_lock_write(struct btree_insert_trans *m, unsigned nr)
+#define trans_for_each_entry(trans, i) \
+ for ((i) = (trans)->entries; (i) < (trans)->entries + (trans)->nr; (i)++)
+
+static void multi_lock_write(struct btree_insert_trans *trans)
{
- struct btree_insert_trans *i;
+ struct btree_trans_entry *i;
- for (i = m; i < m + nr; i++)
- if (!same_leaf_as_prev(m, i))
+ trans_for_each_entry(trans, i)
+ if (!same_leaf_as_prev(trans, i))
btree_node_lock_for_insert(i->iter->nodes[0], i->iter);
}
-static void multi_unlock_write(struct btree_insert_trans *m, unsigned nr)
+static void multi_unlock_write(struct btree_insert_trans *trans)
{
- struct btree_insert_trans *i;
+ struct btree_trans_entry *i;
- for (i = m; i < m + nr; i++)
- if (!same_leaf_as_prev(m, i))
+ trans_for_each_entry(trans, i)
+ if (!same_leaf_as_prev(trans, i))
btree_node_unlock_write(i->iter->nodes[0], i->iter);
}
-static int btree_trans_iter_cmp(const void *_l, const void *_r)
+static int btree_trans_entry_cmp(const void *_l, const void *_r)
{
- const struct btree_insert_trans *l = _l;
- const struct btree_insert_trans *r = _r;
+ const struct btree_trans_entry *l = _l;
+ const struct btree_trans_entry *r = _r;
return btree_iter_cmp(l->iter, r->iter);
}
/* Normal update interface: */
-int bch_btree_insert_trans(struct btree_insert_trans *m, unsigned nr,
+int bch_btree_insert_trans(struct btree_insert_trans *trans,
struct disk_reservation *disk_res,
struct extent_insert_hook *hook,
u64 *journal_seq, unsigned flags)
{
- struct cache_set *c = m[0].iter->c;
+ struct cache_set *c = trans->entries[0].iter->c;
struct journal_res res = { 0, 0 };
- struct btree_insert_trans *i;
+ struct btree_trans_entry *i;
struct btree_iter *split;
unsigned u64s;
int ret;
- for (i = m; i < m + nr; i++) {
+ trans_for_each_entry(trans, i) {
EBUG_ON(i->iter->level);
EBUG_ON(bkey_cmp(bkey_start_pos(&i->k->k), i->iter->pos));
+ i->done = false;
}
+ trans->did_work = false;
- /* Sort transaction entries by iterator position, for lock ordering: */
- sort(m, nr, sizeof(m[0]), btree_trans_iter_cmp, NULL);
+ sort(trans->entries, trans->nr, sizeof(trans->entries[0]),
+ btree_trans_entry_cmp, NULL);
if (unlikely(!percpu_ref_tryget(&c->writes)))
return -EROFS;
- for (i = m; i < m + nr; i++) {
+ trans_for_each_entry(trans, i) {
i->iter->locks_want = 0;
if (unlikely(!bch_btree_iter_upgrade(i->iter))) {
ret = -EINTR;
@@ -1587,9 +1594,8 @@ int bch_btree_insert_trans(struct btree_insert_trans *m, unsigned nr,
}
}
retry:
- split = NULL;
u64s = 0;
- for (i = m; i < m + nr; i++)
+ trans_for_each_entry(trans, i)
if (!i->done)
u64s += jset_u64s(i->k->k.u64s);
@@ -1599,62 +1605,75 @@ retry:
if (ret)
goto err;
- multi_lock_write(m, nr);
+ multi_lock_write(trans);
u64s = 0;
- for (i = m; i < m + nr; i++) {
+ trans_for_each_entry(trans, i) {
/* Multiple inserts might go to same leaf: */
- if (!same_leaf_as_prev(m, i))
+ if (!same_leaf_as_prev(trans, i))
u64s = 0;
if (!i->done) {
u64s += i->k->k.u64s;
if (!bch_btree_node_insert_fits(c,
i->iter->nodes[0], u64s))
- goto split;
+ goto unlock_split;
}
}
- for (i = m; i < m + nr; i++)
- if (!i->done)
- switch (btree_insert_key(i->iter, i->k, disk_res,
- hook, &res, flags)) {
- case BTREE_INSERT_OK:
- i->done = true;
- break;
- case BTREE_INSERT_NEED_TRAVERSE:
- ret = -EINTR;
- goto err_unlock;
- case BTREE_INSERT_NEED_RESCHED:
- ret = -EAGAIN;
- goto err_unlock;
- case BTREE_INSERT_BTREE_NODE_FULL:
- goto split;
- case BTREE_INSERT_JOURNAL_RES_FULL:
- goto err_unlock;
- }
+ ret = 0;
+ split = NULL;
- multi_unlock_write(m, nr);
+ trans_for_each_entry(trans, i) {
+ if (i->done)
+ continue;
+ switch (btree_insert_key(trans, i, disk_res,
+ hook, &res, flags)) {
+ case BTREE_INSERT_OK:
+ i->done = true;
+ break;
+ case BTREE_INSERT_JOURNAL_RES_FULL:
+ case BTREE_INSERT_NEED_TRAVERSE:
+ ret = -EINTR;
+ break;
+ case BTREE_INSERT_NEED_RESCHED:
+ ret = -EAGAIN;
+ break;
+ case BTREE_INSERT_BTREE_NODE_FULL:
+ split = i->iter;
+ break;
+ }
+
+ if (!trans->did_work && (ret || split))
+ break;
+ }
+
+ multi_unlock_write(trans);
bch_journal_res_put(&c->journal, &res, journal_seq);
- for (i = m; i < m + nr; i++)
- if (!same_leaf_as_prev(m, i))
+ if (split)
+ goto split;
+ if (ret)
+ goto err;
+
+ trans_for_each_entry(trans, i)
+ if (!same_leaf_as_prev(trans, i))
bch_btree_node_write_lazy(i->iter->nodes[0], i->iter);
out:
percpu_ref_put(&c->writes);
return ret;
-split:
+unlock_split:
split = i->iter;
- multi_unlock_write(m, nr);
+ multi_unlock_write(trans);
/*
* have to drop journal res before splitting, because splitting means
* allocating new btree nodes, and holding a journal reservation
* potentially blocks the allocator:
*/
bch_journal_res_put(&c->journal, &res, journal_seq);
-
+split:
ret = bch_btree_split_leaf(split, flags);
if (ret)
goto err;
@@ -1665,12 +1684,9 @@ split:
* and is overwriting won't have changed)
*/
goto retry;
-err_unlock:
- multi_unlock_write(m, nr);
- bch_journal_res_put(&c->journal, &res, journal_seq);
err:
if (ret == -EAGAIN) {
- for (i = m; i < m + nr; i++)
+ trans_for_each_entry(trans, i)
bch_btree_iter_unlock(i->iter);
ret = -EINTR;
}
@@ -1685,7 +1701,7 @@ err:
* reservations:
*/
if (ret == -EINTR && !(flags & BTREE_INSERT_ATOMIC)) {
- for (i = m; i < m + nr; i++) {
+ trans_for_each_entry(trans, i) {
ret = bch_btree_iter_traverse(i->iter);
if (ret)
goto out;
@@ -1740,14 +1756,17 @@ int bch_btree_insert_at(struct btree_iter *iter,
u64 *journal_seq, unsigned flags)
{
struct btree_insert_trans m = {
- .iter = iter,
- .k = insert_key,
- .done = false,
+ .nr = 1,
+ .entries = &(struct btree_trans_entry) {
+ .iter = iter,
+ .k = insert_key,
+ .done = false,
+ },
};
- int ret = bch_btree_insert_trans(&m, 1, disk_res,
+ int ret = bch_btree_insert_trans(&m, disk_res,
hook, journal_seq, flags);
- BUG_ON(!ret != m.done);
+ BUG_ON(!ret != m.entries[0].done);
return ret;
}
diff --git a/drivers/md/bcache/btree_update.h b/drivers/md/bcache/btree_update.h
index dfdfcefb29f9..e91ec0fc5880 100644
--- a/drivers/md/bcache/btree_update.h
+++ b/drivers/md/bcache/btree_update.h
@@ -4,6 +4,7 @@
#include "btree_cache.h"
#include "btree_iter.h"
#include "buckets.h"
+#include "journal.h"
struct cache_set;
struct bkey_format_state;
@@ -242,13 +243,38 @@ int bch_btree_insert_list_at(struct btree_iter *, struct keylist *,
struct extent_insert_hook *, u64 *, unsigned);
struct btree_insert_trans {
- struct btree_iter *iter;
- struct bkey_i *k;
- /* true if entire key was inserted - can only be false for extents */
- bool done;
+ unsigned nr;
+ bool did_work;
+ struct btree_trans_entry {
+ struct btree_iter *iter;
+ struct bkey_i *k;
+ /*
+ * true if entire key was inserted - can only be false for
+ * extents
+ */
+ bool done;
+ } *entries;
};
-int bch_btree_insert_trans(struct btree_insert_trans[], unsigned,
+static inline bool journal_res_insert_fits(struct btree_insert_trans *trans,
+ struct btree_trans_entry *insert,
+ struct journal_res *res)
+{
+ struct cache_set *c = insert->iter->c;
+ unsigned u64s = 0;
+ struct btree_trans_entry *i;
+
+ /* If we're in journal replay we're not getting journal reservations: */
+ if (!test_bit(JOURNAL_REPLAY_DONE, &c->journal.flags))
+ return true;
+
+ for (i = insert; i < trans->entries + trans->nr; i++)
+ u64s += jset_u64s(i->k->k.u64s);
+
+ return u64s <= res->u64s;
+}
+
+int bch_btree_insert_trans(struct btree_insert_trans *,
struct disk_reservation *,
struct extent_insert_hook *,
u64 *, unsigned);
diff --git a/drivers/md/bcache/dirent.c b/drivers/md/bcache/dirent.c
index 5815ad5befc5..228e25ea6ff9 100644
--- a/drivers/md/bcache/dirent.c
+++ b/drivers/md/bcache/dirent.c
@@ -324,9 +324,12 @@ int bch_dirent_rename(struct cache_set *c,
new_dst->v.d_inum = old_src_d.v->d_inum;
new_dst->v.d_type = old_src_d.v->d_type;
- ret = bch_btree_insert_trans((struct btree_insert_trans[]) {
- { &src_iter, &new_src->k_i, },
- { &dst_iter, &new_dst->k_i, }}, 2,
+ ret = bch_btree_insert_trans(&(struct btree_insert_trans) {
+ .nr = 2,
+ .entries = (struct btree_trans_entry[]) {
+ { &src_iter, &new_src->k_i, },
+ { &dst_iter, &new_dst->k_i, }
+ }},
NULL, NULL, journal_seq,
BTREE_INSERT_ATOMIC);
bch_btree_iter_unlock(&src_iter);
diff --git a/drivers/md/bcache/extents.c b/drivers/md/bcache/extents.c
index 2c0a38612682..1c04c629cea4 100644
--- a/drivers/md/bcache/extents.c
+++ b/drivers/md/bcache/extents.c
@@ -106,24 +106,24 @@ struct btree_nr_keys bch_key_sort_fix_overlapping(struct btree_keys *b,
/* This returns true if insert should be inserted, false otherwise */
enum btree_insert_ret
-bch_insert_fixup_key(struct btree_iter *iter,
- struct bkey_i *insert,
+bch_insert_fixup_key(struct btree_insert_trans *trans,
+ struct btree_trans_entry *insert,
struct journal_res *res)
{
- struct btree *b = iter->nodes[0];
- struct btree_node_iter *node_iter = &iter->node_iters[0];
+ struct btree *b = insert->iter->nodes[0];
+ struct btree_node_iter *node_iter = &insert->iter->node_iters[0];
const struct bkey_format *f = &b->keys.format;
struct bkey_packed *k;
int cmp;
- BUG_ON(iter->level);
+ BUG_ON(insert->iter->level);
EBUG_ON((k = bch_btree_node_iter_prev_all(node_iter, &b->keys)) &&
(bkey_deleted(k)
- ? bkey_cmp_packed(f, k, &insert->k) > 0
- : bkey_cmp_packed(f, k, &insert->k) >= 0));
+ ? bkey_cmp_packed(f, k, &insert->k->k) > 0
+ : bkey_cmp_packed(f, k, &insert->k->k) >= 0));
while ((k = bch_btree_node_iter_peek_all(node_iter, &b->keys)) &&
- (cmp = bkey_cmp_packed(f, k, &insert->k)) <= 0) {
+ (cmp = bkey_cmp_packed(f, k, &insert->k->k)) <= 0) {
if (!cmp && !bkey_deleted(k)) {
k->type = KEY_TYPE_DELETED;
btree_keys_account_key_drop(&b->keys.nr, k);
@@ -132,7 +132,8 @@ bch_insert_fixup_key(struct btree_iter *iter,
bch_btree_node_iter_next_all(node_iter, &b->keys);
}
- bch_btree_insert_and_journal(iter, insert, res);
+ bch_btree_insert_and_journal(insert->iter, insert->k, res);
+ trans->did_work = true;
return BTREE_INSERT_OK;
}
@@ -979,15 +980,13 @@ enum extent_insert_hook_ret bch_extent_cmpxchg(struct extent_insert_hook *hook,
#define MAX_LOCK_HOLD_TIME (5 * NSEC_PER_MSEC)
-static enum btree_insert_ret extent_insert_should_stop(struct btree_iter *iter,
- struct bkey_i *insert,
+static enum btree_insert_ret extent_insert_should_stop(struct btree_insert_trans *trans,
+ struct btree_trans_entry *insert,
struct journal_res *res,
u64 start_time,
unsigned nr_done)
{
- struct cache_set *c = iter->c;
- struct btree *b = iter->nodes[0];
-
+ struct btree *b = insert->iter->nodes[0];
/*
* Check if we have sufficient space in both the btree node and the
* journal reservation:
@@ -1001,9 +1000,9 @@ static enum btree_insert_ret extent_insert_should_stop(struct btree_iter *iter,
* doing a lot of work under the btree node write lock - bail out if
* we've been running for too long and readers are waiting on the lock:
*/
- if (!bch_btree_node_insert_fits(c, b, insert->k.u64s))
+ if (!bch_btree_node_insert_fits(insert->iter->c, b, insert->k->k.u64s))
return BTREE_INSERT_BTREE_NODE_FULL;
- else if (!journal_res_insert_fits(c, res, insert))
+ else if (!journal_res_insert_fits(trans, insert, res))
return BTREE_INSERT_JOURNAL_RES_FULL; /* XXX worth tracing */
else if (nr_done > 10 &&
time_after64(local_clock(), start_time +
@@ -1014,26 +1013,30 @@ static enum btree_insert_ret extent_insert_should_stop(struct btree_iter *iter,
return BTREE_INSERT_OK;
}
-static void extent_insert_committed(struct btree_iter *iter,
- struct bkey_i *insert,
+static void extent_insert_committed(struct btree_insert_trans *trans,
+ struct btree_trans_entry *insert,
struct journal_res *res)
{
- EBUG_ON(bkey_cmp(insert->k.p, iter->pos) < 0);
- EBUG_ON(bkey_cmp(iter->pos, bkey_start_pos(&insert->k)) < 0);
+ struct btree_iter *iter = insert->iter;
+ struct bkey_i *k = insert->k;
- if (bkey_cmp(iter->pos, bkey_start_pos(&insert->k)) > 0) {
- EBUG_ON(bkey_deleted(&insert->k) || !insert->k.size);
+ EBUG_ON(bkey_cmp(k->k.p, iter->pos) < 0);
+ EBUG_ON(bkey_cmp(iter->pos, bkey_start_pos(&k->k)) < 0);
+
+ if (bkey_cmp(iter->pos, bkey_start_pos(&k->k)) > 0) {
+ EBUG_ON(bkey_deleted(&k->k) || !k->k.size);
bch_btree_insert_and_journal(iter,
- bch_key_split(iter->pos, insert), res);
+ bch_key_split(iter->pos, k), res);
+ trans->did_work = true;
}
}
static enum extent_insert_hook_ret
-__extent_insert_advance_pos(struct extent_insert_hook *hook,
- struct btree_iter *iter,
+__extent_insert_advance_pos(struct btree_insert_trans *trans,
+ struct btree_trans_entry *insert,
+ struct extent_insert_hook *hook,
struct bpos next_pos,
- struct bkey_i *insert,
struct bkey_s_c k,
struct journal_res *res,
struct bucket_stats_cache_set *stats)
@@ -1041,31 +1044,33 @@ __extent_insert_advance_pos(struct extent_insert_hook *hook,
enum extent_insert_hook_ret ret;
if (k.k && k.k->size &&
- insert->k.version &&
- k.k->version > insert->k.version)
+ insert->k->k.version &&
+ k.k->version > insert->k->k.version)
ret = BTREE_HOOK_NO_INSERT;
else if (hook)
- ret = hook->fn(hook, iter, next_pos, k, insert);
+ ret = hook->fn(hook, insert->iter, next_pos, k, insert->k);
else
ret = BTREE_HOOK_DO_INSERT;
- EBUG_ON(bkey_deleted(&insert->k) || !insert->k.size);
+ EBUG_ON(bkey_deleted(&insert->k->k) || !insert->k->k.size);
switch (ret) {
case BTREE_HOOK_DO_INSERT:
break;
case BTREE_HOOK_NO_INSERT:
- extent_insert_committed(iter, insert, res);
- bch_cut_subtract_front(iter, next_pos,
- bkey_i_to_s(insert), stats);
+ extent_insert_committed(trans, insert, res);
+ bch_cut_subtract_front(insert->iter, next_pos,
+ bkey_i_to_s(insert->k), stats);
break;
+ case BTREE_HOOK_RESTART_TRANS:
+ return ret;
}
/*
* Don't update iter->pos until after calling the hook,
* because the hook fn may use it:
*/
- bch_btree_iter_set_pos(iter, next_pos);
+ bch_btree_iter_set_pos(insert->iter, next_pos);
return ret;
}
@@ -1074,41 +1079,48 @@ __extent_insert_advance_pos(struct extent_insert_hook *hook,
* fn:
*/
static enum extent_insert_hook_ret
-extent_insert_advance_pos(struct extent_insert_hook *hook,
- struct btree_iter *iter,
- struct bkey_i *insert,
+extent_insert_advance_pos(struct btree_insert_trans *trans,
+ struct btree_trans_entry *insert,
+ struct extent_insert_hook *hook,
struct bkey_s_c k,
struct journal_res *res,
struct bucket_stats_cache_set *stats)
{
- struct btree *b = iter->nodes[0];
+ struct btree *b = insert->iter->nodes[0];
struct bpos next_pos = k.k
- ? bpos_min(insert->k.p, k.k->p)
- : bpos_min(insert->k.p, b->key.k.p);
+ ? bpos_min(insert->k->k.p, k.k->p)
+ : bpos_min(insert->k->k.p, b->key.k.p);
/* hole? */
- if (k.k && bkey_cmp(iter->pos, bkey_start_pos(k.k)) < 0) {
- bool might_split = bkey_cmp(iter->pos,
- bkey_start_pos(&insert->k)) > 0;
-
- /*
- * If a hole causes us to split and insert a previously
- * comitted portion, return BTREE_HOOK_NO_INSERT to recheck
- * if we have room in journal res/btree node:
- */
- if (__extent_insert_advance_pos(hook, iter, bkey_start_pos(k.k),
- insert, bkey_s_c_null, res,
- stats) == BTREE_HOOK_NO_INSERT &&
- might_split)
- return BTREE_HOOK_NO_INSERT;
+ if (k.k && bkey_cmp(insert->iter->pos, bkey_start_pos(k.k)) < 0) {
+ bool might_split = bkey_cmp(insert->iter->pos,
+ bkey_start_pos(&insert->k->k)) > 0;
+
+ switch (__extent_insert_advance_pos(trans, insert, hook,
+ bkey_start_pos(k.k),
+ bkey_s_c_null,
+ res, stats)) {
+ case BTREE_HOOK_DO_INSERT:
+ break;
+ case BTREE_HOOK_NO_INSERT:
+ /*
+ * @insert was split, need to recheck if we have room in
+ * journal res/btree node:
+ */
+ if (might_split)
+ return BTREE_HOOK_DO_INSERT;
+ break;
+ case BTREE_HOOK_RESTART_TRANS:
+ return BTREE_HOOK_RESTART_TRANS;
+ }
}
/* avoid redundant calls to hook fn: */
- if (!bkey_cmp(iter->pos, next_pos))
+ if (!bkey_cmp(insert->iter->pos, next_pos))
return BTREE_HOOK_DO_INSERT;
- return __extent_insert_advance_pos(hook, iter, next_pos,
- insert, k, res, stats);
+ return __extent_insert_advance_pos(trans, insert, hook, next_pos,
+ k, res, stats);
}
/**
@@ -1151,13 +1163,14 @@ extent_insert_advance_pos(struct extent_insert_hook *hook,
* key insertion needs to continue/be retried.
*/
enum btree_insert_ret
-bch_insert_fixup_extent(struct btree_iter *iter,
- struct bkey_i *insert,
+bch_insert_fixup_extent(struct btree_insert_trans *trans,
+ struct btree_trans_entry *insert,
struct disk_reservation *disk_res,
struct extent_insert_hook *hook,
struct journal_res *res,
unsigned flags)
{
+ struct btree_iter *iter = insert->iter;
struct cache_set *c = iter->c;
struct btree *b = iter->nodes[0];
struct btree_node_iter *node_iter = &iter->node_iters[0];
@@ -1170,19 +1183,19 @@ bch_insert_fixup_extent(struct btree_iter *iter,
enum btree_insert_ret ret = BTREE_INSERT_OK;
EBUG_ON(iter->level);
- EBUG_ON(bkey_deleted(&insert->k) || !insert->k.size);
+ EBUG_ON(bkey_deleted(&insert->k->k) || !insert->k->k.size);
/*
* As we process overlapping extents, we advance @iter->pos both to
- * signal to our caller (btree_insert_key()) how much of @insert has
- * been inserted, and also to keep @iter->pos consistent with @insert
- * and the node iterator that we're advancing:
+ * signal to our caller (btree_insert_key()) how much of @insert->k has
+ * been inserted, and also to keep @iter->pos consistent with
+ * @insert->k and the node iterator that we're advancing:
*/
- EBUG_ON(bkey_cmp(iter->pos, bkey_start_pos(&insert->k)));
+ EBUG_ON(bkey_cmp(iter->pos, bkey_start_pos(&insert->k->k)));
/*
- * If this is a cmpxchg operation, @insert doesn't necessarily exist in
- * the btree, and may have pointers not pinned by open buckets; thus
+ * If this is a cmpxchg operation, @insert->k doesn't necessarily exist
+ * in the btree, and may have pointers not pinned by open buckets; thus
* some of the pointers might be stale because we raced with foreground
* writes.
*
@@ -1197,39 +1210,46 @@ bch_insert_fixup_extent(struct btree_iter *iter,
* to proceed with the insertion.
*/
if (!(flags & BTREE_INSERT_NO_MARK_KEY))
- bch_add_sectors(iter, bkey_i_to_s_c(insert),
- bkey_start_offset(&insert->k),
- insert->k.size, &stats);
+ bch_add_sectors(iter, bkey_i_to_s_c(insert->k),
+ bkey_start_offset(&insert->k->k),
+ insert->k->k.size, &stats);
- while (bkey_cmp(iter->pos, insert->k.p) < 0 &&
- (ret = extent_insert_should_stop(iter, insert, res,
+ while (bkey_cmp(iter->pos, insert->k->k.p) < 0 &&
+ (ret = extent_insert_should_stop(trans, insert, res,
start_time, nr_done)) == BTREE_INSERT_OK &&
(_k = bch_btree_node_iter_peek_overlapping(node_iter,
- &b->keys, &insert->k))) {
+ &b->keys, &insert->k->k))) {
struct bkey_s k = __bkey_disassemble(f, _k, &unpacked);
/*
* Only call advance pos & call hook for nonzero size extents:
- * If hook returned BTREE_HOOK_NO_INSERT, @insert no longer
+ * If hook returned BTREE_HOOK_NO_INSERT, @insert->k no longer
* overlaps with @k:
*/
- if (k.k->size &&
- (extent_insert_advance_pos(hook, iter, insert, k.s_c, res,
- &stats) == BTREE_HOOK_NO_INSERT))
- continue;
+ if (k.k->size)
+ switch (extent_insert_advance_pos(trans, insert, hook,
+ k.s_c, res, &stats)) {
+ case BTREE_HOOK_DO_INSERT:
+ break;
+ case BTREE_HOOK_NO_INSERT:
+ continue;
+ case BTREE_HOOK_RESTART_TRANS:
+ ret = BTREE_INSERT_NEED_TRAVERSE;
+ goto stop;
+ }
/* k is the key currently in the tree, 'insert' is the new key */
- switch (bch_extent_overlap(&insert->k, k.k)) {
+ switch (bch_extent_overlap(&insert->k->k, k.k)) {
case BCH_EXTENT_OVERLAP_FRONT:
/* insert and k share the start, invalidate in k */
- bch_cut_subtract_front(iter, insert->k.p, k, &stats);
+ bch_cut_subtract_front(iter, insert->k->k.p, k, &stats);
extent_save(_k, k.k, f);
break;
case BCH_EXTENT_OVERLAP_BACK:
/* insert and k share the end, invalidate in k */
bch_cut_subtract_back(iter,
- bkey_start_pos(&insert->k),
+ bkey_start_pos(&insert->k->k),
k, &stats);
extent_save(_k, k.k, f);
@@ -1250,7 +1270,7 @@ bch_insert_fixup_extent(struct btree_iter *iter,
btree_keys_account_key_drop(&b->keys.nr, _k);
bch_drop_subtract(iter, k, &stats);
- k.k->p = bkey_start_pos(&insert->k);
+ k.k->p = bkey_start_pos(&insert->k->k);
if (!__extent_save(_k, k.k, f)) {
/*
* Couldn't repack: we aren't necessarily able
@@ -1261,9 +1281,14 @@ bch_insert_fixup_extent(struct btree_iter *iter,
k.k->p = orig_pos;
extent_save(_k, k.k, f);
- extent_insert_advance_pos(hook, iter, insert,
- k.s_c, res, &stats);
- extent_insert_committed(iter, insert, res);
+ if (extent_insert_advance_pos(trans, insert,
+ hook, k.s_c, res,
+ &stats) ==
+ BTREE_HOOK_RESTART_TRANS) {
+ ret = BTREE_INSERT_NEED_TRAVERSE;
+ goto stop;
+ }
+ extent_insert_committed(trans, insert, res);
/*
* We split and inserted upto at k.k->p - that
* has to coincide with iter->pos, so that we
@@ -1294,10 +1319,10 @@ bch_insert_fixup_extent(struct btree_iter *iter,
* what k points to)
*/
bkey_reassemble(&split.k, k.s_c);
- bch_cut_back(bkey_start_pos(&insert->k), &split.k.k);
+ bch_cut_back(bkey_start_pos(&insert->k->k), &split.k.k);
- __bch_cut_front(bkey_start_pos(&insert->k), k);
- bch_cut_subtract_front(iter, insert->k.p, k, &stats);
+ __bch_cut_front(bkey_start_pos(&insert->k->k), k);
+ bch_cut_subtract_front(iter, insert->k->k.p, k, &stats);
extent_save(_k, k.k, f);
bch_btree_bset_insert(iter, b, node_iter, &split.k);
@@ -1306,26 +1331,26 @@ bch_insert_fixup_extent(struct btree_iter *iter,
}
}
- if (bkey_cmp(iter->pos, insert->k.p) < 0 &&
- ret == BTREE_INSERT_OK)
- extent_insert_advance_pos(hook, iter, insert, bkey_s_c_null,
- res, &stats);
-
- extent_insert_committed(iter, insert, res);
+ if (bkey_cmp(iter->pos, insert->k->k.p) < 0 &&
+ ret == BTREE_INSERT_OK &&
+ extent_insert_advance_pos(trans, insert, hook, bkey_s_c_null, res,
+ &stats) == BTREE_HOOK_RESTART_TRANS)
+ ret = BTREE_INSERT_NEED_TRAVERSE;
+stop:
+ extent_insert_committed(trans, insert, res);
/*
* Subtract any remaining sectors from @insert, if we bailed out early
* and didn't fully insert @insert:
*/
- if (insert->k.size)
- bch_subtract_sectors(iter, bkey_i_to_s_c(insert),
- iter->pos.offset,
- insert->k.p.offset - iter->pos.offset,
- &stats);
+ if (insert->k->k.size && !(flags & BTREE_INSERT_NO_MARK_KEY))
+ bch_subtract_sectors(iter, bkey_i_to_s_c(insert->k),
+ bkey_start_offset(&insert->k->k),
+ insert->k->k.size, &stats);
bch_cache_set_stats_apply(c, &stats, disk_res, gc_pos_btree_node(b));
- if (insert->k.size && !bkey_cmp(iter->pos, b->key.k.p))
+ if (insert->k->k.size && !bkey_cmp(iter->pos, b->key.k.p))
ret = BTREE_INSERT_NEED_TRAVERSE;
return ret;
diff --git a/drivers/md/bcache/extents.h b/drivers/md/bcache/extents.h
index 1610ac35f7fe..23418ccb04f9 100644
--- a/drivers/md/bcache/extents.h
+++ b/drivers/md/bcache/extents.h
@@ -8,6 +8,8 @@
struct bch_replace_info;
union bch_extent_crc;
struct btree_iter;
+struct btree_insert_trans;
+struct btree_trans_entry;
struct btree_nr_keys bch_key_sort_fix_overlapping(struct btree_keys *,
struct bset *,
@@ -17,7 +19,8 @@ struct btree_nr_keys bch_extent_sort_fix_overlapping(struct btree_keys *,
struct btree_node_iter *);
enum btree_insert_ret
-bch_insert_fixup_key(struct btree_iter *, struct bkey_i *,
+bch_insert_fixup_key(struct btree_insert_trans *,
+ struct btree_trans_entry *,
struct journal_res *);
extern const struct bkey_ops bch_bkey_btree_ops;
@@ -53,7 +56,8 @@ bch_extent_cmpxchg(struct extent_insert_hook *, struct btree_iter *,
struct bpos, struct bkey_s_c, const struct bkey_i *);
enum btree_insert_ret
-bch_insert_fixup_extent(struct btree_iter *, struct bkey_i *,
+bch_insert_fixup_extent(struct btree_insert_trans *,
+ struct btree_trans_entry *,
struct disk_reservation *,
struct extent_insert_hook *,
struct journal_res *, unsigned);
diff --git a/drivers/md/bcache/journal.h b/drivers/md/bcache/journal.h
index be2b842b9197..c54087472b90 100644
--- a/drivers/md/bcache/journal.h
+++ b/drivers/md/bcache/journal.h
@@ -211,17 +211,6 @@ static inline unsigned jset_u64s(unsigned u64s)
return u64s + sizeof(struct jset_entry) / sizeof(u64);
}
-static inline bool journal_res_insert_fits(struct cache_set *c,
- struct journal_res *res,
- struct bkey_i *k)
-{
- /* If we're in journal replay we're not getting journal reservations: */
- if (!test_bit(JOURNAL_REPLAY_DONE, &c->journal.flags))
- return true;
-
- return jset_u64s(k->k.u64s) <= res->u64s;
-}
-
void bch_journal_start(struct cache_set *);
void bch_journal_mark(struct cache_set *, struct list_head *);
const char *bch_journal_read(struct cache_set *, struct list_head *);