diff options
author | Kent Overstreet <kent.overstreet@gmail.com> | 2016-04-15 15:24:24 -0800 |
---|---|---|
committer | Kent Overstreet <kent.overstreet@gmail.com> | 2017-01-18 21:39:08 -0900 |
commit | 6f7fd88a05182d057e630ed6c13fdc11dfbf0c7b (patch) | |
tree | 9733955304b2db0daad9d50eeff1b8abf5761bac | |
parent | 89c4830407dfcf6292eb835c8c66ee6235e3b4a1 (diff) |
bcache: make transactions work for extents
-rw-r--r-- | drivers/md/bcache/btree_types.h | 1 | ||||
-rw-r--r-- | drivers/md/bcache/btree_update.c | 153 | ||||
-rw-r--r-- | drivers/md/bcache/btree_update.h | 36 | ||||
-rw-r--r-- | drivers/md/bcache/dirent.c | 9 | ||||
-rw-r--r-- | drivers/md/bcache/extents.c | 223 | ||||
-rw-r--r-- | drivers/md/bcache/extents.h | 8 | ||||
-rw-r--r-- | drivers/md/bcache/journal.h | 11 |
7 files changed, 254 insertions, 187 deletions
diff --git a/drivers/md/bcache/btree_types.h b/drivers/md/bcache/btree_types.h index b691ec49e14a..d6110c3f0188 100644 --- a/drivers/md/bcache/btree_types.h +++ b/drivers/md/bcache/btree_types.h @@ -157,6 +157,7 @@ struct bucket_stats_cache_set; enum extent_insert_hook_ret { BTREE_HOOK_DO_INSERT, BTREE_HOOK_NO_INSERT, + BTREE_HOOK_RESTART_TRANS, }; struct extent_insert_hook { diff --git a/drivers/md/bcache/btree_update.c b/drivers/md/bcache/btree_update.c index 0ebd018d4b48..7c478fbf73d1 100644 --- a/drivers/md/bcache/btree_update.c +++ b/drivers/md/bcache/btree_update.c @@ -1495,91 +1495,98 @@ out_unlock: * btree_insert_key - insert a key one key into a leaf node */ static enum btree_insert_ret -btree_insert_key(struct btree_iter *iter, struct bkey_i *insert, +btree_insert_key(struct btree_insert_trans *trans, + struct btree_trans_entry *insert, struct disk_reservation *disk_res, struct extent_insert_hook *hook, struct journal_res *res, unsigned flags) { - struct btree *b = iter->nodes[0]; + struct btree *b = insert->iter->nodes[0]; s64 oldsize = bch_count_data(&b->keys); enum btree_insert_ret ret; - bch_btree_node_iter_verify(&iter->node_iters[0], &b->keys); + bch_btree_node_iter_verify(&insert->iter->node_iters[0], &b->keys); ret = !b->keys.ops->is_extents - ? bch_insert_fixup_key(iter, insert, res) - : bch_insert_fixup_extent(iter, insert, disk_res, + ? bch_insert_fixup_key(trans, insert, res) + : bch_insert_fixup_extent(trans, insert, disk_res, hook, res, flags); bch_count_data_verify(&b->keys, oldsize); - trace_bcache_btree_insert_key(b, insert); + trace_bcache_btree_insert_key(b, insert->k); return ret; } -static bool same_leaf_as_prev(struct btree_insert_trans *m, - struct btree_insert_trans *i) +static bool same_leaf_as_prev(struct btree_insert_trans *trans, + struct btree_trans_entry *i) { /* * Because we sorted the transaction entries, if multiple iterators * point to the same leaf node they'll always be adjacent now: */ - return i != m && i[0].iter->nodes[0] == i[-1].iter->nodes[0]; + return i != trans->entries && + i[0].iter->nodes[0] == i[-1].iter->nodes[0]; } -static void multi_lock_write(struct btree_insert_trans *m, unsigned nr) +#define trans_for_each_entry(trans, i) \ + for ((i) = (trans)->entries; (i) < (trans)->entries + (trans)->nr; (i)++) + +static void multi_lock_write(struct btree_insert_trans *trans) { - struct btree_insert_trans *i; + struct btree_trans_entry *i; - for (i = m; i < m + nr; i++) - if (!same_leaf_as_prev(m, i)) + trans_for_each_entry(trans, i) + if (!same_leaf_as_prev(trans, i)) btree_node_lock_for_insert(i->iter->nodes[0], i->iter); } -static void multi_unlock_write(struct btree_insert_trans *m, unsigned nr) +static void multi_unlock_write(struct btree_insert_trans *trans) { - struct btree_insert_trans *i; + struct btree_trans_entry *i; - for (i = m; i < m + nr; i++) - if (!same_leaf_as_prev(m, i)) + trans_for_each_entry(trans, i) + if (!same_leaf_as_prev(trans, i)) btree_node_unlock_write(i->iter->nodes[0], i->iter); } -static int btree_trans_iter_cmp(const void *_l, const void *_r) +static int btree_trans_entry_cmp(const void *_l, const void *_r) { - const struct btree_insert_trans *l = _l; - const struct btree_insert_trans *r = _r; + const struct btree_trans_entry *l = _l; + const struct btree_trans_entry *r = _r; return btree_iter_cmp(l->iter, r->iter); } /* Normal update interface: */ -int bch_btree_insert_trans(struct btree_insert_trans *m, unsigned nr, +int bch_btree_insert_trans(struct btree_insert_trans *trans, struct disk_reservation *disk_res, struct extent_insert_hook *hook, u64 *journal_seq, unsigned flags) { - struct cache_set *c = m[0].iter->c; + struct cache_set *c = trans->entries[0].iter->c; struct journal_res res = { 0, 0 }; - struct btree_insert_trans *i; + struct btree_trans_entry *i; struct btree_iter *split; unsigned u64s; int ret; - for (i = m; i < m + nr; i++) { + trans_for_each_entry(trans, i) { EBUG_ON(i->iter->level); EBUG_ON(bkey_cmp(bkey_start_pos(&i->k->k), i->iter->pos)); + i->done = false; } + trans->did_work = false; - /* Sort transaction entries by iterator position, for lock ordering: */ - sort(m, nr, sizeof(m[0]), btree_trans_iter_cmp, NULL); + sort(trans->entries, trans->nr, sizeof(trans->entries[0]), + btree_trans_entry_cmp, NULL); if (unlikely(!percpu_ref_tryget(&c->writes))) return -EROFS; - for (i = m; i < m + nr; i++) { + trans_for_each_entry(trans, i) { i->iter->locks_want = 0; if (unlikely(!bch_btree_iter_upgrade(i->iter))) { ret = -EINTR; @@ -1587,9 +1594,8 @@ int bch_btree_insert_trans(struct btree_insert_trans *m, unsigned nr, } } retry: - split = NULL; u64s = 0; - for (i = m; i < m + nr; i++) + trans_for_each_entry(trans, i) if (!i->done) u64s += jset_u64s(i->k->k.u64s); @@ -1599,62 +1605,75 @@ retry: if (ret) goto err; - multi_lock_write(m, nr); + multi_lock_write(trans); u64s = 0; - for (i = m; i < m + nr; i++) { + trans_for_each_entry(trans, i) { /* Multiple inserts might go to same leaf: */ - if (!same_leaf_as_prev(m, i)) + if (!same_leaf_as_prev(trans, i)) u64s = 0; if (!i->done) { u64s += i->k->k.u64s; if (!bch_btree_node_insert_fits(c, i->iter->nodes[0], u64s)) - goto split; + goto unlock_split; } } - for (i = m; i < m + nr; i++) - if (!i->done) - switch (btree_insert_key(i->iter, i->k, disk_res, - hook, &res, flags)) { - case BTREE_INSERT_OK: - i->done = true; - break; - case BTREE_INSERT_NEED_TRAVERSE: - ret = -EINTR; - goto err_unlock; - case BTREE_INSERT_NEED_RESCHED: - ret = -EAGAIN; - goto err_unlock; - case BTREE_INSERT_BTREE_NODE_FULL: - goto split; - case BTREE_INSERT_JOURNAL_RES_FULL: - goto err_unlock; - } + ret = 0; + split = NULL; - multi_unlock_write(m, nr); + trans_for_each_entry(trans, i) { + if (i->done) + continue; + switch (btree_insert_key(trans, i, disk_res, + hook, &res, flags)) { + case BTREE_INSERT_OK: + i->done = true; + break; + case BTREE_INSERT_JOURNAL_RES_FULL: + case BTREE_INSERT_NEED_TRAVERSE: + ret = -EINTR; + break; + case BTREE_INSERT_NEED_RESCHED: + ret = -EAGAIN; + break; + case BTREE_INSERT_BTREE_NODE_FULL: + split = i->iter; + break; + } + + if (!trans->did_work && (ret || split)) + break; + } + + multi_unlock_write(trans); bch_journal_res_put(&c->journal, &res, journal_seq); - for (i = m; i < m + nr; i++) - if (!same_leaf_as_prev(m, i)) + if (split) + goto split; + if (ret) + goto err; + + trans_for_each_entry(trans, i) + if (!same_leaf_as_prev(trans, i)) bch_btree_node_write_lazy(i->iter->nodes[0], i->iter); out: percpu_ref_put(&c->writes); return ret; -split: +unlock_split: split = i->iter; - multi_unlock_write(m, nr); + multi_unlock_write(trans); /* * have to drop journal res before splitting, because splitting means * allocating new btree nodes, and holding a journal reservation * potentially blocks the allocator: */ bch_journal_res_put(&c->journal, &res, journal_seq); - +split: ret = bch_btree_split_leaf(split, flags); if (ret) goto err; @@ -1665,12 +1684,9 @@ split: * and is overwriting won't have changed) */ goto retry; -err_unlock: - multi_unlock_write(m, nr); - bch_journal_res_put(&c->journal, &res, journal_seq); err: if (ret == -EAGAIN) { - for (i = m; i < m + nr; i++) + trans_for_each_entry(trans, i) bch_btree_iter_unlock(i->iter); ret = -EINTR; } @@ -1685,7 +1701,7 @@ err: * reservations: */ if (ret == -EINTR && !(flags & BTREE_INSERT_ATOMIC)) { - for (i = m; i < m + nr; i++) { + trans_for_each_entry(trans, i) { ret = bch_btree_iter_traverse(i->iter); if (ret) goto out; @@ -1740,14 +1756,17 @@ int bch_btree_insert_at(struct btree_iter *iter, u64 *journal_seq, unsigned flags) { struct btree_insert_trans m = { - .iter = iter, - .k = insert_key, - .done = false, + .nr = 1, + .entries = &(struct btree_trans_entry) { + .iter = iter, + .k = insert_key, + .done = false, + }, }; - int ret = bch_btree_insert_trans(&m, 1, disk_res, + int ret = bch_btree_insert_trans(&m, disk_res, hook, journal_seq, flags); - BUG_ON(!ret != m.done); + BUG_ON(!ret != m.entries[0].done); return ret; } diff --git a/drivers/md/bcache/btree_update.h b/drivers/md/bcache/btree_update.h index dfdfcefb29f9..e91ec0fc5880 100644 --- a/drivers/md/bcache/btree_update.h +++ b/drivers/md/bcache/btree_update.h @@ -4,6 +4,7 @@ #include "btree_cache.h" #include "btree_iter.h" #include "buckets.h" +#include "journal.h" struct cache_set; struct bkey_format_state; @@ -242,13 +243,38 @@ int bch_btree_insert_list_at(struct btree_iter *, struct keylist *, struct extent_insert_hook *, u64 *, unsigned); struct btree_insert_trans { - struct btree_iter *iter; - struct bkey_i *k; - /* true if entire key was inserted - can only be false for extents */ - bool done; + unsigned nr; + bool did_work; + struct btree_trans_entry { + struct btree_iter *iter; + struct bkey_i *k; + /* + * true if entire key was inserted - can only be false for + * extents + */ + bool done; + } *entries; }; -int bch_btree_insert_trans(struct btree_insert_trans[], unsigned, +static inline bool journal_res_insert_fits(struct btree_insert_trans *trans, + struct btree_trans_entry *insert, + struct journal_res *res) +{ + struct cache_set *c = insert->iter->c; + unsigned u64s = 0; + struct btree_trans_entry *i; + + /* If we're in journal replay we're not getting journal reservations: */ + if (!test_bit(JOURNAL_REPLAY_DONE, &c->journal.flags)) + return true; + + for (i = insert; i < trans->entries + trans->nr; i++) + u64s += jset_u64s(i->k->k.u64s); + + return u64s <= res->u64s; +} + +int bch_btree_insert_trans(struct btree_insert_trans *, struct disk_reservation *, struct extent_insert_hook *, u64 *, unsigned); diff --git a/drivers/md/bcache/dirent.c b/drivers/md/bcache/dirent.c index 5815ad5befc5..228e25ea6ff9 100644 --- a/drivers/md/bcache/dirent.c +++ b/drivers/md/bcache/dirent.c @@ -324,9 +324,12 @@ int bch_dirent_rename(struct cache_set *c, new_dst->v.d_inum = old_src_d.v->d_inum; new_dst->v.d_type = old_src_d.v->d_type; - ret = bch_btree_insert_trans((struct btree_insert_trans[]) { - { &src_iter, &new_src->k_i, }, - { &dst_iter, &new_dst->k_i, }}, 2, + ret = bch_btree_insert_trans(&(struct btree_insert_trans) { + .nr = 2, + .entries = (struct btree_trans_entry[]) { + { &src_iter, &new_src->k_i, }, + { &dst_iter, &new_dst->k_i, } + }}, NULL, NULL, journal_seq, BTREE_INSERT_ATOMIC); bch_btree_iter_unlock(&src_iter); diff --git a/drivers/md/bcache/extents.c b/drivers/md/bcache/extents.c index 2c0a38612682..1c04c629cea4 100644 --- a/drivers/md/bcache/extents.c +++ b/drivers/md/bcache/extents.c @@ -106,24 +106,24 @@ struct btree_nr_keys bch_key_sort_fix_overlapping(struct btree_keys *b, /* This returns true if insert should be inserted, false otherwise */ enum btree_insert_ret -bch_insert_fixup_key(struct btree_iter *iter, - struct bkey_i *insert, +bch_insert_fixup_key(struct btree_insert_trans *trans, + struct btree_trans_entry *insert, struct journal_res *res) { - struct btree *b = iter->nodes[0]; - struct btree_node_iter *node_iter = &iter->node_iters[0]; + struct btree *b = insert->iter->nodes[0]; + struct btree_node_iter *node_iter = &insert->iter->node_iters[0]; const struct bkey_format *f = &b->keys.format; struct bkey_packed *k; int cmp; - BUG_ON(iter->level); + BUG_ON(insert->iter->level); EBUG_ON((k = bch_btree_node_iter_prev_all(node_iter, &b->keys)) && (bkey_deleted(k) - ? bkey_cmp_packed(f, k, &insert->k) > 0 - : bkey_cmp_packed(f, k, &insert->k) >= 0)); + ? bkey_cmp_packed(f, k, &insert->k->k) > 0 + : bkey_cmp_packed(f, k, &insert->k->k) >= 0)); while ((k = bch_btree_node_iter_peek_all(node_iter, &b->keys)) && - (cmp = bkey_cmp_packed(f, k, &insert->k)) <= 0) { + (cmp = bkey_cmp_packed(f, k, &insert->k->k)) <= 0) { if (!cmp && !bkey_deleted(k)) { k->type = KEY_TYPE_DELETED; btree_keys_account_key_drop(&b->keys.nr, k); @@ -132,7 +132,8 @@ bch_insert_fixup_key(struct btree_iter *iter, bch_btree_node_iter_next_all(node_iter, &b->keys); } - bch_btree_insert_and_journal(iter, insert, res); + bch_btree_insert_and_journal(insert->iter, insert->k, res); + trans->did_work = true; return BTREE_INSERT_OK; } @@ -979,15 +980,13 @@ enum extent_insert_hook_ret bch_extent_cmpxchg(struct extent_insert_hook *hook, #define MAX_LOCK_HOLD_TIME (5 * NSEC_PER_MSEC) -static enum btree_insert_ret extent_insert_should_stop(struct btree_iter *iter, - struct bkey_i *insert, +static enum btree_insert_ret extent_insert_should_stop(struct btree_insert_trans *trans, + struct btree_trans_entry *insert, struct journal_res *res, u64 start_time, unsigned nr_done) { - struct cache_set *c = iter->c; - struct btree *b = iter->nodes[0]; - + struct btree *b = insert->iter->nodes[0]; /* * Check if we have sufficient space in both the btree node and the * journal reservation: @@ -1001,9 +1000,9 @@ static enum btree_insert_ret extent_insert_should_stop(struct btree_iter *iter, * doing a lot of work under the btree node write lock - bail out if * we've been running for too long and readers are waiting on the lock: */ - if (!bch_btree_node_insert_fits(c, b, insert->k.u64s)) + if (!bch_btree_node_insert_fits(insert->iter->c, b, insert->k->k.u64s)) return BTREE_INSERT_BTREE_NODE_FULL; - else if (!journal_res_insert_fits(c, res, insert)) + else if (!journal_res_insert_fits(trans, insert, res)) return BTREE_INSERT_JOURNAL_RES_FULL; /* XXX worth tracing */ else if (nr_done > 10 && time_after64(local_clock(), start_time + @@ -1014,26 +1013,30 @@ static enum btree_insert_ret extent_insert_should_stop(struct btree_iter *iter, return BTREE_INSERT_OK; } -static void extent_insert_committed(struct btree_iter *iter, - struct bkey_i *insert, +static void extent_insert_committed(struct btree_insert_trans *trans, + struct btree_trans_entry *insert, struct journal_res *res) { - EBUG_ON(bkey_cmp(insert->k.p, iter->pos) < 0); - EBUG_ON(bkey_cmp(iter->pos, bkey_start_pos(&insert->k)) < 0); + struct btree_iter *iter = insert->iter; + struct bkey_i *k = insert->k; - if (bkey_cmp(iter->pos, bkey_start_pos(&insert->k)) > 0) { - EBUG_ON(bkey_deleted(&insert->k) || !insert->k.size); + EBUG_ON(bkey_cmp(k->k.p, iter->pos) < 0); + EBUG_ON(bkey_cmp(iter->pos, bkey_start_pos(&k->k)) < 0); + + if (bkey_cmp(iter->pos, bkey_start_pos(&k->k)) > 0) { + EBUG_ON(bkey_deleted(&k->k) || !k->k.size); bch_btree_insert_and_journal(iter, - bch_key_split(iter->pos, insert), res); + bch_key_split(iter->pos, k), res); + trans->did_work = true; } } static enum extent_insert_hook_ret -__extent_insert_advance_pos(struct extent_insert_hook *hook, - struct btree_iter *iter, +__extent_insert_advance_pos(struct btree_insert_trans *trans, + struct btree_trans_entry *insert, + struct extent_insert_hook *hook, struct bpos next_pos, - struct bkey_i *insert, struct bkey_s_c k, struct journal_res *res, struct bucket_stats_cache_set *stats) @@ -1041,31 +1044,33 @@ __extent_insert_advance_pos(struct extent_insert_hook *hook, enum extent_insert_hook_ret ret; if (k.k && k.k->size && - insert->k.version && - k.k->version > insert->k.version) + insert->k->k.version && + k.k->version > insert->k->k.version) ret = BTREE_HOOK_NO_INSERT; else if (hook) - ret = hook->fn(hook, iter, next_pos, k, insert); + ret = hook->fn(hook, insert->iter, next_pos, k, insert->k); else ret = BTREE_HOOK_DO_INSERT; - EBUG_ON(bkey_deleted(&insert->k) || !insert->k.size); + EBUG_ON(bkey_deleted(&insert->k->k) || !insert->k->k.size); switch (ret) { case BTREE_HOOK_DO_INSERT: break; case BTREE_HOOK_NO_INSERT: - extent_insert_committed(iter, insert, res); - bch_cut_subtract_front(iter, next_pos, - bkey_i_to_s(insert), stats); + extent_insert_committed(trans, insert, res); + bch_cut_subtract_front(insert->iter, next_pos, + bkey_i_to_s(insert->k), stats); break; + case BTREE_HOOK_RESTART_TRANS: + return ret; } /* * Don't update iter->pos until after calling the hook, * because the hook fn may use it: */ - bch_btree_iter_set_pos(iter, next_pos); + bch_btree_iter_set_pos(insert->iter, next_pos); return ret; } @@ -1074,41 +1079,48 @@ __extent_insert_advance_pos(struct extent_insert_hook *hook, * fn: */ static enum extent_insert_hook_ret -extent_insert_advance_pos(struct extent_insert_hook *hook, - struct btree_iter *iter, - struct bkey_i *insert, +extent_insert_advance_pos(struct btree_insert_trans *trans, + struct btree_trans_entry *insert, + struct extent_insert_hook *hook, struct bkey_s_c k, struct journal_res *res, struct bucket_stats_cache_set *stats) { - struct btree *b = iter->nodes[0]; + struct btree *b = insert->iter->nodes[0]; struct bpos next_pos = k.k - ? bpos_min(insert->k.p, k.k->p) - : bpos_min(insert->k.p, b->key.k.p); + ? bpos_min(insert->k->k.p, k.k->p) + : bpos_min(insert->k->k.p, b->key.k.p); /* hole? */ - if (k.k && bkey_cmp(iter->pos, bkey_start_pos(k.k)) < 0) { - bool might_split = bkey_cmp(iter->pos, - bkey_start_pos(&insert->k)) > 0; - - /* - * If a hole causes us to split and insert a previously - * comitted portion, return BTREE_HOOK_NO_INSERT to recheck - * if we have room in journal res/btree node: - */ - if (__extent_insert_advance_pos(hook, iter, bkey_start_pos(k.k), - insert, bkey_s_c_null, res, - stats) == BTREE_HOOK_NO_INSERT && - might_split) - return BTREE_HOOK_NO_INSERT; + if (k.k && bkey_cmp(insert->iter->pos, bkey_start_pos(k.k)) < 0) { + bool might_split = bkey_cmp(insert->iter->pos, + bkey_start_pos(&insert->k->k)) > 0; + + switch (__extent_insert_advance_pos(trans, insert, hook, + bkey_start_pos(k.k), + bkey_s_c_null, + res, stats)) { + case BTREE_HOOK_DO_INSERT: + break; + case BTREE_HOOK_NO_INSERT: + /* + * @insert was split, need to recheck if we have room in + * journal res/btree node: + */ + if (might_split) + return BTREE_HOOK_DO_INSERT; + break; + case BTREE_HOOK_RESTART_TRANS: + return BTREE_HOOK_RESTART_TRANS; + } } /* avoid redundant calls to hook fn: */ - if (!bkey_cmp(iter->pos, next_pos)) + if (!bkey_cmp(insert->iter->pos, next_pos)) return BTREE_HOOK_DO_INSERT; - return __extent_insert_advance_pos(hook, iter, next_pos, - insert, k, res, stats); + return __extent_insert_advance_pos(trans, insert, hook, next_pos, + k, res, stats); } /** @@ -1151,13 +1163,14 @@ extent_insert_advance_pos(struct extent_insert_hook *hook, * key insertion needs to continue/be retried. */ enum btree_insert_ret -bch_insert_fixup_extent(struct btree_iter *iter, - struct bkey_i *insert, +bch_insert_fixup_extent(struct btree_insert_trans *trans, + struct btree_trans_entry *insert, struct disk_reservation *disk_res, struct extent_insert_hook *hook, struct journal_res *res, unsigned flags) { + struct btree_iter *iter = insert->iter; struct cache_set *c = iter->c; struct btree *b = iter->nodes[0]; struct btree_node_iter *node_iter = &iter->node_iters[0]; @@ -1170,19 +1183,19 @@ bch_insert_fixup_extent(struct btree_iter *iter, enum btree_insert_ret ret = BTREE_INSERT_OK; EBUG_ON(iter->level); - EBUG_ON(bkey_deleted(&insert->k) || !insert->k.size); + EBUG_ON(bkey_deleted(&insert->k->k) || !insert->k->k.size); /* * As we process overlapping extents, we advance @iter->pos both to - * signal to our caller (btree_insert_key()) how much of @insert has - * been inserted, and also to keep @iter->pos consistent with @insert - * and the node iterator that we're advancing: + * signal to our caller (btree_insert_key()) how much of @insert->k has + * been inserted, and also to keep @iter->pos consistent with + * @insert->k and the node iterator that we're advancing: */ - EBUG_ON(bkey_cmp(iter->pos, bkey_start_pos(&insert->k))); + EBUG_ON(bkey_cmp(iter->pos, bkey_start_pos(&insert->k->k))); /* - * If this is a cmpxchg operation, @insert doesn't necessarily exist in - * the btree, and may have pointers not pinned by open buckets; thus + * If this is a cmpxchg operation, @insert->k doesn't necessarily exist + * in the btree, and may have pointers not pinned by open buckets; thus * some of the pointers might be stale because we raced with foreground * writes. * @@ -1197,39 +1210,46 @@ bch_insert_fixup_extent(struct btree_iter *iter, * to proceed with the insertion. */ if (!(flags & BTREE_INSERT_NO_MARK_KEY)) - bch_add_sectors(iter, bkey_i_to_s_c(insert), - bkey_start_offset(&insert->k), - insert->k.size, &stats); + bch_add_sectors(iter, bkey_i_to_s_c(insert->k), + bkey_start_offset(&insert->k->k), + insert->k->k.size, &stats); - while (bkey_cmp(iter->pos, insert->k.p) < 0 && - (ret = extent_insert_should_stop(iter, insert, res, + while (bkey_cmp(iter->pos, insert->k->k.p) < 0 && + (ret = extent_insert_should_stop(trans, insert, res, start_time, nr_done)) == BTREE_INSERT_OK && (_k = bch_btree_node_iter_peek_overlapping(node_iter, - &b->keys, &insert->k))) { + &b->keys, &insert->k->k))) { struct bkey_s k = __bkey_disassemble(f, _k, &unpacked); /* * Only call advance pos & call hook for nonzero size extents: - * If hook returned BTREE_HOOK_NO_INSERT, @insert no longer + * If hook returned BTREE_HOOK_NO_INSERT, @insert->k no longer * overlaps with @k: */ - if (k.k->size && - (extent_insert_advance_pos(hook, iter, insert, k.s_c, res, - &stats) == BTREE_HOOK_NO_INSERT)) - continue; + if (k.k->size) + switch (extent_insert_advance_pos(trans, insert, hook, + k.s_c, res, &stats)) { + case BTREE_HOOK_DO_INSERT: + break; + case BTREE_HOOK_NO_INSERT: + continue; + case BTREE_HOOK_RESTART_TRANS: + ret = BTREE_INSERT_NEED_TRAVERSE; + goto stop; + } /* k is the key currently in the tree, 'insert' is the new key */ - switch (bch_extent_overlap(&insert->k, k.k)) { + switch (bch_extent_overlap(&insert->k->k, k.k)) { case BCH_EXTENT_OVERLAP_FRONT: /* insert and k share the start, invalidate in k */ - bch_cut_subtract_front(iter, insert->k.p, k, &stats); + bch_cut_subtract_front(iter, insert->k->k.p, k, &stats); extent_save(_k, k.k, f); break; case BCH_EXTENT_OVERLAP_BACK: /* insert and k share the end, invalidate in k */ bch_cut_subtract_back(iter, - bkey_start_pos(&insert->k), + bkey_start_pos(&insert->k->k), k, &stats); extent_save(_k, k.k, f); @@ -1250,7 +1270,7 @@ bch_insert_fixup_extent(struct btree_iter *iter, btree_keys_account_key_drop(&b->keys.nr, _k); bch_drop_subtract(iter, k, &stats); - k.k->p = bkey_start_pos(&insert->k); + k.k->p = bkey_start_pos(&insert->k->k); if (!__extent_save(_k, k.k, f)) { /* * Couldn't repack: we aren't necessarily able @@ -1261,9 +1281,14 @@ bch_insert_fixup_extent(struct btree_iter *iter, k.k->p = orig_pos; extent_save(_k, k.k, f); - extent_insert_advance_pos(hook, iter, insert, - k.s_c, res, &stats); - extent_insert_committed(iter, insert, res); + if (extent_insert_advance_pos(trans, insert, + hook, k.s_c, res, + &stats) == + BTREE_HOOK_RESTART_TRANS) { + ret = BTREE_INSERT_NEED_TRAVERSE; + goto stop; + } + extent_insert_committed(trans, insert, res); /* * We split and inserted upto at k.k->p - that * has to coincide with iter->pos, so that we @@ -1294,10 +1319,10 @@ bch_insert_fixup_extent(struct btree_iter *iter, * what k points to) */ bkey_reassemble(&split.k, k.s_c); - bch_cut_back(bkey_start_pos(&insert->k), &split.k.k); + bch_cut_back(bkey_start_pos(&insert->k->k), &split.k.k); - __bch_cut_front(bkey_start_pos(&insert->k), k); - bch_cut_subtract_front(iter, insert->k.p, k, &stats); + __bch_cut_front(bkey_start_pos(&insert->k->k), k); + bch_cut_subtract_front(iter, insert->k->k.p, k, &stats); extent_save(_k, k.k, f); bch_btree_bset_insert(iter, b, node_iter, &split.k); @@ -1306,26 +1331,26 @@ bch_insert_fixup_extent(struct btree_iter *iter, } } - if (bkey_cmp(iter->pos, insert->k.p) < 0 && - ret == BTREE_INSERT_OK) - extent_insert_advance_pos(hook, iter, insert, bkey_s_c_null, - res, &stats); - - extent_insert_committed(iter, insert, res); + if (bkey_cmp(iter->pos, insert->k->k.p) < 0 && + ret == BTREE_INSERT_OK && + extent_insert_advance_pos(trans, insert, hook, bkey_s_c_null, res, + &stats) == BTREE_HOOK_RESTART_TRANS) + ret = BTREE_INSERT_NEED_TRAVERSE; +stop: + extent_insert_committed(trans, insert, res); /* * Subtract any remaining sectors from @insert, if we bailed out early * and didn't fully insert @insert: */ - if (insert->k.size) - bch_subtract_sectors(iter, bkey_i_to_s_c(insert), - iter->pos.offset, - insert->k.p.offset - iter->pos.offset, - &stats); + if (insert->k->k.size && !(flags & BTREE_INSERT_NO_MARK_KEY)) + bch_subtract_sectors(iter, bkey_i_to_s_c(insert->k), + bkey_start_offset(&insert->k->k), + insert->k->k.size, &stats); bch_cache_set_stats_apply(c, &stats, disk_res, gc_pos_btree_node(b)); - if (insert->k.size && !bkey_cmp(iter->pos, b->key.k.p)) + if (insert->k->k.size && !bkey_cmp(iter->pos, b->key.k.p)) ret = BTREE_INSERT_NEED_TRAVERSE; return ret; diff --git a/drivers/md/bcache/extents.h b/drivers/md/bcache/extents.h index 1610ac35f7fe..23418ccb04f9 100644 --- a/drivers/md/bcache/extents.h +++ b/drivers/md/bcache/extents.h @@ -8,6 +8,8 @@ struct bch_replace_info; union bch_extent_crc; struct btree_iter; +struct btree_insert_trans; +struct btree_trans_entry; struct btree_nr_keys bch_key_sort_fix_overlapping(struct btree_keys *, struct bset *, @@ -17,7 +19,8 @@ struct btree_nr_keys bch_extent_sort_fix_overlapping(struct btree_keys *, struct btree_node_iter *); enum btree_insert_ret -bch_insert_fixup_key(struct btree_iter *, struct bkey_i *, +bch_insert_fixup_key(struct btree_insert_trans *, + struct btree_trans_entry *, struct journal_res *); extern const struct bkey_ops bch_bkey_btree_ops; @@ -53,7 +56,8 @@ bch_extent_cmpxchg(struct extent_insert_hook *, struct btree_iter *, struct bpos, struct bkey_s_c, const struct bkey_i *); enum btree_insert_ret -bch_insert_fixup_extent(struct btree_iter *, struct bkey_i *, +bch_insert_fixup_extent(struct btree_insert_trans *, + struct btree_trans_entry *, struct disk_reservation *, struct extent_insert_hook *, struct journal_res *, unsigned); diff --git a/drivers/md/bcache/journal.h b/drivers/md/bcache/journal.h index be2b842b9197..c54087472b90 100644 --- a/drivers/md/bcache/journal.h +++ b/drivers/md/bcache/journal.h @@ -211,17 +211,6 @@ static inline unsigned jset_u64s(unsigned u64s) return u64s + sizeof(struct jset_entry) / sizeof(u64); } -static inline bool journal_res_insert_fits(struct cache_set *c, - struct journal_res *res, - struct bkey_i *k) -{ - /* If we're in journal replay we're not getting journal reservations: */ - if (!test_bit(JOURNAL_REPLAY_DONE, &c->journal.flags)) - return true; - - return jset_u64s(k->k.u64s) <= res->u64s; -} - void bch_journal_start(struct cache_set *); void bch_journal_mark(struct cache_set *, struct list_head *); const char *bch_journal_read(struct cache_set *, struct list_head *); |