summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--drivers/md/bcache/btree_types.h11
-rw-r--r--drivers/md/bcache/btree_update.c524
-rw-r--r--drivers/md/bcache/btree_update.h4
-rw-r--r--drivers/md/bcache/dirent.c3
-rw-r--r--drivers/md/bcache/extents.c48
-rw-r--r--drivers/md/bcache/extents.h14
-rw-r--r--drivers/md/bcache/journal.c16
7 files changed, 271 insertions, 349 deletions
diff --git a/drivers/md/bcache/btree_types.h b/drivers/md/bcache/btree_types.h
index 05b70dc53e37..6ab5a8d021ed 100644
--- a/drivers/md/bcache/btree_types.h
+++ b/drivers/md/bcache/btree_types.h
@@ -163,4 +163,15 @@ struct btree_insert_hook {
struct bucket_stats_cache_set *);
};
+enum btree_insert_ret {
+ BTREE_INSERT_OK,
+ /* extent spanned multiple leaf nodes: have to traverse to next node: */
+ BTREE_INSERT_NEED_TRAVERSE,
+ /* write lock held for too long */
+ BTREE_INSERT_NEED_RESCHED,
+ /* leaf node needs to be split */
+ BTREE_INSERT_BTREE_NODE_FULL,
+ BTREE_INSERT_JOURNAL_RES_FULL,
+};
+
#endif /* _BCACHE_BTREE_TYPES_H */
diff --git a/drivers/md/bcache/btree_update.c b/drivers/md/bcache/btree_update.c
index dab70f795904..354e11412cbb 100644
--- a/drivers/md/bcache/btree_update.c
+++ b/drivers/md/bcache/btree_update.c
@@ -750,67 +750,6 @@ void bch_btree_insert_and_journal(struct btree_iter *iter,
bch_btree_bset_insert(iter, b, node_iter, insert);
}
-/**
- * btree_insert_key - insert a key into a btree node, handling overlapping extents.
- *
- * The insert is journalled.
- *
- * @iter: btree iterator
- * @insert_keys: list of keys to insert
- * @replace: old key for for exchange (+ stats)
- * @res: journal reservation
- * @flags: BTREE_INSERT_NO_MARK_KEY
- *
- * Inserts the first key from @insert_keys
- *
- * Returns true if an insert was actually done and @b was modified - false on a
- * failed replace operation
- */
-static void btree_insert_key(struct btree_iter *iter, struct btree *b,
- struct btree_node_iter *node_iter,
- struct keylist *insert_keys,
- struct disk_reservation *disk_res,
- struct btree_insert_hook *hook,
- struct journal_res *res,
- unsigned flags)
-{
- struct bkey_i *insert = bch_keylist_front(insert_keys), *orig = insert;
- BKEY_PADDED(key) temp;
- s64 oldsize = bch_count_data(&b->keys);
-
- bch_btree_node_iter_verify(node_iter, &b->keys);
- BUG_ON(b->level);
- BUG_ON(iter->nodes[0] != b || &iter->node_iters[0] != node_iter);
-
- if (!b->keys.ops->is_extents) {
- bch_insert_fixup_key(iter, insert, hook, res);
- bch_keylist_dequeue(insert_keys);
- } else {
- bkey_copy(&temp.key, insert);
- insert = &temp.key;
-
- if (bkey_cmp(insert->k.p, b->key.k.p) > 0)
- bch_cut_back(b->key.k.p, &insert->k);
-
- bch_insert_fixup_extent(iter, insert, disk_res,
- hook, res, flags);
-
- bch_cut_front(iter->pos, orig);
- if (orig->k.size == 0)
- bch_keylist_dequeue(insert_keys);
- }
-
- bch_count_data_verify(&b->keys, oldsize);
-
- trace_bcache_btree_insert_key(b, insert);
-}
-
-enum btree_insert_status {
- BTREE_INSERT_OK,
- BTREE_INSERT_NEED_SPLIT,
- BTREE_INSERT_ERROR,
-};
-
static void verify_keys_sorted(struct keylist *l)
{
#ifdef CONFIG_BCACHE_DEBUG
@@ -1144,14 +1083,7 @@ err:
#endif
}
-static void btree_insert_keys_checks(struct btree_iter *iter, struct btree *b)
-{
- BUG_ON(iter->nodes[b->level] != b);
- BUG_ON(!btree_node_intent_locked(iter, b->level));
- BUG_ON(!b->written);
-}
-
-static enum btree_insert_status
+static enum btree_insert_ret
bch_btree_insert_keys_interior(struct btree *b,
struct btree_iter *iter,
struct keylist *insert_keys,
@@ -1166,8 +1098,6 @@ bch_btree_insert_keys_interior(struct btree *b,
BUG_ON(!btree_node_intent_locked(iter, btree_node_root(b)->level));
BUG_ON(!b->level);
BUG_ON(!as || as->b);
-
- btree_insert_keys_checks(iter, b);
verify_keys_sorted(insert_keys);
btree_node_lock_for_insert(b, iter);
@@ -1175,7 +1105,7 @@ bch_btree_insert_keys_interior(struct btree *b,
if (bch_keylist_nkeys(insert_keys) >
bch_btree_keys_u64s_remaining(iter->c, b)) {
btree_node_unlock_write(b, iter);
- return BTREE_INSERT_NEED_SPLIT;
+ return BTREE_INSERT_BTREE_NODE_FULL;
}
while (!bch_keylist_empty(insert_keys)) {
@@ -1211,100 +1141,9 @@ bch_btree_insert_keys_interior(struct btree *b,
;
btree_node_interior_verify(b);
-
return BTREE_INSERT_OK;
}
-/**
- * bch_btree_insert_keys - insert keys from @insert_keys into btree node @b,
- * until the node is full.
- *
- * If keys couldn't be inserted because @b was full, the caller must split @b
- * and bch_btree_insert_keys() will be called again from btree_split().
- *
- * Caller must either be holding an intent lock on this node only, or intent
- * locks on all nodes all the way up to the root. Caller must not be holding
- * read locks on any nodes.
- */
-static enum btree_insert_status
-bch_btree_insert_keys_leaf(struct btree *b,
- struct btree_iter *iter,
- struct keylist *insert_keys,
- struct disk_reservation *disk_res,
- struct btree_insert_hook *hook,
- u64 *journal_seq,
- unsigned flags)
-{
- bool done = false, need_split = false;
- struct journal_res res = { 0, 0 };
- struct bkey_i *k = bch_keylist_front(insert_keys);
-
- BUG_ON(b->level);
-
- btree_insert_keys_checks(iter, b);
- verify_keys_sorted(insert_keys);
-
- while (!done && !bch_keylist_empty(insert_keys)) {
- /*
- * We need room to insert at least two keys in the journal
- * reservation -- the insert key itself, as well as a subset
- * of it, in the bkey_cmpxchg() or handle_existing_key_newer()
- * cases
- */
- unsigned n_min = bch_keylist_front(insert_keys)->k.u64s;
- unsigned n_max = bch_keylist_nkeys(insert_keys);
-
- unsigned actual_min = jset_u64s(n_min) * 2;
- unsigned actual_max = max_t(unsigned, actual_min,
- jset_u64s(n_max));
-
- if (test_bit(JOURNAL_REPLAY_DONE, &iter->c->journal.flags) &&
- bch_journal_res_get(&iter->c->journal, &res,
- actual_min, actual_max))
- return BTREE_INSERT_ERROR;
-
- btree_node_lock_for_insert(b, iter);
-
- while (!bch_keylist_empty(insert_keys)) {
- k = bch_keylist_front(insert_keys);
-
- EBUG_ON(bkey_cmp(bkey_start_pos(&k->k), iter->pos));
-
- /* finished for this node */
- if (b->keys.ops->is_extents
- ? bkey_cmp(bkey_start_pos(&k->k), b->key.k.p) >= 0
- : bkey_cmp(k->k.p, b->key.k.p) > 0) {
- done = true;
- break;
- }
-
- if (!bch_btree_node_insert_fits(iter->c, b, k->k.u64s)) {
- done = true;
- need_split = true;
- break;
- }
-
- if (!journal_res_insert_fits(iter->c, &res, k,
- iter->is_extents))
- break;
-
- btree_insert_key(iter, b, &iter->node_iters[b->level],
- insert_keys, disk_res,
- hook, &res, flags);
- }
-
- btree_node_unlock_write(b, iter);
-
- if (res.ref)
- bch_journal_res_put(&iter->c->journal, &res,
- journal_seq);
- }
-
- bch_btree_node_write_lazy(b, iter);
-
- return need_split ? BTREE_INSERT_NEED_SPLIT : BTREE_INSERT_OK;
-}
-
/*
* Move keys from n1 (original replacement node, now lower node) to n2 (higher
* node)
@@ -1592,7 +1431,7 @@ void bch_btree_insert_node(struct btree *b,
as, reserve)) {
case BTREE_INSERT_OK:
break;
- case BTREE_INSERT_NEED_SPLIT:
+ case BTREE_INSERT_BTREE_NODE_FULL:
btree_split(b, iter, insert_keys, reserve, as);
break;
default:
@@ -1600,8 +1439,6 @@ void bch_btree_insert_node(struct btree *b,
}
}
-/* Normal update interface: */
-
static int bch_btree_split_leaf(struct btree_iter *iter, unsigned flags)
{
struct cache_set *c = iter->c;
@@ -1643,152 +1480,100 @@ out_unlock:
}
/**
- * bch_btree_insert_at - insert bkeys starting at a given btree node
- * @iter: btree iterator
- * @insert_keys: list of keys to insert
- * @hook: insert callback
- * @persistent: if not null, @persistent will wait on journal write
- * @flags: BTREE_INSERT_ATOMIC | BTREE_INSERT_NO_MARK_KEY
- *
- * This is top level for common btree insertion/index update code. The control
- * flow goes roughly like:
- *
- * bch_btree_insert_at -- split keys that span interior nodes
- * bch_btree_insert_node -- split btree nodes when full
- * btree_split
- * bch_btree_insert_keys -- get and put journal reservations
- * btree_insert_key -- call fixup and remove key from keylist
- * bch_insert_fixup_extent -- handle overlapping extents
- * bch_btree_insert_and_journal -- add the key to the journal
- * bch_bset_insert -- actually insert into the bset
- *
- * This function will split keys that span multiple nodes, calling
- * bch_btree_insert_node() for each one. It will not return until all keys
- * have been inserted, or an insert has failed.
- *
- * @persistent will only wait on the journal write if the full keylist was
- * inserted.
- *
- * Return values:
- * -EINTR: locking changed, this function should be called again. Only returned
- * if passed BTREE_INSERT_ATOMIC.
- * -EROFS: cache set read only
- * -EIO: journal or btree node IO error
+ * btree_insert_key - insert a key one key into a leaf node
*/
-int bch_btree_insert_at(struct btree_iter *iter,
- struct keylist *insert_keys,
- struct disk_reservation *disk_res,
- struct btree_insert_hook *hook,
- u64 *journal_seq, unsigned flags)
+static enum btree_insert_ret
+btree_insert_key(struct btree_iter *iter, struct btree *b,
+ struct btree_node_iter *node_iter,
+ struct bkey_i *insert,
+ struct disk_reservation *disk_res,
+ struct btree_insert_hook *hook,
+ struct journal_res *res,
+ unsigned flags)
{
- int ret = -EINTR;
-
- BUG_ON(iter->level);
+ s64 oldsize = bch_count_data(&b->keys);
+ enum btree_insert_ret ret;
- if (unlikely(!percpu_ref_tryget(&iter->c->writes)))
- return -EROFS;
+ bch_btree_node_iter_verify(node_iter, &b->keys);
+ BUG_ON(b->level);
+ BUG_ON(iter->nodes[0] != b || &iter->node_iters[0] != node_iter);
- iter->locks_want = 0;
- if (unlikely(!bch_btree_iter_upgrade(iter)))
- goto traverse;
+ if (!b->keys.ops->is_extents) {
+ ret = bch_insert_fixup_key(iter, insert, hook, res);
+ } else {
+ BKEY_PADDED(key) temp;
- while (1) {
- EBUG_ON(bkey_cmp(bkey_start_pos(&bch_keylist_front(insert_keys)->k),
- iter->pos));
-
- switch (bch_btree_insert_keys_leaf(iter->nodes[0], iter,
- insert_keys, disk_res,
- hook, journal_seq, flags)) {
- case BTREE_INSERT_OK:
- ret = 0;
- break;
- case BTREE_INSERT_NEED_SPLIT:
- ret = bch_btree_split_leaf(iter, flags);
- break;
- case BTREE_INSERT_ERROR:
- /* Journal error, so we couldn't get a journal reservation: */
- ret = -EIO;
- break;
- default:
- BUG();
- }
+ if (!bkey_cmp(iter->pos, b->key.k.p))
+ return BTREE_INSERT_NEED_TRAVERSE;
- /*
- * We don't test against success because we might have
- * successfully inserted the keys on the keylist, but have more
- * to insert in the next leaf node:
- */
- if (likely(bch_keylist_empty(insert_keys))) {
- BUG_ON(ret);
- break;
- }
+ bkey_copy(&temp.key, insert);
+ if (bkey_cmp(insert->k.p, b->key.k.p) > 0)
+ bch_cut_back(b->key.k.p, &temp.key.k);
- /*
- * -EAGAIN means we have to drop locks and wait on
- * mca_cannibalize_lock - btree_iter_unlock() does this
- */
- if (ret == -EAGAIN) {
- bch_btree_iter_unlock(iter);
- ret = -EINTR;
- }
+ ret = bch_insert_fixup_extent(iter, &temp.key, disk_res,
+ hook, res, flags);
- if (ret && ret != -EINTR)
- break;
-traverse:
- /*
- * Can't retry, make sure we return an error:
- */
- if (flags & BTREE_INSERT_ATOMIC) {
- ret = ret ?: -EINTR;
- break;
- }
+ bch_cut_front(iter->pos, insert);
+ if (insert->k.size && !bkey_cmp(iter->pos, b->key.k.p))
+ ret = BTREE_INSERT_NEED_TRAVERSE;
- ret = bch_btree_iter_traverse(iter);
- if (ret)
- break;
+ EBUG_ON(bkey_cmp(iter->pos, b->key.k.p) > 0);
+ EBUG_ON((ret == BTREE_INSERT_OK) != (insert->k.size == 0));
}
- percpu_ref_put(&iter->c->writes);
+ bch_count_data_verify(&b->keys, oldsize);
+
+ trace_bcache_btree_insert_key(b, insert);
return ret;
}
-static void multi_lock_write(struct btree_insert_multi *first,
- struct btree_insert_multi *m)
+static bool same_leaf_as_prev(struct btree_insert_multi *m,
+ struct btree_insert_multi *i)
{
/*
* Because we sorted the transaction entries, if multiple iterators
* point to the same leaf node they'll always be adjacent now:
*/
- if (m != first &&
- (m[0].iter->nodes[0] == m[-1].iter->nodes[0]))
- return; /* already locked */
+ return i != m && i[0].iter->nodes[0] == i[-1].iter->nodes[0];
+}
+
+static void multi_lock_write(struct btree_insert_multi *m, unsigned nr)
+{
+ struct btree_insert_multi *i;
- btree_node_lock_for_insert(m->iter->nodes[0], m->iter);
+ for (i = m; i < m + nr; i++)
+ if (!same_leaf_as_prev(m, i))
+ btree_node_lock_for_insert(i->iter->nodes[0], i->iter);
}
-static void multi_unlock_write(struct btree_insert_multi *first,
- struct btree_insert_multi *m)
+static void multi_unlock_write(struct btree_insert_multi *m, unsigned nr)
{
- if (m != first &&
- (m[0].iter->nodes[0] == m[-1].iter->nodes[0]))
- return; /* already locked */
+ struct btree_insert_multi *i;
- btree_node_unlock_write(m->iter->nodes[0], m->iter);
+ for (i = m; i < m + nr; i++)
+ if (!same_leaf_as_prev(m, i))
+ btree_node_unlock_write(i->iter->nodes[0], i->iter);
}
+/* Normal update interface: */
+
int bch_btree_insert_at_multi(struct btree_insert_multi *m, unsigned nr,
+ struct disk_reservation *disk_res,
+ struct btree_insert_hook *hook,
u64 *journal_seq, unsigned flags)
{
struct cache_set *c = m[0].iter->c;
struct journal_res res = { 0, 0 };
struct btree_insert_multi *i;
struct btree_iter *split;
- unsigned u64s = 0;
+ unsigned u64s;
bool swapped;
int ret;
- for (i = m; i < m + nr; i++)
+ for (i = m; i < m + nr; i++) {
+ EBUG_ON(i->iter->level);
EBUG_ON(bkey_cmp(bkey_start_pos(&i->k->k), i->iter->pos));
+ }
/* Sort transaction entries by iterator position, for lock ordering: */
do {
@@ -1804,9 +1589,6 @@ int bch_btree_insert_at_multi(struct btree_insert_multi *m, unsigned nr,
if (unlikely(!percpu_ref_tryget(&c->writes)))
return -EROFS;
- for (i = m; i < m + nr; i++)
- u64s += jset_u64s(i->k->k.u64s);
-
for (i = m; i < m + nr; i++) {
i->iter->locks_want = 0;
if (unlikely(!bch_btree_iter_upgrade(i->iter))) {
@@ -1815,75 +1597,193 @@ int bch_btree_insert_at_multi(struct btree_insert_multi *m, unsigned nr,
}
}
retry:
- ret = bch_journal_res_get(&c->journal, &res, u64s, u64s);
+ split = NULL;
+ u64s = 0;
+ for (i = m; i < m + nr; i++)
+ if (!i->done)
+ u64s += journal_res_u64s_required(i->k,
+ i->iter->is_extents);
+
+ ret = test_bit(JOURNAL_REPLAY_DONE, &c->journal.flags)
+ ? bch_journal_res_get(&c->journal, &res, u64s, u64s)
+ : 0;
if (ret)
goto err;
- for (i = m; i < m + nr; i++) {
- multi_lock_write(m, i);
+ multi_lock_write(m, nr);
- /*
- * Check against total, not just the key for this iterator,
- * because multiple inserts might be going to the same node:
- */
- if (!bch_btree_node_insert_fits(c,
- i->iter->nodes[0], u64s))
- goto split;
+ u64s = 0;
+ for (i = m; i < m + nr; i++) {
+ /* Multiple inserts might go to same leaf: */
+ if (!same_leaf_as_prev(m, i))
+ u64s = 0;
+
+ if (!i->done) {
+ u64s += i->k->k.u64s;
+ if (!bch_btree_node_insert_fits(c,
+ i->iter->nodes[0], u64s))
+ goto split;
+ }
}
for (i = m; i < m + nr; i++)
- btree_insert_key(i->iter, i->iter->nodes[0],
- &i->iter->node_iters[0],
- &keylist_single(i->k),
- NULL, NULL,
- &res, flags);
+ if (!i->done)
+ switch (btree_insert_key(i->iter,
+ i->iter->nodes[0],
+ &i->iter->node_iters[0],
+ i->k, disk_res, hook,
+ &res, flags)) {
+ case BTREE_INSERT_OK:
+ i->done = true;
+ break;
+ case BTREE_INSERT_NEED_TRAVERSE:
+ ret = -EINTR;
+ goto err_unlock;
+ case BTREE_INSERT_NEED_RESCHED:
+ ret = -EAGAIN;
+ goto err_unlock;
+ case BTREE_INSERT_BTREE_NODE_FULL:
+ goto split;
+ case BTREE_INSERT_JOURNAL_RES_FULL:
+ goto err_unlock;
+ }
- do {
- multi_unlock_write(m, --i);
- } while (i != m);
+ multi_unlock_write(m, nr);
bch_journal_res_put(&c->journal, &res, journal_seq);
- for (i = m; i < m + nr; i++) {
- if (i != m &&
- (i[0].iter->nodes[0] == i[-1].iter->nodes[0]))
- continue;
-
- bch_btree_node_write_lazy(i->iter->nodes[0], i->iter);
- }
+ for (i = m; i < m + nr; i++)
+ if (!same_leaf_as_prev(m, i))
+ bch_btree_node_write_lazy(i->iter->nodes[0], i->iter);
out:
percpu_ref_put(&c->writes);
return ret;
split:
split = i->iter;
- do {
- multi_unlock_write(m, i);
- } while (i-- != m);
-
+ multi_unlock_write(m, nr);
/*
- * XXX: Do we need to drop our journal res for the split?
- *
- * yes, because otherwise we're potentially blocking other things that
- * need the journal, which includes the allocator - and we're going to
- * be allocating new nodes in the split
+ * have to drop journal res before splitting, because splitting means
+ * allocating new btree nodes, and holding a journal reservation
+ * potentially blocks the allocator:
*/
bch_journal_res_put(&c->journal, &res, journal_seq);
ret = bch_btree_split_leaf(split, flags);
if (ret)
goto err;
+
+ /*
+ * if the split didn't have to drop locks the insert will still be
+ * atomic (in the BTREE_INSERT_ATOMIC sense, what the caller peeked()
+ * and is overwriting won't have changed)
+ */
goto retry;
+err_unlock:
+ multi_unlock_write(m, nr);
+ bch_journal_res_put(&c->journal, &res, journal_seq);
err:
if (ret == -EAGAIN) {
for (i = m; i < m + nr; i++)
bch_btree_iter_unlock(i->iter);
ret = -EINTR;
}
+
+ /*
+ * Main rule is, BTREE_INSERT_ATOMIC means we can't call
+ * bch_btree_iter_traverse(), because if we have to we either dropped
+ * locks or we need a different btree node (different than the one the
+ * caller was looking at).
+ *
+ * BTREE_INSERT_ATOMIC doesn't mean anything w.r.t. journal
+ * reservations:
+ */
+ if (ret == -EINTR && !(flags & BTREE_INSERT_ATOMIC)) {
+ for (i = m; i < m + nr; i++) {
+ ret = bch_btree_iter_traverse(i->iter);
+ if (ret)
+ goto out;
+ }
+
+ ret = 0;
+ }
+
+ if (!ret)
+ goto retry;
+
goto out;
}
/**
+ * bch_btree_insert_at - insert bkeys starting at a given btree node
+ * @iter: btree iterator
+ * @insert_keys: list of keys to insert
+ * @hook: insert callback
+ * @persistent: if not null, @persistent will wait on journal write
+ * @flags: BTREE_INSERT_ATOMIC | BTREE_INSERT_NO_MARK_KEY
+ *
+ * This is top level for common btree insertion/index update code. The control
+ * flow goes roughly like:
+ *
+ * bch_btree_insert_at -- split keys that span interior nodes
+ * bch_btree_insert_node -- split btree nodes when full
+ * btree_split
+ * bch_btree_insert_keys -- get and put journal reservations
+ * btree_insert_key -- call fixup and remove key from keylist
+ * bch_insert_fixup_extent -- handle overlapping extents
+ * bch_btree_insert_and_journal -- add the key to the journal
+ * bch_bset_insert -- actually insert into the bset
+ *
+ * This function will split keys that span multiple nodes, calling
+ * bch_btree_insert_node() for each one. It will not return until all keys
+ * have been inserted, or an insert has failed.
+ *
+ * @persistent will only wait on the journal write if the full keylist was
+ * inserted.
+ *
+ * Return values:
+ * -EINTR: locking changed, this function should be called again. Only returned
+ * if passed BTREE_INSERT_ATOMIC.
+ * -EROFS: cache set read only
+ * -EIO: journal or btree node IO error
+ */
+int bch_btree_insert_at(struct btree_iter *iter,
+ struct keylist *insert_keys,
+ struct disk_reservation *disk_res,
+ struct btree_insert_hook *hook,
+ u64 *journal_seq, unsigned flags)
+{
+ verify_keys_sorted(insert_keys);
+
+ while (1) {
+ struct btree_insert_multi m = {
+ .iter = iter,
+ .k = bch_keylist_front(insert_keys),
+ .done = false,
+ };
+
+ int ret = bch_btree_insert_at_multi(&m, 1, disk_res, hook,
+ journal_seq, flags);
+ BUG_ON(!ret != m.done);
+
+ if (ret)
+ return ret;
+
+ bch_keylist_dequeue(insert_keys);
+ if (bch_keylist_empty(insert_keys))
+ return 0;
+
+ /*
+ * need to traverse between each insert - might be inserting an
+ * extent that spanned leaf nodes:
+ */
+ ret = bch_btree_iter_traverse(iter);
+ if (ret)
+ return ret;
+ }
+}
+
+/**
* bch_btree_insert_check_key - insert dummy key into btree
*
* We insert a random key on a cache miss, then compare exchange on it
diff --git a/drivers/md/bcache/btree_update.h b/drivers/md/bcache/btree_update.h
index e2c59986111b..19e6e7505bbe 100644
--- a/drivers/md/bcache/btree_update.h
+++ b/drivers/md/bcache/btree_update.h
@@ -242,9 +242,13 @@ int bch_btree_insert_at(struct btree_iter *, struct keylist *,
struct btree_insert_multi {
struct btree_iter *iter;
struct bkey_i *k;
+ /* true if entire key was inserted - can only be false for extents */
+ bool done;
};
int bch_btree_insert_at_multi(struct btree_insert_multi[], unsigned,
+ struct disk_reservation *,
+ struct btree_insert_hook *,
u64 *, unsigned);
int bch_btree_insert_check_key(struct btree_iter *, struct bkey_i *);
diff --git a/drivers/md/bcache/dirent.c b/drivers/md/bcache/dirent.c
index 8868e41c8e89..b5af7bff508b 100644
--- a/drivers/md/bcache/dirent.c
+++ b/drivers/md/bcache/dirent.c
@@ -327,7 +327,8 @@ int bch_dirent_rename(struct cache_set *c,
ret = bch_btree_insert_at_multi((struct btree_insert_multi[]) {
{ &src_iter, &new_src->k_i, },
{ &dst_iter, &new_dst->k_i, }}, 2,
- journal_seq, 0);
+ NULL, NULL, journal_seq,
+ BTREE_INSERT_ATOMIC);
bch_btree_iter_unlock(&src_iter);
bch_btree_iter_unlock(&dst_iter);
} while (ret == -EINTR);
diff --git a/drivers/md/bcache/extents.c b/drivers/md/bcache/extents.c
index 620780ced03a..5f76264dccb9 100644
--- a/drivers/md/bcache/extents.c
+++ b/drivers/md/bcache/extents.c
@@ -105,10 +105,11 @@ struct btree_nr_keys bch_key_sort_fix_overlapping(struct btree_keys *b,
/* This returns true if insert should be inserted, false otherwise */
-void bch_insert_fixup_key(struct btree_iter *iter,
- struct bkey_i *insert,
- struct btree_insert_hook *hook,
- struct journal_res *res)
+enum btree_insert_ret
+bch_insert_fixup_key(struct btree_iter *iter,
+ struct bkey_i *insert,
+ struct btree_insert_hook *hook,
+ struct journal_res *res)
{
struct btree *b = iter->nodes[0];
struct btree_node_iter *node_iter = &iter->node_iters[0];
@@ -134,6 +135,7 @@ void bch_insert_fixup_key(struct btree_iter *iter,
}
bch_btree_insert_and_journal(iter, insert, res);
+ return BTREE_INSERT_OK;
}
/* Common among btree and extent ptrs */
@@ -1059,11 +1061,11 @@ static void handle_existing_key_newer(struct btree_iter *iter,
#define MAX_LOCK_HOLD_TIME (5 * NSEC_PER_MSEC)
-static bool extent_insert_should_stop(struct btree_iter *iter,
- struct bkey_i *insert,
- struct journal_res *res,
- u64 start_time,
- unsigned nr_done)
+static enum btree_insert_ret extent_insert_should_stop(struct btree_iter *iter,
+ struct bkey_i *insert,
+ struct journal_res *res,
+ u64 start_time,
+ unsigned nr_done)
{
struct cache_set *c = iter->c;
struct btree *b = iter->nodes[0];
@@ -1082,16 +1084,16 @@ static bool extent_insert_should_stop(struct btree_iter *iter,
* we've been running for too long and readers are waiting on the lock:
*/
if (!bch_btree_node_insert_fits(c, b, insert->k.u64s))
- return true;
+ return BTREE_INSERT_BTREE_NODE_FULL;
else if (!journal_res_insert_fits(c, res, insert, true))
- return true;
+ return BTREE_INSERT_JOURNAL_RES_FULL;
else if (nr_done > 10 &&
time_after64(local_clock(), start_time +
MAX_LOCK_HOLD_TIME) &&
!list_empty_careful(&b->lock.wait_list[SIX_LOCK_read]))
- return true;
+ return BTREE_INSERT_NEED_RESCHED;
else
- return false;
+ return BTREE_INSERT_OK;
}
/*
@@ -1157,12 +1159,13 @@ static void extent_insert_do_pos_hook(struct btree_insert_hook *hook,
* If the end of iter->pos is not the same as the end of insert, then
* key insertion needs to continue/be retried.
*/
-void bch_insert_fixup_extent(struct btree_iter *iter,
- struct bkey_i *insert,
- struct disk_reservation *disk_res,
- struct btree_insert_hook *hook,
- struct journal_res *res,
- unsigned flags)
+enum btree_insert_ret
+bch_insert_fixup_extent(struct btree_iter *iter,
+ struct bkey_i *insert,
+ struct disk_reservation *disk_res,
+ struct btree_insert_hook *hook,
+ struct journal_res *res,
+ unsigned flags)
{
struct cache_set *c = iter->c;
struct btree *b = iter->nodes[0];
@@ -1174,6 +1177,7 @@ void bch_insert_fixup_extent(struct btree_iter *iter,
struct bucket_stats_cache_set stats = { 0 };
unsigned nr_done = 0;
u64 start_time = local_clock();
+ enum btree_insert_ret ret = BTREE_INSERT_OK;
BUG_ON(iter->level);
BUG_ON(bkey_deleted(&insert->k));
@@ -1220,8 +1224,9 @@ void bch_insert_fixup_extent(struct btree_iter *iter,
&insert->k))) {
struct bkey_s k = __bkey_disassemble(f, _k, &unpacked);
- if (extent_insert_should_stop(iter, insert, res,
- start_time, nr_done)) {
+ ret = extent_insert_should_stop(iter, insert, res,
+ start_time, nr_done);
+ if (ret != BTREE_INSERT_OK) {
/*
* Bailing out early - trim the portion of @insert we
* haven't checked against existing extents (the portion
@@ -1370,6 +1375,7 @@ void bch_insert_fixup_extent(struct btree_iter *iter,
bch_btree_insert_and_journal(iter, insert, res);
apply_stats:
bch_cache_set_stats_apply(c, &stats, disk_res);
+ return ret;
}
static const char *bch_extent_invalid(const struct cache_set *c,
diff --git a/drivers/md/bcache/extents.h b/drivers/md/bcache/extents.h
index e861492c731e..5f353a3ab040 100644
--- a/drivers/md/bcache/extents.h
+++ b/drivers/md/bcache/extents.h
@@ -16,8 +16,9 @@ struct btree_nr_keys bch_extent_sort_fix_overlapping(struct btree_keys *,
struct bset *,
struct btree_node_iter *);
-void bch_insert_fixup_key(struct btree_iter *, struct bkey_i *,
- struct btree_insert_hook *, struct journal_res *);
+enum btree_insert_ret
+bch_insert_fixup_key(struct btree_iter *, struct bkey_i *,
+ struct btree_insert_hook *, struct journal_res *);
extern const struct bkey_ops bch_bkey_btree_ops;
extern const struct bkey_ops bch_bkey_extent_ops;
@@ -54,10 +55,11 @@ void bch_extent_cmpxchg(struct btree_insert_hook *,
struct journal_res *,
struct bucket_stats_cache_set *);
-void bch_insert_fixup_extent(struct btree_iter *, struct bkey_i *,
- struct disk_reservation *,
- struct btree_insert_hook *,
- struct journal_res *, unsigned);
+enum btree_insert_ret
+bch_insert_fixup_extent(struct btree_iter *, struct bkey_i *,
+ struct disk_reservation *,
+ struct btree_insert_hook *,
+ struct journal_res *, unsigned);
void bch_extent_drop_stale(struct cache_set *c, struct bkey_s_extent);
bool bch_extent_normalize(struct cache_set *, struct bkey_s);
diff --git a/drivers/md/bcache/journal.c b/drivers/md/bcache/journal.c
index 3fdc27bdcbe5..61ceb59cc15b 100644
--- a/drivers/md/bcache/journal.c
+++ b/drivers/md/bcache/journal.c
@@ -1725,10 +1725,10 @@ void bch_journal_res_put(struct journal *j, struct journal_res *res,
union journal_res_state s;
bool do_write = false;
- BUG_ON(!res->ref);
- lock_release(&j->res_map, 0, _RET_IP_);
+ if (!res->ref)
+ return;
- res->ref = false;
+ lock_release(&j->res_map, 0, _RET_IP_);
while (res->u64s) {
unsigned actual = jset_u64s(0);
@@ -1739,16 +1739,12 @@ void bch_journal_res_put(struct journal *j, struct journal_res *res,
res->u64s -= actual;
}
- if (!test_bit(JOURNAL_DIRTY, &j->flags)) {
- set_bit(JOURNAL_DIRTY, &j->flags);
+ if (!test_bit(JOURNAL_DIRTY, &j->flags) &&
+ !test_and_set_bit(JOURNAL_DIRTY, &j->flags))
queue_delayed_work(system_freezable_wq,
&j->write_work,
msecs_to_jiffies(j->delay_ms));
- /* between set_bit() and *journal_seq = j->seq */
- smp_wmb();
- }
-
if (journal_seq)
*journal_seq = j->seq;
@@ -1770,6 +1766,8 @@ void bch_journal_res_put(struct journal *j, struct journal_res *res,
wake_up(&j->wait);
}
+
+ memset(res, 0, sizeof(*res));
}
static inline bool journal_bucket_has_room(struct journal *j)