diff options
-rw-r--r-- | drivers/md/bcache/btree_types.h | 11 | ||||
-rw-r--r-- | drivers/md/bcache/btree_update.c | 524 | ||||
-rw-r--r-- | drivers/md/bcache/btree_update.h | 4 | ||||
-rw-r--r-- | drivers/md/bcache/dirent.c | 3 | ||||
-rw-r--r-- | drivers/md/bcache/extents.c | 48 | ||||
-rw-r--r-- | drivers/md/bcache/extents.h | 14 | ||||
-rw-r--r-- | drivers/md/bcache/journal.c | 16 |
7 files changed, 271 insertions, 349 deletions
diff --git a/drivers/md/bcache/btree_types.h b/drivers/md/bcache/btree_types.h index 05b70dc53e37..6ab5a8d021ed 100644 --- a/drivers/md/bcache/btree_types.h +++ b/drivers/md/bcache/btree_types.h @@ -163,4 +163,15 @@ struct btree_insert_hook { struct bucket_stats_cache_set *); }; +enum btree_insert_ret { + BTREE_INSERT_OK, + /* extent spanned multiple leaf nodes: have to traverse to next node: */ + BTREE_INSERT_NEED_TRAVERSE, + /* write lock held for too long */ + BTREE_INSERT_NEED_RESCHED, + /* leaf node needs to be split */ + BTREE_INSERT_BTREE_NODE_FULL, + BTREE_INSERT_JOURNAL_RES_FULL, +}; + #endif /* _BCACHE_BTREE_TYPES_H */ diff --git a/drivers/md/bcache/btree_update.c b/drivers/md/bcache/btree_update.c index dab70f795904..354e11412cbb 100644 --- a/drivers/md/bcache/btree_update.c +++ b/drivers/md/bcache/btree_update.c @@ -750,67 +750,6 @@ void bch_btree_insert_and_journal(struct btree_iter *iter, bch_btree_bset_insert(iter, b, node_iter, insert); } -/** - * btree_insert_key - insert a key into a btree node, handling overlapping extents. - * - * The insert is journalled. - * - * @iter: btree iterator - * @insert_keys: list of keys to insert - * @replace: old key for for exchange (+ stats) - * @res: journal reservation - * @flags: BTREE_INSERT_NO_MARK_KEY - * - * Inserts the first key from @insert_keys - * - * Returns true if an insert was actually done and @b was modified - false on a - * failed replace operation - */ -static void btree_insert_key(struct btree_iter *iter, struct btree *b, - struct btree_node_iter *node_iter, - struct keylist *insert_keys, - struct disk_reservation *disk_res, - struct btree_insert_hook *hook, - struct journal_res *res, - unsigned flags) -{ - struct bkey_i *insert = bch_keylist_front(insert_keys), *orig = insert; - BKEY_PADDED(key) temp; - s64 oldsize = bch_count_data(&b->keys); - - bch_btree_node_iter_verify(node_iter, &b->keys); - BUG_ON(b->level); - BUG_ON(iter->nodes[0] != b || &iter->node_iters[0] != node_iter); - - if (!b->keys.ops->is_extents) { - bch_insert_fixup_key(iter, insert, hook, res); - bch_keylist_dequeue(insert_keys); - } else { - bkey_copy(&temp.key, insert); - insert = &temp.key; - - if (bkey_cmp(insert->k.p, b->key.k.p) > 0) - bch_cut_back(b->key.k.p, &insert->k); - - bch_insert_fixup_extent(iter, insert, disk_res, - hook, res, flags); - - bch_cut_front(iter->pos, orig); - if (orig->k.size == 0) - bch_keylist_dequeue(insert_keys); - } - - bch_count_data_verify(&b->keys, oldsize); - - trace_bcache_btree_insert_key(b, insert); -} - -enum btree_insert_status { - BTREE_INSERT_OK, - BTREE_INSERT_NEED_SPLIT, - BTREE_INSERT_ERROR, -}; - static void verify_keys_sorted(struct keylist *l) { #ifdef CONFIG_BCACHE_DEBUG @@ -1144,14 +1083,7 @@ err: #endif } -static void btree_insert_keys_checks(struct btree_iter *iter, struct btree *b) -{ - BUG_ON(iter->nodes[b->level] != b); - BUG_ON(!btree_node_intent_locked(iter, b->level)); - BUG_ON(!b->written); -} - -static enum btree_insert_status +static enum btree_insert_ret bch_btree_insert_keys_interior(struct btree *b, struct btree_iter *iter, struct keylist *insert_keys, @@ -1166,8 +1098,6 @@ bch_btree_insert_keys_interior(struct btree *b, BUG_ON(!btree_node_intent_locked(iter, btree_node_root(b)->level)); BUG_ON(!b->level); BUG_ON(!as || as->b); - - btree_insert_keys_checks(iter, b); verify_keys_sorted(insert_keys); btree_node_lock_for_insert(b, iter); @@ -1175,7 +1105,7 @@ bch_btree_insert_keys_interior(struct btree *b, if (bch_keylist_nkeys(insert_keys) > bch_btree_keys_u64s_remaining(iter->c, b)) { btree_node_unlock_write(b, iter); - return BTREE_INSERT_NEED_SPLIT; + return BTREE_INSERT_BTREE_NODE_FULL; } while (!bch_keylist_empty(insert_keys)) { @@ -1211,100 +1141,9 @@ bch_btree_insert_keys_interior(struct btree *b, ; btree_node_interior_verify(b); - return BTREE_INSERT_OK; } -/** - * bch_btree_insert_keys - insert keys from @insert_keys into btree node @b, - * until the node is full. - * - * If keys couldn't be inserted because @b was full, the caller must split @b - * and bch_btree_insert_keys() will be called again from btree_split(). - * - * Caller must either be holding an intent lock on this node only, or intent - * locks on all nodes all the way up to the root. Caller must not be holding - * read locks on any nodes. - */ -static enum btree_insert_status -bch_btree_insert_keys_leaf(struct btree *b, - struct btree_iter *iter, - struct keylist *insert_keys, - struct disk_reservation *disk_res, - struct btree_insert_hook *hook, - u64 *journal_seq, - unsigned flags) -{ - bool done = false, need_split = false; - struct journal_res res = { 0, 0 }; - struct bkey_i *k = bch_keylist_front(insert_keys); - - BUG_ON(b->level); - - btree_insert_keys_checks(iter, b); - verify_keys_sorted(insert_keys); - - while (!done && !bch_keylist_empty(insert_keys)) { - /* - * We need room to insert at least two keys in the journal - * reservation -- the insert key itself, as well as a subset - * of it, in the bkey_cmpxchg() or handle_existing_key_newer() - * cases - */ - unsigned n_min = bch_keylist_front(insert_keys)->k.u64s; - unsigned n_max = bch_keylist_nkeys(insert_keys); - - unsigned actual_min = jset_u64s(n_min) * 2; - unsigned actual_max = max_t(unsigned, actual_min, - jset_u64s(n_max)); - - if (test_bit(JOURNAL_REPLAY_DONE, &iter->c->journal.flags) && - bch_journal_res_get(&iter->c->journal, &res, - actual_min, actual_max)) - return BTREE_INSERT_ERROR; - - btree_node_lock_for_insert(b, iter); - - while (!bch_keylist_empty(insert_keys)) { - k = bch_keylist_front(insert_keys); - - EBUG_ON(bkey_cmp(bkey_start_pos(&k->k), iter->pos)); - - /* finished for this node */ - if (b->keys.ops->is_extents - ? bkey_cmp(bkey_start_pos(&k->k), b->key.k.p) >= 0 - : bkey_cmp(k->k.p, b->key.k.p) > 0) { - done = true; - break; - } - - if (!bch_btree_node_insert_fits(iter->c, b, k->k.u64s)) { - done = true; - need_split = true; - break; - } - - if (!journal_res_insert_fits(iter->c, &res, k, - iter->is_extents)) - break; - - btree_insert_key(iter, b, &iter->node_iters[b->level], - insert_keys, disk_res, - hook, &res, flags); - } - - btree_node_unlock_write(b, iter); - - if (res.ref) - bch_journal_res_put(&iter->c->journal, &res, - journal_seq); - } - - bch_btree_node_write_lazy(b, iter); - - return need_split ? BTREE_INSERT_NEED_SPLIT : BTREE_INSERT_OK; -} - /* * Move keys from n1 (original replacement node, now lower node) to n2 (higher * node) @@ -1592,7 +1431,7 @@ void bch_btree_insert_node(struct btree *b, as, reserve)) { case BTREE_INSERT_OK: break; - case BTREE_INSERT_NEED_SPLIT: + case BTREE_INSERT_BTREE_NODE_FULL: btree_split(b, iter, insert_keys, reserve, as); break; default: @@ -1600,8 +1439,6 @@ void bch_btree_insert_node(struct btree *b, } } -/* Normal update interface: */ - static int bch_btree_split_leaf(struct btree_iter *iter, unsigned flags) { struct cache_set *c = iter->c; @@ -1643,152 +1480,100 @@ out_unlock: } /** - * bch_btree_insert_at - insert bkeys starting at a given btree node - * @iter: btree iterator - * @insert_keys: list of keys to insert - * @hook: insert callback - * @persistent: if not null, @persistent will wait on journal write - * @flags: BTREE_INSERT_ATOMIC | BTREE_INSERT_NO_MARK_KEY - * - * This is top level for common btree insertion/index update code. The control - * flow goes roughly like: - * - * bch_btree_insert_at -- split keys that span interior nodes - * bch_btree_insert_node -- split btree nodes when full - * btree_split - * bch_btree_insert_keys -- get and put journal reservations - * btree_insert_key -- call fixup and remove key from keylist - * bch_insert_fixup_extent -- handle overlapping extents - * bch_btree_insert_and_journal -- add the key to the journal - * bch_bset_insert -- actually insert into the bset - * - * This function will split keys that span multiple nodes, calling - * bch_btree_insert_node() for each one. It will not return until all keys - * have been inserted, or an insert has failed. - * - * @persistent will only wait on the journal write if the full keylist was - * inserted. - * - * Return values: - * -EINTR: locking changed, this function should be called again. Only returned - * if passed BTREE_INSERT_ATOMIC. - * -EROFS: cache set read only - * -EIO: journal or btree node IO error + * btree_insert_key - insert a key one key into a leaf node */ -int bch_btree_insert_at(struct btree_iter *iter, - struct keylist *insert_keys, - struct disk_reservation *disk_res, - struct btree_insert_hook *hook, - u64 *journal_seq, unsigned flags) +static enum btree_insert_ret +btree_insert_key(struct btree_iter *iter, struct btree *b, + struct btree_node_iter *node_iter, + struct bkey_i *insert, + struct disk_reservation *disk_res, + struct btree_insert_hook *hook, + struct journal_res *res, + unsigned flags) { - int ret = -EINTR; - - BUG_ON(iter->level); + s64 oldsize = bch_count_data(&b->keys); + enum btree_insert_ret ret; - if (unlikely(!percpu_ref_tryget(&iter->c->writes))) - return -EROFS; + bch_btree_node_iter_verify(node_iter, &b->keys); + BUG_ON(b->level); + BUG_ON(iter->nodes[0] != b || &iter->node_iters[0] != node_iter); - iter->locks_want = 0; - if (unlikely(!bch_btree_iter_upgrade(iter))) - goto traverse; + if (!b->keys.ops->is_extents) { + ret = bch_insert_fixup_key(iter, insert, hook, res); + } else { + BKEY_PADDED(key) temp; - while (1) { - EBUG_ON(bkey_cmp(bkey_start_pos(&bch_keylist_front(insert_keys)->k), - iter->pos)); - - switch (bch_btree_insert_keys_leaf(iter->nodes[0], iter, - insert_keys, disk_res, - hook, journal_seq, flags)) { - case BTREE_INSERT_OK: - ret = 0; - break; - case BTREE_INSERT_NEED_SPLIT: - ret = bch_btree_split_leaf(iter, flags); - break; - case BTREE_INSERT_ERROR: - /* Journal error, so we couldn't get a journal reservation: */ - ret = -EIO; - break; - default: - BUG(); - } + if (!bkey_cmp(iter->pos, b->key.k.p)) + return BTREE_INSERT_NEED_TRAVERSE; - /* - * We don't test against success because we might have - * successfully inserted the keys on the keylist, but have more - * to insert in the next leaf node: - */ - if (likely(bch_keylist_empty(insert_keys))) { - BUG_ON(ret); - break; - } + bkey_copy(&temp.key, insert); + if (bkey_cmp(insert->k.p, b->key.k.p) > 0) + bch_cut_back(b->key.k.p, &temp.key.k); - /* - * -EAGAIN means we have to drop locks and wait on - * mca_cannibalize_lock - btree_iter_unlock() does this - */ - if (ret == -EAGAIN) { - bch_btree_iter_unlock(iter); - ret = -EINTR; - } + ret = bch_insert_fixup_extent(iter, &temp.key, disk_res, + hook, res, flags); - if (ret && ret != -EINTR) - break; -traverse: - /* - * Can't retry, make sure we return an error: - */ - if (flags & BTREE_INSERT_ATOMIC) { - ret = ret ?: -EINTR; - break; - } + bch_cut_front(iter->pos, insert); + if (insert->k.size && !bkey_cmp(iter->pos, b->key.k.p)) + ret = BTREE_INSERT_NEED_TRAVERSE; - ret = bch_btree_iter_traverse(iter); - if (ret) - break; + EBUG_ON(bkey_cmp(iter->pos, b->key.k.p) > 0); + EBUG_ON((ret == BTREE_INSERT_OK) != (insert->k.size == 0)); } - percpu_ref_put(&iter->c->writes); + bch_count_data_verify(&b->keys, oldsize); + + trace_bcache_btree_insert_key(b, insert); return ret; } -static void multi_lock_write(struct btree_insert_multi *first, - struct btree_insert_multi *m) +static bool same_leaf_as_prev(struct btree_insert_multi *m, + struct btree_insert_multi *i) { /* * Because we sorted the transaction entries, if multiple iterators * point to the same leaf node they'll always be adjacent now: */ - if (m != first && - (m[0].iter->nodes[0] == m[-1].iter->nodes[0])) - return; /* already locked */ + return i != m && i[0].iter->nodes[0] == i[-1].iter->nodes[0]; +} + +static void multi_lock_write(struct btree_insert_multi *m, unsigned nr) +{ + struct btree_insert_multi *i; - btree_node_lock_for_insert(m->iter->nodes[0], m->iter); + for (i = m; i < m + nr; i++) + if (!same_leaf_as_prev(m, i)) + btree_node_lock_for_insert(i->iter->nodes[0], i->iter); } -static void multi_unlock_write(struct btree_insert_multi *first, - struct btree_insert_multi *m) +static void multi_unlock_write(struct btree_insert_multi *m, unsigned nr) { - if (m != first && - (m[0].iter->nodes[0] == m[-1].iter->nodes[0])) - return; /* already locked */ + struct btree_insert_multi *i; - btree_node_unlock_write(m->iter->nodes[0], m->iter); + for (i = m; i < m + nr; i++) + if (!same_leaf_as_prev(m, i)) + btree_node_unlock_write(i->iter->nodes[0], i->iter); } +/* Normal update interface: */ + int bch_btree_insert_at_multi(struct btree_insert_multi *m, unsigned nr, + struct disk_reservation *disk_res, + struct btree_insert_hook *hook, u64 *journal_seq, unsigned flags) { struct cache_set *c = m[0].iter->c; struct journal_res res = { 0, 0 }; struct btree_insert_multi *i; struct btree_iter *split; - unsigned u64s = 0; + unsigned u64s; bool swapped; int ret; - for (i = m; i < m + nr; i++) + for (i = m; i < m + nr; i++) { + EBUG_ON(i->iter->level); EBUG_ON(bkey_cmp(bkey_start_pos(&i->k->k), i->iter->pos)); + } /* Sort transaction entries by iterator position, for lock ordering: */ do { @@ -1804,9 +1589,6 @@ int bch_btree_insert_at_multi(struct btree_insert_multi *m, unsigned nr, if (unlikely(!percpu_ref_tryget(&c->writes))) return -EROFS; - for (i = m; i < m + nr; i++) - u64s += jset_u64s(i->k->k.u64s); - for (i = m; i < m + nr; i++) { i->iter->locks_want = 0; if (unlikely(!bch_btree_iter_upgrade(i->iter))) { @@ -1815,75 +1597,193 @@ int bch_btree_insert_at_multi(struct btree_insert_multi *m, unsigned nr, } } retry: - ret = bch_journal_res_get(&c->journal, &res, u64s, u64s); + split = NULL; + u64s = 0; + for (i = m; i < m + nr; i++) + if (!i->done) + u64s += journal_res_u64s_required(i->k, + i->iter->is_extents); + + ret = test_bit(JOURNAL_REPLAY_DONE, &c->journal.flags) + ? bch_journal_res_get(&c->journal, &res, u64s, u64s) + : 0; if (ret) goto err; - for (i = m; i < m + nr; i++) { - multi_lock_write(m, i); + multi_lock_write(m, nr); - /* - * Check against total, not just the key for this iterator, - * because multiple inserts might be going to the same node: - */ - if (!bch_btree_node_insert_fits(c, - i->iter->nodes[0], u64s)) - goto split; + u64s = 0; + for (i = m; i < m + nr; i++) { + /* Multiple inserts might go to same leaf: */ + if (!same_leaf_as_prev(m, i)) + u64s = 0; + + if (!i->done) { + u64s += i->k->k.u64s; + if (!bch_btree_node_insert_fits(c, + i->iter->nodes[0], u64s)) + goto split; + } } for (i = m; i < m + nr; i++) - btree_insert_key(i->iter, i->iter->nodes[0], - &i->iter->node_iters[0], - &keylist_single(i->k), - NULL, NULL, - &res, flags); + if (!i->done) + switch (btree_insert_key(i->iter, + i->iter->nodes[0], + &i->iter->node_iters[0], + i->k, disk_res, hook, + &res, flags)) { + case BTREE_INSERT_OK: + i->done = true; + break; + case BTREE_INSERT_NEED_TRAVERSE: + ret = -EINTR; + goto err_unlock; + case BTREE_INSERT_NEED_RESCHED: + ret = -EAGAIN; + goto err_unlock; + case BTREE_INSERT_BTREE_NODE_FULL: + goto split; + case BTREE_INSERT_JOURNAL_RES_FULL: + goto err_unlock; + } - do { - multi_unlock_write(m, --i); - } while (i != m); + multi_unlock_write(m, nr); bch_journal_res_put(&c->journal, &res, journal_seq); - for (i = m; i < m + nr; i++) { - if (i != m && - (i[0].iter->nodes[0] == i[-1].iter->nodes[0])) - continue; - - bch_btree_node_write_lazy(i->iter->nodes[0], i->iter); - } + for (i = m; i < m + nr; i++) + if (!same_leaf_as_prev(m, i)) + bch_btree_node_write_lazy(i->iter->nodes[0], i->iter); out: percpu_ref_put(&c->writes); return ret; split: split = i->iter; - do { - multi_unlock_write(m, i); - } while (i-- != m); - + multi_unlock_write(m, nr); /* - * XXX: Do we need to drop our journal res for the split? - * - * yes, because otherwise we're potentially blocking other things that - * need the journal, which includes the allocator - and we're going to - * be allocating new nodes in the split + * have to drop journal res before splitting, because splitting means + * allocating new btree nodes, and holding a journal reservation + * potentially blocks the allocator: */ bch_journal_res_put(&c->journal, &res, journal_seq); ret = bch_btree_split_leaf(split, flags); if (ret) goto err; + + /* + * if the split didn't have to drop locks the insert will still be + * atomic (in the BTREE_INSERT_ATOMIC sense, what the caller peeked() + * and is overwriting won't have changed) + */ goto retry; +err_unlock: + multi_unlock_write(m, nr); + bch_journal_res_put(&c->journal, &res, journal_seq); err: if (ret == -EAGAIN) { for (i = m; i < m + nr; i++) bch_btree_iter_unlock(i->iter); ret = -EINTR; } + + /* + * Main rule is, BTREE_INSERT_ATOMIC means we can't call + * bch_btree_iter_traverse(), because if we have to we either dropped + * locks or we need a different btree node (different than the one the + * caller was looking at). + * + * BTREE_INSERT_ATOMIC doesn't mean anything w.r.t. journal + * reservations: + */ + if (ret == -EINTR && !(flags & BTREE_INSERT_ATOMIC)) { + for (i = m; i < m + nr; i++) { + ret = bch_btree_iter_traverse(i->iter); + if (ret) + goto out; + } + + ret = 0; + } + + if (!ret) + goto retry; + goto out; } /** + * bch_btree_insert_at - insert bkeys starting at a given btree node + * @iter: btree iterator + * @insert_keys: list of keys to insert + * @hook: insert callback + * @persistent: if not null, @persistent will wait on journal write + * @flags: BTREE_INSERT_ATOMIC | BTREE_INSERT_NO_MARK_KEY + * + * This is top level for common btree insertion/index update code. The control + * flow goes roughly like: + * + * bch_btree_insert_at -- split keys that span interior nodes + * bch_btree_insert_node -- split btree nodes when full + * btree_split + * bch_btree_insert_keys -- get and put journal reservations + * btree_insert_key -- call fixup and remove key from keylist + * bch_insert_fixup_extent -- handle overlapping extents + * bch_btree_insert_and_journal -- add the key to the journal + * bch_bset_insert -- actually insert into the bset + * + * This function will split keys that span multiple nodes, calling + * bch_btree_insert_node() for each one. It will not return until all keys + * have been inserted, or an insert has failed. + * + * @persistent will only wait on the journal write if the full keylist was + * inserted. + * + * Return values: + * -EINTR: locking changed, this function should be called again. Only returned + * if passed BTREE_INSERT_ATOMIC. + * -EROFS: cache set read only + * -EIO: journal or btree node IO error + */ +int bch_btree_insert_at(struct btree_iter *iter, + struct keylist *insert_keys, + struct disk_reservation *disk_res, + struct btree_insert_hook *hook, + u64 *journal_seq, unsigned flags) +{ + verify_keys_sorted(insert_keys); + + while (1) { + struct btree_insert_multi m = { + .iter = iter, + .k = bch_keylist_front(insert_keys), + .done = false, + }; + + int ret = bch_btree_insert_at_multi(&m, 1, disk_res, hook, + journal_seq, flags); + BUG_ON(!ret != m.done); + + if (ret) + return ret; + + bch_keylist_dequeue(insert_keys); + if (bch_keylist_empty(insert_keys)) + return 0; + + /* + * need to traverse between each insert - might be inserting an + * extent that spanned leaf nodes: + */ + ret = bch_btree_iter_traverse(iter); + if (ret) + return ret; + } +} + +/** * bch_btree_insert_check_key - insert dummy key into btree * * We insert a random key on a cache miss, then compare exchange on it diff --git a/drivers/md/bcache/btree_update.h b/drivers/md/bcache/btree_update.h index e2c59986111b..19e6e7505bbe 100644 --- a/drivers/md/bcache/btree_update.h +++ b/drivers/md/bcache/btree_update.h @@ -242,9 +242,13 @@ int bch_btree_insert_at(struct btree_iter *, struct keylist *, struct btree_insert_multi { struct btree_iter *iter; struct bkey_i *k; + /* true if entire key was inserted - can only be false for extents */ + bool done; }; int bch_btree_insert_at_multi(struct btree_insert_multi[], unsigned, + struct disk_reservation *, + struct btree_insert_hook *, u64 *, unsigned); int bch_btree_insert_check_key(struct btree_iter *, struct bkey_i *); diff --git a/drivers/md/bcache/dirent.c b/drivers/md/bcache/dirent.c index 8868e41c8e89..b5af7bff508b 100644 --- a/drivers/md/bcache/dirent.c +++ b/drivers/md/bcache/dirent.c @@ -327,7 +327,8 @@ int bch_dirent_rename(struct cache_set *c, ret = bch_btree_insert_at_multi((struct btree_insert_multi[]) { { &src_iter, &new_src->k_i, }, { &dst_iter, &new_dst->k_i, }}, 2, - journal_seq, 0); + NULL, NULL, journal_seq, + BTREE_INSERT_ATOMIC); bch_btree_iter_unlock(&src_iter); bch_btree_iter_unlock(&dst_iter); } while (ret == -EINTR); diff --git a/drivers/md/bcache/extents.c b/drivers/md/bcache/extents.c index 620780ced03a..5f76264dccb9 100644 --- a/drivers/md/bcache/extents.c +++ b/drivers/md/bcache/extents.c @@ -105,10 +105,11 @@ struct btree_nr_keys bch_key_sort_fix_overlapping(struct btree_keys *b, /* This returns true if insert should be inserted, false otherwise */ -void bch_insert_fixup_key(struct btree_iter *iter, - struct bkey_i *insert, - struct btree_insert_hook *hook, - struct journal_res *res) +enum btree_insert_ret +bch_insert_fixup_key(struct btree_iter *iter, + struct bkey_i *insert, + struct btree_insert_hook *hook, + struct journal_res *res) { struct btree *b = iter->nodes[0]; struct btree_node_iter *node_iter = &iter->node_iters[0]; @@ -134,6 +135,7 @@ void bch_insert_fixup_key(struct btree_iter *iter, } bch_btree_insert_and_journal(iter, insert, res); + return BTREE_INSERT_OK; } /* Common among btree and extent ptrs */ @@ -1059,11 +1061,11 @@ static void handle_existing_key_newer(struct btree_iter *iter, #define MAX_LOCK_HOLD_TIME (5 * NSEC_PER_MSEC) -static bool extent_insert_should_stop(struct btree_iter *iter, - struct bkey_i *insert, - struct journal_res *res, - u64 start_time, - unsigned nr_done) +static enum btree_insert_ret extent_insert_should_stop(struct btree_iter *iter, + struct bkey_i *insert, + struct journal_res *res, + u64 start_time, + unsigned nr_done) { struct cache_set *c = iter->c; struct btree *b = iter->nodes[0]; @@ -1082,16 +1084,16 @@ static bool extent_insert_should_stop(struct btree_iter *iter, * we've been running for too long and readers are waiting on the lock: */ if (!bch_btree_node_insert_fits(c, b, insert->k.u64s)) - return true; + return BTREE_INSERT_BTREE_NODE_FULL; else if (!journal_res_insert_fits(c, res, insert, true)) - return true; + return BTREE_INSERT_JOURNAL_RES_FULL; else if (nr_done > 10 && time_after64(local_clock(), start_time + MAX_LOCK_HOLD_TIME) && !list_empty_careful(&b->lock.wait_list[SIX_LOCK_read])) - return true; + return BTREE_INSERT_NEED_RESCHED; else - return false; + return BTREE_INSERT_OK; } /* @@ -1157,12 +1159,13 @@ static void extent_insert_do_pos_hook(struct btree_insert_hook *hook, * If the end of iter->pos is not the same as the end of insert, then * key insertion needs to continue/be retried. */ -void bch_insert_fixup_extent(struct btree_iter *iter, - struct bkey_i *insert, - struct disk_reservation *disk_res, - struct btree_insert_hook *hook, - struct journal_res *res, - unsigned flags) +enum btree_insert_ret +bch_insert_fixup_extent(struct btree_iter *iter, + struct bkey_i *insert, + struct disk_reservation *disk_res, + struct btree_insert_hook *hook, + struct journal_res *res, + unsigned flags) { struct cache_set *c = iter->c; struct btree *b = iter->nodes[0]; @@ -1174,6 +1177,7 @@ void bch_insert_fixup_extent(struct btree_iter *iter, struct bucket_stats_cache_set stats = { 0 }; unsigned nr_done = 0; u64 start_time = local_clock(); + enum btree_insert_ret ret = BTREE_INSERT_OK; BUG_ON(iter->level); BUG_ON(bkey_deleted(&insert->k)); @@ -1220,8 +1224,9 @@ void bch_insert_fixup_extent(struct btree_iter *iter, &insert->k))) { struct bkey_s k = __bkey_disassemble(f, _k, &unpacked); - if (extent_insert_should_stop(iter, insert, res, - start_time, nr_done)) { + ret = extent_insert_should_stop(iter, insert, res, + start_time, nr_done); + if (ret != BTREE_INSERT_OK) { /* * Bailing out early - trim the portion of @insert we * haven't checked against existing extents (the portion @@ -1370,6 +1375,7 @@ void bch_insert_fixup_extent(struct btree_iter *iter, bch_btree_insert_and_journal(iter, insert, res); apply_stats: bch_cache_set_stats_apply(c, &stats, disk_res); + return ret; } static const char *bch_extent_invalid(const struct cache_set *c, diff --git a/drivers/md/bcache/extents.h b/drivers/md/bcache/extents.h index e861492c731e..5f353a3ab040 100644 --- a/drivers/md/bcache/extents.h +++ b/drivers/md/bcache/extents.h @@ -16,8 +16,9 @@ struct btree_nr_keys bch_extent_sort_fix_overlapping(struct btree_keys *, struct bset *, struct btree_node_iter *); -void bch_insert_fixup_key(struct btree_iter *, struct bkey_i *, - struct btree_insert_hook *, struct journal_res *); +enum btree_insert_ret +bch_insert_fixup_key(struct btree_iter *, struct bkey_i *, + struct btree_insert_hook *, struct journal_res *); extern const struct bkey_ops bch_bkey_btree_ops; extern const struct bkey_ops bch_bkey_extent_ops; @@ -54,10 +55,11 @@ void bch_extent_cmpxchg(struct btree_insert_hook *, struct journal_res *, struct bucket_stats_cache_set *); -void bch_insert_fixup_extent(struct btree_iter *, struct bkey_i *, - struct disk_reservation *, - struct btree_insert_hook *, - struct journal_res *, unsigned); +enum btree_insert_ret +bch_insert_fixup_extent(struct btree_iter *, struct bkey_i *, + struct disk_reservation *, + struct btree_insert_hook *, + struct journal_res *, unsigned); void bch_extent_drop_stale(struct cache_set *c, struct bkey_s_extent); bool bch_extent_normalize(struct cache_set *, struct bkey_s); diff --git a/drivers/md/bcache/journal.c b/drivers/md/bcache/journal.c index 3fdc27bdcbe5..61ceb59cc15b 100644 --- a/drivers/md/bcache/journal.c +++ b/drivers/md/bcache/journal.c @@ -1725,10 +1725,10 @@ void bch_journal_res_put(struct journal *j, struct journal_res *res, union journal_res_state s; bool do_write = false; - BUG_ON(!res->ref); - lock_release(&j->res_map, 0, _RET_IP_); + if (!res->ref) + return; - res->ref = false; + lock_release(&j->res_map, 0, _RET_IP_); while (res->u64s) { unsigned actual = jset_u64s(0); @@ -1739,16 +1739,12 @@ void bch_journal_res_put(struct journal *j, struct journal_res *res, res->u64s -= actual; } - if (!test_bit(JOURNAL_DIRTY, &j->flags)) { - set_bit(JOURNAL_DIRTY, &j->flags); + if (!test_bit(JOURNAL_DIRTY, &j->flags) && + !test_and_set_bit(JOURNAL_DIRTY, &j->flags)) queue_delayed_work(system_freezable_wq, &j->write_work, msecs_to_jiffies(j->delay_ms)); - /* between set_bit() and *journal_seq = j->seq */ - smp_wmb(); - } - if (journal_seq) *journal_seq = j->seq; @@ -1770,6 +1766,8 @@ void bch_journal_res_put(struct journal *j, struct journal_res *res, wake_up(&j->wait); } + + memset(res, 0, sizeof(*res)); } static inline bool journal_bucket_has_room(struct journal *j) |