summaryrefslogtreecommitdiff
path: root/fs/bcachefs/btree_iter.c
diff options
context:
space:
mode:
Diffstat (limited to 'fs/bcachefs/btree_iter.c')
-rw-r--r--fs/bcachefs/btree_iter.c272
1 files changed, 157 insertions, 115 deletions
diff --git a/fs/bcachefs/btree_iter.c b/fs/bcachefs/btree_iter.c
index 330abc577c9a..3ad7e082f9b3 100644
--- a/fs/bcachefs/btree_iter.c
+++ b/fs/bcachefs/btree_iter.c
@@ -46,7 +46,7 @@ void __bch2_btree_node_lock_write(struct btree *b, struct btree_iter *iter)
struct btree_iter *linked;
unsigned readers = 0;
- for_each_linked_btree_iter(iter, linked)
+ for_each_btree_iter(iter, linked)
if (linked->l[b->level].b == b &&
btree_node_read_locked(linked, b->level))
readers++;
@@ -68,7 +68,7 @@ bool __bch2_btree_node_relock(struct btree_iter *iter, unsigned level)
{
struct btree_iter *linked;
struct btree *b = iter->l[level].b;
- int want = btree_lock_want(iter, level);
+ int want = __btree_lock_want(iter, level);
int have = btree_node_locked_type(iter, level);
if (want == have)
@@ -101,38 +101,6 @@ success:
return true;
}
-static bool __bch2_btree_iter_relock(struct btree_iter *iter)
-{
- unsigned l = iter->level;
-
- do {
- if (!btree_iter_node(iter, l))
- break;
-
- if (!bch2_btree_node_relock(iter, l)) {
- btree_iter_set_dirty(iter, BTREE_ITER_NEED_TRAVERSE);
- return false;
- }
-
- l++;
- } while (l < iter->locks_want);
-
- if (iter->uptodate == BTREE_ITER_NEED_RELOCK)
- iter->uptodate = BTREE_ITER_NEED_PEEK;
- return true;
-}
-
-bool bch2_btree_iter_relock(struct btree_iter *iter)
-{
- struct btree_iter *linked;
- bool ret = true;
-
- for_each_btree_iter(iter, linked)
- ret &= __bch2_btree_iter_relock(linked);
-
- return ret;
-}
-
/* Slowpath: */
bool __bch2_btree_node_lock(struct btree *b, struct bpos pos,
unsigned level,
@@ -217,57 +185,135 @@ bool __bch2_btree_node_lock(struct btree *b, struct bpos pos,
/* Btree iterator locking: */
-static void btree_iter_drop_extra_locks(struct btree_iter *iter)
+#ifdef CONFIG_BCACHEFS_DEBUG
+void bch2_btree_iter_verify_locks(struct btree_iter *iter)
{
unsigned l;
- while (iter->nodes_locked &&
- (l = __fls(iter->nodes_locked)) > iter->locks_want) {
- if (l > iter->level) {
- btree_node_unlock(iter, l);
- } else {
- if (btree_node_intent_locked(iter, l)) {
- six_lock_downgrade(&iter->l[l].b->lock);
- iter->nodes_intent_locked ^= 1 << l;
+ if (iter->uptodate == BTREE_ITER_END) {
+ BUG_ON(iter->nodes_locked);
+ return;
+ }
+
+ for (l = 0; btree_iter_node(iter, l); l++) {
+ if (iter->uptodate >= BTREE_ITER_NEED_RELOCK &&
+ !btree_node_locked(iter, l))
+ continue;
+
+ BUG_ON(btree_lock_want(iter, l) !=
+ btree_node_locked_type(iter, l));
+ }
+}
+#endif
+
+static bool __bch2_btree_iter_relock(struct btree_iter *iter)
+{
+ if (iter->uptodate < BTREE_ITER_NEED_RELOCK) {
+ bch2_btree_iter_verify_locks(iter);
+ return true;
+ }
+
+ if (iter->uptodate == BTREE_ITER_NEED_RELOCK) {
+ unsigned l = iter->level;
+
+ do {
+ if (!btree_iter_node(iter, l))
+ break;
+
+ if (!bch2_btree_node_relock(iter, l)) {
+ btree_iter_set_dirty(iter,
+ BTREE_ITER_NEED_TRAVERSE);
+ return false;
}
- break;
- }
+
+ l++;
+ } while (l < iter->locks_want);
+
+ iter->uptodate = BTREE_ITER_NEED_PEEK;
+ bch2_btree_iter_verify_locks(iter);
+ return true;
}
+
+ return false;
}
-bool __bch2_btree_iter_set_locks_want(struct btree_iter *iter,
- unsigned new_locks_want)
+bool bch2_btree_iter_relock(struct btree_iter *iter)
{
struct btree_iter *linked;
+ bool ret = true;
- /* Drop locks we don't want anymore: */
- if (new_locks_want < iter->locks_want)
- for_each_linked_btree_iter(iter, linked)
- if (linked->locks_want > new_locks_want) {
- linked->locks_want = max_t(unsigned, 1,
- new_locks_want);
- btree_iter_drop_extra_locks(linked);
- }
+ for_each_btree_iter(iter, linked)
+ ret &= __bch2_btree_iter_relock(linked);
- iter->locks_want = new_locks_want;
- btree_iter_drop_extra_locks(iter);
+ return ret;
+}
+
+bool __bch2_btree_iter_upgrade(struct btree_iter *iter,
+ unsigned new_locks_want)
+{
+ struct btree_iter *linked;
+
+ if (iter->locks_want < new_locks_want) {
+ iter->locks_want = new_locks_want;
+ btree_iter_set_dirty(iter, BTREE_ITER_NEED_RELOCK);
+ }
if (__bch2_btree_iter_relock(iter))
return true;
/*
- * Just an optimization: ancestor nodes must be locked before child
- * nodes, so set locks_want on iterators that might lock ancestors
- * before us to avoid getting -EINTR later:
+ * Ancestor nodes must be locked before child nodes, so set locks_want
+ * on iterators that might lock ancestors before us to avoid getting
+ * -EINTR later:
*/
for_each_linked_btree_iter(iter, linked)
if (linked->btree_id == iter->btree_id &&
- btree_iter_cmp(linked, iter) <= 0)
- linked->locks_want = max_t(unsigned, linked->locks_want,
- new_locks_want);
+ btree_iter_cmp(linked, iter) <= 0 &&
+ linked->locks_want < new_locks_want) {
+ linked->locks_want = new_locks_want;
+ btree_iter_set_dirty(linked, BTREE_ITER_NEED_RELOCK);
+ }
+
return false;
}
+void __bch2_btree_iter_downgrade(struct btree_iter *iter,
+ unsigned downgrade_to)
+{
+ struct btree_iter *linked;
+ unsigned l;
+
+ /*
+ * We downgrade linked iterators as well because btree_iter_upgrade
+ * might have had to modify locks_want on linked iterators due to lock
+ * ordering:
+ */
+ for_each_btree_iter(iter, linked) {
+ unsigned new_locks_want = downgrade_to ?:
+ (linked->flags & BTREE_ITER_INTENT ? 1 : 0);
+
+ if (linked->locks_want <= new_locks_want)
+ continue;
+
+ linked->locks_want = new_locks_want;
+
+ while (linked->nodes_locked &&
+ (l = __fls(linked->nodes_locked)) >= linked->locks_want) {
+ if (l > linked->level) {
+ btree_node_unlock(linked, l);
+ } else {
+ if (btree_node_intent_locked(linked, l)) {
+ six_lock_downgrade(&linked->l[l].b->lock);
+ linked->nodes_intent_locked ^= 1 << l;
+ }
+ break;
+ }
+ }
+
+ bch2_btree_iter_verify_locks(linked);
+ }
+}
+
int bch2_btree_iter_unlock(struct btree_iter *iter)
{
struct btree_iter *linked;
@@ -609,11 +655,12 @@ static inline void btree_iter_node_set(struct btree_iter *iter,
* A btree node is being replaced - update the iterator to point to the new
* node:
*/
-bool bch2_btree_iter_node_replace(struct btree_iter *iter, struct btree *b)
+void bch2_btree_iter_node_replace(struct btree_iter *iter, struct btree *b)
{
+ enum btree_node_locked_type t;
struct btree_iter *linked;
- for_each_linked_btree_iter(iter, linked)
+ for_each_btree_iter(iter, linked)
if (btree_iter_pos_in_node(linked, b)) {
/*
* bch2_btree_iter_node_drop() has already been called -
@@ -622,51 +669,28 @@ bool bch2_btree_iter_node_replace(struct btree_iter *iter, struct btree *b)
*/
BUG_ON(btree_node_locked(linked, b->level));
- /*
- * If @linked wants this node read locked, we don't want
- * to actually take the read lock now because it's not
- * legal to hold read locks on other nodes while we take
- * write locks, so the journal can make forward
- * progress...
- *
- * Instead, btree_iter_node_set() sets things up so
- * bch2_btree_node_relock() will succeed:
- */
-
- if (btree_want_intent(linked, b->level)) {
- six_lock_increment(&b->lock, SIX_LOCK_intent);
- mark_btree_node_intent_locked(linked, b->level);
+ t = btree_lock_want(linked, b->level);
+ if (t != BTREE_NODE_UNLOCKED) {
+ six_lock_increment(&b->lock, t);
+ mark_btree_node_locked(linked, b->level, t);
}
btree_iter_node_set(linked, b);
}
- if (!btree_iter_pos_in_node(iter, b)) {
- six_unlock_intent(&b->lock);
- return false;
- }
-
- mark_btree_node_intent_locked(iter, b->level);
- btree_iter_node_set(iter, b);
- return true;
-}
-
-void bch2_btree_iter_node_drop_linked(struct btree_iter *iter, struct btree *b)
-{
- struct btree_iter *linked;
-
- for_each_linked_btree_iter(iter, linked)
- bch2_btree_iter_node_drop(linked, b);
+ six_unlock_intent(&b->lock);
}
void bch2_btree_iter_node_drop(struct btree_iter *iter, struct btree *b)
{
+ struct btree_iter *linked;
unsigned level = b->level;
- if (iter->l[level].b == b) {
- btree_node_unlock(iter, level);
- iter->l[level].b = BTREE_ITER_NOT_END;
- }
+ for_each_btree_iter(iter, linked)
+ if (linked->l[level].b == b) {
+ btree_node_unlock(linked, level);
+ linked->l[level].b = BTREE_ITER_NOT_END;
+ }
}
/*
@@ -707,7 +731,7 @@ static inline int btree_iter_lock_root(struct btree_iter *iter,
return 0;
}
- lock_type = btree_lock_want(iter, iter->level);
+ lock_type = __btree_lock_want(iter, iter->level);
if (unlikely(!btree_node_lock(b, POS_MAX, iter->level,
iter, lock_type)))
return -EINTR;
@@ -765,7 +789,7 @@ static inline int btree_iter_down(struct btree_iter *iter)
struct btree_iter_level *l = &iter->l[iter->level];
struct btree *b;
unsigned level = iter->level - 1;
- enum six_lock_type lock_type = btree_lock_want(iter, level);
+ enum six_lock_type lock_type = __btree_lock_want(iter, level);
BKEY_PADDED(k) tmp;
BUG_ON(!btree_node_locked(iter, iter->level));
@@ -793,6 +817,12 @@ static void btree_iter_up(struct btree_iter *iter)
btree_node_unlock(iter, iter->level++);
}
+static void btree_iter_set_end(struct btree_iter *iter)
+{
+ iter->uptodate = BTREE_ITER_END;
+ __bch2_btree_iter_unlock(iter);
+}
+
int __must_check __bch2_btree_iter_traverse(struct btree_iter *);
static int btree_iter_traverse_error(struct btree_iter *iter, int ret)
@@ -956,6 +986,7 @@ int __must_check __bch2_btree_iter_traverse(struct btree_iter *iter)
}
iter->uptodate = BTREE_ITER_NEED_PEEK;
+ bch2_btree_iter_verify_locks(iter);
return 0;
}
@@ -963,11 +994,7 @@ int __must_check bch2_btree_iter_traverse(struct btree_iter *iter)
{
int ret;
- if (iter->uptodate < BTREE_ITER_NEED_RELOCK)
- return 0;
-
- if (iter->uptodate == BTREE_ITER_NEED_RELOCK &&
- __bch2_btree_iter_relock(iter))
+ if (__bch2_btree_iter_relock(iter))
return 0;
ret = __bch2_btree_iter_traverse(iter);
@@ -987,6 +1014,7 @@ struct btree *bch2_btree_iter_peek_node(struct btree_iter *iter)
int ret;
EBUG_ON(iter->flags & BTREE_ITER_IS_EXTENTS);
+ bch2_btree_iter_verify_locks(iter);
if (iter->uptodate == BTREE_ITER_UPTODATE)
return iter->l[iter->level].b;
@@ -1000,7 +1028,7 @@ struct btree *bch2_btree_iter_peek_node(struct btree_iter *iter)
b = iter->l[iter->level].b;
if (!b) {
- iter->uptodate = BTREE_ITER_END;
+ btree_iter_set_end(iter);
return NULL;
}
@@ -1018,11 +1046,12 @@ struct btree *bch2_btree_iter_next_node(struct btree_iter *iter, unsigned depth)
int ret;
EBUG_ON(iter->flags & BTREE_ITER_IS_EXTENTS);
+ bch2_btree_iter_verify_locks(iter);
btree_iter_up(iter);
if (!btree_iter_node(iter, iter->level)) {
- iter->uptodate = BTREE_ITER_END;
+ btree_iter_set_end(iter);
return NULL;
}
@@ -1042,6 +1071,15 @@ struct btree *bch2_btree_iter_next_node(struct btree_iter *iter, unsigned depth)
* the next child node
*/
+ /*
+ * We don't really want to be unlocking here except we can't
+ * directly tell btree_iter_traverse() "traverse to this level"
+ * except by setting iter->level, so we have to unlock so we
+ * don't screw up our lock invariants:
+ */
+ if (btree_node_read_locked(iter, iter->level))
+ btree_node_unlock(iter, iter->level);
+
/* ick: */
iter->pos = iter->btree_id == BTREE_ID_INODES
? btree_type_successor(iter->btree_id, iter->pos)
@@ -1104,8 +1142,7 @@ struct bkey_s_c bch2_btree_iter_peek(struct btree_iter *iter)
EBUG_ON(!!(iter->flags & BTREE_ITER_IS_EXTENTS) !=
(iter->btree_id == BTREE_ID_EXTENTS));
EBUG_ON(iter->flags & BTREE_ITER_SLOTS);
- EBUG_ON(iter->uptodate == BTREE_ITER_UPTODATE &&
- !btree_node_locked(iter, 0));
+ bch2_btree_iter_verify_locks(iter);
if (iter->uptodate == BTREE_ITER_UPTODATE) {
struct bkey_packed *k =
@@ -1135,7 +1172,7 @@ struct bkey_s_c bch2_btree_iter_peek(struct btree_iter *iter)
/* got to the end of the leaf, iterator needs to be traversed: */
iter->pos = l->b->key.k.p;
if (!bkey_cmp(iter->pos, POS_MAX)) {
- iter->uptodate = BTREE_ITER_END;
+ btree_iter_set_end(iter);
return bkey_s_c_null;
}
@@ -1162,7 +1199,7 @@ struct bkey_s_c bch2_btree_iter_peek_next_leaf(struct btree_iter *iter)
iter->pos = l->b->key.k.p;
if (!bkey_cmp(iter->pos, POS_MAX)) {
- iter->uptodate = BTREE_ITER_END;
+ btree_iter_set_end(iter);
return bkey_s_c_null;
}
@@ -1181,6 +1218,7 @@ struct bkey_s_c bch2_btree_iter_next(struct btree_iter *iter)
EBUG_ON(!!(iter->flags & BTREE_ITER_IS_EXTENTS) !=
(iter->btree_id == BTREE_ID_EXTENTS));
EBUG_ON(iter->flags & BTREE_ITER_SLOTS);
+ bch2_btree_iter_verify_locks(iter);
if (unlikely(iter->uptodate != BTREE_ITER_UPTODATE)) {
k = bch2_btree_iter_peek(iter);
@@ -1243,7 +1281,7 @@ recheck:
if (iter->flags & BTREE_ITER_IS_EXTENTS) {
if (n.p.offset == KEY_OFFSET_MAX) {
if (n.p.inode == KEY_INODE_MAX) {
- iter->uptodate = BTREE_ITER_END;
+ btree_iter_set_end(iter);
return bkey_s_c_null;
}
@@ -1277,8 +1315,7 @@ struct bkey_s_c bch2_btree_iter_peek_slot(struct btree_iter *iter)
EBUG_ON(!!(iter->flags & BTREE_ITER_IS_EXTENTS) !=
(iter->btree_id == BTREE_ID_EXTENTS));
EBUG_ON(!(iter->flags & BTREE_ITER_SLOTS));
- EBUG_ON(iter->uptodate == BTREE_ITER_UPTODATE &&
- !btree_node_locked(iter, 0));
+ bch2_btree_iter_verify_locks(iter);
if (iter->uptodate == BTREE_ITER_UPTODATE) {
struct bkey_s_c ret = { .k = &iter->k };
@@ -1304,6 +1341,11 @@ struct bkey_s_c bch2_btree_iter_peek_slot(struct btree_iter *iter)
struct bkey_s_c bch2_btree_iter_next_slot(struct btree_iter *iter)
{
+ EBUG_ON(!!(iter->flags & BTREE_ITER_IS_EXTENTS) !=
+ (iter->btree_id == BTREE_ID_EXTENTS));
+ EBUG_ON(!(iter->flags & BTREE_ITER_SLOTS));
+ bch2_btree_iter_verify_locks(iter);
+
iter->pos = btree_type_successor(iter->btree_id, iter->k.p);
if (unlikely(iter->uptodate != BTREE_ITER_UPTODATE)) {