diff options
Diffstat (limited to 'fs/bcachefs/btree_iter.c')
-rw-r--r-- | fs/bcachefs/btree_iter.c | 272 |
1 files changed, 157 insertions, 115 deletions
diff --git a/fs/bcachefs/btree_iter.c b/fs/bcachefs/btree_iter.c index 330abc577c9a..3ad7e082f9b3 100644 --- a/fs/bcachefs/btree_iter.c +++ b/fs/bcachefs/btree_iter.c @@ -46,7 +46,7 @@ void __bch2_btree_node_lock_write(struct btree *b, struct btree_iter *iter) struct btree_iter *linked; unsigned readers = 0; - for_each_linked_btree_iter(iter, linked) + for_each_btree_iter(iter, linked) if (linked->l[b->level].b == b && btree_node_read_locked(linked, b->level)) readers++; @@ -68,7 +68,7 @@ bool __bch2_btree_node_relock(struct btree_iter *iter, unsigned level) { struct btree_iter *linked; struct btree *b = iter->l[level].b; - int want = btree_lock_want(iter, level); + int want = __btree_lock_want(iter, level); int have = btree_node_locked_type(iter, level); if (want == have) @@ -101,38 +101,6 @@ success: return true; } -static bool __bch2_btree_iter_relock(struct btree_iter *iter) -{ - unsigned l = iter->level; - - do { - if (!btree_iter_node(iter, l)) - break; - - if (!bch2_btree_node_relock(iter, l)) { - btree_iter_set_dirty(iter, BTREE_ITER_NEED_TRAVERSE); - return false; - } - - l++; - } while (l < iter->locks_want); - - if (iter->uptodate == BTREE_ITER_NEED_RELOCK) - iter->uptodate = BTREE_ITER_NEED_PEEK; - return true; -} - -bool bch2_btree_iter_relock(struct btree_iter *iter) -{ - struct btree_iter *linked; - bool ret = true; - - for_each_btree_iter(iter, linked) - ret &= __bch2_btree_iter_relock(linked); - - return ret; -} - /* Slowpath: */ bool __bch2_btree_node_lock(struct btree *b, struct bpos pos, unsigned level, @@ -217,57 +185,135 @@ bool __bch2_btree_node_lock(struct btree *b, struct bpos pos, /* Btree iterator locking: */ -static void btree_iter_drop_extra_locks(struct btree_iter *iter) +#ifdef CONFIG_BCACHEFS_DEBUG +void bch2_btree_iter_verify_locks(struct btree_iter *iter) { unsigned l; - while (iter->nodes_locked && - (l = __fls(iter->nodes_locked)) > iter->locks_want) { - if (l > iter->level) { - btree_node_unlock(iter, l); - } else { - if (btree_node_intent_locked(iter, l)) { - six_lock_downgrade(&iter->l[l].b->lock); - iter->nodes_intent_locked ^= 1 << l; + if (iter->uptodate == BTREE_ITER_END) { + BUG_ON(iter->nodes_locked); + return; + } + + for (l = 0; btree_iter_node(iter, l); l++) { + if (iter->uptodate >= BTREE_ITER_NEED_RELOCK && + !btree_node_locked(iter, l)) + continue; + + BUG_ON(btree_lock_want(iter, l) != + btree_node_locked_type(iter, l)); + } +} +#endif + +static bool __bch2_btree_iter_relock(struct btree_iter *iter) +{ + if (iter->uptodate < BTREE_ITER_NEED_RELOCK) { + bch2_btree_iter_verify_locks(iter); + return true; + } + + if (iter->uptodate == BTREE_ITER_NEED_RELOCK) { + unsigned l = iter->level; + + do { + if (!btree_iter_node(iter, l)) + break; + + if (!bch2_btree_node_relock(iter, l)) { + btree_iter_set_dirty(iter, + BTREE_ITER_NEED_TRAVERSE); + return false; } - break; - } + + l++; + } while (l < iter->locks_want); + + iter->uptodate = BTREE_ITER_NEED_PEEK; + bch2_btree_iter_verify_locks(iter); + return true; } + + return false; } -bool __bch2_btree_iter_set_locks_want(struct btree_iter *iter, - unsigned new_locks_want) +bool bch2_btree_iter_relock(struct btree_iter *iter) { struct btree_iter *linked; + bool ret = true; - /* Drop locks we don't want anymore: */ - if (new_locks_want < iter->locks_want) - for_each_linked_btree_iter(iter, linked) - if (linked->locks_want > new_locks_want) { - linked->locks_want = max_t(unsigned, 1, - new_locks_want); - btree_iter_drop_extra_locks(linked); - } + for_each_btree_iter(iter, linked) + ret &= __bch2_btree_iter_relock(linked); - iter->locks_want = new_locks_want; - btree_iter_drop_extra_locks(iter); + return ret; +} + +bool __bch2_btree_iter_upgrade(struct btree_iter *iter, + unsigned new_locks_want) +{ + struct btree_iter *linked; + + if (iter->locks_want < new_locks_want) { + iter->locks_want = new_locks_want; + btree_iter_set_dirty(iter, BTREE_ITER_NEED_RELOCK); + } if (__bch2_btree_iter_relock(iter)) return true; /* - * Just an optimization: ancestor nodes must be locked before child - * nodes, so set locks_want on iterators that might lock ancestors - * before us to avoid getting -EINTR later: + * Ancestor nodes must be locked before child nodes, so set locks_want + * on iterators that might lock ancestors before us to avoid getting + * -EINTR later: */ for_each_linked_btree_iter(iter, linked) if (linked->btree_id == iter->btree_id && - btree_iter_cmp(linked, iter) <= 0) - linked->locks_want = max_t(unsigned, linked->locks_want, - new_locks_want); + btree_iter_cmp(linked, iter) <= 0 && + linked->locks_want < new_locks_want) { + linked->locks_want = new_locks_want; + btree_iter_set_dirty(linked, BTREE_ITER_NEED_RELOCK); + } + return false; } +void __bch2_btree_iter_downgrade(struct btree_iter *iter, + unsigned downgrade_to) +{ + struct btree_iter *linked; + unsigned l; + + /* + * We downgrade linked iterators as well because btree_iter_upgrade + * might have had to modify locks_want on linked iterators due to lock + * ordering: + */ + for_each_btree_iter(iter, linked) { + unsigned new_locks_want = downgrade_to ?: + (linked->flags & BTREE_ITER_INTENT ? 1 : 0); + + if (linked->locks_want <= new_locks_want) + continue; + + linked->locks_want = new_locks_want; + + while (linked->nodes_locked && + (l = __fls(linked->nodes_locked)) >= linked->locks_want) { + if (l > linked->level) { + btree_node_unlock(linked, l); + } else { + if (btree_node_intent_locked(linked, l)) { + six_lock_downgrade(&linked->l[l].b->lock); + linked->nodes_intent_locked ^= 1 << l; + } + break; + } + } + + bch2_btree_iter_verify_locks(linked); + } +} + int bch2_btree_iter_unlock(struct btree_iter *iter) { struct btree_iter *linked; @@ -609,11 +655,12 @@ static inline void btree_iter_node_set(struct btree_iter *iter, * A btree node is being replaced - update the iterator to point to the new * node: */ -bool bch2_btree_iter_node_replace(struct btree_iter *iter, struct btree *b) +void bch2_btree_iter_node_replace(struct btree_iter *iter, struct btree *b) { + enum btree_node_locked_type t; struct btree_iter *linked; - for_each_linked_btree_iter(iter, linked) + for_each_btree_iter(iter, linked) if (btree_iter_pos_in_node(linked, b)) { /* * bch2_btree_iter_node_drop() has already been called - @@ -622,51 +669,28 @@ bool bch2_btree_iter_node_replace(struct btree_iter *iter, struct btree *b) */ BUG_ON(btree_node_locked(linked, b->level)); - /* - * If @linked wants this node read locked, we don't want - * to actually take the read lock now because it's not - * legal to hold read locks on other nodes while we take - * write locks, so the journal can make forward - * progress... - * - * Instead, btree_iter_node_set() sets things up so - * bch2_btree_node_relock() will succeed: - */ - - if (btree_want_intent(linked, b->level)) { - six_lock_increment(&b->lock, SIX_LOCK_intent); - mark_btree_node_intent_locked(linked, b->level); + t = btree_lock_want(linked, b->level); + if (t != BTREE_NODE_UNLOCKED) { + six_lock_increment(&b->lock, t); + mark_btree_node_locked(linked, b->level, t); } btree_iter_node_set(linked, b); } - if (!btree_iter_pos_in_node(iter, b)) { - six_unlock_intent(&b->lock); - return false; - } - - mark_btree_node_intent_locked(iter, b->level); - btree_iter_node_set(iter, b); - return true; -} - -void bch2_btree_iter_node_drop_linked(struct btree_iter *iter, struct btree *b) -{ - struct btree_iter *linked; - - for_each_linked_btree_iter(iter, linked) - bch2_btree_iter_node_drop(linked, b); + six_unlock_intent(&b->lock); } void bch2_btree_iter_node_drop(struct btree_iter *iter, struct btree *b) { + struct btree_iter *linked; unsigned level = b->level; - if (iter->l[level].b == b) { - btree_node_unlock(iter, level); - iter->l[level].b = BTREE_ITER_NOT_END; - } + for_each_btree_iter(iter, linked) + if (linked->l[level].b == b) { + btree_node_unlock(linked, level); + linked->l[level].b = BTREE_ITER_NOT_END; + } } /* @@ -707,7 +731,7 @@ static inline int btree_iter_lock_root(struct btree_iter *iter, return 0; } - lock_type = btree_lock_want(iter, iter->level); + lock_type = __btree_lock_want(iter, iter->level); if (unlikely(!btree_node_lock(b, POS_MAX, iter->level, iter, lock_type))) return -EINTR; @@ -765,7 +789,7 @@ static inline int btree_iter_down(struct btree_iter *iter) struct btree_iter_level *l = &iter->l[iter->level]; struct btree *b; unsigned level = iter->level - 1; - enum six_lock_type lock_type = btree_lock_want(iter, level); + enum six_lock_type lock_type = __btree_lock_want(iter, level); BKEY_PADDED(k) tmp; BUG_ON(!btree_node_locked(iter, iter->level)); @@ -793,6 +817,12 @@ static void btree_iter_up(struct btree_iter *iter) btree_node_unlock(iter, iter->level++); } +static void btree_iter_set_end(struct btree_iter *iter) +{ + iter->uptodate = BTREE_ITER_END; + __bch2_btree_iter_unlock(iter); +} + int __must_check __bch2_btree_iter_traverse(struct btree_iter *); static int btree_iter_traverse_error(struct btree_iter *iter, int ret) @@ -956,6 +986,7 @@ int __must_check __bch2_btree_iter_traverse(struct btree_iter *iter) } iter->uptodate = BTREE_ITER_NEED_PEEK; + bch2_btree_iter_verify_locks(iter); return 0; } @@ -963,11 +994,7 @@ int __must_check bch2_btree_iter_traverse(struct btree_iter *iter) { int ret; - if (iter->uptodate < BTREE_ITER_NEED_RELOCK) - return 0; - - if (iter->uptodate == BTREE_ITER_NEED_RELOCK && - __bch2_btree_iter_relock(iter)) + if (__bch2_btree_iter_relock(iter)) return 0; ret = __bch2_btree_iter_traverse(iter); @@ -987,6 +1014,7 @@ struct btree *bch2_btree_iter_peek_node(struct btree_iter *iter) int ret; EBUG_ON(iter->flags & BTREE_ITER_IS_EXTENTS); + bch2_btree_iter_verify_locks(iter); if (iter->uptodate == BTREE_ITER_UPTODATE) return iter->l[iter->level].b; @@ -1000,7 +1028,7 @@ struct btree *bch2_btree_iter_peek_node(struct btree_iter *iter) b = iter->l[iter->level].b; if (!b) { - iter->uptodate = BTREE_ITER_END; + btree_iter_set_end(iter); return NULL; } @@ -1018,11 +1046,12 @@ struct btree *bch2_btree_iter_next_node(struct btree_iter *iter, unsigned depth) int ret; EBUG_ON(iter->flags & BTREE_ITER_IS_EXTENTS); + bch2_btree_iter_verify_locks(iter); btree_iter_up(iter); if (!btree_iter_node(iter, iter->level)) { - iter->uptodate = BTREE_ITER_END; + btree_iter_set_end(iter); return NULL; } @@ -1042,6 +1071,15 @@ struct btree *bch2_btree_iter_next_node(struct btree_iter *iter, unsigned depth) * the next child node */ + /* + * We don't really want to be unlocking here except we can't + * directly tell btree_iter_traverse() "traverse to this level" + * except by setting iter->level, so we have to unlock so we + * don't screw up our lock invariants: + */ + if (btree_node_read_locked(iter, iter->level)) + btree_node_unlock(iter, iter->level); + /* ick: */ iter->pos = iter->btree_id == BTREE_ID_INODES ? btree_type_successor(iter->btree_id, iter->pos) @@ -1104,8 +1142,7 @@ struct bkey_s_c bch2_btree_iter_peek(struct btree_iter *iter) EBUG_ON(!!(iter->flags & BTREE_ITER_IS_EXTENTS) != (iter->btree_id == BTREE_ID_EXTENTS)); EBUG_ON(iter->flags & BTREE_ITER_SLOTS); - EBUG_ON(iter->uptodate == BTREE_ITER_UPTODATE && - !btree_node_locked(iter, 0)); + bch2_btree_iter_verify_locks(iter); if (iter->uptodate == BTREE_ITER_UPTODATE) { struct bkey_packed *k = @@ -1135,7 +1172,7 @@ struct bkey_s_c bch2_btree_iter_peek(struct btree_iter *iter) /* got to the end of the leaf, iterator needs to be traversed: */ iter->pos = l->b->key.k.p; if (!bkey_cmp(iter->pos, POS_MAX)) { - iter->uptodate = BTREE_ITER_END; + btree_iter_set_end(iter); return bkey_s_c_null; } @@ -1162,7 +1199,7 @@ struct bkey_s_c bch2_btree_iter_peek_next_leaf(struct btree_iter *iter) iter->pos = l->b->key.k.p; if (!bkey_cmp(iter->pos, POS_MAX)) { - iter->uptodate = BTREE_ITER_END; + btree_iter_set_end(iter); return bkey_s_c_null; } @@ -1181,6 +1218,7 @@ struct bkey_s_c bch2_btree_iter_next(struct btree_iter *iter) EBUG_ON(!!(iter->flags & BTREE_ITER_IS_EXTENTS) != (iter->btree_id == BTREE_ID_EXTENTS)); EBUG_ON(iter->flags & BTREE_ITER_SLOTS); + bch2_btree_iter_verify_locks(iter); if (unlikely(iter->uptodate != BTREE_ITER_UPTODATE)) { k = bch2_btree_iter_peek(iter); @@ -1243,7 +1281,7 @@ recheck: if (iter->flags & BTREE_ITER_IS_EXTENTS) { if (n.p.offset == KEY_OFFSET_MAX) { if (n.p.inode == KEY_INODE_MAX) { - iter->uptodate = BTREE_ITER_END; + btree_iter_set_end(iter); return bkey_s_c_null; } @@ -1277,8 +1315,7 @@ struct bkey_s_c bch2_btree_iter_peek_slot(struct btree_iter *iter) EBUG_ON(!!(iter->flags & BTREE_ITER_IS_EXTENTS) != (iter->btree_id == BTREE_ID_EXTENTS)); EBUG_ON(!(iter->flags & BTREE_ITER_SLOTS)); - EBUG_ON(iter->uptodate == BTREE_ITER_UPTODATE && - !btree_node_locked(iter, 0)); + bch2_btree_iter_verify_locks(iter); if (iter->uptodate == BTREE_ITER_UPTODATE) { struct bkey_s_c ret = { .k = &iter->k }; @@ -1304,6 +1341,11 @@ struct bkey_s_c bch2_btree_iter_peek_slot(struct btree_iter *iter) struct bkey_s_c bch2_btree_iter_next_slot(struct btree_iter *iter) { + EBUG_ON(!!(iter->flags & BTREE_ITER_IS_EXTENTS) != + (iter->btree_id == BTREE_ID_EXTENTS)); + EBUG_ON(!(iter->flags & BTREE_ITER_SLOTS)); + bch2_btree_iter_verify_locks(iter); + iter->pos = btree_type_successor(iter->btree_id, iter->k.p); if (unlikely(iter->uptodate != BTREE_ITER_UPTODATE)) { |