diff options
author | Kent Overstreet <kent.overstreet@gmail.com> | 2020-06-12 22:29:48 -0400 |
---|---|---|
committer | Kent Overstreet <kent.overstreet@gmail.com> | 2020-06-13 14:23:03 -0400 |
commit | 2604fe59d9b448d0f0e3a3cc5eab5d30a41dd55f (patch) | |
tree | f3b8fab06d48c9dc29f3665cca0b01ff13336a17 | |
parent | 858daffbc820e4284940f0aed07dc9e258eeccca (diff) |
bcachefs: Don't deadlock when btree node reuse changes lock ordering
Btree node lock ordering is based on the logical key. However, 'struct
btree' may be reused for a different btree node under memory pressure.
This patch uses the new six lock callback to check if a btree node is no
longer the node we wanted to lock before blocking.
Signed-off-by: Kent Overstreet <kent.overstreet@gmail.com>
-rw-r--r-- | fs/bcachefs/btree_cache.c | 19 | ||||
-rw-r--r-- | fs/bcachefs/btree_gc.c | 2 | ||||
-rw-r--r-- | fs/bcachefs/btree_iter.c | 38 | ||||
-rw-r--r-- | fs/bcachefs/btree_locking.h | 21 | ||||
-rw-r--r-- | fs/bcachefs/btree_update_interior.c | 4 | ||||
-rw-r--r-- | fs/bcachefs/recovery.c | 2 |
6 files changed, 66 insertions, 20 deletions
diff --git a/fs/bcachefs/btree_cache.c b/fs/bcachefs/btree_cache.c index 6cbb263576d3..dc169a845da7 100644 --- a/fs/bcachefs/btree_cache.c +++ b/fs/bcachefs/btree_cache.c @@ -677,6 +677,14 @@ static noinline struct btree *bch2_btree_node_fill(struct bch_fs *c, return b; } +static int lock_node_check_fn(struct six_lock *lock, void *p) +{ + struct btree *b = container_of(lock, struct btree, lock); + const struct bkey_i *k = p; + + return b->hash_val == btree_ptr_hash_val(k) ? 0 : -1; +} + /** * bch_btree_node_get - find a btree node in the cache and lock it, reading it * in from disk if necessary. @@ -749,8 +757,12 @@ lock_node: if (btree_node_read_locked(iter, level + 1)) btree_node_unlock(iter, level + 1); - if (!btree_node_lock(b, k->k.p, level, iter, lock_type)) + if (!btree_node_lock(b, k->k.p, level, iter, lock_type, + lock_node_check_fn, (void *) k)) { + if (b->hash_val != btree_ptr_hash_val(k)) + goto retry; return ERR_PTR(-EINTR); + } if (unlikely(b->hash_val != btree_ptr_hash_val(k) || b->level != level || @@ -802,6 +814,7 @@ struct btree *bch2_btree_node_get_noiter(struct bch_fs *c, struct btree_cache *bc = &c->btree_cache; struct btree *b; struct bset_tree *t; + int ret; EBUG_ON(level >= BTREE_MAX_DEPTH); @@ -822,7 +835,9 @@ retry: return b; } else { lock_node: - six_lock_read(&b->lock); + ret = six_lock_read(&b->lock, lock_node_check_fn, (void *) k); + if (ret) + goto retry; if (unlikely(b->hash_val != btree_ptr_hash_val(k) || b->btree_id != btree_id || diff --git a/fs/bcachefs/btree_gc.c b/fs/bcachefs/btree_gc.c index 65b01e865015..c62fa3583b73 100644 --- a/fs/bcachefs/btree_gc.c +++ b/fs/bcachefs/btree_gc.c @@ -336,7 +336,7 @@ static int bch2_gc_btree_init(struct bch_fs *c, if (btree_node_fake(b)) return 0; - six_lock_read(&b->lock); + six_lock_read(&b->lock, NULL, NULL); if (fsck_err_on(bkey_cmp(b->data->min_key, POS_MIN), c, "btree root with incorrect min_key: %llu:%llu", b->data->min_key.inode, diff --git a/fs/bcachefs/btree_iter.c b/fs/bcachefs/btree_iter.c index bd005e3e9e8d..5e73791b21e0 100644 --- a/fs/bcachefs/btree_iter.c +++ b/fs/bcachefs/btree_iter.c @@ -194,10 +194,13 @@ static inline bool btree_iter_get_locks(struct btree_iter *iter, /* Slowpath: */ bool __bch2_btree_node_lock(struct btree *b, struct bpos pos, unsigned level, struct btree_iter *iter, - enum six_lock_type type) + enum six_lock_type type, + six_lock_should_sleep_fn should_sleep_fn, + void *p) { struct btree_trans *trans = iter->trans; struct btree_iter *linked; + u64 start_time = local_clock(); unsigned l; bool ret = true; @@ -276,7 +279,14 @@ bool __bch2_btree_node_lock(struct btree *b, struct bpos pos, return false; } - __btree_node_lock_type(iter->trans->c, b, type); + if (six_trylock_type(&b->lock, type)) + return true; + + if (six_lock_type(&b->lock, type, should_sleep_fn, p)) + return false; + + bch2_time_stats_update(&trans->c->times[lock_to_time_stat(type)], + start_time); return true; } @@ -287,6 +297,11 @@ static void bch2_btree_iter_verify_locks(struct btree_iter *iter) { unsigned l; + if (!(iter->trans->iters_linked & (1ULL << iter->idx))) { + BUG_ON(iter->nodes_locked); + return; + } + for (l = 0; btree_iter_node(iter, l); l++) { if (iter->uptodate >= BTREE_ITER_NEED_RELOCK && !btree_node_locked(iter, l)) @@ -301,7 +316,7 @@ void bch2_btree_trans_verify_locks(struct btree_trans *trans) { struct btree_iter *iter; - trans_for_each_iter(trans, iter) + trans_for_each_iter_all(trans, iter) bch2_btree_iter_verify_locks(iter); } #else @@ -893,18 +908,26 @@ void bch2_btree_iter_reinit_node(struct btree_iter *iter, struct btree *b) __btree_iter_init(linked, b->level); } +static int lock_root_check_fn(struct six_lock *lock, void *p) +{ + struct btree *b = container_of(lock, struct btree, lock); + struct btree **rootp = p; + + return b == *rootp ? 0 : -1; +} + static inline int btree_iter_lock_root(struct btree_iter *iter, unsigned depth_want) { struct bch_fs *c = iter->trans->c; - struct btree *b; + struct btree *b, **rootp = &c->btree_roots[iter->btree_id].b; enum six_lock_type lock_type; unsigned i; EBUG_ON(iter->nodes_locked); while (1) { - b = READ_ONCE(c->btree_roots[iter->btree_id].b); + b = READ_ONCE(*rootp); iter->level = READ_ONCE(b->level); if (unlikely(iter->level < depth_want)) { @@ -922,10 +945,11 @@ static inline int btree_iter_lock_root(struct btree_iter *iter, lock_type = __btree_lock_want(iter, iter->level); if (unlikely(!btree_node_lock(b, POS_MAX, iter->level, - iter, lock_type))) + iter, lock_type, + lock_root_check_fn, rootp))) return -EINTR; - if (likely(b == c->btree_roots[iter->btree_id].b && + if (likely(b == READ_ONCE(*rootp) && b->level == iter->level && !race_fault())) { for (i = 0; i < iter->level; i++) diff --git a/fs/bcachefs/btree_locking.h b/fs/bcachefs/btree_locking.h index 936bfaf06e20..da2a0ebbc24f 100644 --- a/fs/bcachefs/btree_locking.h +++ b/fs/bcachefs/btree_locking.h @@ -143,7 +143,7 @@ static inline void __btree_node_lock_type(struct bch_fs *c, struct btree *b, { u64 start_time = local_clock(); - six_lock_type(&b->lock, type); + six_lock_type(&b->lock, type, NULL, NULL); bch2_time_stats_update(&c->times[lock_to_time_stat(type)], start_time); } @@ -175,17 +175,21 @@ static inline bool btree_node_lock_increment(struct btree_trans *trans, } bool __bch2_btree_node_lock(struct btree *, struct bpos, unsigned, - struct btree_iter *, enum six_lock_type); - -static inline bool btree_node_lock(struct btree *b, struct bpos pos, - unsigned level, - struct btree_iter *iter, - enum six_lock_type type) + struct btree_iter *, enum six_lock_type, + six_lock_should_sleep_fn, void *); + +static inline bool btree_node_lock(struct btree *b, + struct bpos pos, unsigned level, + struct btree_iter *iter, + enum six_lock_type type, + six_lock_should_sleep_fn should_sleep_fn, void *p) { struct btree_trans *trans = iter->trans; bool ret; EBUG_ON(level >= BTREE_MAX_DEPTH); + EBUG_ON(!(trans->iters_linked & (1ULL << iter->idx))); + #ifdef CONFIG_BCACHEFS_DEBUG trans->locking = b; trans->locking_iter_idx = iter->idx; @@ -195,7 +199,8 @@ static inline bool btree_node_lock(struct btree *b, struct bpos pos, #endif ret = likely(six_trylock_type(&b->lock, type)) || btree_node_lock_increment(trans, b, level, type) || - __bch2_btree_node_lock(b, pos, level, iter, type); + __bch2_btree_node_lock(b, pos, level, iter, type, + should_sleep_fn, p); #ifdef CONFIG_BCACHEFS_DEBUG trans->locking = NULL; diff --git a/fs/bcachefs/btree_update_interior.c b/fs/bcachefs/btree_update_interior.c index 932e45548510..4d38943a4f0c 100644 --- a/fs/bcachefs/btree_update_interior.c +++ b/fs/bcachefs/btree_update_interior.c @@ -135,6 +135,8 @@ static void __btree_node_free(struct bch_fs *c, struct btree *b) bch2_btree_node_hash_remove(&c->btree_cache, b); + six_lock_wakeup_all(&b->lock); + mutex_lock(&c->btree_cache.lock); list_move(&b->list, &c->btree_cache.freeable); mutex_unlock(&c->btree_cache.lock); @@ -163,7 +165,7 @@ void bch2_btree_node_free_inmem(struct bch_fs *c, struct btree *b, trans_for_each_iter(iter->trans, linked) BUG_ON(linked->l[b->level].b == b); - six_lock_write(&b->lock); + six_lock_write(&b->lock, NULL, NULL); __btree_node_free(c, b); six_unlock_write(&b->lock); six_unlock_intent(&b->lock); diff --git a/fs/bcachefs/recovery.c b/fs/bcachefs/recovery.c index a94f25cca679..c478d19e5691 100644 --- a/fs/bcachefs/recovery.c +++ b/fs/bcachefs/recovery.c @@ -253,7 +253,7 @@ int bch2_btree_and_journal_walk(struct bch_fs *c, struct journal_keys *journal_k if (btree_node_fake(b)) return 0; - six_lock_read(&b->lock); + six_lock_read(&b->lock, NULL, NULL); ret = (node_fn ? node_fn(c, b) : 0) ?: bch2_btree_and_journal_walk_recurse(c, b, journal_keys, btree_id, node_fn, key_fn) ?: |