summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorKent Overstreet <kent.overstreet@gmail.com>2020-06-12 22:29:48 -0400
committerKent Overstreet <kent.overstreet@gmail.com>2020-06-13 14:23:03 -0400
commit2604fe59d9b448d0f0e3a3cc5eab5d30a41dd55f (patch)
treef3b8fab06d48c9dc29f3665cca0b01ff13336a17
parent858daffbc820e4284940f0aed07dc9e258eeccca (diff)
bcachefs: Don't deadlock when btree node reuse changes lock ordering
Btree node lock ordering is based on the logical key. However, 'struct btree' may be reused for a different btree node under memory pressure. This patch uses the new six lock callback to check if a btree node is no longer the node we wanted to lock before blocking. Signed-off-by: Kent Overstreet <kent.overstreet@gmail.com>
-rw-r--r--fs/bcachefs/btree_cache.c19
-rw-r--r--fs/bcachefs/btree_gc.c2
-rw-r--r--fs/bcachefs/btree_iter.c38
-rw-r--r--fs/bcachefs/btree_locking.h21
-rw-r--r--fs/bcachefs/btree_update_interior.c4
-rw-r--r--fs/bcachefs/recovery.c2
6 files changed, 66 insertions, 20 deletions
diff --git a/fs/bcachefs/btree_cache.c b/fs/bcachefs/btree_cache.c
index 6cbb263576d3..dc169a845da7 100644
--- a/fs/bcachefs/btree_cache.c
+++ b/fs/bcachefs/btree_cache.c
@@ -677,6 +677,14 @@ static noinline struct btree *bch2_btree_node_fill(struct bch_fs *c,
return b;
}
+static int lock_node_check_fn(struct six_lock *lock, void *p)
+{
+ struct btree *b = container_of(lock, struct btree, lock);
+ const struct bkey_i *k = p;
+
+ return b->hash_val == btree_ptr_hash_val(k) ? 0 : -1;
+}
+
/**
* bch_btree_node_get - find a btree node in the cache and lock it, reading it
* in from disk if necessary.
@@ -749,8 +757,12 @@ lock_node:
if (btree_node_read_locked(iter, level + 1))
btree_node_unlock(iter, level + 1);
- if (!btree_node_lock(b, k->k.p, level, iter, lock_type))
+ if (!btree_node_lock(b, k->k.p, level, iter, lock_type,
+ lock_node_check_fn, (void *) k)) {
+ if (b->hash_val != btree_ptr_hash_val(k))
+ goto retry;
return ERR_PTR(-EINTR);
+ }
if (unlikely(b->hash_val != btree_ptr_hash_val(k) ||
b->level != level ||
@@ -802,6 +814,7 @@ struct btree *bch2_btree_node_get_noiter(struct bch_fs *c,
struct btree_cache *bc = &c->btree_cache;
struct btree *b;
struct bset_tree *t;
+ int ret;
EBUG_ON(level >= BTREE_MAX_DEPTH);
@@ -822,7 +835,9 @@ retry:
return b;
} else {
lock_node:
- six_lock_read(&b->lock);
+ ret = six_lock_read(&b->lock, lock_node_check_fn, (void *) k);
+ if (ret)
+ goto retry;
if (unlikely(b->hash_val != btree_ptr_hash_val(k) ||
b->btree_id != btree_id ||
diff --git a/fs/bcachefs/btree_gc.c b/fs/bcachefs/btree_gc.c
index 65b01e865015..c62fa3583b73 100644
--- a/fs/bcachefs/btree_gc.c
+++ b/fs/bcachefs/btree_gc.c
@@ -336,7 +336,7 @@ static int bch2_gc_btree_init(struct bch_fs *c,
if (btree_node_fake(b))
return 0;
- six_lock_read(&b->lock);
+ six_lock_read(&b->lock, NULL, NULL);
if (fsck_err_on(bkey_cmp(b->data->min_key, POS_MIN), c,
"btree root with incorrect min_key: %llu:%llu",
b->data->min_key.inode,
diff --git a/fs/bcachefs/btree_iter.c b/fs/bcachefs/btree_iter.c
index bd005e3e9e8d..5e73791b21e0 100644
--- a/fs/bcachefs/btree_iter.c
+++ b/fs/bcachefs/btree_iter.c
@@ -194,10 +194,13 @@ static inline bool btree_iter_get_locks(struct btree_iter *iter,
/* Slowpath: */
bool __bch2_btree_node_lock(struct btree *b, struct bpos pos,
unsigned level, struct btree_iter *iter,
- enum six_lock_type type)
+ enum six_lock_type type,
+ six_lock_should_sleep_fn should_sleep_fn,
+ void *p)
{
struct btree_trans *trans = iter->trans;
struct btree_iter *linked;
+ u64 start_time = local_clock();
unsigned l;
bool ret = true;
@@ -276,7 +279,14 @@ bool __bch2_btree_node_lock(struct btree *b, struct bpos pos,
return false;
}
- __btree_node_lock_type(iter->trans->c, b, type);
+ if (six_trylock_type(&b->lock, type))
+ return true;
+
+ if (six_lock_type(&b->lock, type, should_sleep_fn, p))
+ return false;
+
+ bch2_time_stats_update(&trans->c->times[lock_to_time_stat(type)],
+ start_time);
return true;
}
@@ -287,6 +297,11 @@ static void bch2_btree_iter_verify_locks(struct btree_iter *iter)
{
unsigned l;
+ if (!(iter->trans->iters_linked & (1ULL << iter->idx))) {
+ BUG_ON(iter->nodes_locked);
+ return;
+ }
+
for (l = 0; btree_iter_node(iter, l); l++) {
if (iter->uptodate >= BTREE_ITER_NEED_RELOCK &&
!btree_node_locked(iter, l))
@@ -301,7 +316,7 @@ void bch2_btree_trans_verify_locks(struct btree_trans *trans)
{
struct btree_iter *iter;
- trans_for_each_iter(trans, iter)
+ trans_for_each_iter_all(trans, iter)
bch2_btree_iter_verify_locks(iter);
}
#else
@@ -893,18 +908,26 @@ void bch2_btree_iter_reinit_node(struct btree_iter *iter, struct btree *b)
__btree_iter_init(linked, b->level);
}
+static int lock_root_check_fn(struct six_lock *lock, void *p)
+{
+ struct btree *b = container_of(lock, struct btree, lock);
+ struct btree **rootp = p;
+
+ return b == *rootp ? 0 : -1;
+}
+
static inline int btree_iter_lock_root(struct btree_iter *iter,
unsigned depth_want)
{
struct bch_fs *c = iter->trans->c;
- struct btree *b;
+ struct btree *b, **rootp = &c->btree_roots[iter->btree_id].b;
enum six_lock_type lock_type;
unsigned i;
EBUG_ON(iter->nodes_locked);
while (1) {
- b = READ_ONCE(c->btree_roots[iter->btree_id].b);
+ b = READ_ONCE(*rootp);
iter->level = READ_ONCE(b->level);
if (unlikely(iter->level < depth_want)) {
@@ -922,10 +945,11 @@ static inline int btree_iter_lock_root(struct btree_iter *iter,
lock_type = __btree_lock_want(iter, iter->level);
if (unlikely(!btree_node_lock(b, POS_MAX, iter->level,
- iter, lock_type)))
+ iter, lock_type,
+ lock_root_check_fn, rootp)))
return -EINTR;
- if (likely(b == c->btree_roots[iter->btree_id].b &&
+ if (likely(b == READ_ONCE(*rootp) &&
b->level == iter->level &&
!race_fault())) {
for (i = 0; i < iter->level; i++)
diff --git a/fs/bcachefs/btree_locking.h b/fs/bcachefs/btree_locking.h
index 936bfaf06e20..da2a0ebbc24f 100644
--- a/fs/bcachefs/btree_locking.h
+++ b/fs/bcachefs/btree_locking.h
@@ -143,7 +143,7 @@ static inline void __btree_node_lock_type(struct bch_fs *c, struct btree *b,
{
u64 start_time = local_clock();
- six_lock_type(&b->lock, type);
+ six_lock_type(&b->lock, type, NULL, NULL);
bch2_time_stats_update(&c->times[lock_to_time_stat(type)], start_time);
}
@@ -175,17 +175,21 @@ static inline bool btree_node_lock_increment(struct btree_trans *trans,
}
bool __bch2_btree_node_lock(struct btree *, struct bpos, unsigned,
- struct btree_iter *, enum six_lock_type);
-
-static inline bool btree_node_lock(struct btree *b, struct bpos pos,
- unsigned level,
- struct btree_iter *iter,
- enum six_lock_type type)
+ struct btree_iter *, enum six_lock_type,
+ six_lock_should_sleep_fn, void *);
+
+static inline bool btree_node_lock(struct btree *b,
+ struct bpos pos, unsigned level,
+ struct btree_iter *iter,
+ enum six_lock_type type,
+ six_lock_should_sleep_fn should_sleep_fn, void *p)
{
struct btree_trans *trans = iter->trans;
bool ret;
EBUG_ON(level >= BTREE_MAX_DEPTH);
+ EBUG_ON(!(trans->iters_linked & (1ULL << iter->idx)));
+
#ifdef CONFIG_BCACHEFS_DEBUG
trans->locking = b;
trans->locking_iter_idx = iter->idx;
@@ -195,7 +199,8 @@ static inline bool btree_node_lock(struct btree *b, struct bpos pos,
#endif
ret = likely(six_trylock_type(&b->lock, type)) ||
btree_node_lock_increment(trans, b, level, type) ||
- __bch2_btree_node_lock(b, pos, level, iter, type);
+ __bch2_btree_node_lock(b, pos, level, iter, type,
+ should_sleep_fn, p);
#ifdef CONFIG_BCACHEFS_DEBUG
trans->locking = NULL;
diff --git a/fs/bcachefs/btree_update_interior.c b/fs/bcachefs/btree_update_interior.c
index 932e45548510..4d38943a4f0c 100644
--- a/fs/bcachefs/btree_update_interior.c
+++ b/fs/bcachefs/btree_update_interior.c
@@ -135,6 +135,8 @@ static void __btree_node_free(struct bch_fs *c, struct btree *b)
bch2_btree_node_hash_remove(&c->btree_cache, b);
+ six_lock_wakeup_all(&b->lock);
+
mutex_lock(&c->btree_cache.lock);
list_move(&b->list, &c->btree_cache.freeable);
mutex_unlock(&c->btree_cache.lock);
@@ -163,7 +165,7 @@ void bch2_btree_node_free_inmem(struct bch_fs *c, struct btree *b,
trans_for_each_iter(iter->trans, linked)
BUG_ON(linked->l[b->level].b == b);
- six_lock_write(&b->lock);
+ six_lock_write(&b->lock, NULL, NULL);
__btree_node_free(c, b);
six_unlock_write(&b->lock);
six_unlock_intent(&b->lock);
diff --git a/fs/bcachefs/recovery.c b/fs/bcachefs/recovery.c
index a94f25cca679..c478d19e5691 100644
--- a/fs/bcachefs/recovery.c
+++ b/fs/bcachefs/recovery.c
@@ -253,7 +253,7 @@ int bch2_btree_and_journal_walk(struct bch_fs *c, struct journal_keys *journal_k
if (btree_node_fake(b))
return 0;
- six_lock_read(&b->lock);
+ six_lock_read(&b->lock, NULL, NULL);
ret = (node_fn ? node_fn(c, b) : 0) ?:
bch2_btree_and_journal_walk_recurse(c, b, journal_keys, btree_id,
node_fn, key_fn) ?: