summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorKent Overstreet <kent.overstreet@gmail.com>2020-06-12 14:58:07 -0400
committerKent Overstreet <kent.overstreet@gmail.com>2020-06-12 19:14:27 -0400
commit77546f38027c8ef06cfff3864c7464f7e3ed3cf0 (patch)
treeacc4f96961b1805ec3991144f7cec767786ae197
parentfee7bfda25d14f678eab996afe12b4f162980a35 (diff)
bcachefs: Fix a deadlock
__bch2_btree_node_lock() was incorrectly using iter->pos as a proxy for btree node lock ordering, this caused an off by one error that was triggered by bch2_btree_node_get_sibling() getting the previous node. This refactors the code to compare against btree node keys directly. Signed-off-by: Kent Overstreet <kent.overstreet@gmail.com>
-rw-r--r--fs/bcachefs/btree_iter.c70
-rw-r--r--fs/bcachefs/btree_locking.h24
-rw-r--r--fs/bcachefs/btree_types.h4
-rw-r--r--fs/bcachefs/btree_update_leaf.c2
4 files changed, 68 insertions, 32 deletions
diff --git a/fs/bcachefs/btree_iter.c b/fs/bcachefs/btree_iter.c
index 814b4f154c2c..bd005e3e9e8d 100644
--- a/fs/bcachefs/btree_iter.c
+++ b/fs/bcachefs/btree_iter.c
@@ -101,7 +101,7 @@ bool __bch2_btree_node_relock(struct btree_iter *iter, unsigned level)
if (six_relock_type(&b->lock, want, iter->l[level].lock_seq) ||
(btree_node_lock_seq_matches(iter, b, level) &&
- btree_node_lock_increment(iter, b, level, want))) {
+ btree_node_lock_increment(iter->trans, b, level, want))) {
mark_btree_node_locked(iter, level, want);
return true;
} else {
@@ -130,7 +130,7 @@ static bool bch2_btree_node_upgrade(struct btree_iter *iter, unsigned level)
goto success;
if (btree_node_lock_seq_matches(iter, b, level) &&
- btree_node_lock_increment(iter, b, level, BTREE_NODE_INTENT_LOCKED)) {
+ btree_node_lock_increment(iter->trans, b, level, BTREE_NODE_INTENT_LOCKED)) {
btree_node_unlock(iter, level);
goto success;
}
@@ -193,23 +193,19 @@ static inline bool btree_iter_get_locks(struct btree_iter *iter,
/* Slowpath: */
bool __bch2_btree_node_lock(struct btree *b, struct bpos pos,
- unsigned level,
- struct btree_iter *iter,
- enum six_lock_type type)
+ unsigned level, struct btree_iter *iter,
+ enum six_lock_type type)
{
+ struct btree_trans *trans = iter->trans;
struct btree_iter *linked;
+ unsigned l;
bool ret = true;
/* Check if it's safe to block: */
- trans_for_each_iter(iter->trans, linked) {
+ trans_for_each_iter(trans, linked) {
if (!linked->nodes_locked)
continue;
- /* Must lock btree nodes in key order: */
- if ((cmp_int(iter->btree_id, linked->btree_id) ?:
- bkey_cmp(pos, linked->pos)) < 0)
- ret = false;
-
/*
* Can't block taking an intent lock if we have _any_ nodes read
* locked:
@@ -224,13 +220,15 @@ bool __bch2_btree_node_lock(struct btree *b, struct bpos pos,
*/
if (type == SIX_LOCK_intent &&
linked->nodes_locked != linked->nodes_intent_locked) {
- if (!(iter->trans->nounlock)) {
+ if (!(trans->nounlock)) {
linked->locks_want = max_t(unsigned,
linked->locks_want,
__fls(linked->nodes_locked) + 1);
- btree_iter_get_locks(linked, true, false);
+ if (!btree_iter_get_locks(linked, true, false))
+ ret = false;
+ } else {
+ ret = false;
}
- ret = false;
}
/*
@@ -240,14 +238,36 @@ bool __bch2_btree_node_lock(struct btree *b, struct bpos pos,
*/
if (linked->btree_id == iter->btree_id &&
level > __fls(linked->nodes_locked)) {
- if (!(iter->trans->nounlock)) {
+ if (!(trans->nounlock)) {
linked->locks_want =
max(level + 1, max_t(unsigned,
linked->locks_want,
iter->locks_want));
- btree_iter_get_locks(linked, true, false);
+ if (!btree_iter_get_locks(linked, true, false))
+ ret = false;
+ } else {
+ ret = false;
}
+ }
+
+ /* Must lock btree nodes in key order: */
+ if (iter->btree_id < linked->btree_id)
+ ret = false;
+
+ if (iter->btree_id == linked->btree_id &&
+ btree_node_locked(linked, level) &&
+ bkey_cmp(pos, linked->l[l].b->key.k.p) <= 0)
ret = false;
+
+ /*
+ * Recheck if this is a node we already have locked - since one
+ * of the get_locks() calls might've successfully
+ * upgraded/relocked it:
+ */
+ if (linked->l[level].b == b &&
+ btree_node_locked_type(linked, level) >= type) {
+ six_lock_increment(&b->lock, type);
+ return true;
}
}
@@ -2241,13 +2261,15 @@ void bch2_btree_trans_to_text(struct printbuf *out, struct bch_fs *c)
mutex_lock(&c->btree_trans_lock);
list_for_each_entry(trans, &c->btree_trans_list, list) {
- pr_buf(out, "%i %ps\n", trans->pid, (void *) trans->ip);
+ pr_buf(out, "%i %px %ps\n", trans->pid, trans, (void *) trans->ip);
trans_for_each_iter(trans, iter) {
if (!iter->nodes_locked)
continue;
- pr_buf(out, " iter %s:", bch2_btree_ids[iter->btree_id]);
+ pr_buf(out, " iter %u %s:",
+ iter->idx,
+ bch2_btree_ids[iter->btree_id]);
bch2_bpos_to_text(out, iter->pos);
pr_buf(out, "\n");
@@ -2255,8 +2277,8 @@ void bch2_btree_trans_to_text(struct printbuf *out, struct bch_fs *c)
if (btree_node_locked(iter, l)) {
b = iter->l[l].b;
- pr_buf(out, " %p l=%u %s ",
- b, l, btree_node_intent_locked(iter, l) ? "i" : "r");
+ pr_buf(out, " %px %s l=%u ",
+ b, btree_node_intent_locked(iter, l) ? "i" : "r", l);
bch2_bpos_to_text(out, b->key.k.p);
pr_buf(out, "\n");
}
@@ -2265,7 +2287,13 @@ void bch2_btree_trans_to_text(struct printbuf *out, struct bch_fs *c)
b = READ_ONCE(trans->locking);
if (b) {
- pr_buf(out, " locking %px l=%u %s:",
+ pr_buf(out, " locking iter %u l=%u %s:",
+ trans->locking_iter_idx,
+ trans->locking_level,
+ bch2_btree_ids[trans->locking_btree_id]);
+ bch2_bpos_to_text(out, trans->locking_pos);
+
+ pr_buf(out, " node %px l=%u %s:",
b, b->level,
bch2_btree_ids[b->btree_id]);
bch2_bpos_to_text(out, b->key.k.p);
diff --git a/fs/bcachefs/btree_locking.h b/fs/bcachefs/btree_locking.h
index 7aa11c00b647..936bfaf06e20 100644
--- a/fs/bcachefs/btree_locking.h
+++ b/fs/bcachefs/btree_locking.h
@@ -158,15 +158,15 @@ static inline void btree_node_lock_type(struct bch_fs *c, struct btree *b,
* Lock a btree node if we already have it locked on one of our linked
* iterators:
*/
-static inline bool btree_node_lock_increment(struct btree_iter *iter,
+static inline bool btree_node_lock_increment(struct btree_trans *trans,
struct btree *b, unsigned level,
enum btree_node_locked_type want)
{
- struct btree_iter *linked;
+ struct btree_iter *iter;
- trans_for_each_iter(iter->trans, linked)
- if (linked->l[level].b == b &&
- btree_node_locked_type(linked, level) >= want) {
+ trans_for_each_iter(trans, iter)
+ if (iter->l[level].b == b &&
+ btree_node_locked_type(iter, level) >= want) {
six_lock_increment(&b->lock, want);
return true;
}
@@ -182,19 +182,23 @@ static inline bool btree_node_lock(struct btree *b, struct bpos pos,
struct btree_iter *iter,
enum six_lock_type type)
{
+ struct btree_trans *trans = iter->trans;
bool ret;
EBUG_ON(level >= BTREE_MAX_DEPTH);
#ifdef CONFIG_BCACHEFS_DEBUG
- iter->trans->locking = b;
+ trans->locking = b;
+ trans->locking_iter_idx = iter->idx;
+ trans->locking_pos = pos;
+ trans->locking_btree_id = iter->btree_id;
+ trans->locking_level = level;
#endif
-
- ret = likely(six_trylock_type(&b->lock, type)) ||
- btree_node_lock_increment(iter, b, level, type) ||
+ ret = likely(six_trylock_type(&b->lock, type)) ||
+ btree_node_lock_increment(trans, b, level, type) ||
__bch2_btree_node_lock(b, pos, level, iter, type);
#ifdef CONFIG_BCACHEFS_DEBUG
- iter->trans->locking = NULL;
+ trans->locking = NULL;
#endif
return ret;
}
diff --git a/fs/bcachefs/btree_types.h b/fs/bcachefs/btree_types.h
index e97248ca3aa2..047b7b0776a1 100644
--- a/fs/bcachefs/btree_types.h
+++ b/fs/bcachefs/btree_types.h
@@ -284,6 +284,10 @@ struct btree_trans {
#ifdef CONFIG_BCACHEFS_DEBUG
struct list_head list;
struct btree *locking;
+ unsigned locking_iter_idx;
+ struct bpos locking_pos;
+ u8 locking_btree_id;
+ u8 locking_level;
pid_t pid;
#endif
unsigned long ip;
diff --git a/fs/bcachefs/btree_update_leaf.c b/fs/bcachefs/btree_update_leaf.c
index 9c2b7c030544..283c10feb81f 100644
--- a/fs/bcachefs/btree_update_leaf.c
+++ b/fs/bcachefs/btree_update_leaf.c
@@ -481,7 +481,7 @@ static inline int do_bch2_trans_commit(struct btree_trans *trans,
* or anything else that might call bch2_trans_relock(), since that
* would just retake the read locks:
*/
- trans_for_each_iter_all(trans, iter) {
+ trans_for_each_iter(trans, iter) {
if (iter->nodes_locked != iter->nodes_intent_locked) {
EBUG_ON(iter->flags & BTREE_ITER_KEEP_UNTIL_COMMIT);
EBUG_ON(trans->iters_live & (1ULL << iter->idx));