summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorKent Overstreet <kent.overstreet@gmail.com>2022-02-27 09:56:33 -0500
committerKent Overstreet <kent.overstreet@linux.dev>2023-03-13 11:34:20 -0400
commit076e6f18ff0d753050fc37617cd4796c49db3958 (patch)
tree1f872158f02fc4c321f0bee4ab9df081c6a1ad86
parent010587d73d65e26c2a52d377245b16dc78af900c (diff)
bcachefs: Fix race leading to btree node write getting stuck
Checking btree_node_may_write() isn't atomic with the other btree flags, dirty and need_write in particular. There was a rare race where we'd unblock a node from writing while __btree_node_flush() was setting need_write, and no thread would notice that the node was now both able to write and needed to be written. Fix this by adding btree node flags for will_make_reachable and write_blocked that can be checked in the cmpxchg loop in __bch2_btree_node_write. Signed-off-by: Kent Overstreet <kent.overstreet@gmail.com>
-rw-r--r--fs/bcachefs/btree_cache.c7
-rw-r--r--fs/bcachefs/btree_io.c10
-rw-r--r--fs/bcachefs/btree_io.h6
-rw-r--r--fs/bcachefs/btree_types.h2
-rw-r--r--fs/bcachefs/btree_update_interior.c7
5 files changed, 19 insertions, 13 deletions
diff --git a/fs/bcachefs/btree_cache.c b/fs/bcachefs/btree_cache.c
index 8ab21e4bfd64..9d00ce25aae2 100644
--- a/fs/bcachefs/btree_cache.c
+++ b/fs/bcachefs/btree_cache.c
@@ -227,10 +227,9 @@ wait_on_io:
goto wait_on_io;
}
- if (btree_node_noevict(b))
- goto out_unlock;
-
- if (!btree_node_may_write(b))
+ if (btree_node_noevict(b) ||
+ btree_node_write_blocked(b) ||
+ btree_node_will_make_reachable(b))
goto out_unlock;
if (btree_node_dirty(b)) {
diff --git a/fs/bcachefs/btree_io.c b/fs/bcachefs/btree_io.c
index 65252b64598e..07db7d0d3229 100644
--- a/fs/bcachefs/btree_io.c
+++ b/fs/bcachefs/btree_io.c
@@ -1612,7 +1612,8 @@ static void __btree_node_write_done(struct bch_fs *c, struct btree *b)
if ((old & (1U << BTREE_NODE_dirty)) &&
(old & (1U << BTREE_NODE_need_write)) &&
!(old & (1U << BTREE_NODE_never_write)) &&
- btree_node_may_write(b)) {
+ !(old & (1U << BTREE_NODE_write_blocked)) &&
+ !(old & (1U << BTREE_NODE_will_make_reachable))) {
new &= ~(1U << BTREE_NODE_dirty);
new &= ~(1U << BTREE_NODE_need_write);
new |= (1U << BTREE_NODE_write_in_flight);
@@ -1784,10 +1785,13 @@ void __bch2_btree_node_write(struct bch_fs *c, struct btree *b, unsigned flags)
!(old & (1 << BTREE_NODE_need_write)))
return;
- if (!btree_node_may_write(b))
+ if (old &
+ ((1 << BTREE_NODE_never_write)|
+ (1 << BTREE_NODE_write_blocked)))
return;
- if (old & (1 << BTREE_NODE_never_write))
+ if (b->written &&
+ (old & (1 << BTREE_NODE_will_make_reachable)))
return;
if (old & (1 << BTREE_NODE_write_in_flight))
diff --git a/fs/bcachefs/btree_io.h b/fs/bcachefs/btree_io.h
index 7ed88089f6f9..d818d87661e8 100644
--- a/fs/bcachefs/btree_io.h
+++ b/fs/bcachefs/btree_io.h
@@ -62,12 +62,6 @@ void __bch2_btree_node_wait_on_write(struct btree *);
void bch2_btree_node_wait_on_read(struct btree *);
void bch2_btree_node_wait_on_write(struct btree *);
-static inline bool btree_node_may_write(struct btree *b)
-{
- return list_empty_careful(&b->write_blocked) &&
- (!b->written || !b->will_make_reachable);
-}
-
enum compact_mode {
COMPACT_LAZY,
COMPACT_ALL,
diff --git a/fs/bcachefs/btree_types.h b/fs/bcachefs/btree_types.h
index 0b275ea28a8f..d3e6d6bb9c48 100644
--- a/fs/bcachefs/btree_types.h
+++ b/fs/bcachefs/btree_types.h
@@ -435,6 +435,8 @@ struct btree_trans {
x(read_error) \
x(dirty) \
x(need_write) \
+ x(write_blocked) \
+ x(will_make_reachable) \
x(noevict) \
x(write_idx) \
x(accessed) \
diff --git a/fs/bcachefs/btree_update_interior.c b/fs/bcachefs/btree_update_interior.c
index 330186bd2146..9e5d49fbf4aa 100644
--- a/fs/bcachefs/btree_update_interior.c
+++ b/fs/bcachefs/btree_update_interior.c
@@ -606,6 +606,8 @@ err:
mutex_lock(&c->btree_interior_update_lock);
list_del(&as->write_blocked_list);
+ if (list_empty(&b->write_blocked))
+ clear_btree_node_write_blocked(b);
/*
* Node might have been freed, recheck under
@@ -650,6 +652,7 @@ err:
BUG_ON(b->will_make_reachable != (unsigned long) as);
b->will_make_reachable = 0;
+ clear_btree_node_will_make_reachable(b);
}
mutex_unlock(&c->btree_interior_update_lock);
@@ -716,6 +719,8 @@ static void btree_update_updated_node(struct btree_update *as, struct btree *b)
as->mode = BTREE_INTERIOR_UPDATING_NODE;
as->b = b;
+
+ set_btree_node_write_blocked(b);
list_add(&as->write_blocked_list, &b->write_blocked);
mutex_unlock(&c->btree_interior_update_lock);
@@ -781,6 +786,7 @@ static void bch2_btree_update_add_new_node(struct btree_update *as, struct btree
as->new_nodes[as->nr_new_nodes++] = b;
b->will_make_reachable = 1UL|(unsigned long) as;
+ set_btree_node_will_make_reachable(b);
mutex_unlock(&c->btree_interior_update_lock);
@@ -803,6 +809,7 @@ static void btree_update_drop_new_node(struct bch_fs *c, struct btree *b)
* xchg() is for synchronization with bch2_btree_complete_write:
*/
v = xchg(&b->will_make_reachable, 0);
+ clear_btree_node_will_make_reachable(b);
as = (struct btree_update *) (v & ~1UL);
if (!as) {