diff options
author | Kent Overstreet <kent.overstreet@gmail.com> | 2021-07-10 23:03:15 -0400 |
---|---|---|
committer | Kent Overstreet <kent.overstreet@gmail.com> | 2021-07-11 12:46:22 -0400 |
commit | c1c29de4ac7fa8bcf4792210f7d816220be8d54c (patch) | |
tree | 706d77b9a259e5f7b15877d7cb9af3fca9243c53 | |
parent | 82d12365c9b49710a12862c18e78ad9690c0a8e4 (diff) |
bcachefs: Really don't hold btree locks while btree IOs are in flight
This is something we've attempted to stick to for quite some time, as it
helps guarantee filesystem latency - but there's a few remaining paths
that this patch fixes.
We also add asserts that we're not holding btree locks when waiting on
btree reads or writes.
This is also necessary for an upcoming patch to update btree pointers
after every btree write - since the btree write completion path will now
be doing btree operations.
Signed-off-by: Kent Overstreet <kent.overstreet@gmail.com>
-rw-r--r-- | fs/bcachefs/btree_cache.c | 95 | ||||
-rw-r--r-- | fs/bcachefs/btree_io.c | 57 | ||||
-rw-r--r-- | fs/bcachefs/btree_io.h | 26 | ||||
-rw-r--r-- | fs/bcachefs/btree_update_interior.c | 2 | ||||
-rw-r--r-- | fs/bcachefs/debug.c | 4 |
5 files changed, 122 insertions, 62 deletions
diff --git a/fs/bcachefs/btree_cache.c b/fs/bcachefs/btree_cache.c index 9994f392643f..73bfd01f2dcb 100644 --- a/fs/bcachefs/btree_cache.c +++ b/fs/bcachefs/btree_cache.c @@ -186,6 +186,17 @@ static int __btree_node_reclaim(struct bch_fs *c, struct btree *b, bool flush) int ret = 0; lockdep_assert_held(&bc->lock); +wait_on_io: + if (b->flags & ((1U << BTREE_NODE_dirty)| + (1U << BTREE_NODE_read_in_flight)| + (1U << BTREE_NODE_write_in_flight))) { + if (!flush) + return -ENOMEM; + + /* XXX: waiting on IO with btree cache lock held */ + bch2_btree_node_wait_on_read(b); + bch2_btree_node_wait_on_write(b); + } if (!six_trylock_intent(&b->c.lock)) return -ENOMEM; @@ -193,25 +204,26 @@ static int __btree_node_reclaim(struct bch_fs *c, struct btree *b, bool flush) if (!six_trylock_write(&b->c.lock)) goto out_unlock_intent; + /* recheck under lock */ + if (b->flags & ((1U << BTREE_NODE_read_in_flight)| + (1U << BTREE_NODE_write_in_flight))) { + if (!flush) + goto out_unlock; + six_unlock_write(&b->c.lock); + six_unlock_intent(&b->c.lock); + goto wait_on_io; + } + if (btree_node_noevict(b)) goto out_unlock; if (!btree_node_may_write(b)) goto out_unlock; - if (btree_node_dirty(b) && - test_bit(BCH_FS_HOLD_BTREE_WRITES, &c->flags)) - goto out_unlock; - - if (btree_node_dirty(b) || - btree_node_write_in_flight(b) || - btree_node_read_in_flight(b)) { - if (!flush) + if (btree_node_dirty(b)) { + if (!flush || + test_bit(BCH_FS_HOLD_BTREE_WRITES, &c->flags)) goto out_unlock; - - wait_on_bit_io(&b->flags, BTREE_NODE_read_in_flight, - TASK_UNINTERRUPTIBLE); - /* * Using the underscore version because we don't want to compact * bsets after the write, since this node is about to be evicted @@ -223,8 +235,9 @@ static int __btree_node_reclaim(struct bch_fs *c, struct btree *b, bool flush) else __bch2_btree_node_write(c, b); - /* wait for any in flight btree write */ - btree_node_wait_on_io(b); + six_unlock_write(&b->c.lock); + six_unlock_intent(&b->c.lock); + goto wait_on_io; } out: if (b->hash_val && !ret) @@ -574,6 +587,7 @@ got_node: } BUG_ON(btree_node_hashed(b)); + BUG_ON(btree_node_dirty(b)); BUG_ON(btree_node_write_in_flight(b)); out: b->flags = 0; @@ -627,6 +641,7 @@ static noinline struct btree *bch2_btree_node_fill(struct bch_fs *c, { struct btree_cache *bc = &c->btree_cache; struct btree *b; + u32 seq; BUG_ON(level + 1 >= BTREE_MAX_DEPTH); /* @@ -656,31 +671,31 @@ static noinline struct btree *bch2_btree_node_fill(struct bch_fs *c, return NULL; } + set_btree_node_read_in_flight(b); + + six_unlock_write(&b->c.lock); + seq = b->c.lock.state.seq; + six_unlock_intent(&b->c.lock); + /* Unlock before doing IO: */ if (iter && sync) bch2_trans_unlock(iter->trans); bch2_btree_node_read(c, b, sync); - six_unlock_write(&b->c.lock); - - if (!sync) { - six_unlock_intent(&b->c.lock); + if (!sync) return NULL; - } /* * XXX: this will probably always fail because btree_iter_relock() * currently fails for iterators that aren't pointed at a valid btree * node */ - if (iter && !bch2_trans_relock(iter->trans)) { - six_unlock_intent(&b->c.lock); + if (iter && !bch2_trans_relock(iter->trans)) return ERR_PTR(-EINTR); - } - if (lock_type == SIX_LOCK_read) - six_lock_downgrade(&b->c.lock); + if (!six_relock_type(&b->c.lock, lock_type, seq)) + return ERR_PTR(-EINTR); return b; } @@ -824,11 +839,12 @@ lock_node: } if (unlikely(btree_node_read_in_flight(b))) { + u32 seq = b->c.lock.state.seq; + six_unlock_type(&b->c.lock, lock_type); bch2_trans_unlock(iter->trans); - wait_on_bit_io(&b->flags, BTREE_NODE_read_in_flight, - TASK_UNINTERRUPTIBLE); + bch2_btree_node_wait_on_read(b); /* * XXX: check if this always fails - btree_iter_relock() @@ -837,7 +853,9 @@ lock_node: */ if (iter && !bch2_trans_relock(iter->trans)) return ERR_PTR(-EINTR); - goto retry; + + if (!six_relock_type(&b->c.lock, lock_type, seq)) + goto retry; } prefetch(b->aux_data); @@ -916,8 +934,7 @@ lock_node: } /* XXX: waiting on IO with btree locks held: */ - wait_on_bit_io(&b->flags, BTREE_NODE_read_in_flight, - TASK_UNINTERRUPTIBLE); + __bch2_btree_node_wait_on_read(b); prefetch(b->aux_data); @@ -972,16 +989,24 @@ void bch2_btree_node_evict(struct bch_fs *c, const struct bkey_i *k) b = btree_cache_find(bc, k); if (!b) return; +wait_on_io: + /* not allowed to wait on io with btree locks held: */ + + /* XXX we're called from btree_gc which will be holding other btree + * nodes locked + * */ + __bch2_btree_node_wait_on_read(b); + __bch2_btree_node_wait_on_write(b); six_lock_intent(&b->c.lock, NULL, NULL); six_lock_write(&b->c.lock, NULL, NULL); - wait_on_bit_io(&b->flags, BTREE_NODE_read_in_flight, - TASK_UNINTERRUPTIBLE); - __bch2_btree_node_write(c, b); - - /* wait for any in flight btree write */ - btree_node_wait_on_io(b); + if (btree_node_dirty(b)) { + __bch2_btree_node_write(c, b); + six_unlock_write(&b->c.lock); + six_unlock_intent(&b->c.lock); + goto wait_on_io; + } BUG_ON(btree_node_dirty(b)); diff --git a/fs/bcachefs/btree_io.c b/fs/bcachefs/btree_io.c index 98fcd6a9f97a..12894f8959bf 100644 --- a/fs/bcachefs/btree_io.c +++ b/fs/bcachefs/btree_io.c @@ -22,6 +22,50 @@ #include <linux/sched/mm.h> #include <trace/events/bcachefs.h> +void bch2_btree_node_io_unlock(struct btree *b) +{ + EBUG_ON(!btree_node_write_in_flight(b)); + + clear_btree_node_write_in_flight(b); + wake_up_bit(&b->flags, BTREE_NODE_write_in_flight); +} + +void bch2_btree_node_io_lock(struct btree *b) +{ + BUG_ON(lock_class_is_held(&bch2_btree_node_lock_key)); + + wait_on_bit_lock_io(&b->flags, BTREE_NODE_write_in_flight, + TASK_UNINTERRUPTIBLE); +} + +void __bch2_btree_node_wait_on_read(struct btree *b) +{ + wait_on_bit_io(&b->flags, BTREE_NODE_read_in_flight, + TASK_UNINTERRUPTIBLE); +} + +void __bch2_btree_node_wait_on_write(struct btree *b) +{ + wait_on_bit_io(&b->flags, BTREE_NODE_write_in_flight, + TASK_UNINTERRUPTIBLE); +} + +void bch2_btree_node_wait_on_read(struct btree *b) +{ + BUG_ON(lock_class_is_held(&bch2_btree_node_lock_key)); + + wait_on_bit_io(&b->flags, BTREE_NODE_read_in_flight, + TASK_UNINTERRUPTIBLE); +} + +void bch2_btree_node_wait_on_write(struct btree *b) +{ + BUG_ON(lock_class_is_held(&bch2_btree_node_lock_key)); + + wait_on_bit_io(&b->flags, BTREE_NODE_write_in_flight, + TASK_UNINTERRUPTIBLE); +} + static void verify_no_dups(struct btree *b, struct bkey_packed *start, struct bkey_packed *end) @@ -432,7 +476,8 @@ void bch2_btree_init_next(struct btree_trans *trans, EBUG_ON(iter && iter->l[b->c.level].b != b); BUG_ON(bset_written(b, bset(b, &b->set[1]))); - if (b->nsets == MAX_BSETS) { + if (b->nsets == MAX_BSETS && + !btree_node_write_in_flight(b)) { unsigned log_u64s[] = { ilog2(bset_u64s(&b->set[0])), ilog2(bset_u64s(&b->set[1])), @@ -1401,8 +1446,6 @@ void bch2_btree_node_read(struct bch_fs *c, struct btree *b, btree_pos_to_text(&PBUF(buf), c, b); trace_btree_read(c, b); - set_btree_node_read_in_flight(b); - if (bch2_verify_all_btree_replicas && !btree_node_read_all_replicas(c, b, sync)) return; @@ -1478,6 +1521,8 @@ int bch2_btree_root_read(struct bch_fs *c, enum btree_id id, bkey_copy(&b->key, k); BUG_ON(bch2_btree_node_hash_insert(&c->btree_cache, b, level, id)); + set_btree_node_read_in_flight(b); + bch2_btree_node_read(c, b, true); if (btree_node_read_error(b)) { @@ -1523,7 +1568,7 @@ static void btree_node_write_done(struct bch_fs *c, struct btree *b) struct btree_write *w = btree_prev_write(b); bch2_btree_complete_write(c, b, w); - btree_node_io_unlock(b); + bch2_btree_node_io_unlock(b); } static void bch2_btree_node_write_error(struct bch_fs *c, @@ -1705,6 +1750,8 @@ void __bch2_btree_node_write(struct bch_fs *c, struct btree *b) bool validate_before_checksum = false; void *data; + BUG_ON(btree_node_write_in_flight(b)); + if (test_bit(BCH_FS_HOLD_BTREE_WRITES, &c->flags)) return; @@ -1732,7 +1779,7 @@ void __bch2_btree_node_write(struct bch_fs *c, struct btree *b) * XXX waiting on btree writes with btree locks held - * this can deadlock, and we hit the write error path */ - btree_node_wait_on_io(b); + bch2_btree_node_wait_on_write(b); continue; } diff --git a/fs/bcachefs/btree_io.h b/fs/bcachefs/btree_io.h index fae67622c127..89fd4aba5218 100644 --- a/fs/bcachefs/btree_io.h +++ b/fs/bcachefs/btree_io.h @@ -52,24 +52,12 @@ struct btree_write_bio { struct bch_write_bio wbio; }; -static inline void btree_node_io_unlock(struct btree *b) -{ - EBUG_ON(!btree_node_write_in_flight(b)); - clear_btree_node_write_in_flight(b); - wake_up_bit(&b->flags, BTREE_NODE_write_in_flight); -} - -static inline void btree_node_io_lock(struct btree *b) -{ - wait_on_bit_lock_io(&b->flags, BTREE_NODE_write_in_flight, - TASK_UNINTERRUPTIBLE); -} - -static inline void btree_node_wait_on_io(struct btree *b) -{ - wait_on_bit_io(&b->flags, BTREE_NODE_write_in_flight, - TASK_UNINTERRUPTIBLE); -} +void bch2_btree_node_io_unlock(struct btree *); +void bch2_btree_node_io_lock(struct btree *); +void __bch2_btree_node_wait_on_read(struct btree *); +void __bch2_btree_node_wait_on_write(struct btree *); +void bch2_btree_node_wait_on_read(struct btree *); +void bch2_btree_node_wait_on_write(struct btree *); static inline bool btree_node_may_write(struct btree *b) { @@ -169,7 +157,7 @@ static inline void btree_node_write_if_need(struct bch_fs *c, struct btree *b, } six_unlock_type(&b->c.lock, lock_held); - btree_node_wait_on_io(b); + bch2_btree_node_wait_on_write(b); btree_node_lock_type(c, b, lock_held); } } diff --git a/fs/bcachefs/btree_update_interior.c b/fs/bcachefs/btree_update_interior.c index 191e5d306700..6b55a4108425 100644 --- a/fs/bcachefs/btree_update_interior.c +++ b/fs/bcachefs/btree_update_interior.c @@ -567,7 +567,7 @@ static void btree_update_nodes_written(struct btree_update *as) six_unlock_read(&old->c.lock); if (seq == as->old_nodes_seq[i]) - btree_node_wait_on_io(old); + bch2_btree_node_wait_on_write(old); } /* diff --git a/fs/bcachefs/debug.c b/fs/bcachefs/debug.c index 92e970bc1332..b0a8eb58a7a7 100644 --- a/fs/bcachefs/debug.c +++ b/fs/bcachefs/debug.c @@ -133,7 +133,7 @@ void __bch2_btree_verify(struct bch_fs *c, struct btree *b) if (c->opts.nochanges) return; - btree_node_io_lock(b); + bch2_btree_node_io_lock(b); mutex_lock(&c->verify_lock); if (!c->verify_ondisk) { @@ -176,7 +176,7 @@ void __bch2_btree_verify(struct bch_fs *c, struct btree *b) } out: mutex_unlock(&c->verify_lock); - btree_node_io_unlock(b); + bch2_btree_node_io_unlock(b); } #ifdef CONFIG_DEBUG_FS |