diff options
author | Kent Overstreet <kent.overstreet@linux.dev> | 2025-06-24 16:01:33 -0400 |
---|---|---|
committer | Kent Overstreet <kent.overstreet@linux.dev> | 2025-07-03 01:20:18 -0400 |
commit | d0ffb68c86ade11838acb4d31df55a1a056d8c1c (patch) | |
tree | 56fd16b199248ad7d811c115cbc1199b78f564c0 | |
parent | 100142f6ec4d184a3e78870a878bdd80415852d4 (diff) |
bcachefs: Evict/bypass key cache when deleting
Fix a performance bug when doing many unlinks.
The btree has optimizations to ensure we don't have too many whiteouts
to scan in peek() before we find a real key to return, but unflushed key
cache deletions break this.
To fix this, tweak the existing code for redirecting updates that create
a key to the underlying btree so that we can use it for deletions as
well.
Reported-by: John Schoenick <johns@valvesoftware.com>
Signed-off-by: Kent Overstreet <kent.overstreet@linux.dev>
-rw-r--r-- | fs/bcachefs/btree_iter.c | 1 | ||||
-rw-r--r-- | fs/bcachefs/btree_key_cache.c | 1 | ||||
-rw-r--r-- | fs/bcachefs/btree_trans_commit.c | 6 | ||||
-rw-r--r-- | fs/bcachefs/btree_types.h | 8 | ||||
-rw-r--r-- | fs/bcachefs/btree_update.c | 105 |
5 files changed, 73 insertions, 48 deletions
diff --git a/fs/bcachefs/btree_iter.c b/fs/bcachefs/btree_iter.c index 87d98a5cb02a..0b856c72389c 100644 --- a/fs/bcachefs/btree_iter.c +++ b/fs/bcachefs/btree_iter.c @@ -645,6 +645,7 @@ static void bch2_trans_revalidate_updates_in_node(struct btree_trans *trans, str trans_for_each_update(trans, i) if (!i->cached && + !i->key_cache_flushing && i->level == b->c.level && i->btree_id == b->c.btree_id && bpos_cmp(i->k->k.p, b->data->min_key) >= 0 && diff --git a/fs/bcachefs/btree_key_cache.c b/fs/bcachefs/btree_key_cache.c index d96188b92db2..f68265f9969c 100644 --- a/fs/bcachefs/btree_key_cache.c +++ b/fs/bcachefs/btree_key_cache.c @@ -580,6 +580,7 @@ bool bch2_btree_insert_key_cached(struct btree_trans *trans, bool kick_reclaim = false; BUG_ON(insert->k.u64s > ck->u64s); + BUG_ON(bkey_deleted(&insert->k)); bkey_copy(ck->k, insert); diff --git a/fs/bcachefs/btree_trans_commit.c b/fs/bcachefs/btree_trans_commit.c index 454b4c5c1808..7fcf248a9a76 100644 --- a/fs/bcachefs/btree_trans_commit.c +++ b/fs/bcachefs/btree_trans_commit.c @@ -46,6 +46,9 @@ void bch2_trans_commit_flags_to_text(struct printbuf *out, enum bch_trans_commit static void verify_update_old_key(struct btree_trans *trans, struct btree_insert_entry *i) { #ifdef CONFIG_BCACHEFS_DEBUG + if (i->key_cache_flushing) + return; + struct bch_fs *c = trans->c; struct bkey u; struct bkey_s_c k = bch2_btree_path_peek_slot_exact(trans->paths + i->path, &u); @@ -337,6 +340,9 @@ static inline void btree_insert_entry_checks(struct btree_trans *trans, BUG_ON(!bpos_eq(i->k->k.p, path->pos)); BUG_ON(i->cached != path->cached); + BUG_ON(i->cached && + !i->key_cache_already_flushed && + bkey_deleted(&i->k->k));; BUG_ON(i->level != path->level); BUG_ON(i->btree_id != path->btree_id); BUG_ON(i->bkey_type != __btree_node_type(path->level, path->btree_id)); diff --git a/fs/bcachefs/btree_types.h b/fs/bcachefs/btree_types.h index 112170fd9c8f..76adf75617aa 100644 --- a/fs/bcachefs/btree_types.h +++ b/fs/bcachefs/btree_types.h @@ -422,14 +422,16 @@ struct btree_insert_entry { u8 sort_order; u8 bkey_type; enum btree_id btree_id:8; - u8 level:4; + u8 level:3; bool cached:1; bool insert_trigger_run:1; bool overwrite_trigger_run:1; bool key_cache_already_flushed:1; + bool key_cache_flushing:1; /* - * @old_k may be a key from the journal; @old_btree_u64s always refers - * to the size of the key being overwritten in the btree: + * @old_k may be a key from the journal or the key cache; + * @old_btree_u64s always refers to the size of the key being + * overwritten in the btree: */ u8 old_btree_u64s; btree_path_idx_t path; diff --git a/fs/bcachefs/btree_update.c b/fs/bcachefs/btree_update.c index 5d5394db3965..5d9e02370aff 100644 --- a/fs/bcachefs/btree_update.c +++ b/fs/bcachefs/btree_update.c @@ -325,47 +325,11 @@ err: return ret; } -static noinline int flush_new_cached_update(struct btree_trans *trans, - struct btree_insert_entry *i, - enum btree_iter_update_trigger_flags flags, - unsigned long ip) -{ - struct bkey k; - int ret; - - btree_path_idx_t path_idx = - bch2_path_get(trans, i->btree_id, i->old_k.p, 1, 0, - BTREE_ITER_intent, _THIS_IP_); - ret = bch2_btree_path_traverse(trans, path_idx, 0); - if (ret) - goto out; - - struct btree_path *btree_path = trans->paths + path_idx; - - /* - * The old key in the insert entry might actually refer to an existing - * key in the btree that has been deleted from cache and not yet - * flushed. Check for this and skip the flush so we don't run triggers - * against a stale key. - */ - bch2_btree_path_peek_slot_exact(btree_path, &k); - if (!bkey_deleted(&k)) - goto out; - - i->key_cache_already_flushed = true; - i->flags |= BTREE_TRIGGER_norun; - - btree_path_set_should_be_locked(trans, btree_path); - ret = bch2_trans_update_by_path(trans, path_idx, i->k, flags, ip); -out: - bch2_path_put(trans, path_idx, true); - return ret; -} - -static int __must_check -bch2_trans_update_by_path(struct btree_trans *trans, btree_path_idx_t path_idx, - struct bkey_i *k, enum btree_iter_update_trigger_flags flags, - unsigned long ip) +static inline struct btree_insert_entry * +__btree_trans_update_by_path(struct btree_trans *trans, + btree_path_idx_t path_idx, + struct bkey_i *k, enum btree_iter_update_trigger_flags flags, + unsigned long ip) { struct bch_fs *c = trans->c; struct btree_insert_entry *i, n; @@ -436,6 +400,58 @@ bch2_trans_update_by_path(struct btree_trans *trans, btree_path_idx_t path_idx, __btree_path_get(trans, trans->paths + i->path, true); trace_update_by_path(trans, path, i, overwrite); + return i; +} + +static noinline int flush_new_cached_update(struct btree_trans *trans, + struct btree_insert_entry *i, + enum btree_iter_update_trigger_flags flags, + unsigned long ip) +{ + btree_path_idx_t path_idx = + bch2_path_get(trans, i->btree_id, i->old_k.p, 1, 0, + BTREE_ITER_intent, _THIS_IP_); + int ret = bch2_btree_path_traverse(trans, path_idx, 0); + if (ret) + goto out; + + struct btree_path *btree_path = trans->paths + path_idx; + + btree_path_set_should_be_locked(trans, btree_path); +#if 0 + /* + * The old key in the insert entry might actually refer to an existing + * key in the btree that has been deleted from cache and not yet + * flushed. Check for this and skip the flush so we don't run triggers + * against a stale key. + */ + struct bkey k; + bch2_btree_path_peek_slot_exact(btree_path, &k); + if (!bkey_deleted(&k)) + goto out; +#endif + i->key_cache_already_flushed = true; + i->flags |= BTREE_TRIGGER_norun; + + struct bkey old_k = i->old_k; + const struct bch_val *old_v = i->old_v; + + i = __btree_trans_update_by_path(trans, path_idx, i->k, flags, _THIS_IP_); + + i->old_k = old_k; + i->old_v = old_v; + i->key_cache_flushing = true; +out: + bch2_path_put(trans, path_idx, true); + return ret; +} + +static int __must_check +bch2_trans_update_by_path(struct btree_trans *trans, btree_path_idx_t path_idx, + struct bkey_i *k, enum btree_iter_update_trigger_flags flags, + unsigned long ip) +{ + struct btree_insert_entry *i = __btree_trans_update_by_path(trans, path_idx, k, flags, ip); /* * If a key is present in the key cache, it must also exist in the @@ -444,10 +460,9 @@ bch2_trans_update_by_path(struct btree_trans *trans, btree_path_idx_t path_idx, * the key cache - but the key has to exist in the btree for that to * work: */ - if (path->cached && !i->old_btree_u64s) - return flush_new_cached_update(trans, i, flags, ip); - - return 0; + return i->cached && (!i->old_btree_u64s || bkey_deleted(&k->k)) + ? flush_new_cached_update(trans, i, flags, ip) + : 0; } static noinline int bch2_trans_update_get_key_cache(struct btree_trans *trans, |