diff options
author | Kent Overstreet <kent.overstreet@gmail.com> | 2017-11-21 22:28:25 -0500 |
---|---|---|
committer | Kent Overstreet <kent.overstreet@gmail.com> | 2018-01-30 20:40:46 -0500 |
commit | 182394227f0b8243ba634d138a58d06b72c07e11 (patch) | |
tree | 1a61258b92b694799f64bd2099fbfaef3345d619 | |
parent | 891c56d240dd477c1c921817ed63290ec1da77db (diff) |
bcachefs: Refactor btree cache code
making struct bch_fs a bit smaller
-rw-r--r-- | fs/bcachefs/bcachefs.h | 35 | ||||
-rw-r--r-- | fs/bcachefs/btree_cache.c | 253 | ||||
-rw-r--r-- | fs/bcachefs/btree_cache.h | 15 | ||||
-rw-r--r-- | fs/bcachefs/btree_io.c | 10 | ||||
-rw-r--r-- | fs/bcachefs/btree_iter.c | 4 | ||||
-rw-r--r-- | fs/bcachefs/btree_types.h | 36 | ||||
-rw-r--r-- | fs/bcachefs/btree_update_interior.c | 43 | ||||
-rw-r--r-- | fs/bcachefs/super.c | 10 | ||||
-rw-r--r-- | fs/bcachefs/sysfs.c | 8 |
9 files changed, 220 insertions, 194 deletions
diff --git a/fs/bcachefs/bcachefs.h b/fs/bcachefs/bcachefs.h index ff7ce71ac0a0..2d83cd163d55 100644 --- a/fs/bcachefs/bcachefs.h +++ b/fs/bcachefs/bcachefs.h @@ -544,40 +544,7 @@ struct bch_fs { struct btree_root btree_roots[BTREE_ID_NR]; struct mutex btree_root_lock; - bool btree_cache_table_init_done; - struct rhashtable btree_cache_table; - - /* - * We never free a struct btree, except on shutdown - we just put it on - * the btree_cache_freed list and reuse it later. This simplifies the - * code, and it doesn't cost us much memory as the memory usage is - * dominated by buffers that hold the actual btree node data and those - * can be freed - and the number of struct btrees allocated is - * effectively bounded. - * - * btree_cache_freeable effectively is a small cache - we use it because - * high order page allocations can be rather expensive, and it's quite - * common to delete and allocate btree nodes in quick succession. It - * should never grow past ~2-3 nodes in practice. - */ - struct mutex btree_cache_lock; - struct list_head btree_cache; - struct list_head btree_cache_freeable; - struct list_head btree_cache_freed; - - /* Number of elements in btree_cache + btree_cache_freeable lists */ - unsigned btree_cache_used; - unsigned btree_cache_reserve; - struct shrinker btree_cache_shrink; - - /* - * If we need to allocate memory for a new btree node and that - * allocation fails, we can cannibalize another node in the btree cache - * to satisfy the allocation - lock to guarantee only one thread does - * this at a time: - */ - struct closure_waitlist mca_wait; - struct task_struct *btree_cache_alloc_lock; + struct btree_cache btree_cache; mempool_t btree_reserve_pool; diff --git a/fs/bcachefs/btree_cache.c b/fs/bcachefs/btree_cache.c index 4147545d047b..22846d8acbfb 100644 --- a/fs/bcachefs/btree_cache.c +++ b/fs/bcachefs/btree_cache.c @@ -31,13 +31,15 @@ void bch2_recalc_btree_reserve(struct bch_fs *c) reserve += min_t(unsigned, 1, c->btree_roots[i].b->level) * 8; - c->btree_cache_reserve = reserve; + c->btree_cache.reserve = reserve; } -#define mca_can_free(c) \ - max_t(int, 0, c->btree_cache_used - c->btree_cache_reserve) +static inline unsigned btree_cache_can_free(struct btree_cache *bc) +{ + return max_t(int, 0, bc->used - bc->reserve); +} -static void __mca_data_free(struct bch_fs *c, struct btree *b) +static void __btree_node_data_free(struct bch_fs *c, struct btree *b) { EBUG_ON(btree_node_write_in_flight(b)); @@ -46,11 +48,13 @@ static void __mca_data_free(struct bch_fs *c, struct btree *b) bch2_btree_keys_free(b); } -static void mca_data_free(struct bch_fs *c, struct btree *b) +static void btree_node_data_free(struct bch_fs *c, struct btree *b) { - __mca_data_free(c, b); - c->btree_cache_used--; - list_move(&b->list, &c->btree_cache_freed); + struct btree_cache *bc = &c->btree_cache; + + __btree_node_data_free(c, b); + bc->used--; + list_move(&b->list, &bc->freed); } static const struct rhashtable_params bch_btree_cache_params = { @@ -59,8 +63,10 @@ static const struct rhashtable_params bch_btree_cache_params = { .key_len = sizeof(struct bch_extent_ptr), }; -static void mca_data_alloc(struct bch_fs *c, struct btree *b, gfp_t gfp) +static void btree_node_data_alloc(struct bch_fs *c, struct btree *b, gfp_t gfp) { + struct btree_cache *bc = &c->btree_cache; + b->data = kvpmalloc(btree_bytes(c), gfp); if (!b->data) goto err; @@ -68,16 +74,16 @@ static void mca_data_alloc(struct bch_fs *c, struct btree *b, gfp_t gfp) if (bch2_btree_keys_alloc(b, btree_page_order(c), gfp)) goto err; - c->btree_cache_used++; - list_move(&b->list, &c->btree_cache_freeable); + bc->used++; + list_move(&b->list, &bc->freeable); return; err: kvpfree(b->data, btree_bytes(c)); b->data = NULL; - list_move(&b->list, &c->btree_cache_freed); + list_move(&b->list, &bc->freed); } -static struct btree *mca_bucket_alloc(struct bch_fs *c, gfp_t gfp) +static struct btree *btree_node_mem_alloc(struct bch_fs *c, gfp_t gfp) { struct btree *b = kzalloc(sizeof(struct btree), gfp); if (!b) @@ -88,49 +94,48 @@ static struct btree *mca_bucket_alloc(struct bch_fs *c, gfp_t gfp) INIT_LIST_HEAD(&b->list); INIT_LIST_HEAD(&b->write_blocked); - mca_data_alloc(c, b, gfp); + btree_node_data_alloc(c, b, gfp); return b->data ? b : NULL; } /* Btree in memory cache - hash table */ -void bch2_btree_node_hash_remove(struct bch_fs *c, struct btree *b) +void bch2_btree_node_hash_remove(struct btree_cache *bc, struct btree *b) { - rhashtable_remove_fast(&c->btree_cache_table, &b->hash, - bch_btree_cache_params); + rhashtable_remove_fast(&bc->table, &b->hash, bch_btree_cache_params); /* Cause future lookups for this node to fail: */ bkey_i_to_extent(&b->key)->v._data[0] = 0; } -int __bch2_btree_node_hash_insert(struct bch_fs *c, struct btree *b) +int __bch2_btree_node_hash_insert(struct btree_cache *bc, struct btree *b) { - return rhashtable_lookup_insert_fast(&c->btree_cache_table, &b->hash, + return rhashtable_lookup_insert_fast(&bc->table, &b->hash, bch_btree_cache_params); } -int bch2_btree_node_hash_insert(struct bch_fs *c, struct btree *b, - unsigned level, enum btree_id id) +int bch2_btree_node_hash_insert(struct btree_cache *bc, struct btree *b, + unsigned level, enum btree_id id) { int ret; b->level = level; b->btree_id = id; - mutex_lock(&c->btree_cache_lock); - ret = __bch2_btree_node_hash_insert(c, b); + mutex_lock(&bc->lock); + ret = __bch2_btree_node_hash_insert(bc, b); if (!ret) - list_add(&b->list, &c->btree_cache); - mutex_unlock(&c->btree_cache_lock); + list_add(&b->list, &bc->live); + mutex_unlock(&bc->lock); return ret; } __flatten -static inline struct btree *mca_find(struct bch_fs *c, +static inline struct btree *btree_cache_find(struct btree_cache *bc, const struct bkey_i *k) { - return rhashtable_lookup_fast(&c->btree_cache_table, &PTR_HASH(k), + return rhashtable_lookup_fast(&bc->table, &PTR_HASH(k), bch_btree_cache_params); } @@ -140,9 +145,10 @@ static inline struct btree *mca_find(struct bch_fs *c, */ static int __btree_node_reclaim(struct bch_fs *c, struct btree *b, bool flush) { + struct btree_cache *bc = &c->btree_cache; int ret = 0; - lockdep_assert_held(&c->btree_cache_lock); + lockdep_assert_held(&bc->lock); if (!six_trylock_intent(&b->lock)) return -ENOMEM; @@ -201,11 +207,12 @@ static int btree_node_write_and_reclaim(struct bch_fs *c, struct btree *b) return __btree_node_reclaim(c, b, true); } -static unsigned long bch2_mca_scan(struct shrinker *shrink, - struct shrink_control *sc) +static unsigned long bch2_btree_cache_scan(struct shrinker *shrink, + struct shrink_control *sc) { struct bch_fs *c = container_of(shrink, struct bch_fs, - btree_cache_shrink); + btree_cache.shrink); + struct btree_cache *bc = &c->btree_cache; struct btree *b, *t; unsigned long nr = sc->nr_to_scan; unsigned long can_free; @@ -218,8 +225,8 @@ static unsigned long bch2_mca_scan(struct shrinker *shrink, /* Return -1 if we can't do anything right now */ if (sc->gfp_mask & __GFP_IO) - mutex_lock(&c->btree_cache_lock); - else if (!mutex_trylock(&c->btree_cache_lock)) + mutex_lock(&bc->lock); + else if (!mutex_trylock(&bc->lock)) return -1; /* @@ -230,11 +237,11 @@ static unsigned long bch2_mca_scan(struct shrinker *shrink, * IO can always make forward progress: */ nr /= btree_pages(c); - can_free = mca_can_free(c); + can_free = btree_cache_can_free(bc); nr = min_t(unsigned long, nr, can_free); i = 0; - list_for_each_entry_safe(b, t, &c->btree_cache_freeable, list) { + list_for_each_entry_safe(b, t, &bc->freeable, list) { touched++; if (freed >= nr) @@ -242,34 +249,34 @@ static unsigned long bch2_mca_scan(struct shrinker *shrink, if (++i > 3 && !btree_node_reclaim(c, b)) { - mca_data_free(c, b); + btree_node_data_free(c, b); six_unlock_write(&b->lock); six_unlock_intent(&b->lock); freed++; } } restart: - list_for_each_entry_safe(b, t, &c->btree_cache, list) { + list_for_each_entry_safe(b, t, &bc->live, list) { touched++; if (freed >= nr) { /* Save position */ - if (&t->list != &c->btree_cache) - list_move_tail(&c->btree_cache, &t->list); + if (&t->list != &bc->live) + list_move_tail(&bc->live, &t->list); break; } if (!btree_node_accessed(b) && !btree_node_reclaim(c, b)) { - /* can't call bch2_btree_node_hash_remove under btree_cache_lock */ + /* can't call bch2_btree_node_hash_remove under lock */ freed++; - if (&t->list != &c->btree_cache) - list_move_tail(&c->btree_cache, &t->list); + if (&t->list != &bc->live) + list_move_tail(&bc->live, &t->list); - mca_data_free(c, b); - mutex_unlock(&c->btree_cache_lock); + btree_node_data_free(c, b); + mutex_unlock(&bc->lock); - bch2_btree_node_hash_remove(c, b); + bch2_btree_node_hash_remove(bc, b); six_unlock_write(&b->lock); six_unlock_intent(&b->lock); @@ -277,97 +284,97 @@ restart: goto out; if (sc->gfp_mask & __GFP_IO) - mutex_lock(&c->btree_cache_lock); - else if (!mutex_trylock(&c->btree_cache_lock)) + mutex_lock(&bc->lock); + else if (!mutex_trylock(&bc->lock)) goto out; goto restart; } else clear_btree_node_accessed(b); } - mutex_unlock(&c->btree_cache_lock); + mutex_unlock(&bc->lock); out: return (unsigned long) freed * btree_pages(c); } -static unsigned long bch2_mca_count(struct shrinker *shrink, - struct shrink_control *sc) +static unsigned long bch2_btree_cache_count(struct shrinker *shrink, + struct shrink_control *sc) { struct bch_fs *c = container_of(shrink, struct bch_fs, - btree_cache_shrink); + btree_cache.shrink); + struct btree_cache *bc = &c->btree_cache; if (btree_shrinker_disabled(c)) return 0; - return mca_can_free(c) * btree_pages(c); + return btree_cache_can_free(bc) * btree_pages(c); } -void bch2_fs_btree_exit(struct bch_fs *c) +void bch2_fs_btree_cache_exit(struct bch_fs *c) { + struct btree_cache *bc = &c->btree_cache; struct btree *b; unsigned i; - if (c->btree_cache_shrink.list.next) - unregister_shrinker(&c->btree_cache_shrink); + if (bc->shrink.list.next) + unregister_shrinker(&bc->shrink); - mutex_lock(&c->btree_cache_lock); + mutex_lock(&bc->lock); #ifdef CONFIG_BCACHEFS_DEBUG if (c->verify_data) - list_move(&c->verify_data->list, &c->btree_cache); + list_move(&c->verify_data->list, &bc->live); kvpfree(c->verify_ondisk, btree_bytes(c)); #endif for (i = 0; i < BTREE_ID_NR; i++) if (c->btree_roots[i].b) - list_add(&c->btree_roots[i].b->list, &c->btree_cache); + list_add(&c->btree_roots[i].b->list, &bc->live); - list_splice(&c->btree_cache_freeable, - &c->btree_cache); + list_splice(&bc->freeable, &bc->live); - while (!list_empty(&c->btree_cache)) { - b = list_first_entry(&c->btree_cache, struct btree, list); + while (!list_empty(&bc->live)) { + b = list_first_entry(&bc->live, struct btree, list); if (btree_node_dirty(b)) bch2_btree_complete_write(c, b, btree_current_write(b)); clear_btree_node_dirty(b); - mca_data_free(c, b); + btree_node_data_free(c, b); } - while (!list_empty(&c->btree_cache_freed)) { - b = list_first_entry(&c->btree_cache_freed, - struct btree, list); + while (!list_empty(&bc->freed)) { + b = list_first_entry(&bc->freed, struct btree, list); list_del(&b->list); kfree(b); } - mutex_unlock(&c->btree_cache_lock); + mutex_unlock(&bc->lock); - if (c->btree_cache_table_init_done) - rhashtable_destroy(&c->btree_cache_table); + if (bc->table_init_done) + rhashtable_destroy(&bc->table); } -int bch2_fs_btree_init(struct bch_fs *c) +int bch2_fs_btree_cache_init(struct bch_fs *c) { + struct btree_cache *bc = &c->btree_cache; unsigned i; int ret; - ret = rhashtable_init(&c->btree_cache_table, &bch_btree_cache_params); + ret = rhashtable_init(&bc->table, &bch_btree_cache_params); if (ret) return ret; - c->btree_cache_table_init_done = true; + bc->table_init_done = true; bch2_recalc_btree_reserve(c); - for (i = 0; i < c->btree_cache_reserve; i++) - if (!mca_bucket_alloc(c, GFP_KERNEL)) + for (i = 0; i < bc->reserve; i++) + if (!btree_node_mem_alloc(c, GFP_KERNEL)) return -ENOMEM; - list_splice_init(&c->btree_cache, - &c->btree_cache_freeable); + list_splice_init(&bc->live, &bc->freeable); #ifdef CONFIG_BCACHEFS_DEBUG mutex_init(&c->verify_lock); @@ -376,42 +383,53 @@ int bch2_fs_btree_init(struct bch_fs *c) if (!c->verify_ondisk) return -ENOMEM; - c->verify_data = mca_bucket_alloc(c, GFP_KERNEL); + c->verify_data = btree_node_mem_alloc(c, GFP_KERNEL); if (!c->verify_data) return -ENOMEM; list_del_init(&c->verify_data->list); #endif - c->btree_cache_shrink.count_objects = bch2_mca_count; - c->btree_cache_shrink.scan_objects = bch2_mca_scan; - c->btree_cache_shrink.seeks = 4; - c->btree_cache_shrink.batch = btree_pages(c) * 2; - register_shrinker(&c->btree_cache_shrink); + bc->shrink.count_objects = bch2_btree_cache_count; + bc->shrink.scan_objects = bch2_btree_cache_scan; + bc->shrink.seeks = 4; + bc->shrink.batch = btree_pages(c) * 2; + register_shrinker(&bc->shrink); return 0; } +void bch2_fs_btree_cache_init_early(struct btree_cache *bc) +{ + mutex_init(&bc->lock); + INIT_LIST_HEAD(&bc->live); + INIT_LIST_HEAD(&bc->freeable); + INIT_LIST_HEAD(&bc->freed); +} + /* * We can only have one thread cannibalizing other cached btree nodes at a time, * or we'll deadlock. We use an open coded mutex to ensure that, which a * cannibalize_bucket() will take. This means every time we unlock the root of * the btree, we need to release this lock if we have it held. */ -void bch2_btree_node_cannibalize_unlock(struct bch_fs *c) +void bch2_btree_cache_cannibalize_unlock(struct bch_fs *c) { - if (c->btree_cache_alloc_lock == current) { + struct btree_cache *bc = &c->btree_cache; + + if (bc->alloc_lock == current) { trace_btree_node_cannibalize_unlock(c); - c->btree_cache_alloc_lock = NULL; - closure_wake_up(&c->mca_wait); + bc->alloc_lock = NULL; + closure_wake_up(&bc->alloc_wait); } } -int bch2_btree_node_cannibalize_lock(struct bch_fs *c, struct closure *cl) +int bch2_btree_cache_cannibalize_lock(struct bch_fs *c, struct closure *cl) { + struct btree_cache *bc = &c->btree_cache; struct task_struct *old; - old = cmpxchg(&c->btree_cache_alloc_lock, NULL, current); + old = cmpxchg(&bc->alloc_lock, NULL, current); if (old == NULL || old == current) goto success; @@ -420,13 +438,13 @@ int bch2_btree_node_cannibalize_lock(struct bch_fs *c, struct closure *cl) return -ENOMEM; } - closure_wait(&c->mca_wait, cl); + closure_wait(&bc->alloc_wait, cl); /* Try again, after adding ourselves to waitlist */ - old = cmpxchg(&c->btree_cache_alloc_lock, NULL, current); + old = cmpxchg(&bc->alloc_lock, NULL, current); if (old == NULL || old == current) { /* We raced */ - closure_wake_up(&c->mca_wait); + closure_wake_up(&bc->alloc_wait); goto success; } @@ -438,16 +456,17 @@ success: return 0; } -static struct btree *mca_cannibalize(struct bch_fs *c) +static struct btree *btree_node_cannibalize(struct bch_fs *c) { + struct btree_cache *bc = &c->btree_cache; struct btree *b; - list_for_each_entry_reverse(b, &c->btree_cache, list) + list_for_each_entry_reverse(b, &bc->live, list) if (!btree_node_reclaim(c, b)) return b; while (1) { - list_for_each_entry_reverse(b, &c->btree_cache, list) + list_for_each_entry_reverse(b, &bc->live, list) if (!btree_node_write_and_reclaim(c, b)) return b; @@ -462,16 +481,17 @@ static struct btree *mca_cannibalize(struct bch_fs *c) struct btree *bch2_btree_node_mem_alloc(struct bch_fs *c) { + struct btree_cache *bc = &c->btree_cache; struct btree *b; u64 start_time = local_clock(); - mutex_lock(&c->btree_cache_lock); + mutex_lock(&bc->lock); /* * btree_free() doesn't free memory; it sticks the node on the end of * the list. Check if there's any freed nodes there: */ - list_for_each_entry(b, &c->btree_cache_freeable, list) + list_for_each_entry(b, &bc->freeable, list) if (!btree_node_reclaim(c, b)) goto out_unlock; @@ -479,9 +499,9 @@ struct btree *bch2_btree_node_mem_alloc(struct bch_fs *c) * We never free struct btree itself, just the memory that holds the on * disk node. Check the freed list before allocating a new one: */ - list_for_each_entry(b, &c->btree_cache_freed, list) + list_for_each_entry(b, &bc->freed, list) if (!btree_node_reclaim(c, b)) { - mca_data_alloc(c, b, __GFP_NOWARN|GFP_NOIO); + btree_node_data_alloc(c, b, __GFP_NOWARN|GFP_NOIO); if (b->data) goto out_unlock; @@ -490,7 +510,7 @@ struct btree *bch2_btree_node_mem_alloc(struct bch_fs *c) goto err; } - b = mca_bucket_alloc(c, __GFP_NOWARN|GFP_NOIO); + b = btree_node_mem_alloc(c, __GFP_NOWARN|GFP_NOIO); if (!b) goto err; @@ -501,7 +521,7 @@ out_unlock: BUG_ON(btree_node_write_in_flight(b)); list_del_init(&b->list); - mutex_unlock(&c->btree_cache_lock); + mutex_unlock(&bc->lock); out: b->flags = 0; b->written = 0; @@ -517,18 +537,18 @@ out: return b; err: /* Try to cannibalize another cached btree node: */ - if (c->btree_cache_alloc_lock == current) { - b = mca_cannibalize(c); + if (bc->alloc_lock == current) { + b = btree_node_cannibalize(c); list_del_init(&b->list); - mutex_unlock(&c->btree_cache_lock); + mutex_unlock(&bc->lock); - bch2_btree_node_hash_remove(c, b); + bch2_btree_node_hash_remove(bc, b); trace_btree_node_cannibalize(c); goto out; } - mutex_unlock(&c->btree_cache_lock); + mutex_unlock(&bc->lock); return ERR_PTR(-ENOMEM); } @@ -539,6 +559,7 @@ static noinline struct btree *bch2_btree_node_fill(struct bch_fs *c, unsigned level, enum six_lock_type lock_type) { + struct btree_cache *bc = &c->btree_cache; struct btree *b; /* @@ -552,15 +573,15 @@ static noinline struct btree *bch2_btree_node_fill(struct bch_fs *c, return b; bkey_copy(&b->key, k); - if (bch2_btree_node_hash_insert(c, b, level, iter->btree_id)) { + if (bch2_btree_node_hash_insert(bc, b, level, iter->btree_id)) { /* raced with another fill: */ /* mark as unhashed... */ bkey_i_to_extent(&b->key)->v._data[0] = 0; - mutex_lock(&c->btree_cache_lock); - list_add(&b->list, &c->btree_cache_freeable); - mutex_unlock(&c->btree_cache_lock); + mutex_lock(&bc->lock); + list_add(&b->list, &bc->freeable); + mutex_unlock(&bc->lock); six_unlock_write(&b->lock); six_unlock_intent(&b->lock); @@ -601,13 +622,14 @@ struct btree *bch2_btree_node_get(struct bch_fs *c, struct btree_iter *iter, const struct bkey_i *k, unsigned level, enum six_lock_type lock_type) { + struct btree_cache *bc = &c->btree_cache; struct btree *b; struct bset_tree *t; BUG_ON(level >= BTREE_MAX_DEPTH); retry: rcu_read_lock(); - b = mca_find(c, k); + b = btree_cache_find(bc, k); rcu_read_unlock(); if (unlikely(!b)) { @@ -755,12 +777,13 @@ struct btree *bch2_btree_node_get_sibling(struct bch_fs *c, void bch2_btree_node_prefetch(struct bch_fs *c, const struct bkey_i *k, unsigned level, enum btree_id btree_id) { + struct btree_cache *bc = &c->btree_cache; struct btree *b; BUG_ON(level >= BTREE_MAX_DEPTH); rcu_read_lock(); - b = mca_find(c, k); + b = btree_cache_find(bc, k); rcu_read_unlock(); if (b) @@ -771,15 +794,15 @@ void bch2_btree_node_prefetch(struct bch_fs *c, const struct bkey_i *k, return; bkey_copy(&b->key, k); - if (bch2_btree_node_hash_insert(c, b, level, btree_id)) { + if (bch2_btree_node_hash_insert(bc, b, level, btree_id)) { /* raced with another fill: */ /* mark as unhashed... */ bkey_i_to_extent(&b->key)->v._data[0] = 0; - mutex_lock(&c->btree_cache_lock); - list_add(&b->list, &c->btree_cache_freeable); - mutex_unlock(&c->btree_cache_lock); + mutex_lock(&bc->lock); + list_add(&b->list, &bc->freeable); + mutex_unlock(&bc->lock); goto out; } diff --git a/fs/bcachefs/btree_cache.h b/fs/bcachefs/btree_cache.h index 5e836acda5d6..46d536eb36a8 100644 --- a/fs/bcachefs/btree_cache.h +++ b/fs/bcachefs/btree_cache.h @@ -11,13 +11,13 @@ extern const char * const bch2_btree_ids[]; void bch2_recalc_btree_reserve(struct bch_fs *); -void bch2_btree_node_hash_remove(struct bch_fs *, struct btree *); -int __bch2_btree_node_hash_insert(struct bch_fs *, struct btree *); -int bch2_btree_node_hash_insert(struct bch_fs *, struct btree *, +void bch2_btree_node_hash_remove(struct btree_cache *, struct btree *); +int __bch2_btree_node_hash_insert(struct btree_cache *, struct btree *); +int bch2_btree_node_hash_insert(struct btree_cache *, struct btree *, unsigned, enum btree_id); -void bch2_btree_node_cannibalize_unlock(struct bch_fs *); -int bch2_btree_node_cannibalize_lock(struct bch_fs *, struct closure *); +void bch2_btree_cache_cannibalize_unlock(struct bch_fs *); +int bch2_btree_cache_cannibalize_lock(struct bch_fs *, struct closure *); struct btree *bch2_btree_node_mem_alloc(struct bch_fs *); @@ -32,8 +32,9 @@ struct btree *bch2_btree_node_get_sibling(struct bch_fs *, struct btree_iter *, void bch2_btree_node_prefetch(struct bch_fs *, const struct bkey_i *, unsigned, enum btree_id); -void bch2_fs_btree_exit(struct bch_fs *); -int bch2_fs_btree_init(struct bch_fs *); +void bch2_fs_btree_cache_exit(struct bch_fs *); +int bch2_fs_btree_cache_init(struct bch_fs *); +void bch2_fs_btree_cache_init_early(struct btree_cache *); #define PTR_HASH(_k) (bkey_i_to_extent_c(_k)->v._data[0]) diff --git a/fs/bcachefs/btree_io.c b/fs/bcachefs/btree_io.c index d20047ace529..8b7f4d2da9af 100644 --- a/fs/bcachefs/btree_io.c +++ b/fs/bcachefs/btree_io.c @@ -1364,17 +1364,17 @@ int bch2_btree_root_read(struct bch_fs *c, enum btree_id id, closure_init_stack(&cl); do { - ret = bch2_btree_node_cannibalize_lock(c, &cl); + ret = bch2_btree_cache_cannibalize_lock(c, &cl); closure_sync(&cl); } while (ret); b = bch2_btree_node_mem_alloc(c); - bch2_btree_node_cannibalize_unlock(c); + bch2_btree_cache_cannibalize_unlock(c); BUG_ON(IS_ERR(b)); bkey_copy(&b->key, k); - BUG_ON(bch2_btree_node_hash_insert(c, b, level, id)); + BUG_ON(bch2_btree_node_hash_insert(&c->btree_cache, b, level, id)); bch2_btree_node_read(c, b, true); six_unlock_write(&b->lock); @@ -1844,8 +1844,8 @@ void bch2_btree_verify_flushed(struct bch_fs *c) unsigned i; rcu_read_lock(); - tbl = rht_dereference_rcu(c->btree_cache_table.tbl, - &c->btree_cache_table); + tbl = rht_dereference_rcu(c->btree_cache.table.tbl, + &c->btree_cache.table); for (i = 0; i < tbl->size; i++) rht_for_each_entry_rcu(b, pos, tbl, i, hash) diff --git a/fs/bcachefs/btree_iter.c b/fs/bcachefs/btree_iter.c index b1b6233993dc..b0e64957d493 100644 --- a/fs/bcachefs/btree_iter.c +++ b/fs/bcachefs/btree_iter.c @@ -769,7 +769,7 @@ retry_all: closure_init_stack(&cl); do { - ret = bch2_btree_node_cannibalize_lock(c, &cl); + ret = bch2_btree_cache_cannibalize_lock(c, &cl); closure_sync(&cl); } while (ret); } @@ -817,7 +817,7 @@ retry: ret = btree_iter_linked(iter) ? -EINTR : 0; out: - bch2_btree_node_cannibalize_unlock(c); + bch2_btree_cache_cannibalize_unlock(c); return ret; io_error: BUG_ON(ret != -EIO); diff --git a/fs/bcachefs/btree_types.h b/fs/bcachefs/btree_types.h index c0c162054bc7..8b4df034f926 100644 --- a/fs/bcachefs/btree_types.h +++ b/fs/bcachefs/btree_types.h @@ -130,6 +130,42 @@ struct btree { #endif }; +struct btree_cache { + struct rhashtable table; + bool table_init_done; + /* + * We never free a struct btree, except on shutdown - we just put it on + * the btree_cache_freed list and reuse it later. This simplifies the + * code, and it doesn't cost us much memory as the memory usage is + * dominated by buffers that hold the actual btree node data and those + * can be freed - and the number of struct btrees allocated is + * effectively bounded. + * + * btree_cache_freeable effectively is a small cache - we use it because + * high order page allocations can be rather expensive, and it's quite + * common to delete and allocate btree nodes in quick succession. It + * should never grow past ~2-3 nodes in practice. + */ + struct mutex lock; + struct list_head live; + struct list_head freeable; + struct list_head freed; + + /* Number of elements in live + freeable lists */ + unsigned used; + unsigned reserve; + struct shrinker shrink; + + /* + * If we need to allocate memory for a new btree node and that + * allocation fails, we can cannibalize another node in the btree cache + * to satisfy the allocation - lock to guarantee only one thread does + * this at a time: + */ + struct task_struct *alloc_lock; + struct closure_waitlist alloc_wait; +}; + #define BTREE_FLAG(flag) \ static inline bool btree_node_ ## flag(struct btree *b) \ { return test_bit(BTREE_NODE_ ## flag, &b->flags); } \ diff --git a/fs/bcachefs/btree_update_interior.c b/fs/bcachefs/btree_update_interior.c index 922a48635a6a..ae7760356dda 100644 --- a/fs/bcachefs/btree_update_interior.c +++ b/fs/bcachefs/btree_update_interior.c @@ -237,11 +237,11 @@ static void __btree_node_free(struct bch_fs *c, struct btree *b, six_lock_write(&b->lock); - bch2_btree_node_hash_remove(c, b); + bch2_btree_node_hash_remove(&c->btree_cache, b); - mutex_lock(&c->btree_cache_lock); - list_move(&b->list, &c->btree_cache_freeable); - mutex_unlock(&c->btree_cache_lock); + mutex_lock(&c->btree_cache.lock); + list_move(&b->list, &c->btree_cache.freeable); + mutex_unlock(&c->btree_cache.lock); /* * By using six_unlock_write() directly instead of @@ -374,7 +374,7 @@ static struct btree *bch2_btree_node_alloc(struct btree_update *as, unsigned lev b = as->reserve->b[--as->reserve->nr]; - BUG_ON(bch2_btree_node_hash_insert(c, b, level, as->btree_id)); + BUG_ON(bch2_btree_node_hash_insert(&c->btree_cache, b, level, as->btree_id)); set_btree_node_accessed(b); set_btree_node_dirty(b); @@ -515,7 +515,7 @@ static struct btree_reserve *bch2_btree_reserve_get(struct bch_fs *c, * Protects reaping from the btree node cache and using the btree node * open bucket reserve: */ - ret = bch2_btree_node_cannibalize_lock(c, cl); + ret = bch2_btree_cache_cannibalize_lock(c, cl); if (ret) { bch2_disk_reservation_put(c, &disk_res); return ERR_PTR(ret); @@ -543,11 +543,11 @@ static struct btree_reserve *bch2_btree_reserve_get(struct bch_fs *c, reserve->b[reserve->nr++] = b; } - bch2_btree_node_cannibalize_unlock(c); + bch2_btree_cache_cannibalize_unlock(c); return reserve; err_free: bch2_btree_reserve_put(c, reserve); - bch2_btree_node_cannibalize_unlock(c); + bch2_btree_cache_cannibalize_unlock(c); trace_btree_reserve_get_fail(c, nr_nodes, cl); return ERR_PTR(ret); } @@ -1015,9 +1015,9 @@ bch2_btree_update_start(struct bch_fs *c, enum btree_id id, static void __bch2_btree_set_root_inmem(struct bch_fs *c, struct btree *b) { /* Root nodes cannot be reaped */ - mutex_lock(&c->btree_cache_lock); + mutex_lock(&c->btree_cache.lock); list_del_init(&b->list); - mutex_unlock(&c->btree_cache_lock); + mutex_unlock(&c->btree_cache.lock); mutex_lock(&c->btree_root_lock); btree_node_root(c, b) = b; @@ -1802,7 +1802,7 @@ retry: PTR_HASH(&new_key->k_i) != PTR_HASH(&b->key)) { /* bch2_btree_reserve_get will unlock */ do { - ret = bch2_btree_node_cannibalize_lock(c, &cl); + ret = bch2_btree_cache_cannibalize_lock(c, &cl); closure_sync(&cl); } while (ret == -EAGAIN); @@ -1873,23 +1873,24 @@ retry: if (parent) { if (new_hash) { bkey_copy(&new_hash->key, &new_key->k_i); - BUG_ON(bch2_btree_node_hash_insert(c, new_hash, - b->level, b->btree_id)); + ret = bch2_btree_node_hash_insert(&c->btree_cache, + new_hash, b->level, b->btree_id); + BUG_ON(ret); } bch2_btree_insert_node(as, parent, &iter, &keylist_single(&new_key->k_i)); if (new_hash) { - mutex_lock(&c->btree_cache_lock); - bch2_btree_node_hash_remove(c, new_hash); + mutex_lock(&c->btree_cache.lock); + bch2_btree_node_hash_remove(&c->btree_cache, new_hash); - bch2_btree_node_hash_remove(c, b); + bch2_btree_node_hash_remove(&c->btree_cache, b); bkey_copy(&b->key, &new_key->k_i); - ret = __bch2_btree_node_hash_insert(c, b); + ret = __bch2_btree_node_hash_insert(&c->btree_cache, b); BUG_ON(ret); - mutex_unlock(&c->btree_cache_lock); + mutex_unlock(&c->btree_cache.lock); } else { bkey_copy(&b->key, &new_key->k_i); } @@ -1918,9 +1919,9 @@ retry: bch2_btree_update_done(as); out: if (new_hash) { - mutex_lock(&c->btree_cache_lock); - list_move(&new_hash->list, &c->btree_cache_freeable); - mutex_unlock(&c->btree_cache_lock); + mutex_lock(&c->btree_cache.lock); + list_move(&new_hash->list, &c->btree_cache.freeable); + mutex_unlock(&c->btree_cache.lock); six_unlock_write(&new_hash->lock); six_unlock_intent(&new_hash->lock); diff --git a/fs/bcachefs/super.c b/fs/bcachefs/super.c index 14a264cca8d8..d3bab8e00d20 100644 --- a/fs/bcachefs/super.c +++ b/fs/bcachefs/super.c @@ -370,7 +370,7 @@ err: static void bch2_fs_free(struct bch_fs *c) { bch2_fs_encryption_exit(c); - bch2_fs_btree_exit(c); + bch2_fs_btree_cache_exit(c); bch2_fs_journal_exit(&c->journal); bch2_io_clock_exit(&c->io_clock[WRITE]); bch2_io_clock_exit(&c->io_clock[READ]); @@ -483,7 +483,6 @@ static struct bch_fs *bch2_fs_alloc(struct bch_sb *sb, struct bch_opts opts) mutex_init(&c->state_lock); mutex_init(&c->sb_lock); mutex_init(&c->replicas_gc_lock); - mutex_init(&c->btree_cache_lock); mutex_init(&c->bucket_lock); mutex_init(&c->btree_root_lock); INIT_WORK(&c->read_only_work, bch2_fs_read_only_work); @@ -499,9 +498,6 @@ static struct bch_fs *bch2_fs_alloc(struct bch_sb *sb, struct bch_opts opts) bch2_fs_tiering_init(c); INIT_LIST_HEAD(&c->list); - INIT_LIST_HEAD(&c->btree_cache); - INIT_LIST_HEAD(&c->btree_cache_freeable); - INIT_LIST_HEAD(&c->btree_cache_freed); INIT_LIST_HEAD(&c->btree_interior_update_list); mutex_init(&c->btree_reserve_cache_lock); @@ -538,6 +534,8 @@ static struct bch_fs *bch2_fs_alloc(struct bch_sb *sb, struct bch_opts opts) c->journal.blocked_time = &c->journal_blocked_time; c->journal.flush_seq_time = &c->journal_flush_seq_time; + bch2_fs_btree_cache_init_early(&c->btree_cache); + mutex_lock(&c->sb_lock); if (bch2_sb_to_fs(c, sb)) { @@ -594,7 +592,7 @@ static struct bch_fs *bch2_fs_alloc(struct bch_sb *sb, struct bch_opts opts) bch2_io_clock_init(&c->io_clock[READ]) || bch2_io_clock_init(&c->io_clock[WRITE]) || bch2_fs_journal_init(&c->journal) || - bch2_fs_btree_init(c) || + bch2_fs_btree_cache_init(c) || bch2_fs_encryption_init(c) || bch2_fs_compress_init(c) || bch2_check_set_has_compressed_data(c, c->opts.compression)) diff --git a/fs/bcachefs/sysfs.c b/fs/bcachefs/sysfs.c index 07d9be751d6c..c20769b7ca06 100644 --- a/fs/bcachefs/sysfs.c +++ b/fs/bcachefs/sysfs.c @@ -209,11 +209,11 @@ static size_t bch2_btree_cache_size(struct bch_fs *c) size_t ret = 0; struct btree *b; - mutex_lock(&c->btree_cache_lock); - list_for_each_entry(b, &c->btree_cache, list) + mutex_lock(&c->btree_cache.lock); + list_for_each_entry(b, &c->btree_cache.live, list) ret += btree_bytes(c); - mutex_unlock(&c->btree_cache_lock); + mutex_unlock(&c->btree_cache.lock); return ret; } @@ -436,7 +436,7 @@ STORE(__bch2_fs) sc.gfp_mask = GFP_KERNEL; sc.nr_to_scan = strtoul_or_return(buf); - c->btree_cache_shrink.scan_objects(&c->btree_cache_shrink, &sc); + c->btree_cache.shrink.scan_objects(&c->btree_cache.shrink, &sc); } return size; |