summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorKent Overstreet <kent.overstreet@gmail.com>2017-11-21 22:28:25 -0500
committerKent Overstreet <kent.overstreet@gmail.com>2018-01-30 20:40:46 -0500
commit182394227f0b8243ba634d138a58d06b72c07e11 (patch)
tree1a61258b92b694799f64bd2099fbfaef3345d619
parent891c56d240dd477c1c921817ed63290ec1da77db (diff)
bcachefs: Refactor btree cache code
making struct bch_fs a bit smaller
-rw-r--r--fs/bcachefs/bcachefs.h35
-rw-r--r--fs/bcachefs/btree_cache.c253
-rw-r--r--fs/bcachefs/btree_cache.h15
-rw-r--r--fs/bcachefs/btree_io.c10
-rw-r--r--fs/bcachefs/btree_iter.c4
-rw-r--r--fs/bcachefs/btree_types.h36
-rw-r--r--fs/bcachefs/btree_update_interior.c43
-rw-r--r--fs/bcachefs/super.c10
-rw-r--r--fs/bcachefs/sysfs.c8
9 files changed, 220 insertions, 194 deletions
diff --git a/fs/bcachefs/bcachefs.h b/fs/bcachefs/bcachefs.h
index ff7ce71ac0a0..2d83cd163d55 100644
--- a/fs/bcachefs/bcachefs.h
+++ b/fs/bcachefs/bcachefs.h
@@ -544,40 +544,7 @@ struct bch_fs {
struct btree_root btree_roots[BTREE_ID_NR];
struct mutex btree_root_lock;
- bool btree_cache_table_init_done;
- struct rhashtable btree_cache_table;
-
- /*
- * We never free a struct btree, except on shutdown - we just put it on
- * the btree_cache_freed list and reuse it later. This simplifies the
- * code, and it doesn't cost us much memory as the memory usage is
- * dominated by buffers that hold the actual btree node data and those
- * can be freed - and the number of struct btrees allocated is
- * effectively bounded.
- *
- * btree_cache_freeable effectively is a small cache - we use it because
- * high order page allocations can be rather expensive, and it's quite
- * common to delete and allocate btree nodes in quick succession. It
- * should never grow past ~2-3 nodes in practice.
- */
- struct mutex btree_cache_lock;
- struct list_head btree_cache;
- struct list_head btree_cache_freeable;
- struct list_head btree_cache_freed;
-
- /* Number of elements in btree_cache + btree_cache_freeable lists */
- unsigned btree_cache_used;
- unsigned btree_cache_reserve;
- struct shrinker btree_cache_shrink;
-
- /*
- * If we need to allocate memory for a new btree node and that
- * allocation fails, we can cannibalize another node in the btree cache
- * to satisfy the allocation - lock to guarantee only one thread does
- * this at a time:
- */
- struct closure_waitlist mca_wait;
- struct task_struct *btree_cache_alloc_lock;
+ struct btree_cache btree_cache;
mempool_t btree_reserve_pool;
diff --git a/fs/bcachefs/btree_cache.c b/fs/bcachefs/btree_cache.c
index 4147545d047b..22846d8acbfb 100644
--- a/fs/bcachefs/btree_cache.c
+++ b/fs/bcachefs/btree_cache.c
@@ -31,13 +31,15 @@ void bch2_recalc_btree_reserve(struct bch_fs *c)
reserve += min_t(unsigned, 1,
c->btree_roots[i].b->level) * 8;
- c->btree_cache_reserve = reserve;
+ c->btree_cache.reserve = reserve;
}
-#define mca_can_free(c) \
- max_t(int, 0, c->btree_cache_used - c->btree_cache_reserve)
+static inline unsigned btree_cache_can_free(struct btree_cache *bc)
+{
+ return max_t(int, 0, bc->used - bc->reserve);
+}
-static void __mca_data_free(struct bch_fs *c, struct btree *b)
+static void __btree_node_data_free(struct bch_fs *c, struct btree *b)
{
EBUG_ON(btree_node_write_in_flight(b));
@@ -46,11 +48,13 @@ static void __mca_data_free(struct bch_fs *c, struct btree *b)
bch2_btree_keys_free(b);
}
-static void mca_data_free(struct bch_fs *c, struct btree *b)
+static void btree_node_data_free(struct bch_fs *c, struct btree *b)
{
- __mca_data_free(c, b);
- c->btree_cache_used--;
- list_move(&b->list, &c->btree_cache_freed);
+ struct btree_cache *bc = &c->btree_cache;
+
+ __btree_node_data_free(c, b);
+ bc->used--;
+ list_move(&b->list, &bc->freed);
}
static const struct rhashtable_params bch_btree_cache_params = {
@@ -59,8 +63,10 @@ static const struct rhashtable_params bch_btree_cache_params = {
.key_len = sizeof(struct bch_extent_ptr),
};
-static void mca_data_alloc(struct bch_fs *c, struct btree *b, gfp_t gfp)
+static void btree_node_data_alloc(struct bch_fs *c, struct btree *b, gfp_t gfp)
{
+ struct btree_cache *bc = &c->btree_cache;
+
b->data = kvpmalloc(btree_bytes(c), gfp);
if (!b->data)
goto err;
@@ -68,16 +74,16 @@ static void mca_data_alloc(struct bch_fs *c, struct btree *b, gfp_t gfp)
if (bch2_btree_keys_alloc(b, btree_page_order(c), gfp))
goto err;
- c->btree_cache_used++;
- list_move(&b->list, &c->btree_cache_freeable);
+ bc->used++;
+ list_move(&b->list, &bc->freeable);
return;
err:
kvpfree(b->data, btree_bytes(c));
b->data = NULL;
- list_move(&b->list, &c->btree_cache_freed);
+ list_move(&b->list, &bc->freed);
}
-static struct btree *mca_bucket_alloc(struct bch_fs *c, gfp_t gfp)
+static struct btree *btree_node_mem_alloc(struct bch_fs *c, gfp_t gfp)
{
struct btree *b = kzalloc(sizeof(struct btree), gfp);
if (!b)
@@ -88,49 +94,48 @@ static struct btree *mca_bucket_alloc(struct bch_fs *c, gfp_t gfp)
INIT_LIST_HEAD(&b->list);
INIT_LIST_HEAD(&b->write_blocked);
- mca_data_alloc(c, b, gfp);
+ btree_node_data_alloc(c, b, gfp);
return b->data ? b : NULL;
}
/* Btree in memory cache - hash table */
-void bch2_btree_node_hash_remove(struct bch_fs *c, struct btree *b)
+void bch2_btree_node_hash_remove(struct btree_cache *bc, struct btree *b)
{
- rhashtable_remove_fast(&c->btree_cache_table, &b->hash,
- bch_btree_cache_params);
+ rhashtable_remove_fast(&bc->table, &b->hash, bch_btree_cache_params);
/* Cause future lookups for this node to fail: */
bkey_i_to_extent(&b->key)->v._data[0] = 0;
}
-int __bch2_btree_node_hash_insert(struct bch_fs *c, struct btree *b)
+int __bch2_btree_node_hash_insert(struct btree_cache *bc, struct btree *b)
{
- return rhashtable_lookup_insert_fast(&c->btree_cache_table, &b->hash,
+ return rhashtable_lookup_insert_fast(&bc->table, &b->hash,
bch_btree_cache_params);
}
-int bch2_btree_node_hash_insert(struct bch_fs *c, struct btree *b,
- unsigned level, enum btree_id id)
+int bch2_btree_node_hash_insert(struct btree_cache *bc, struct btree *b,
+ unsigned level, enum btree_id id)
{
int ret;
b->level = level;
b->btree_id = id;
- mutex_lock(&c->btree_cache_lock);
- ret = __bch2_btree_node_hash_insert(c, b);
+ mutex_lock(&bc->lock);
+ ret = __bch2_btree_node_hash_insert(bc, b);
if (!ret)
- list_add(&b->list, &c->btree_cache);
- mutex_unlock(&c->btree_cache_lock);
+ list_add(&b->list, &bc->live);
+ mutex_unlock(&bc->lock);
return ret;
}
__flatten
-static inline struct btree *mca_find(struct bch_fs *c,
+static inline struct btree *btree_cache_find(struct btree_cache *bc,
const struct bkey_i *k)
{
- return rhashtable_lookup_fast(&c->btree_cache_table, &PTR_HASH(k),
+ return rhashtable_lookup_fast(&bc->table, &PTR_HASH(k),
bch_btree_cache_params);
}
@@ -140,9 +145,10 @@ static inline struct btree *mca_find(struct bch_fs *c,
*/
static int __btree_node_reclaim(struct bch_fs *c, struct btree *b, bool flush)
{
+ struct btree_cache *bc = &c->btree_cache;
int ret = 0;
- lockdep_assert_held(&c->btree_cache_lock);
+ lockdep_assert_held(&bc->lock);
if (!six_trylock_intent(&b->lock))
return -ENOMEM;
@@ -201,11 +207,12 @@ static int btree_node_write_and_reclaim(struct bch_fs *c, struct btree *b)
return __btree_node_reclaim(c, b, true);
}
-static unsigned long bch2_mca_scan(struct shrinker *shrink,
- struct shrink_control *sc)
+static unsigned long bch2_btree_cache_scan(struct shrinker *shrink,
+ struct shrink_control *sc)
{
struct bch_fs *c = container_of(shrink, struct bch_fs,
- btree_cache_shrink);
+ btree_cache.shrink);
+ struct btree_cache *bc = &c->btree_cache;
struct btree *b, *t;
unsigned long nr = sc->nr_to_scan;
unsigned long can_free;
@@ -218,8 +225,8 @@ static unsigned long bch2_mca_scan(struct shrinker *shrink,
/* Return -1 if we can't do anything right now */
if (sc->gfp_mask & __GFP_IO)
- mutex_lock(&c->btree_cache_lock);
- else if (!mutex_trylock(&c->btree_cache_lock))
+ mutex_lock(&bc->lock);
+ else if (!mutex_trylock(&bc->lock))
return -1;
/*
@@ -230,11 +237,11 @@ static unsigned long bch2_mca_scan(struct shrinker *shrink,
* IO can always make forward progress:
*/
nr /= btree_pages(c);
- can_free = mca_can_free(c);
+ can_free = btree_cache_can_free(bc);
nr = min_t(unsigned long, nr, can_free);
i = 0;
- list_for_each_entry_safe(b, t, &c->btree_cache_freeable, list) {
+ list_for_each_entry_safe(b, t, &bc->freeable, list) {
touched++;
if (freed >= nr)
@@ -242,34 +249,34 @@ static unsigned long bch2_mca_scan(struct shrinker *shrink,
if (++i > 3 &&
!btree_node_reclaim(c, b)) {
- mca_data_free(c, b);
+ btree_node_data_free(c, b);
six_unlock_write(&b->lock);
six_unlock_intent(&b->lock);
freed++;
}
}
restart:
- list_for_each_entry_safe(b, t, &c->btree_cache, list) {
+ list_for_each_entry_safe(b, t, &bc->live, list) {
touched++;
if (freed >= nr) {
/* Save position */
- if (&t->list != &c->btree_cache)
- list_move_tail(&c->btree_cache, &t->list);
+ if (&t->list != &bc->live)
+ list_move_tail(&bc->live, &t->list);
break;
}
if (!btree_node_accessed(b) &&
!btree_node_reclaim(c, b)) {
- /* can't call bch2_btree_node_hash_remove under btree_cache_lock */
+ /* can't call bch2_btree_node_hash_remove under lock */
freed++;
- if (&t->list != &c->btree_cache)
- list_move_tail(&c->btree_cache, &t->list);
+ if (&t->list != &bc->live)
+ list_move_tail(&bc->live, &t->list);
- mca_data_free(c, b);
- mutex_unlock(&c->btree_cache_lock);
+ btree_node_data_free(c, b);
+ mutex_unlock(&bc->lock);
- bch2_btree_node_hash_remove(c, b);
+ bch2_btree_node_hash_remove(bc, b);
six_unlock_write(&b->lock);
six_unlock_intent(&b->lock);
@@ -277,97 +284,97 @@ restart:
goto out;
if (sc->gfp_mask & __GFP_IO)
- mutex_lock(&c->btree_cache_lock);
- else if (!mutex_trylock(&c->btree_cache_lock))
+ mutex_lock(&bc->lock);
+ else if (!mutex_trylock(&bc->lock))
goto out;
goto restart;
} else
clear_btree_node_accessed(b);
}
- mutex_unlock(&c->btree_cache_lock);
+ mutex_unlock(&bc->lock);
out:
return (unsigned long) freed * btree_pages(c);
}
-static unsigned long bch2_mca_count(struct shrinker *shrink,
- struct shrink_control *sc)
+static unsigned long bch2_btree_cache_count(struct shrinker *shrink,
+ struct shrink_control *sc)
{
struct bch_fs *c = container_of(shrink, struct bch_fs,
- btree_cache_shrink);
+ btree_cache.shrink);
+ struct btree_cache *bc = &c->btree_cache;
if (btree_shrinker_disabled(c))
return 0;
- return mca_can_free(c) * btree_pages(c);
+ return btree_cache_can_free(bc) * btree_pages(c);
}
-void bch2_fs_btree_exit(struct bch_fs *c)
+void bch2_fs_btree_cache_exit(struct bch_fs *c)
{
+ struct btree_cache *bc = &c->btree_cache;
struct btree *b;
unsigned i;
- if (c->btree_cache_shrink.list.next)
- unregister_shrinker(&c->btree_cache_shrink);
+ if (bc->shrink.list.next)
+ unregister_shrinker(&bc->shrink);
- mutex_lock(&c->btree_cache_lock);
+ mutex_lock(&bc->lock);
#ifdef CONFIG_BCACHEFS_DEBUG
if (c->verify_data)
- list_move(&c->verify_data->list, &c->btree_cache);
+ list_move(&c->verify_data->list, &bc->live);
kvpfree(c->verify_ondisk, btree_bytes(c));
#endif
for (i = 0; i < BTREE_ID_NR; i++)
if (c->btree_roots[i].b)
- list_add(&c->btree_roots[i].b->list, &c->btree_cache);
+ list_add(&c->btree_roots[i].b->list, &bc->live);
- list_splice(&c->btree_cache_freeable,
- &c->btree_cache);
+ list_splice(&bc->freeable, &bc->live);
- while (!list_empty(&c->btree_cache)) {
- b = list_first_entry(&c->btree_cache, struct btree, list);
+ while (!list_empty(&bc->live)) {
+ b = list_first_entry(&bc->live, struct btree, list);
if (btree_node_dirty(b))
bch2_btree_complete_write(c, b, btree_current_write(b));
clear_btree_node_dirty(b);
- mca_data_free(c, b);
+ btree_node_data_free(c, b);
}
- while (!list_empty(&c->btree_cache_freed)) {
- b = list_first_entry(&c->btree_cache_freed,
- struct btree, list);
+ while (!list_empty(&bc->freed)) {
+ b = list_first_entry(&bc->freed, struct btree, list);
list_del(&b->list);
kfree(b);
}
- mutex_unlock(&c->btree_cache_lock);
+ mutex_unlock(&bc->lock);
- if (c->btree_cache_table_init_done)
- rhashtable_destroy(&c->btree_cache_table);
+ if (bc->table_init_done)
+ rhashtable_destroy(&bc->table);
}
-int bch2_fs_btree_init(struct bch_fs *c)
+int bch2_fs_btree_cache_init(struct bch_fs *c)
{
+ struct btree_cache *bc = &c->btree_cache;
unsigned i;
int ret;
- ret = rhashtable_init(&c->btree_cache_table, &bch_btree_cache_params);
+ ret = rhashtable_init(&bc->table, &bch_btree_cache_params);
if (ret)
return ret;
- c->btree_cache_table_init_done = true;
+ bc->table_init_done = true;
bch2_recalc_btree_reserve(c);
- for (i = 0; i < c->btree_cache_reserve; i++)
- if (!mca_bucket_alloc(c, GFP_KERNEL))
+ for (i = 0; i < bc->reserve; i++)
+ if (!btree_node_mem_alloc(c, GFP_KERNEL))
return -ENOMEM;
- list_splice_init(&c->btree_cache,
- &c->btree_cache_freeable);
+ list_splice_init(&bc->live, &bc->freeable);
#ifdef CONFIG_BCACHEFS_DEBUG
mutex_init(&c->verify_lock);
@@ -376,42 +383,53 @@ int bch2_fs_btree_init(struct bch_fs *c)
if (!c->verify_ondisk)
return -ENOMEM;
- c->verify_data = mca_bucket_alloc(c, GFP_KERNEL);
+ c->verify_data = btree_node_mem_alloc(c, GFP_KERNEL);
if (!c->verify_data)
return -ENOMEM;
list_del_init(&c->verify_data->list);
#endif
- c->btree_cache_shrink.count_objects = bch2_mca_count;
- c->btree_cache_shrink.scan_objects = bch2_mca_scan;
- c->btree_cache_shrink.seeks = 4;
- c->btree_cache_shrink.batch = btree_pages(c) * 2;
- register_shrinker(&c->btree_cache_shrink);
+ bc->shrink.count_objects = bch2_btree_cache_count;
+ bc->shrink.scan_objects = bch2_btree_cache_scan;
+ bc->shrink.seeks = 4;
+ bc->shrink.batch = btree_pages(c) * 2;
+ register_shrinker(&bc->shrink);
return 0;
}
+void bch2_fs_btree_cache_init_early(struct btree_cache *bc)
+{
+ mutex_init(&bc->lock);
+ INIT_LIST_HEAD(&bc->live);
+ INIT_LIST_HEAD(&bc->freeable);
+ INIT_LIST_HEAD(&bc->freed);
+}
+
/*
* We can only have one thread cannibalizing other cached btree nodes at a time,
* or we'll deadlock. We use an open coded mutex to ensure that, which a
* cannibalize_bucket() will take. This means every time we unlock the root of
* the btree, we need to release this lock if we have it held.
*/
-void bch2_btree_node_cannibalize_unlock(struct bch_fs *c)
+void bch2_btree_cache_cannibalize_unlock(struct bch_fs *c)
{
- if (c->btree_cache_alloc_lock == current) {
+ struct btree_cache *bc = &c->btree_cache;
+
+ if (bc->alloc_lock == current) {
trace_btree_node_cannibalize_unlock(c);
- c->btree_cache_alloc_lock = NULL;
- closure_wake_up(&c->mca_wait);
+ bc->alloc_lock = NULL;
+ closure_wake_up(&bc->alloc_wait);
}
}
-int bch2_btree_node_cannibalize_lock(struct bch_fs *c, struct closure *cl)
+int bch2_btree_cache_cannibalize_lock(struct bch_fs *c, struct closure *cl)
{
+ struct btree_cache *bc = &c->btree_cache;
struct task_struct *old;
- old = cmpxchg(&c->btree_cache_alloc_lock, NULL, current);
+ old = cmpxchg(&bc->alloc_lock, NULL, current);
if (old == NULL || old == current)
goto success;
@@ -420,13 +438,13 @@ int bch2_btree_node_cannibalize_lock(struct bch_fs *c, struct closure *cl)
return -ENOMEM;
}
- closure_wait(&c->mca_wait, cl);
+ closure_wait(&bc->alloc_wait, cl);
/* Try again, after adding ourselves to waitlist */
- old = cmpxchg(&c->btree_cache_alloc_lock, NULL, current);
+ old = cmpxchg(&bc->alloc_lock, NULL, current);
if (old == NULL || old == current) {
/* We raced */
- closure_wake_up(&c->mca_wait);
+ closure_wake_up(&bc->alloc_wait);
goto success;
}
@@ -438,16 +456,17 @@ success:
return 0;
}
-static struct btree *mca_cannibalize(struct bch_fs *c)
+static struct btree *btree_node_cannibalize(struct bch_fs *c)
{
+ struct btree_cache *bc = &c->btree_cache;
struct btree *b;
- list_for_each_entry_reverse(b, &c->btree_cache, list)
+ list_for_each_entry_reverse(b, &bc->live, list)
if (!btree_node_reclaim(c, b))
return b;
while (1) {
- list_for_each_entry_reverse(b, &c->btree_cache, list)
+ list_for_each_entry_reverse(b, &bc->live, list)
if (!btree_node_write_and_reclaim(c, b))
return b;
@@ -462,16 +481,17 @@ static struct btree *mca_cannibalize(struct bch_fs *c)
struct btree *bch2_btree_node_mem_alloc(struct bch_fs *c)
{
+ struct btree_cache *bc = &c->btree_cache;
struct btree *b;
u64 start_time = local_clock();
- mutex_lock(&c->btree_cache_lock);
+ mutex_lock(&bc->lock);
/*
* btree_free() doesn't free memory; it sticks the node on the end of
* the list. Check if there's any freed nodes there:
*/
- list_for_each_entry(b, &c->btree_cache_freeable, list)
+ list_for_each_entry(b, &bc->freeable, list)
if (!btree_node_reclaim(c, b))
goto out_unlock;
@@ -479,9 +499,9 @@ struct btree *bch2_btree_node_mem_alloc(struct bch_fs *c)
* We never free struct btree itself, just the memory that holds the on
* disk node. Check the freed list before allocating a new one:
*/
- list_for_each_entry(b, &c->btree_cache_freed, list)
+ list_for_each_entry(b, &bc->freed, list)
if (!btree_node_reclaim(c, b)) {
- mca_data_alloc(c, b, __GFP_NOWARN|GFP_NOIO);
+ btree_node_data_alloc(c, b, __GFP_NOWARN|GFP_NOIO);
if (b->data)
goto out_unlock;
@@ -490,7 +510,7 @@ struct btree *bch2_btree_node_mem_alloc(struct bch_fs *c)
goto err;
}
- b = mca_bucket_alloc(c, __GFP_NOWARN|GFP_NOIO);
+ b = btree_node_mem_alloc(c, __GFP_NOWARN|GFP_NOIO);
if (!b)
goto err;
@@ -501,7 +521,7 @@ out_unlock:
BUG_ON(btree_node_write_in_flight(b));
list_del_init(&b->list);
- mutex_unlock(&c->btree_cache_lock);
+ mutex_unlock(&bc->lock);
out:
b->flags = 0;
b->written = 0;
@@ -517,18 +537,18 @@ out:
return b;
err:
/* Try to cannibalize another cached btree node: */
- if (c->btree_cache_alloc_lock == current) {
- b = mca_cannibalize(c);
+ if (bc->alloc_lock == current) {
+ b = btree_node_cannibalize(c);
list_del_init(&b->list);
- mutex_unlock(&c->btree_cache_lock);
+ mutex_unlock(&bc->lock);
- bch2_btree_node_hash_remove(c, b);
+ bch2_btree_node_hash_remove(bc, b);
trace_btree_node_cannibalize(c);
goto out;
}
- mutex_unlock(&c->btree_cache_lock);
+ mutex_unlock(&bc->lock);
return ERR_PTR(-ENOMEM);
}
@@ -539,6 +559,7 @@ static noinline struct btree *bch2_btree_node_fill(struct bch_fs *c,
unsigned level,
enum six_lock_type lock_type)
{
+ struct btree_cache *bc = &c->btree_cache;
struct btree *b;
/*
@@ -552,15 +573,15 @@ static noinline struct btree *bch2_btree_node_fill(struct bch_fs *c,
return b;
bkey_copy(&b->key, k);
- if (bch2_btree_node_hash_insert(c, b, level, iter->btree_id)) {
+ if (bch2_btree_node_hash_insert(bc, b, level, iter->btree_id)) {
/* raced with another fill: */
/* mark as unhashed... */
bkey_i_to_extent(&b->key)->v._data[0] = 0;
- mutex_lock(&c->btree_cache_lock);
- list_add(&b->list, &c->btree_cache_freeable);
- mutex_unlock(&c->btree_cache_lock);
+ mutex_lock(&bc->lock);
+ list_add(&b->list, &bc->freeable);
+ mutex_unlock(&bc->lock);
six_unlock_write(&b->lock);
six_unlock_intent(&b->lock);
@@ -601,13 +622,14 @@ struct btree *bch2_btree_node_get(struct bch_fs *c, struct btree_iter *iter,
const struct bkey_i *k, unsigned level,
enum six_lock_type lock_type)
{
+ struct btree_cache *bc = &c->btree_cache;
struct btree *b;
struct bset_tree *t;
BUG_ON(level >= BTREE_MAX_DEPTH);
retry:
rcu_read_lock();
- b = mca_find(c, k);
+ b = btree_cache_find(bc, k);
rcu_read_unlock();
if (unlikely(!b)) {
@@ -755,12 +777,13 @@ struct btree *bch2_btree_node_get_sibling(struct bch_fs *c,
void bch2_btree_node_prefetch(struct bch_fs *c, const struct bkey_i *k,
unsigned level, enum btree_id btree_id)
{
+ struct btree_cache *bc = &c->btree_cache;
struct btree *b;
BUG_ON(level >= BTREE_MAX_DEPTH);
rcu_read_lock();
- b = mca_find(c, k);
+ b = btree_cache_find(bc, k);
rcu_read_unlock();
if (b)
@@ -771,15 +794,15 @@ void bch2_btree_node_prefetch(struct bch_fs *c, const struct bkey_i *k,
return;
bkey_copy(&b->key, k);
- if (bch2_btree_node_hash_insert(c, b, level, btree_id)) {
+ if (bch2_btree_node_hash_insert(bc, b, level, btree_id)) {
/* raced with another fill: */
/* mark as unhashed... */
bkey_i_to_extent(&b->key)->v._data[0] = 0;
- mutex_lock(&c->btree_cache_lock);
- list_add(&b->list, &c->btree_cache_freeable);
- mutex_unlock(&c->btree_cache_lock);
+ mutex_lock(&bc->lock);
+ list_add(&b->list, &bc->freeable);
+ mutex_unlock(&bc->lock);
goto out;
}
diff --git a/fs/bcachefs/btree_cache.h b/fs/bcachefs/btree_cache.h
index 5e836acda5d6..46d536eb36a8 100644
--- a/fs/bcachefs/btree_cache.h
+++ b/fs/bcachefs/btree_cache.h
@@ -11,13 +11,13 @@ extern const char * const bch2_btree_ids[];
void bch2_recalc_btree_reserve(struct bch_fs *);
-void bch2_btree_node_hash_remove(struct bch_fs *, struct btree *);
-int __bch2_btree_node_hash_insert(struct bch_fs *, struct btree *);
-int bch2_btree_node_hash_insert(struct bch_fs *, struct btree *,
+void bch2_btree_node_hash_remove(struct btree_cache *, struct btree *);
+int __bch2_btree_node_hash_insert(struct btree_cache *, struct btree *);
+int bch2_btree_node_hash_insert(struct btree_cache *, struct btree *,
unsigned, enum btree_id);
-void bch2_btree_node_cannibalize_unlock(struct bch_fs *);
-int bch2_btree_node_cannibalize_lock(struct bch_fs *, struct closure *);
+void bch2_btree_cache_cannibalize_unlock(struct bch_fs *);
+int bch2_btree_cache_cannibalize_lock(struct bch_fs *, struct closure *);
struct btree *bch2_btree_node_mem_alloc(struct bch_fs *);
@@ -32,8 +32,9 @@ struct btree *bch2_btree_node_get_sibling(struct bch_fs *, struct btree_iter *,
void bch2_btree_node_prefetch(struct bch_fs *, const struct bkey_i *,
unsigned, enum btree_id);
-void bch2_fs_btree_exit(struct bch_fs *);
-int bch2_fs_btree_init(struct bch_fs *);
+void bch2_fs_btree_cache_exit(struct bch_fs *);
+int bch2_fs_btree_cache_init(struct bch_fs *);
+void bch2_fs_btree_cache_init_early(struct btree_cache *);
#define PTR_HASH(_k) (bkey_i_to_extent_c(_k)->v._data[0])
diff --git a/fs/bcachefs/btree_io.c b/fs/bcachefs/btree_io.c
index d20047ace529..8b7f4d2da9af 100644
--- a/fs/bcachefs/btree_io.c
+++ b/fs/bcachefs/btree_io.c
@@ -1364,17 +1364,17 @@ int bch2_btree_root_read(struct bch_fs *c, enum btree_id id,
closure_init_stack(&cl);
do {
- ret = bch2_btree_node_cannibalize_lock(c, &cl);
+ ret = bch2_btree_cache_cannibalize_lock(c, &cl);
closure_sync(&cl);
} while (ret);
b = bch2_btree_node_mem_alloc(c);
- bch2_btree_node_cannibalize_unlock(c);
+ bch2_btree_cache_cannibalize_unlock(c);
BUG_ON(IS_ERR(b));
bkey_copy(&b->key, k);
- BUG_ON(bch2_btree_node_hash_insert(c, b, level, id));
+ BUG_ON(bch2_btree_node_hash_insert(&c->btree_cache, b, level, id));
bch2_btree_node_read(c, b, true);
six_unlock_write(&b->lock);
@@ -1844,8 +1844,8 @@ void bch2_btree_verify_flushed(struct bch_fs *c)
unsigned i;
rcu_read_lock();
- tbl = rht_dereference_rcu(c->btree_cache_table.tbl,
- &c->btree_cache_table);
+ tbl = rht_dereference_rcu(c->btree_cache.table.tbl,
+ &c->btree_cache.table);
for (i = 0; i < tbl->size; i++)
rht_for_each_entry_rcu(b, pos, tbl, i, hash)
diff --git a/fs/bcachefs/btree_iter.c b/fs/bcachefs/btree_iter.c
index b1b6233993dc..b0e64957d493 100644
--- a/fs/bcachefs/btree_iter.c
+++ b/fs/bcachefs/btree_iter.c
@@ -769,7 +769,7 @@ retry_all:
closure_init_stack(&cl);
do {
- ret = bch2_btree_node_cannibalize_lock(c, &cl);
+ ret = bch2_btree_cache_cannibalize_lock(c, &cl);
closure_sync(&cl);
} while (ret);
}
@@ -817,7 +817,7 @@ retry:
ret = btree_iter_linked(iter) ? -EINTR : 0;
out:
- bch2_btree_node_cannibalize_unlock(c);
+ bch2_btree_cache_cannibalize_unlock(c);
return ret;
io_error:
BUG_ON(ret != -EIO);
diff --git a/fs/bcachefs/btree_types.h b/fs/bcachefs/btree_types.h
index c0c162054bc7..8b4df034f926 100644
--- a/fs/bcachefs/btree_types.h
+++ b/fs/bcachefs/btree_types.h
@@ -130,6 +130,42 @@ struct btree {
#endif
};
+struct btree_cache {
+ struct rhashtable table;
+ bool table_init_done;
+ /*
+ * We never free a struct btree, except on shutdown - we just put it on
+ * the btree_cache_freed list and reuse it later. This simplifies the
+ * code, and it doesn't cost us much memory as the memory usage is
+ * dominated by buffers that hold the actual btree node data and those
+ * can be freed - and the number of struct btrees allocated is
+ * effectively bounded.
+ *
+ * btree_cache_freeable effectively is a small cache - we use it because
+ * high order page allocations can be rather expensive, and it's quite
+ * common to delete and allocate btree nodes in quick succession. It
+ * should never grow past ~2-3 nodes in practice.
+ */
+ struct mutex lock;
+ struct list_head live;
+ struct list_head freeable;
+ struct list_head freed;
+
+ /* Number of elements in live + freeable lists */
+ unsigned used;
+ unsigned reserve;
+ struct shrinker shrink;
+
+ /*
+ * If we need to allocate memory for a new btree node and that
+ * allocation fails, we can cannibalize another node in the btree cache
+ * to satisfy the allocation - lock to guarantee only one thread does
+ * this at a time:
+ */
+ struct task_struct *alloc_lock;
+ struct closure_waitlist alloc_wait;
+};
+
#define BTREE_FLAG(flag) \
static inline bool btree_node_ ## flag(struct btree *b) \
{ return test_bit(BTREE_NODE_ ## flag, &b->flags); } \
diff --git a/fs/bcachefs/btree_update_interior.c b/fs/bcachefs/btree_update_interior.c
index 922a48635a6a..ae7760356dda 100644
--- a/fs/bcachefs/btree_update_interior.c
+++ b/fs/bcachefs/btree_update_interior.c
@@ -237,11 +237,11 @@ static void __btree_node_free(struct bch_fs *c, struct btree *b,
six_lock_write(&b->lock);
- bch2_btree_node_hash_remove(c, b);
+ bch2_btree_node_hash_remove(&c->btree_cache, b);
- mutex_lock(&c->btree_cache_lock);
- list_move(&b->list, &c->btree_cache_freeable);
- mutex_unlock(&c->btree_cache_lock);
+ mutex_lock(&c->btree_cache.lock);
+ list_move(&b->list, &c->btree_cache.freeable);
+ mutex_unlock(&c->btree_cache.lock);
/*
* By using six_unlock_write() directly instead of
@@ -374,7 +374,7 @@ static struct btree *bch2_btree_node_alloc(struct btree_update *as, unsigned lev
b = as->reserve->b[--as->reserve->nr];
- BUG_ON(bch2_btree_node_hash_insert(c, b, level, as->btree_id));
+ BUG_ON(bch2_btree_node_hash_insert(&c->btree_cache, b, level, as->btree_id));
set_btree_node_accessed(b);
set_btree_node_dirty(b);
@@ -515,7 +515,7 @@ static struct btree_reserve *bch2_btree_reserve_get(struct bch_fs *c,
* Protects reaping from the btree node cache and using the btree node
* open bucket reserve:
*/
- ret = bch2_btree_node_cannibalize_lock(c, cl);
+ ret = bch2_btree_cache_cannibalize_lock(c, cl);
if (ret) {
bch2_disk_reservation_put(c, &disk_res);
return ERR_PTR(ret);
@@ -543,11 +543,11 @@ static struct btree_reserve *bch2_btree_reserve_get(struct bch_fs *c,
reserve->b[reserve->nr++] = b;
}
- bch2_btree_node_cannibalize_unlock(c);
+ bch2_btree_cache_cannibalize_unlock(c);
return reserve;
err_free:
bch2_btree_reserve_put(c, reserve);
- bch2_btree_node_cannibalize_unlock(c);
+ bch2_btree_cache_cannibalize_unlock(c);
trace_btree_reserve_get_fail(c, nr_nodes, cl);
return ERR_PTR(ret);
}
@@ -1015,9 +1015,9 @@ bch2_btree_update_start(struct bch_fs *c, enum btree_id id,
static void __bch2_btree_set_root_inmem(struct bch_fs *c, struct btree *b)
{
/* Root nodes cannot be reaped */
- mutex_lock(&c->btree_cache_lock);
+ mutex_lock(&c->btree_cache.lock);
list_del_init(&b->list);
- mutex_unlock(&c->btree_cache_lock);
+ mutex_unlock(&c->btree_cache.lock);
mutex_lock(&c->btree_root_lock);
btree_node_root(c, b) = b;
@@ -1802,7 +1802,7 @@ retry:
PTR_HASH(&new_key->k_i) != PTR_HASH(&b->key)) {
/* bch2_btree_reserve_get will unlock */
do {
- ret = bch2_btree_node_cannibalize_lock(c, &cl);
+ ret = bch2_btree_cache_cannibalize_lock(c, &cl);
closure_sync(&cl);
} while (ret == -EAGAIN);
@@ -1873,23 +1873,24 @@ retry:
if (parent) {
if (new_hash) {
bkey_copy(&new_hash->key, &new_key->k_i);
- BUG_ON(bch2_btree_node_hash_insert(c, new_hash,
- b->level, b->btree_id));
+ ret = bch2_btree_node_hash_insert(&c->btree_cache,
+ new_hash, b->level, b->btree_id);
+ BUG_ON(ret);
}
bch2_btree_insert_node(as, parent, &iter,
&keylist_single(&new_key->k_i));
if (new_hash) {
- mutex_lock(&c->btree_cache_lock);
- bch2_btree_node_hash_remove(c, new_hash);
+ mutex_lock(&c->btree_cache.lock);
+ bch2_btree_node_hash_remove(&c->btree_cache, new_hash);
- bch2_btree_node_hash_remove(c, b);
+ bch2_btree_node_hash_remove(&c->btree_cache, b);
bkey_copy(&b->key, &new_key->k_i);
- ret = __bch2_btree_node_hash_insert(c, b);
+ ret = __bch2_btree_node_hash_insert(&c->btree_cache, b);
BUG_ON(ret);
- mutex_unlock(&c->btree_cache_lock);
+ mutex_unlock(&c->btree_cache.lock);
} else {
bkey_copy(&b->key, &new_key->k_i);
}
@@ -1918,9 +1919,9 @@ retry:
bch2_btree_update_done(as);
out:
if (new_hash) {
- mutex_lock(&c->btree_cache_lock);
- list_move(&new_hash->list, &c->btree_cache_freeable);
- mutex_unlock(&c->btree_cache_lock);
+ mutex_lock(&c->btree_cache.lock);
+ list_move(&new_hash->list, &c->btree_cache.freeable);
+ mutex_unlock(&c->btree_cache.lock);
six_unlock_write(&new_hash->lock);
six_unlock_intent(&new_hash->lock);
diff --git a/fs/bcachefs/super.c b/fs/bcachefs/super.c
index 14a264cca8d8..d3bab8e00d20 100644
--- a/fs/bcachefs/super.c
+++ b/fs/bcachefs/super.c
@@ -370,7 +370,7 @@ err:
static void bch2_fs_free(struct bch_fs *c)
{
bch2_fs_encryption_exit(c);
- bch2_fs_btree_exit(c);
+ bch2_fs_btree_cache_exit(c);
bch2_fs_journal_exit(&c->journal);
bch2_io_clock_exit(&c->io_clock[WRITE]);
bch2_io_clock_exit(&c->io_clock[READ]);
@@ -483,7 +483,6 @@ static struct bch_fs *bch2_fs_alloc(struct bch_sb *sb, struct bch_opts opts)
mutex_init(&c->state_lock);
mutex_init(&c->sb_lock);
mutex_init(&c->replicas_gc_lock);
- mutex_init(&c->btree_cache_lock);
mutex_init(&c->bucket_lock);
mutex_init(&c->btree_root_lock);
INIT_WORK(&c->read_only_work, bch2_fs_read_only_work);
@@ -499,9 +498,6 @@ static struct bch_fs *bch2_fs_alloc(struct bch_sb *sb, struct bch_opts opts)
bch2_fs_tiering_init(c);
INIT_LIST_HEAD(&c->list);
- INIT_LIST_HEAD(&c->btree_cache);
- INIT_LIST_HEAD(&c->btree_cache_freeable);
- INIT_LIST_HEAD(&c->btree_cache_freed);
INIT_LIST_HEAD(&c->btree_interior_update_list);
mutex_init(&c->btree_reserve_cache_lock);
@@ -538,6 +534,8 @@ static struct bch_fs *bch2_fs_alloc(struct bch_sb *sb, struct bch_opts opts)
c->journal.blocked_time = &c->journal_blocked_time;
c->journal.flush_seq_time = &c->journal_flush_seq_time;
+ bch2_fs_btree_cache_init_early(&c->btree_cache);
+
mutex_lock(&c->sb_lock);
if (bch2_sb_to_fs(c, sb)) {
@@ -594,7 +592,7 @@ static struct bch_fs *bch2_fs_alloc(struct bch_sb *sb, struct bch_opts opts)
bch2_io_clock_init(&c->io_clock[READ]) ||
bch2_io_clock_init(&c->io_clock[WRITE]) ||
bch2_fs_journal_init(&c->journal) ||
- bch2_fs_btree_init(c) ||
+ bch2_fs_btree_cache_init(c) ||
bch2_fs_encryption_init(c) ||
bch2_fs_compress_init(c) ||
bch2_check_set_has_compressed_data(c, c->opts.compression))
diff --git a/fs/bcachefs/sysfs.c b/fs/bcachefs/sysfs.c
index 07d9be751d6c..c20769b7ca06 100644
--- a/fs/bcachefs/sysfs.c
+++ b/fs/bcachefs/sysfs.c
@@ -209,11 +209,11 @@ static size_t bch2_btree_cache_size(struct bch_fs *c)
size_t ret = 0;
struct btree *b;
- mutex_lock(&c->btree_cache_lock);
- list_for_each_entry(b, &c->btree_cache, list)
+ mutex_lock(&c->btree_cache.lock);
+ list_for_each_entry(b, &c->btree_cache.live, list)
ret += btree_bytes(c);
- mutex_unlock(&c->btree_cache_lock);
+ mutex_unlock(&c->btree_cache.lock);
return ret;
}
@@ -436,7 +436,7 @@ STORE(__bch2_fs)
sc.gfp_mask = GFP_KERNEL;
sc.nr_to_scan = strtoul_or_return(buf);
- c->btree_cache_shrink.scan_objects(&c->btree_cache_shrink, &sc);
+ c->btree_cache.shrink.scan_objects(&c->btree_cache.shrink, &sc);
}
return size;