summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorKent Overstreet <kent.overstreet@gmail.com>2019-03-07 19:46:10 -0500
committerKent Overstreet <kent.overstreet@gmail.com>2020-06-15 16:26:59 -0400
commit5500217de2eb8d189896ca9a895380339f6973b0 (patch)
treed65dabb3ffa3766f2708ce5d5fa1f7fab384381f
parent8dd7a2097a1f800b35a7823da76885d6f742360c (diff)
bcachefs: Btree key cache
This introduces a new kind of btree iterator, cached iterators, which point to keys cached in a hash table. The cache also acts as a write cache - in the update path, we journal the update but defer updating the btree until the cached entry is flushed by journal reclaim. Cache coherency is for now up to the users to handle, which isn't ideal but should be good enough for now. These new iterators will be used for updating inodes and alloc info (the alloc and stripes btrees). Signed-off-by: Kent Overstreet <kent.overstreet@gmail.com>
-rw-r--r--fs/bcachefs/Makefile1
-rw-r--r--fs/bcachefs/bcachefs.h2
-rw-r--r--fs/bcachefs/btree_iter.c115
-rw-r--r--fs/bcachefs/btree_iter.h16
-rw-r--r--fs/bcachefs/btree_key_cache.c494
-rw-r--r--fs/bcachefs/btree_key_cache.h23
-rw-r--r--fs/bcachefs/btree_types.h53
-rw-r--r--fs/bcachefs/btree_update.h5
-rw-r--r--fs/bcachefs/btree_update_interior.c9
-rw-r--r--fs/bcachefs/btree_update_leaf.c70
-rw-r--r--fs/bcachefs/buckets.c7
-rw-r--r--fs/bcachefs/journal_reclaim.c31
-rw-r--r--fs/bcachefs/journal_reclaim.h4
-rw-r--r--fs/bcachefs/super.c4
14 files changed, 789 insertions, 45 deletions
diff --git a/fs/bcachefs/Makefile b/fs/bcachefs/Makefile
index c7727d05cf49..d85ced62c0dd 100644
--- a/fs/bcachefs/Makefile
+++ b/fs/bcachefs/Makefile
@@ -13,6 +13,7 @@ bcachefs-y := \
btree_gc.o \
btree_io.o \
btree_iter.o \
+ btree_key_cache.o \
btree_update_interior.o \
btree_update_leaf.o \
buckets.o \
diff --git a/fs/bcachefs/bcachefs.h b/fs/bcachefs/bcachefs.h
index 797325ea295a..63b8fe97d39f 100644
--- a/fs/bcachefs/bcachefs.h
+++ b/fs/bcachefs/bcachefs.h
@@ -631,6 +631,8 @@ struct bch_fs {
struct list_head btree_trans_list;
mempool_t btree_iters_pool;
+ struct btree_key_cache btree_key_cache;
+
struct workqueue_struct *wq;
/* copygc needs its own workqueue for index updates.. */
struct workqueue_struct *copygc_wq;
diff --git a/fs/bcachefs/btree_iter.c b/fs/bcachefs/btree_iter.c
index 93d710faddae..e620088d3116 100644
--- a/fs/bcachefs/btree_iter.c
+++ b/fs/bcachefs/btree_iter.c
@@ -4,22 +4,16 @@
#include "bkey_methods.h"
#include "btree_cache.h"
#include "btree_iter.h"
+#include "btree_key_cache.h"
#include "btree_locking.h"
#include "btree_update.h"
#include "debug.h"
#include "extents.h"
+#include "journal.h"
#include <linux/prefetch.h>
#include <trace/events/bcachefs.h>
-#define BTREE_ITER_NO_NODE_GET_LOCKS ((struct btree *) 1)
-#define BTREE_ITER_NO_NODE_DROP ((struct btree *) 2)
-#define BTREE_ITER_NO_NODE_LOCK_ROOT ((struct btree *) 3)
-#define BTREE_ITER_NO_NODE_UP ((struct btree *) 4)
-#define BTREE_ITER_NO_NODE_DOWN ((struct btree *) 5)
-#define BTREE_ITER_NO_NODE_INIT ((struct btree *) 6)
-#define BTREE_ITER_NO_NODE_ERROR ((struct btree *) 7)
-
static inline bool is_btree_node(struct btree_iter *iter, unsigned l)
{
return l < BTREE_MAX_DEPTH &&
@@ -253,7 +247,8 @@ bool __bch2_btree_node_lock(struct btree *b, struct bpos pos,
}
/* Must lock btree nodes in key order: */
- if (iter->btree_id < linked->btree_id)
+ if ((cmp_int(iter->btree_id, linked->btree_id) ?:
+ -cmp_int(btree_iter_type(iter), btree_iter_type(linked))) < 0)
ret = false;
if (iter->btree_id == linked->btree_id &&
@@ -301,7 +296,7 @@ static void bch2_btree_iter_verify_locks(struct btree_iter *iter)
return;
}
- for (l = 0; btree_iter_node(iter, l); l++) {
+ for (l = 0; is_btree_node(iter, l); l++) {
if (iter->uptodate >= BTREE_ITER_NEED_RELOCK &&
!btree_node_locked(iter, l))
continue;
@@ -323,7 +318,7 @@ static inline void bch2_btree_iter_verify_locks(struct btree_iter *iter) {}
#endif
__flatten
-static bool bch2_btree_iter_relock(struct btree_iter *iter, bool trace)
+bool bch2_btree_iter_relock(struct btree_iter *iter, bool trace)
{
return btree_iter_get_locks(iter, false, trace);
}
@@ -845,6 +840,8 @@ static inline void __btree_iter_init(struct btree_iter *iter,
static inline void btree_iter_node_set(struct btree_iter *iter,
struct btree *b)
{
+ BUG_ON(btree_iter_type(iter) == BTREE_ITER_CACHED);
+
btree_iter_verify_new_node(iter, b);
EBUG_ON(!btree_iter_pos_in_node(iter, b));
@@ -865,7 +862,8 @@ void bch2_btree_iter_node_replace(struct btree_iter *iter, struct btree *b)
struct btree_iter *linked;
trans_for_each_iter(iter->trans, linked)
- if (btree_iter_pos_in_node(linked, b)) {
+ if (btree_iter_type(linked) != BTREE_ITER_CACHED &&
+ btree_iter_pos_in_node(linked, b)) {
/*
* bch2_btree_iter_node_drop() has already been called -
* the old node we're replacing has already been
@@ -1057,24 +1055,28 @@ static void btree_iter_up(struct btree_iter *iter)
static int btree_iter_traverse_one(struct btree_iter *);
-static int __btree_iter_traverse_all(struct btree_trans *trans,
- struct btree_iter *orig_iter, int ret)
+static int __btree_iter_traverse_all(struct btree_trans *trans, int ret)
{
struct bch_fs *c = trans->c;
struct btree_iter *iter;
u8 sorted[BTREE_ITER_MAX];
unsigned i, nr_sorted = 0;
+ if (trans->in_traverse_all)
+ return -EINTR;
+
+ trans->in_traverse_all = true;
+retry_all:
+ nr_sorted = 0;
+
trans_for_each_iter(trans, iter)
- sorted[nr_sorted++] = iter - trans->iters;
+ sorted[nr_sorted++] = iter->idx;
#define btree_iter_cmp_by_idx(_l, _r) \
btree_iter_cmp(&trans->iters[_l], &trans->iters[_r])
bubble_sort(sorted, nr_sorted, btree_iter_cmp_by_idx);
#undef btree_iter_cmp_by_idx
-
-retry_all:
bch2_trans_unlock(trans);
if (unlikely(ret == -ENOMEM)) {
@@ -1090,11 +1092,6 @@ retry_all:
if (unlikely(ret == -EIO)) {
trans->error = true;
- if (orig_iter) {
- orig_iter->flags |= BTREE_ITER_ERROR;
- orig_iter->l[orig_iter->level].b =
- BTREE_ITER_NO_NODE_ERROR;
- }
goto out;
}
@@ -1102,9 +1099,16 @@ retry_all:
/* Now, redo traversals in correct order: */
for (i = 0; i < nr_sorted; i++) {
- iter = &trans->iters[sorted[i]];
+ unsigned idx = sorted[i];
+
+ /*
+ * sucessfully traversing one iterator can cause another to be
+ * unlinked, in btree_key_cache_fill()
+ */
+ if (!(trans->iters_linked & (1ULL << idx)))
+ continue;
- ret = btree_iter_traverse_one(iter);
+ ret = btree_iter_traverse_one(&trans->iters[idx]);
if (ret)
goto retry_all;
}
@@ -1119,12 +1123,14 @@ retry_all:
}
out:
bch2_btree_cache_cannibalize_unlock(c);
+
+ trans->in_traverse_all = false;
return ret;
}
int bch2_btree_iter_traverse_all(struct btree_trans *trans)
{
- return __btree_iter_traverse_all(trans, NULL, 0);
+ return __btree_iter_traverse_all(trans, 0);
}
static inline bool btree_iter_good_node(struct btree_iter *iter,
@@ -1169,9 +1175,6 @@ static int btree_iter_traverse_one(struct btree_iter *iter)
{
unsigned depth_want = iter->level;
- if (unlikely(iter->level >= BTREE_MAX_DEPTH))
- return 0;
-
/*
* if we need interior nodes locked, call btree_iter_relock() to make
* sure we walk back up enough that we lock them:
@@ -1180,9 +1183,15 @@ static int btree_iter_traverse_one(struct btree_iter *iter)
iter->locks_want > 1)
bch2_btree_iter_relock(iter, false);
+ if (btree_iter_type(iter) == BTREE_ITER_CACHED)
+ return bch2_btree_iter_traverse_cached(iter);
+
if (iter->uptodate < BTREE_ITER_NEED_RELOCK)
return 0;
+ if (unlikely(iter->level >= BTREE_MAX_DEPTH))
+ return 0;
+
/*
* XXX: correctly using BTREE_ITER_UPTODATE should make using check_pos
* here unnecessary
@@ -1216,7 +1225,15 @@ static int btree_iter_traverse_one(struct btree_iter *iter)
return 0;
iter->level = depth_want;
- iter->l[iter->level].b = BTREE_ITER_NO_NODE_DOWN;
+
+ if (ret == -EIO) {
+ iter->flags |= BTREE_ITER_ERROR;
+ iter->l[iter->level].b =
+ BTREE_ITER_NO_NODE_ERROR;
+ } else {
+ iter->l[iter->level].b =
+ BTREE_ITER_NO_NODE_DOWN;
+ }
return ret;
}
}
@@ -1229,12 +1246,13 @@ static int btree_iter_traverse_one(struct btree_iter *iter)
int __must_check __bch2_btree_iter_traverse(struct btree_iter *iter)
{
+ struct btree_trans *trans = iter->trans;
int ret;
- ret = bch2_trans_cond_resched(iter->trans) ?:
+ ret = bch2_trans_cond_resched(trans) ?:
btree_iter_traverse_one(iter);
if (unlikely(ret))
- ret = __btree_iter_traverse_all(iter->trans, iter, ret);
+ ret = __btree_iter_traverse_all(trans, ret);
return ret;
}
@@ -1383,6 +1401,13 @@ static void btree_iter_pos_changed(struct btree_iter *iter, int cmp)
if (!cmp)
goto out;
+ if (unlikely(btree_iter_type(iter) == BTREE_ITER_CACHED)) {
+ btree_node_unlock(iter, 0);
+ iter->l[0].b = BTREE_ITER_NO_NODE_UP;
+ btree_iter_set_dirty(iter, BTREE_ITER_NEED_TRAVERSE);
+ return;
+ }
+
l = btree_iter_up_until_good_node(iter, cmp);
if (btree_iter_node(iter, l)) {
@@ -1814,6 +1839,26 @@ struct bkey_s_c bch2_btree_iter_next_slot(struct btree_iter *iter)
return bch2_btree_iter_peek_slot(iter);
}
+struct bkey_s_c bch2_btree_iter_peek_cached(struct btree_iter *iter)
+{
+ struct bkey_cached *ck;
+ int ret;
+
+ bch2_btree_iter_checks(iter, BTREE_ITER_CACHED);
+
+ ret = bch2_btree_iter_traverse(iter);
+ if (unlikely(ret))
+ return bkey_s_c_err(ret);
+
+ ck = (void *) iter->l[0].b;
+
+ EBUG_ON(iter->btree_id != ck->key.btree_id ||
+ bkey_cmp(iter->pos, ck->key.pos));
+ BUG_ON(!ck->valid);
+
+ return bkey_i_to_s_c(ck->k);
+}
+
static inline void bch2_btree_iter_init(struct btree_trans *trans,
struct btree_iter *iter, enum btree_id btree_id,
struct bpos pos, unsigned flags)
@@ -1999,6 +2044,7 @@ static inline void btree_iter_copy(struct btree_iter *dst,
*dst = *src;
dst->idx = idx;
+ dst->flags &= ~BTREE_ITER_KEEP_UNTIL_COMMIT;
for (i = 0; i < BTREE_MAX_DEPTH; i++)
if (btree_node_locked(dst, i))
@@ -2057,8 +2103,9 @@ static struct btree_iter *__btree_trans_get_iter(struct btree_trans *trans,
iter = best;
}
- iter->flags &= ~(BTREE_ITER_SLOTS|BTREE_ITER_INTENT|BTREE_ITER_PREFETCH);
- iter->flags |= flags & (BTREE_ITER_SLOTS|BTREE_ITER_INTENT|BTREE_ITER_PREFETCH);
+ iter->flags &= ~BTREE_ITER_KEEP_UNTIL_COMMIT;
+ iter->flags &= ~BTREE_ITER_USER_FLAGS;
+ iter->flags |= flags & BTREE_ITER_USER_FLAGS;
if (iter->flags & BTREE_ITER_INTENT)
bch2_btree_iter_upgrade(iter, 1);
@@ -2262,6 +2309,8 @@ int bch2_trans_exit(struct btree_trans *trans)
mutex_unlock(&trans->c->btree_trans_lock);
#endif
+ bch2_journal_preres_put(&trans->c->journal, &trans->journal_preres);
+
kfree(trans->fs_usage_deltas);
kfree(trans->mem);
if (trans->used_mempool)
diff --git a/fs/bcachefs/btree_iter.h b/fs/bcachefs/btree_iter.h
index bc408f1272e7..bd9ec3ec9a92 100644
--- a/fs/bcachefs/btree_iter.h
+++ b/fs/bcachefs/btree_iter.h
@@ -110,6 +110,7 @@ void bch2_btree_node_iter_fix(struct btree_iter *, struct btree *,
struct btree_node_iter *, struct bkey_packed *,
unsigned, unsigned);
+bool bch2_btree_iter_relock(struct btree_iter *, bool);
bool bch2_trans_relock(struct btree_trans *);
void bch2_trans_unlock(struct btree_trans *);
@@ -170,6 +171,8 @@ struct bkey_s_c bch2_btree_iter_prev(struct btree_iter *);
struct bkey_s_c bch2_btree_iter_peek_slot(struct btree_iter *);
struct bkey_s_c bch2_btree_iter_next_slot(struct btree_iter *);
+struct bkey_s_c bch2_btree_iter_peek_cached(struct btree_iter *);
+
void bch2_btree_iter_set_pos_same_leaf(struct btree_iter *, struct bpos);
void __bch2_btree_iter_set_pos(struct btree_iter *, struct bpos, bool);
void bch2_btree_iter_set_pos(struct btree_iter *, struct bpos);
@@ -177,7 +180,9 @@ void bch2_btree_iter_set_pos(struct btree_iter *, struct bpos);
static inline int btree_iter_cmp(const struct btree_iter *l,
const struct btree_iter *r)
{
- return cmp_int(l->btree_id, r->btree_id) ?: bkey_cmp(l->pos, r->pos);
+ return cmp_int(l->btree_id, r->btree_id) ?:
+ -cmp_int(btree_iter_type(l), btree_iter_type(r)) ?:
+ bkey_cmp(l->pos, r->pos);
}
/*
@@ -211,9 +216,12 @@ static inline int bch2_trans_cond_resched(struct btree_trans *trans)
static inline struct bkey_s_c __bch2_btree_iter_peek(struct btree_iter *iter,
unsigned flags)
{
- return flags & BTREE_ITER_SLOTS
- ? bch2_btree_iter_peek_slot(iter)
- : bch2_btree_iter_peek(iter);
+ if ((flags & BTREE_ITER_TYPE) == BTREE_ITER_CACHED)
+ return bch2_btree_iter_peek_cached(iter);
+ else
+ return flags & BTREE_ITER_SLOTS
+ ? bch2_btree_iter_peek_slot(iter)
+ : bch2_btree_iter_peek(iter);
}
static inline struct bkey_s_c __bch2_btree_iter_next(struct btree_iter *iter,
diff --git a/fs/bcachefs/btree_key_cache.c b/fs/bcachefs/btree_key_cache.c
new file mode 100644
index 000000000000..1e533514162c
--- /dev/null
+++ b/fs/bcachefs/btree_key_cache.c
@@ -0,0 +1,494 @@
+
+#include "bcachefs.h"
+#include "btree_iter.h"
+#include "btree_key_cache.h"
+#include "btree_locking.h"
+#include "btree_update.h"
+#include "error.h"
+#include "journal.h"
+#include "journal_reclaim.h"
+
+#include <trace/events/bcachefs.h>
+
+static int bch2_btree_key_cache_cmp_fn(struct rhashtable_compare_arg *arg,
+ const void *obj)
+{
+ const struct bkey_cached *ck = obj;
+ const struct bkey_cached_key *key = arg->key;
+
+ return cmp_int(ck->key.btree_id, key->btree_id) ?:
+ bkey_cmp(ck->key.pos, key->pos);
+}
+
+static const struct rhashtable_params bch2_btree_key_cache_params = {
+ .head_offset = offsetof(struct bkey_cached, hash),
+ .key_offset = offsetof(struct bkey_cached, key),
+ .key_len = sizeof(struct bkey_cached_key),
+ .obj_cmpfn = bch2_btree_key_cache_cmp_fn,
+};
+
+__flatten
+static inline struct bkey_cached *
+btree_key_cache_find(struct bch_fs *c, enum btree_id btree_id, struct bpos pos)
+{
+ struct bkey_cached_key key = {
+ .btree_id = btree_id,
+ .pos = pos,
+ };
+
+ return rhashtable_lookup_fast(&c->btree_key_cache.table, &key,
+ bch2_btree_key_cache_params);
+}
+
+static bool bkey_cached_lock_for_evict(struct bkey_cached *ck)
+{
+ if (!six_trylock_intent(&ck->c.lock))
+ return false;
+
+ if (!six_trylock_write(&ck->c.lock)) {
+ six_unlock_intent(&ck->c.lock);
+ return false;
+ }
+
+ if (test_bit(BKEY_CACHED_DIRTY, &ck->flags)) {
+ six_unlock_write(&ck->c.lock);
+ six_unlock_intent(&ck->c.lock);
+ return false;
+ }
+
+ return true;
+}
+
+static void bkey_cached_evict(struct btree_key_cache *c,
+ struct bkey_cached *ck)
+{
+ BUG_ON(rhashtable_remove_fast(&c->table, &ck->hash,
+ bch2_btree_key_cache_params));
+ memset(&ck->key, ~0, sizeof(ck->key));
+}
+
+static void bkey_cached_free(struct btree_key_cache *c,
+ struct bkey_cached *ck)
+{
+ list_move(&ck->list, &c->freed);
+
+ kfree(ck->k);
+ ck->k = NULL;
+ ck->u64s = 0;
+
+ six_unlock_write(&ck->c.lock);
+ six_unlock_intent(&ck->c.lock);
+}
+
+static struct bkey_cached *
+bkey_cached_alloc(struct btree_key_cache *c)
+{
+ struct bkey_cached *ck;
+
+ list_for_each_entry(ck, &c->freed, list)
+ if (bkey_cached_lock_for_evict(ck))
+ return ck;
+
+ list_for_each_entry(ck, &c->clean, list)
+ if (bkey_cached_lock_for_evict(ck)) {
+ bkey_cached_evict(c, ck);
+ return ck;
+ }
+
+ ck = kzalloc(sizeof(*ck), GFP_NOFS);
+ if (!ck)
+ return NULL;
+
+ INIT_LIST_HEAD(&ck->list);
+ six_lock_init(&ck->c.lock);
+ BUG_ON(!six_trylock_intent(&ck->c.lock));
+ BUG_ON(!six_trylock_write(&ck->c.lock));
+
+ return ck;
+}
+
+static struct bkey_cached *
+btree_key_cache_create(struct btree_key_cache *c,
+ enum btree_id btree_id,
+ struct bpos pos)
+{
+ struct bkey_cached *ck;
+
+ ck = bkey_cached_alloc(c);
+ if (!ck)
+ return ERR_PTR(-ENOMEM);
+
+ ck->c.level = 0;
+ ck->c.btree_id = btree_id;
+ ck->key.btree_id = btree_id;
+ ck->key.pos = pos;
+ ck->valid = false;
+
+ BUG_ON(ck->flags);
+
+ if (rhashtable_lookup_insert_fast(&c->table,
+ &ck->hash,
+ bch2_btree_key_cache_params)) {
+ /* We raced with another fill: */
+ bkey_cached_free(c, ck);
+ return NULL;
+ }
+
+ list_move(&ck->list, &c->clean);
+ six_unlock_write(&ck->c.lock);
+
+ return ck;
+}
+
+static int btree_key_cache_fill(struct btree_trans *trans,
+ struct btree_iter *ck_iter,
+ struct bkey_cached *ck)
+{
+ struct btree_iter *iter;
+ struct bkey_s_c k;
+ unsigned new_u64s = 0;
+ struct bkey_i *new_k = NULL;
+ int ret;
+
+ iter = bch2_trans_get_iter(trans, ck->key.btree_id,
+ ck->key.pos, BTREE_ITER_SLOTS);
+ if (IS_ERR(iter))
+ return PTR_ERR(iter);
+
+ k = bch2_btree_iter_peek_slot(iter);
+ ret = bkey_err(k);
+ if (ret) {
+ bch2_trans_iter_put(trans, iter);
+ return ret;
+ }
+
+ if (!bch2_btree_node_relock(ck_iter, 0)) {
+ bch2_trans_iter_put(trans, iter);
+ trace_transaction_restart_ip(trans->ip, _THIS_IP_);
+ return -EINTR;
+ }
+
+ if (k.k->u64s > ck->u64s) {
+ new_u64s = roundup_pow_of_two(k.k->u64s);
+ new_k = kmalloc(new_u64s * sizeof(u64), GFP_NOFS);
+ if (!new_k) {
+ bch2_trans_iter_put(trans, iter);
+ return -ENOMEM;
+ }
+ }
+
+ bch2_btree_node_lock_write(ck_iter->l[0].b, ck_iter);
+ if (new_k) {
+ kfree(ck->k);
+ ck->u64s = new_u64s;
+ ck->k = new_k;
+ }
+
+ bkey_reassemble(ck->k, k);
+ ck->valid = true;
+ bch2_btree_node_unlock_write(ck_iter->l[0].b, ck_iter);
+
+ /* We're not likely to need this iterator again: */
+ bch2_trans_iter_free(trans, iter);
+
+ return 0;
+}
+
+static int bkey_cached_check_fn(struct six_lock *lock, void *p)
+{
+ struct bkey_cached *ck = container_of(lock, struct bkey_cached, c.lock);
+ const struct btree_iter *iter = p;
+
+ return ck->key.btree_id == iter->btree_id &&
+ !bkey_cmp(ck->key.pos, iter->pos) ? 0 : -1;
+}
+
+int bch2_btree_iter_traverse_cached(struct btree_iter *iter)
+{
+ struct btree_trans *trans = iter->trans;
+ struct bch_fs *c = trans->c;
+ struct bkey_cached *ck;
+ int ret = 0;
+
+ BUG_ON(iter->level);
+
+ if (btree_node_locked(iter, 0)) {
+ ck = (void *) iter->l[0].b;
+ goto fill;
+ }
+retry:
+ ck = btree_key_cache_find(c, iter->btree_id, iter->pos);
+ if (!ck) {
+ if (iter->flags & BTREE_ITER_CACHED_NOCREATE) {
+ iter->l[0].b = NULL;
+ return 0;
+ }
+
+ mutex_lock(&c->btree_key_cache.lock);
+ ck = btree_key_cache_create(&c->btree_key_cache,
+ iter->btree_id, iter->pos);
+ mutex_unlock(&c->btree_key_cache.lock);
+
+ ret = PTR_ERR_OR_ZERO(ck);
+ if (ret)
+ goto err;
+ if (!ck)
+ goto retry;
+
+ mark_btree_node_locked(iter, 0, SIX_LOCK_intent);
+ iter->locks_want = 1;
+ } else {
+ enum six_lock_type lock_want = __btree_lock_want(iter, 0);
+
+ if (!btree_node_lock((void *) ck, iter->pos, 0, iter, lock_want,
+ bkey_cached_check_fn, iter)) {
+ if (ck->key.btree_id != iter->btree_id ||
+ bkey_cmp(ck->key.pos, iter->pos)) {
+ goto retry;
+ }
+
+ trace_transaction_restart_ip(trans->ip, _THIS_IP_);
+ ret = -EINTR;
+ goto err;
+ }
+
+ if (ck->key.btree_id != iter->btree_id ||
+ bkey_cmp(ck->key.pos, iter->pos)) {
+ six_unlock_type(&ck->c.lock, lock_want);
+ goto retry;
+ }
+
+ mark_btree_node_locked(iter, 0, lock_want);
+ }
+
+ iter->l[0].lock_seq = ck->c.lock.state.seq;
+ iter->l[0].b = (void *) ck;
+fill:
+ if (!ck->valid && !(iter->flags & BTREE_ITER_CACHED_NOFILL)) {
+ if (!btree_node_intent_locked(iter, 0))
+ bch2_btree_iter_upgrade(iter, 1);
+ if (!btree_node_intent_locked(iter, 0)) {
+ trace_transaction_restart_ip(trans->ip, _THIS_IP_);
+ ret = -EINTR;
+ goto err;
+ }
+
+ ret = btree_key_cache_fill(trans, iter, ck);
+ if (ret)
+ goto err;
+ }
+
+ iter->uptodate = BTREE_ITER_NEED_PEEK;
+ bch2_btree_iter_downgrade(iter);
+ return ret;
+err:
+ if (ret != -EINTR) {
+ btree_node_unlock(iter, 0);
+ iter->flags |= BTREE_ITER_ERROR;
+ iter->l[0].b = BTREE_ITER_NO_NODE_ERROR;
+ }
+ return ret;
+}
+
+static int btree_key_cache_flush_pos(struct btree_trans *trans,
+ struct bkey_cached_key key,
+ u64 journal_seq,
+ bool evict)
+{
+ struct bch_fs *c = trans->c;
+ struct journal *j = &c->journal;
+ struct btree_iter *c_iter = NULL, *b_iter = NULL;
+ struct bkey_cached *ck;
+ int ret;
+
+ b_iter = bch2_trans_get_iter(trans, key.btree_id, key.pos,
+ BTREE_ITER_SLOTS|
+ BTREE_ITER_INTENT);
+ ret = PTR_ERR_OR_ZERO(b_iter);
+ if (ret)
+ goto out;
+
+ c_iter = bch2_trans_get_iter(trans, key.btree_id, key.pos,
+ BTREE_ITER_CACHED|
+ BTREE_ITER_CACHED_NOFILL|
+ BTREE_ITER_CACHED_NOCREATE|
+ BTREE_ITER_INTENT);
+ ret = PTR_ERR_OR_ZERO(c_iter);
+ if (ret)
+ goto out;
+retry:
+ ret = bch2_btree_iter_traverse(c_iter);
+ if (ret)
+ goto err;
+
+ ck = (void *) c_iter->l[0].b;
+ if (!ck ||
+ (journal_seq && ck->journal.seq != journal_seq))
+ goto out;
+
+ if (!test_bit(BKEY_CACHED_DIRTY, &ck->flags)) {
+ if (!evict)
+ goto out;
+ goto evict;
+ }
+
+ ret = bch2_btree_iter_traverse(b_iter) ?:
+ bch2_trans_update(trans, b_iter, ck->k, BTREE_TRIGGER_NORUN) ?:
+ bch2_trans_commit(trans, NULL, NULL,
+ BTREE_INSERT_NOUNLOCK|
+ BTREE_INSERT_NOCHECK_RW|
+ BTREE_INSERT_NOFAIL|
+ BTREE_INSERT_USE_RESERVE|
+ BTREE_INSERT_USE_ALLOC_RESERVE|
+ BTREE_INSERT_JOURNAL_RESERVED|
+ BTREE_INSERT_JOURNAL_RECLAIM);
+err:
+ if (ret == -EINTR)
+ goto retry;
+
+ BUG_ON(ret && !bch2_journal_error(j));
+
+ if (ret)
+ goto out;
+
+ bch2_journal_pin_drop(j, &ck->journal);
+ bch2_journal_preres_put(j, &ck->res);
+ clear_bit(BKEY_CACHED_DIRTY, &ck->flags);
+
+ if (!evict) {
+ mutex_lock(&c->btree_key_cache.lock);
+ list_move_tail(&ck->list, &c->btree_key_cache.clean);
+ mutex_unlock(&c->btree_key_cache.lock);
+ } else {
+evict:
+ BUG_ON(!btree_node_intent_locked(c_iter, 0));
+
+ mark_btree_node_unlocked(c_iter, 0);
+ c_iter->l[0].b = NULL;
+
+ six_lock_write(&ck->c.lock, NULL, NULL);
+
+ mutex_lock(&c->btree_key_cache.lock);
+ bkey_cached_evict(&c->btree_key_cache, ck);
+ bkey_cached_free(&c->btree_key_cache, ck);
+ mutex_unlock(&c->btree_key_cache.lock);
+ }
+out:
+ bch2_trans_iter_put(trans, b_iter);
+ bch2_trans_iter_put(trans, c_iter);
+ return ret;
+}
+
+static void btree_key_cache_journal_flush(struct journal *j,
+ struct journal_entry_pin *pin,
+ u64 seq)
+{
+ struct bch_fs *c = container_of(j, struct bch_fs, journal);
+ struct bkey_cached *ck =
+ container_of(pin, struct bkey_cached, journal);
+ struct bkey_cached_key key;
+ struct btree_trans trans;
+
+ six_lock_read(&ck->c.lock, NULL, NULL);
+ key = READ_ONCE(ck->key);
+
+ if (ck->journal.seq != seq ||
+ !test_bit(BKEY_CACHED_DIRTY, &ck->flags)) {
+ six_unlock_read(&ck->c.lock);
+ return;
+ }
+ six_unlock_read(&ck->c.lock);
+
+ bch2_trans_init(&trans, c, 0, 0);
+ btree_key_cache_flush_pos(&trans, key, seq, false);
+ bch2_trans_exit(&trans);
+}
+
+/*
+ * Flush and evict a key from the key cache:
+ */
+int bch2_btree_key_cache_flush(struct btree_trans *trans,
+ enum btree_id id, struct bpos pos)
+{
+ struct bch_fs *c = trans->c;
+ struct bkey_cached_key key = { id, pos };
+
+ /* Fastpath - assume it won't be found: */
+ if (!btree_key_cache_find(c, id, pos))
+ return 0;
+
+ return btree_key_cache_flush_pos(trans, key, 0, true);
+}
+
+bool bch2_btree_insert_key_cached(struct btree_trans *trans,
+ struct btree_iter *iter,
+ struct bkey_i *insert)
+{
+ struct bch_fs *c = trans->c;
+ struct bkey_cached *ck = (void *) iter->l[0].b;
+
+ BUG_ON(insert->u64s > ck->u64s);
+
+ if (likely(!(trans->flags & BTREE_INSERT_JOURNAL_REPLAY))) {
+ int difference;
+
+ BUG_ON(jset_u64s(insert->u64s) > trans->journal_preres.u64s);
+
+ difference = jset_u64s(insert->u64s) - ck->res.u64s;
+ if (difference > 0) {
+ trans->journal_preres.u64s -= difference;
+ ck->res.u64s += difference;
+ }
+ }
+
+ bkey_copy(ck->k, insert);
+ ck->valid = true;
+
+ if (!test_bit(BKEY_CACHED_DIRTY, &ck->flags)) {
+ mutex_lock(&c->btree_key_cache.lock);
+ list_del_init(&ck->list);
+
+ set_bit(BKEY_CACHED_DIRTY, &ck->flags);
+ mutex_unlock(&c->btree_key_cache.lock);
+ }
+
+ bch2_journal_pin_update(&c->journal, trans->journal_res.seq,
+ &ck->journal, btree_key_cache_journal_flush);
+ return true;
+}
+
+#ifdef CONFIG_BCACHEFS_DEBUG
+void bch2_btree_key_cache_verify_clean(struct btree_trans *trans,
+ enum btree_id id, struct bpos pos)
+{
+ BUG_ON(btree_key_cache_find(trans->c, id, pos));
+}
+#endif
+
+void bch2_fs_btree_key_cache_exit(struct btree_key_cache *c)
+{
+ struct bkey_cached *ck, *n;
+
+ mutex_lock(&c->lock);
+ list_for_each_entry_safe(ck, n, &c->clean, list) {
+ kfree(ck->k);
+ kfree(ck);
+ }
+ list_for_each_entry_safe(ck, n, &c->freed, list)
+ kfree(ck);
+ mutex_unlock(&c->lock);
+
+ rhashtable_destroy(&c->table);
+}
+
+void bch2_fs_btree_key_cache_init_early(struct btree_key_cache *c)
+{
+ mutex_init(&c->lock);
+ INIT_LIST_HEAD(&c->freed);
+ INIT_LIST_HEAD(&c->clean);
+}
+
+int bch2_fs_btree_key_cache_init(struct btree_key_cache *c)
+{
+ return rhashtable_init(&c->table, &bch2_btree_key_cache_params);
+}
diff --git a/fs/bcachefs/btree_key_cache.h b/fs/bcachefs/btree_key_cache.h
new file mode 100644
index 000000000000..fbc29336091f
--- /dev/null
+++ b/fs/bcachefs/btree_key_cache.h
@@ -0,0 +1,23 @@
+#ifndef _BCACHEFS_BTREE_KEY_CACHE_H
+#define _BCACHEFS_BTREE_KEY_CACHE_H
+
+int bch2_btree_iter_traverse_cached(struct btree_iter *);
+
+bool bch2_btree_insert_key_cached(struct btree_trans *,
+ struct btree_iter *, struct bkey_i *);
+int bch2_btree_key_cache_flush(struct btree_trans *,
+ enum btree_id, struct bpos);
+#ifdef CONFIG_BCACHEFS_DEBUG
+void bch2_btree_key_cache_verify_clean(struct btree_trans *,
+ enum btree_id, struct bpos);
+#else
+static inline void
+bch2_btree_key_cache_verify_clean(struct btree_trans *trans,
+ enum btree_id id, struct bpos pos) {}
+#endif
+
+void bch2_fs_btree_key_cache_exit(struct btree_key_cache *);
+void bch2_fs_btree_key_cache_init_early(struct btree_key_cache *);
+int bch2_fs_btree_key_cache_init(struct btree_key_cache *);
+
+#endif /* _BCACHEFS_BTREE_KEY_CACHE_H */
diff --git a/fs/bcachefs/btree_types.h b/fs/bcachefs/btree_types.h
index 9ca4032f49a6..ba47f51263f9 100644
--- a/fs/bcachefs/btree_types.h
+++ b/fs/bcachefs/btree_types.h
@@ -183,6 +183,7 @@ struct btree_node_iter {
enum btree_iter_type {
BTREE_ITER_KEYS,
BTREE_ITER_NODES,
+ BTREE_ITER_CACHED,
};
#define BTREE_ITER_TYPE ((1 << 2) - 1)
@@ -214,6 +215,15 @@ enum btree_iter_type {
#define BTREE_ITER_IS_EXTENTS (1 << 6)
#define BTREE_ITER_ERROR (1 << 7)
#define BTREE_ITER_SET_POS_AFTER_COMMIT (1 << 8)
+#define BTREE_ITER_CACHED_NOFILL (1 << 9)
+#define BTREE_ITER_CACHED_NOCREATE (1 << 10)
+
+#define BTREE_ITER_USER_FLAGS \
+ (BTREE_ITER_SLOTS \
+ |BTREE_ITER_INTENT \
+ |BTREE_ITER_PREFETCH \
+ |BTREE_ITER_CACHED_NOFILL \
+ |BTREE_ITER_CACHED_NOCREATE)
enum btree_iter_uptodate {
BTREE_ITER_UPTODATE = 0,
@@ -222,6 +232,14 @@ enum btree_iter_uptodate {
BTREE_ITER_NEED_TRAVERSE = 3,
};
+#define BTREE_ITER_NO_NODE_GET_LOCKS ((struct btree *) 1)
+#define BTREE_ITER_NO_NODE_DROP ((struct btree *) 2)
+#define BTREE_ITER_NO_NODE_LOCK_ROOT ((struct btree *) 3)
+#define BTREE_ITER_NO_NODE_UP ((struct btree *) 4)
+#define BTREE_ITER_NO_NODE_DOWN ((struct btree *) 5)
+#define BTREE_ITER_NO_NODE_INIT ((struct btree *) 6)
+#define BTREE_ITER_NO_NODE_ERROR ((struct btree *) 7)
+
/*
* @pos - iterator's current position
* @level - current btree depth
@@ -259,7 +277,8 @@ struct btree_iter {
unsigned long ip_allocated;
};
-static inline enum btree_iter_type btree_iter_type(struct btree_iter *iter)
+static inline enum btree_iter_type
+btree_iter_type(const struct btree_iter *iter)
{
return iter->flags & BTREE_ITER_TYPE;
}
@@ -269,6 +288,37 @@ static inline struct btree_iter_level *iter_l(struct btree_iter *iter)
return iter->l + iter->level;
}
+struct btree_key_cache {
+ struct mutex lock;
+ struct rhashtable table;
+ struct list_head freed;
+ struct list_head clean;
+};
+
+struct bkey_cached_key {
+ u32 btree_id;
+ struct bpos pos;
+} __packed;
+
+#define BKEY_CACHED_DIRTY 0
+
+struct bkey_cached {
+ struct btree_bkey_cached_common c;
+
+ unsigned long flags;
+ u8 u64s;
+ bool valid;
+ struct bkey_cached_key key;
+
+ struct rhash_head hash;
+ struct list_head list;
+
+ struct journal_preres res;
+ struct journal_entry_pin journal;
+
+ struct bkey_i *k;
+};
+
struct btree_insert_entry {
unsigned trigger_flags;
unsigned trans_triggers_run:1;
@@ -307,6 +357,7 @@ struct btree_trans {
unsigned error:1;
unsigned nounlock:1;
unsigned need_reset:1;
+ unsigned in_traverse_all:1;
unsigned mem_top;
unsigned mem_bytes;
diff --git a/fs/bcachefs/btree_update.h b/fs/bcachefs/btree_update.h
index 11f7d02de622..e0b1bde37484 100644
--- a/fs/bcachefs/btree_update.h
+++ b/fs/bcachefs/btree_update.h
@@ -23,6 +23,7 @@ enum btree_insert_flags {
__BTREE_INSERT_USE_ALLOC_RESERVE,
__BTREE_INSERT_JOURNAL_REPLAY,
__BTREE_INSERT_JOURNAL_RESERVED,
+ __BTREE_INSERT_JOURNAL_RECLAIM,
__BTREE_INSERT_NOWAIT,
__BTREE_INSERT_GC_LOCK_HELD,
__BCH_HASH_SET_MUST_CREATE,
@@ -47,8 +48,12 @@ enum btree_insert_flags {
/* Insert is for journal replay - don't get journal reservations: */
#define BTREE_INSERT_JOURNAL_REPLAY (1 << __BTREE_INSERT_JOURNAL_REPLAY)
+/* Indicates that we have pre-reserved space in the journal: */
#define BTREE_INSERT_JOURNAL_RESERVED (1 << __BTREE_INSERT_JOURNAL_RESERVED)
+/* Insert is being called from journal reclaim path: */
+#define BTREE_INSERT_JOURNAL_RECLAIM (1 << __BTREE_INSERT_JOURNAL_RECLAIM)
+
/* Don't block on allocation failure (for new btree nodes: */
#define BTREE_INSERT_NOWAIT (1 << __BTREE_INSERT_NOWAIT)
#define BTREE_INSERT_GC_LOCK_HELD (1 << __BTREE_INSERT_GC_LOCK_HELD)
diff --git a/fs/bcachefs/btree_update_interior.c b/fs/bcachefs/btree_update_interior.c
index a626a7698d13..9e6006d07585 100644
--- a/fs/bcachefs/btree_update_interior.c
+++ b/fs/bcachefs/btree_update_interior.c
@@ -529,11 +529,20 @@ static void btree_update_nodes_written(struct btree_update *as)
* to child nodes that weren't written yet: now, the child nodes have
* been written so we can write out the update to the interior node.
*/
+
+ /*
+ * We can't call into journal reclaim here: we'd block on the journal
+ * reclaim lock, but we may need to release the open buckets we have
+ * pinned in order for other btree updates to make forward progress, and
+ * journal reclaim does btree updates when flushing bkey_cached entries,
+ * which may require allocations as well.
+ */
ret = bch2_trans_do(c, &as->disk_res, &journal_seq,
BTREE_INSERT_NOFAIL|
BTREE_INSERT_USE_RESERVE|
BTREE_INSERT_USE_ALLOC_RESERVE|
BTREE_INSERT_NOCHECK_RW|
+ BTREE_INSERT_JOURNAL_RECLAIM|
BTREE_INSERT_JOURNAL_RESERVED,
btree_update_nodes_written_trans(&trans, as));
BUG_ON(ret && !bch2_journal_error(&c->journal));
diff --git a/fs/bcachefs/btree_update_leaf.c b/fs/bcachefs/btree_update_leaf.c
index 1a1fd230e4b9..e82d4df9ccab 100644
--- a/fs/bcachefs/btree_update_leaf.c
+++ b/fs/bcachefs/btree_update_leaf.c
@@ -6,6 +6,7 @@
#include "btree_gc.h"
#include "btree_io.h"
#include "btree_iter.h"
+#include "btree_key_cache.h"
#include "btree_locking.h"
#include "buckets.h"
#include "debug.h"
@@ -32,6 +33,9 @@ inline void bch2_btree_node_lock_for_insert(struct bch_fs *c, struct btree *b,
{
bch2_btree_node_lock_write(b, iter);
+ if (btree_iter_type(iter) == BTREE_ITER_CACHED)
+ return;
+
if (unlikely(btree_node_just_written(b)) &&
bch2_btree_post_write_cleanup(c, b))
bch2_btree_iter_reinit_node(iter, b);
@@ -174,6 +178,9 @@ static bool btree_insert_key_leaf(struct btree_trans *trans,
int old_live_u64s = b->nr.live_u64s;
int live_u64s_added, u64s_added;
+ EBUG_ON(!iter->level &&
+ !test_bit(BCH_FS_BTREE_INTERIOR_REPLAY_DONE, &c->flags));
+
if (unlikely(!bch2_btree_bset_insert_key(iter, b,
&iter_l(iter)->iter, insert)))
return false;
@@ -202,6 +209,8 @@ static bool btree_insert_key_leaf(struct btree_trans *trans,
return true;
}
+/* Cached btree updates: */
+
/* Normal update interface: */
static inline void btree_insert_entry_checks(struct btree_trans *trans,
@@ -284,6 +293,31 @@ btree_key_can_insert(struct btree_trans *trans,
return BTREE_INSERT_OK;
}
+static enum btree_insert_ret
+btree_key_can_insert_cached(struct btree_trans *trans,
+ struct btree_iter *iter,
+ struct bkey_i *insert,
+ unsigned *u64s)
+{
+ struct bkey_cached *ck = (void *) iter->l[0].b;
+ unsigned new_u64s;
+ struct bkey_i *new_k;
+
+ BUG_ON(iter->level);
+
+ if (*u64s <= ck->u64s)
+ return BTREE_INSERT_OK;
+
+ new_u64s = roundup_pow_of_two(*u64s);
+ new_k = krealloc(ck->k, new_u64s * sizeof(u64), GFP_NOFS);
+ if (!new_k)
+ return -ENOMEM;
+
+ ck->u64s = new_u64s;
+ ck->k = new_k;
+ return BTREE_INSERT_OK;
+}
+
static inline void do_btree_insert_one(struct btree_trans *trans,
struct btree_iter *iter,
struct bkey_i *insert)
@@ -297,7 +331,9 @@ static inline void do_btree_insert_one(struct btree_trans *trans,
insert->k.needs_whiteout = false;
- did_work = btree_insert_key_leaf(trans, iter, insert);
+ did_work = (btree_iter_type(iter) != BTREE_ITER_CACHED)
+ ? btree_insert_key_leaf(trans, iter, insert)
+ : bch2_btree_insert_key_cached(trans, iter, insert);
if (!did_work)
return;
@@ -335,10 +371,16 @@ static noinline void bch2_trans_mark_gc(struct btree_trans *trans)
struct bch_fs *c = trans->c;
struct btree_insert_entry *i;
- trans_for_each_update(trans, i)
- if (gc_visited(c, gc_pos_btree_node(iter_l(i->iter)->b)))
+ trans_for_each_update(trans, i) {
+ /*
+ * XXX: synchronization of cached update triggers with gc
+ */
+ BUG_ON(btree_iter_type(i->iter) == BTREE_ITER_CACHED);
+
+ if (gc_visited(c, gc_pos_btree_node(i->iter->l[0].b)))
bch2_mark_update(trans, i->iter, i->k, NULL,
i->trigger_flags|BTREE_TRIGGER_GC);
+ }
}
static inline int
@@ -371,7 +413,9 @@ bch2_trans_commit_write_locked(struct btree_trans *trans,
u64s = 0;
u64s += i->k->k.u64s;
- ret = btree_key_can_insert(trans, i->iter, i->k, &u64s);
+ ret = btree_iter_type(i->iter) != BTREE_ITER_CACHED
+ ? btree_key_can_insert(trans, i->iter, i->k, &u64s)
+ : btree_key_can_insert_cached(trans, i->iter, i->k, &u64s);
if (ret) {
*stopped_at = i;
return ret;
@@ -467,7 +511,9 @@ static inline int do_bch2_trans_commit(struct btree_trans *trans,
ret = bch2_journal_preres_get(&trans->c->journal,
&trans->journal_preres, trans->journal_preres_u64s,
- JOURNAL_RES_GET_NONBLOCK);
+ JOURNAL_RES_GET_NONBLOCK|
+ ((trans->flags & BTREE_INSERT_JOURNAL_RECLAIM)
+ ? JOURNAL_RES_GET_RECLAIM : 0));
if (unlikely(ret == -EAGAIN))
ret = bch2_trans_journal_preres_get_cold(trans,
trans->journal_preres_u64s);
@@ -523,7 +569,8 @@ static inline int do_bch2_trans_commit(struct btree_trans *trans,
trans->nounlock = true;
trans_for_each_update2(trans, i)
- if (!same_leaf_as_prev(trans, i))
+ if (btree_iter_type(i->iter) != BTREE_ITER_CACHED &&
+ !same_leaf_as_prev(trans, i))
bch2_foreground_maybe_merge(trans->c, i->iter,
0, trans->flags);
@@ -808,6 +855,14 @@ int __bch2_trans_commit(struct btree_trans *trans)
return ret;
}
+#ifdef CONFIG_BCACHEFS_DEBUG
+ trans_for_each_update(trans, i)
+ if (btree_iter_type(i->iter) != BTREE_ITER_CACHED &&
+ !(i->trigger_flags & BTREE_TRIGGER_NORUN))
+ bch2_btree_key_cache_verify_clean(trans,
+ i->iter->btree_id, i->iter->pos);
+#endif
+
/*
* Running triggers will append more updates to the list of updates as
* we're walking it:
@@ -880,7 +935,8 @@ int __bch2_trans_commit(struct btree_trans *trans)
BUG_ON(i->iter->locks_want < 1);
u64s = jset_u64s(i->k->k.u64s);
- if (0)
+ if (btree_iter_type(i->iter) == BTREE_ITER_CACHED &&
+ likely(!(trans->flags & BTREE_INSERT_JOURNAL_REPLAY)))
trans->journal_preres_u64s += u64s;
trans->journal_u64s += u64s;
}
diff --git a/fs/bcachefs/buckets.c b/fs/bcachefs/buckets.c
index 2da60ba3b7cb..86ce91ef76a9 100644
--- a/fs/bcachefs/buckets.c
+++ b/fs/bcachefs/buckets.c
@@ -1816,6 +1816,13 @@ int bch2_trans_mark_update(struct btree_trans *trans,
if (unlikely(flags & BTREE_TRIGGER_NOOVERWRITES))
return 0;
+ if (btree_iter_type(iter) == BTREE_ITER_CACHED) {
+ struct bkey_cached *ck = (void *) iter->l[0].b;
+
+ return bch2_trans_mark_key(trans, bkey_i_to_s_c(ck->k),
+ 0, 0, BTREE_TRIGGER_OVERWRITE);
+ }
+
while ((_k = bch2_btree_node_iter_peek(&node_iter, b))) {
struct bkey unpacked;
struct bkey_s_c k;
diff --git a/fs/bcachefs/journal_reclaim.c b/fs/bcachefs/journal_reclaim.c
index 0cae90d6e053..357f42d31aaf 100644
--- a/fs/bcachefs/journal_reclaim.c
+++ b/fs/bcachefs/journal_reclaim.c
@@ -346,6 +346,37 @@ void __bch2_journal_pin_add(struct journal *j, u64 seq,
journal_wake(j);
}
+void bch2_journal_pin_update(struct journal *j, u64 seq,
+ struct journal_entry_pin *pin,
+ journal_pin_flush_fn flush_fn)
+{
+ if (journal_pin_active(pin) && pin->seq < seq)
+ return;
+
+ spin_lock(&j->lock);
+
+ if (pin->seq != seq) {
+ bch2_journal_pin_add_locked(j, seq, pin, flush_fn);
+ } else {
+ struct journal_entry_pin_list *pin_list =
+ journal_seq_pin(j, seq);
+
+ /*
+ * If the pin is already pinning the right sequence number, it
+ * still might've already been flushed:
+ */
+ list_move(&pin->list, &pin_list->list);
+ }
+
+ spin_unlock(&j->lock);
+
+ /*
+ * If the journal is currently full, we might want to call flush_fn
+ * immediately:
+ */
+ journal_wake(j);
+}
+
void bch2_journal_pin_copy(struct journal *j,
struct journal_entry_pin *dst,
struct journal_entry_pin *src,
diff --git a/fs/bcachefs/journal_reclaim.h b/fs/bcachefs/journal_reclaim.h
index 272ba8a37967..8128907a7623 100644
--- a/fs/bcachefs/journal_reclaim.h
+++ b/fs/bcachefs/journal_reclaim.h
@@ -42,6 +42,10 @@ static inline void bch2_journal_pin_add(struct journal *j, u64 seq,
__bch2_journal_pin_add(j, seq, pin, flush_fn);
}
+void bch2_journal_pin_update(struct journal *, u64,
+ struct journal_entry_pin *,
+ journal_pin_flush_fn);
+
void bch2_journal_pin_copy(struct journal *,
struct journal_entry_pin *,
struct journal_entry_pin *,
diff --git a/fs/bcachefs/super.c b/fs/bcachefs/super.c
index 111781428f0e..088ce018f890 100644
--- a/fs/bcachefs/super.c
+++ b/fs/bcachefs/super.c
@@ -13,6 +13,7 @@
#include "bkey_sort.h"
#include "btree_cache.h"
#include "btree_gc.h"
+#include "btree_key_cache.h"
#include "btree_update_interior.h"
#include "btree_io.h"
#include "chardev.h"
@@ -511,6 +512,7 @@ static void bch2_fs_free(struct bch_fs *c)
bch2_fs_io_exit(c);
bch2_fs_btree_interior_update_exit(c);
bch2_fs_btree_iter_exit(c);
+ bch2_fs_btree_key_cache_exit(&c->btree_key_cache);
bch2_fs_btree_cache_exit(c);
bch2_fs_journal_exit(&c->journal);
bch2_io_clock_exit(&c->io_clock[WRITE]);
@@ -682,6 +684,7 @@ static struct bch_fs *bch2_fs_alloc(struct bch_sb *sb, struct bch_opts opts)
for (i = 0; i < BCH_TIME_STAT_NR; i++)
bch2_time_stats_init(&c->times[i]);
+ bch2_fs_btree_key_cache_init_early(&c->btree_key_cache);
bch2_fs_allocator_background_init(c);
bch2_fs_allocator_foreground_init(c);
bch2_fs_rebalance_init(c);
@@ -777,6 +780,7 @@ static struct bch_fs *bch2_fs_alloc(struct bch_sb *sb, struct bch_opts opts)
bch2_fs_journal_init(&c->journal) ||
bch2_fs_replicas_init(c) ||
bch2_fs_btree_cache_init(c) ||
+ bch2_fs_btree_key_cache_init(&c->btree_key_cache) ||
bch2_fs_btree_iter_init(c) ||
bch2_fs_btree_interior_update_init(c) ||
bch2_fs_io_init(c) ||