summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorKent Overstreet <kent.overstreet@linux.dev>2023-02-19 02:36:22 -0500
committerKent Overstreet <kent.overstreet@linux.dev>2023-03-17 10:33:08 -0400
commit475c638bc09f8279cd0d687f857d8513d2095d76 (patch)
treeff398457d8bf40af0552d33702d5ad275a155eaa
parentba83cd8fe637896b9b2c7832d3d55cbf79e564c4 (diff)
bcachefs: btree write buffer: Do write buffer updates via journal
Instead of appending to the write buffer in the transaction commit path, remember that we also have everything we need in the journal: This adds a new journal entry type, BCH_JSET_ENTRY_buffered_keys, for keys that needed to be added to the write buffer. Before doing a journal write, in our compaction pass, we find those journal entries and add them to the write buffer, and write them out with the normal BCH_JSET_ENTRY_btree_keys type. Signed-off-by: Kent Overstreet <kent.overstreet@linux.dev>
-rw-r--r--fs/bcachefs/backpointers.c2
-rw-r--r--fs/bcachefs/bcachefs_format.h3
-rw-r--r--fs/bcachefs/btree_update_leaf.c33
-rw-r--r--fs/bcachefs/btree_write_buffer.c137
-rw-r--r--fs/bcachefs/btree_write_buffer.h7
-rw-r--r--fs/bcachefs/btree_write_buffer_types.h23
-rw-r--r--fs/bcachefs/journal_io.c31
7 files changed, 102 insertions, 134 deletions
diff --git a/fs/bcachefs/backpointers.c b/fs/bcachefs/backpointers.c
index 740084b3ff12..362bf53c0abe 100644
--- a/fs/bcachefs/backpointers.c
+++ b/fs/bcachefs/backpointers.c
@@ -625,7 +625,7 @@ static int check_bp_exists(struct btree_trans *trans,
last_flushed->level = bp.level;
last_flushed->pos = orig_k.k->p;
- ret = bch2_btree_write_buffer_flush_sync(trans) ?:
+ ret = bch2_btree_write_buffer_flush_sync(trans, 0) ?:
-BCH_ERR_transaction_restart_write_buffer_flush;
goto out;
}
diff --git a/fs/bcachefs/bcachefs_format.h b/fs/bcachefs/bcachefs_format.h
index 8b29e462492b..4845471d1edf 100644
--- a/fs/bcachefs/bcachefs_format.h
+++ b/fs/bcachefs/bcachefs_format.h
@@ -1944,7 +1944,8 @@ static inline __u64 __bset_magic(struct bch_sb *sb)
x(clock, 7) \
x(dev_usage, 8) \
x(log, 9) \
- x(overwrite, 10)
+ x(overwrite, 10) \
+ x(buffered_keys, 11)
enum {
#define x(f, nr) BCH_JSET_ENTRY_##f = nr,
diff --git a/fs/bcachefs/btree_update_leaf.c b/fs/bcachefs/btree_update_leaf.c
index d22ceb9506fd..1dc2c1138ad4 100644
--- a/fs/bcachefs/btree_update_leaf.c
+++ b/fs/bcachefs/btree_update_leaf.c
@@ -640,8 +640,7 @@ bch2_trans_commit_write_locked(struct btree_trans *trans, unsigned flags,
marking = true;
}
- if (trans->nr_wb_updates &&
- trans->nr_wb_updates + c->btree_write_buffer.state.nr > c->btree_write_buffer.size)
+ if (c->btree_write_buffer.nr > c->btree_write_buffer.size / 2)
return -BCH_ERR_btree_insert_need_flush_buffer;
/*
@@ -680,14 +679,6 @@ bch2_trans_commit_write_locked(struct btree_trans *trans, unsigned flags,
bch2_trans_fs_usage_apply(trans, trans->fs_usage_deltas))
return -BCH_ERR_btree_insert_need_mark_replicas;
- if (trans->nr_wb_updates) {
- EBUG_ON(flags & BTREE_INSERT_JOURNAL_REPLAY);
-
- ret = bch2_btree_insert_keys_write_buffer(trans);
- if (ret)
- goto revert_fs_usage;
- }
-
h = trans->hooks;
while (h) {
ret = h->fn(trans, h);
@@ -749,7 +740,7 @@ bch2_trans_commit_write_locked(struct btree_trans *trans, unsigned flags,
trans_for_each_wb_update(trans, wb) {
entry = bch2_journal_add_entry(j, &trans->journal_res,
- BCH_JSET_ENTRY_btree_keys,
+ BCH_JSET_ENTRY_buffered_keys,
wb->btree, 0,
wb->k.k.u64s);
bkey_copy(&entry->start[0], &wb->k);
@@ -775,7 +766,6 @@ bch2_trans_commit_write_locked(struct btree_trans *trans, unsigned flags,
return 0;
fatal_err:
bch2_fatal_error(c);
-revert_fs_usage:
if (trans->fs_usage_deltas)
bch2_trans_fs_usage_revert(trans, trans->fs_usage_deltas);
return ret;
@@ -1009,17 +999,15 @@ int bch2_trans_commit_error(struct btree_trans *trans, unsigned flags,
ret = 0;
- if (wb->state.nr > wb->size * 3 / 4) {
+ if (wb->nr) {
bch2_trans_reset_updates(trans);
bch2_trans_unlock(trans);
- mutex_lock(&wb->flush_lock);
-
- if (wb->state.nr > wb->size * 3 / 4)
- ret = __bch2_btree_write_buffer_flush(trans,
+ mutex_lock(&wb->lock);
+ if (wb->nr)
+ ret = bch2_btree_write_buffer_flush_locked(trans,
flags|BTREE_INSERT_NOCHECK_RW, true);
- else
- mutex_unlock(&wb->flush_lock);
+ mutex_unlock(&wb->lock);
if (!ret) {
trace_and_count(c, trans_restart_write_buffer_flush, trans, _THIS_IP_);
@@ -1117,13 +1105,14 @@ int __bch2_trans_commit(struct btree_trans *trans, unsigned flags)
goto out_reset;
}
- if (c->btree_write_buffer.state.nr > c->btree_write_buffer.size / 2 &&
- mutex_trylock(&c->btree_write_buffer.flush_lock)) {
+ if (c->btree_write_buffer.nr &&
+ mutex_trylock(&c->btree_write_buffer.lock)) {
bch2_trans_begin(trans);
bch2_trans_unlock(trans);
- ret = __bch2_btree_write_buffer_flush(trans,
+ ret = bch2_btree_write_buffer_flush_locked(trans,
flags|BTREE_INSERT_NOCHECK_RW, true);
+ mutex_unlock(&trans->c->btree_write_buffer.lock);
if (!ret) {
trace_and_count(c, trans_restart_write_buffer_flush, trans, _THIS_IP_);
ret = btree_trans_restart(trans, BCH_ERR_transaction_restart_write_buffer_flush);
diff --git a/fs/bcachefs/btree_write_buffer.c b/fs/bcachefs/btree_write_buffer.c
index 0a6a774e6662..1ba17fa6fecf 100644
--- a/fs/bcachefs/btree_write_buffer.c
+++ b/fs/bcachefs/btree_write_buffer.c
@@ -152,28 +152,8 @@ trans_commit:
BTREE_INSERT_JOURNAL_RECLAIM);
}
-static union btree_write_buffer_state btree_write_buffer_switch(struct btree_write_buffer *wb)
-{
- union btree_write_buffer_state old, new;
- u64 v = READ_ONCE(wb->state.v);
-
- do {
- old.v = new.v = v;
-
- new.nr = 0;
- new.idx++;
- } while ((v = atomic64_cmpxchg_acquire(&wb->state.counter, old.v, new.v)) != old.v);
-
- while (old.idx == 0 ? wb->state.ref0 : wb->state.ref1)
- cpu_relax();
-
- smp_mb();
-
- return old;
-}
-
-int __bch2_btree_write_buffer_flush(struct btree_trans *trans, unsigned commit_flags,
- bool locked)
+int bch2_btree_write_buffer_flush_locked(struct btree_trans *trans, unsigned commit_flags,
+ bool may_commit)
{
struct bch_fs *c = trans->c;
struct journal *j = &c->journal;
@@ -183,20 +163,15 @@ int __bch2_btree_write_buffer_flush(struct btree_trans *trans, unsigned commit_f
struct btree_iter iter = { NULL };
size_t nr = 0, skipped = 0, fast = 0;
bool write_locked = false;
- union btree_write_buffer_state s;
int ret = 0;
memset(&pin, 0, sizeof(pin));
- if (!locked && !mutex_trylock(&wb->flush_lock))
- return 0;
-
bch2_journal_pin_copy(j, &pin, &wb->journal_pin, NULL);
bch2_journal_pin_drop(j, &wb->journal_pin);
- s = btree_write_buffer_switch(wb);
- keys = wb->keys[s.idx];
- nr = s.nr;
+ keys = wb->keys;
+ swap(nr, wb->nr);
/*
* We first sort so that we can detect and skip redundant updates, and
@@ -262,7 +237,6 @@ int __bch2_btree_write_buffer_flush(struct btree_trans *trans, unsigned commit_f
bch2_fs_fatal_err_on(ret, c, "%s: insert error %s", __func__, bch2_err_str(ret));
out:
bch2_journal_pin_drop(j, &pin);
- mutex_unlock(&wb->flush_lock);
return ret;
slowpath:
trace_write_buffer_flush_slowpath(trans, i - keys, nr);
@@ -308,100 +282,89 @@ slowpath:
goto out;
}
-int bch2_btree_write_buffer_flush_sync(struct btree_trans *trans)
+int bch2_btree_write_buffer_flush(struct btree_trans *trans)
{
- bch2_trans_unlock(trans);
- mutex_lock(&trans->c->btree_write_buffer.flush_lock);
- return __bch2_btree_write_buffer_flush(trans, 0, true);
+ struct bch_fs *c = trans->c;
+ struct btree_write_buffer *wb = &c->btree_write_buffer;
+ int ret;
+
+ if (!mutex_trylock(&wb->lock))
+ return 0;
+
+ ret = bch2_btree_write_buffer_flush_locked(trans, 0, true);
+ mutex_unlock(&wb->lock);
+
+ return ret;
}
-int bch2_btree_write_buffer_flush(struct btree_trans *trans)
+int bch2_btree_write_buffer_flush_sync(struct btree_trans *trans, unsigned commit_flags)
{
- return __bch2_btree_write_buffer_flush(trans, 0, false);
+ int ret;
+
+ bch2_trans_unlock(trans);
+
+ mutex_lock(&trans->c->btree_write_buffer.lock);
+ ret = bch2_btree_write_buffer_flush_locked(trans, commit_flags, true);
+ mutex_unlock(&trans->c->btree_write_buffer.lock);
+
+ return ret;
}
static int bch2_btree_write_buffer_journal_flush(struct journal *j,
struct journal_entry_pin *_pin, u64 seq)
{
struct bch_fs *c = container_of(j, struct bch_fs, journal);
- struct btree_write_buffer *wb = &c->btree_write_buffer;
-
- mutex_lock(&wb->flush_lock);
return bch2_trans_run(c,
- __bch2_btree_write_buffer_flush(&trans, BTREE_INSERT_NOCHECK_RW, true));
-}
-
-static inline u64 btree_write_buffer_ref(int idx)
-{
- return ((union btree_write_buffer_state) {
- .ref0 = idx == 0,
- .ref1 = idx == 1,
- }).v;
+ bch2_btree_write_buffer_flush_sync(&trans, BTREE_INSERT_NOCHECK_RW));
}
-int bch2_btree_insert_keys_write_buffer(struct btree_trans *trans)
+int bch2_write_buffer_key(struct bch_fs *c, u64 seq, unsigned offset,
+ enum btree_id btree, struct bkey_i *k)
{
- struct bch_fs *c = trans->c;
struct btree_write_buffer *wb = &c->btree_write_buffer;
- struct btree_write_buffered_key *i;
- union btree_write_buffer_state old, new;
- int ret = 0;
- u64 v;
- trans_for_each_wb_update(trans, i) {
- EBUG_ON(i->k.k.u64s > BTREE_WRITE_BUFERED_U64s_MAX);
+ lockdep_assert_held(&wb->lock);
- i->journal_seq = trans->journal_res.seq;
- i->journal_offset = trans->journal_res.offset;
+ if (wb->nr == wb->size) {
+ BUG();
+ /*
+ int ret = bch2_trans_run(c,
+ bch2_btree_write_buffer_flush_locked(&trans,
+ BTREE_INSERT_NOCHECK_RW, false));
+ BUG_ON(ret);
+ */
}
- preempt_disable();
- v = READ_ONCE(wb->state.v);
- do {
- old.v = new.v = v;
-
- new.v += btree_write_buffer_ref(new.idx);
- new.nr += trans->nr_wb_updates;
- if (new.nr > wb->size) {
- ret = -BCH_ERR_btree_insert_need_flush_buffer;
- goto out;
- }
- } while ((v = atomic64_cmpxchg_acquire(&wb->state.counter, old.v, new.v)) != old.v);
-
- memcpy(wb->keys[new.idx] + old.nr,
- trans->wb_updates,
- sizeof(trans->wb_updates[0]) * trans->nr_wb_updates);
+ wb->keys[wb->nr].journal_seq = seq;
+ wb->keys[wb->nr].journal_offset = offset;
+ wb->keys[wb->nr].btree = btree;
+ bkey_copy(&wb->keys[wb->nr].k, k);
+ wb->nr++;
- bch2_journal_pin_add(&c->journal, trans->journal_res.seq, &wb->journal_pin,
+ bch2_journal_pin_add(&c->journal, seq, &wb->journal_pin,
bch2_btree_write_buffer_journal_flush);
-
- atomic64_sub_return_release(btree_write_buffer_ref(new.idx), &wb->state.counter);
-out:
- preempt_enable();
- return ret;
+ return 0;
}
void bch2_fs_btree_write_buffer_exit(struct bch_fs *c)
{
struct btree_write_buffer *wb = &c->btree_write_buffer;
- BUG_ON(wb->state.nr && !bch2_journal_error(&c->journal));
+ WARN_ON(wb->nr && !bch2_journal_error(&c->journal));
- kvfree(wb->keys[1]);
- kvfree(wb->keys[0]);
+ kvfree(wb->keys);
}
int bch2_fs_btree_write_buffer_init(struct bch_fs *c)
{
struct btree_write_buffer *wb = &c->btree_write_buffer;
- mutex_init(&wb->flush_lock);
+ mutex_init(&wb->lock);
wb->size = c->opts.btree_write_buffer_size;
- wb->keys[0] = kvmalloc_array(wb->size, sizeof(*wb->keys[0]), GFP_KERNEL);
- wb->keys[1] = kvmalloc_array(wb->size, sizeof(*wb->keys[1]), GFP_KERNEL);
- if (!wb->keys[0] || !wb->keys[1])
+ wb->keys = kvmalloc_array(wb->size, sizeof(*wb->keys), GFP_KERNEL);
+ if (!wb->keys)
return -BCH_ERR_ENOMEM_fs_btree_write_buffer_init;
return 0;
diff --git a/fs/bcachefs/btree_write_buffer.h b/fs/bcachefs/btree_write_buffer.h
index 322df1c8304e..5663a0f941d2 100644
--- a/fs/bcachefs/btree_write_buffer.h
+++ b/fs/bcachefs/btree_write_buffer.h
@@ -2,11 +2,12 @@
#ifndef _BCACHEFS_BTREE_WRITE_BUFFER_H
#define _BCACHEFS_BTREE_WRITE_BUFFER_H
-int __bch2_btree_write_buffer_flush(struct btree_trans *, unsigned, bool);
-int bch2_btree_write_buffer_flush_sync(struct btree_trans *);
+int bch2_btree_write_buffer_flush_locked(struct btree_trans *, unsigned, bool);
+int bch2_btree_write_buffer_flush_sync(struct btree_trans *, unsigned);
int bch2_btree_write_buffer_flush(struct btree_trans *);
-int bch2_btree_insert_keys_write_buffer(struct btree_trans *);
+int bch2_write_buffer_key(struct bch_fs *, u64, unsigned,
+ enum btree_id, struct bkey_i *);
void bch2_fs_btree_write_buffer_exit(struct bch_fs *);
int bch2_fs_btree_write_buffer_init(struct bch_fs *);
diff --git a/fs/bcachefs/btree_write_buffer_types.h b/fs/bcachefs/btree_write_buffer_types.h
index 99993ba77aea..be224c3c1850 100644
--- a/fs/bcachefs/btree_write_buffer_types.h
+++ b/fs/bcachefs/btree_write_buffer_types.h
@@ -14,31 +14,14 @@ struct btree_write_buffered_key {
__BKEY_PADDED(k, BTREE_WRITE_BUFERED_VAL_U64s_MAX);
};
-union btree_write_buffer_state {
- struct {
- atomic64_t counter;
- };
-
- struct {
- u64 v;
- };
-
- struct {
- u64 nr:23;
- u64 idx:1;
- u64 ref0:20;
- u64 ref1:20;
- };
-};
-
struct btree_write_buffer {
- struct mutex flush_lock;
+ struct mutex lock;
struct journal_entry_pin journal_pin;
- union btree_write_buffer_state state;
+ size_t nr;
size_t size;
- struct btree_write_buffered_key *keys[2];
+ struct btree_write_buffered_key *keys;
};
#endif /* _BCACHEFS_BTREE_WRITE_BUFFER_TYPES_H */
diff --git a/fs/bcachefs/journal_io.c b/fs/bcachefs/journal_io.c
index 45b1b839783d..6c1bfcf92dc4 100644
--- a/fs/bcachefs/journal_io.c
+++ b/fs/bcachefs/journal_io.c
@@ -4,6 +4,7 @@
#include "alloc_foreground.h"
#include "btree_io.h"
#include "btree_update_interior.h"
+#include "btree_write_buffer.h"
#include "buckets.h"
#include "checksum.h"
#include "disk_groups.h"
@@ -672,6 +673,21 @@ static void journal_entry_overwrite_to_text(struct printbuf *out, struct bch_fs
journal_entry_btree_keys_to_text(out, c, entry);
}
+static int journal_entry_buffered_keys_validate(struct bch_fs *c,
+ struct jset *jset,
+ struct jset_entry *entry,
+ unsigned version, int big_endian, int write)
+{
+ return -EINVAL;
+}
+
+static void journal_entry_buffered_keys_to_text(struct printbuf *out,
+ struct bch_fs *c,
+ struct jset_entry *entry)
+{
+ journal_entry_btree_keys_to_text(out, c, entry);
+}
+
struct jset_entry_ops {
int (*validate)(struct bch_fs *, struct jset *,
struct jset_entry *, unsigned, int, int);
@@ -1624,6 +1640,9 @@ static void do_journal_write(struct closure *cl)
static void bch2_journal_entries_postprocess(struct bch_fs *c, struct jset *jset)
{
struct jset_entry *i, *next, *prev = NULL;
+ u64 seq = le64_to_cpu(jset->seq);
+
+ mutex_lock(&c->btree_write_buffer.lock);
/*
* Simple compaction, dropping empty jset_entries (from journal
@@ -1643,6 +1662,16 @@ static void bch2_journal_entries_postprocess(struct bch_fs *c, struct jset *jset
if (i->type == BCH_JSET_ENTRY_btree_root)
bch2_journal_entry_to_btree_root(c, i);
+ if (i->type == BCH_JSET_ENTRY_buffered_keys) {
+ struct bkey_i *k;
+
+ vstruct_for_each(i, k)
+ bch2_write_buffer_key(c, seq, (u64 *) k - jset->_data,
+ i->btree_id, k);
+
+ i->type = BCH_JSET_ENTRY_btree_keys;
+ }
+
/* Can we merge with previous entry? */
if (prev &&
i->btree_id == prev->btree_id &&
@@ -1665,6 +1694,8 @@ static void bch2_journal_entries_postprocess(struct bch_fs *c, struct jset *jset
prev = prev ? vstruct_next(prev) : jset->start;
jset->u64s = cpu_to_le32((u64 *) prev - jset->_data);
+
+ mutex_unlock(&c->btree_write_buffer.lock);
}
void bch2_journal_write(struct closure *cl)