diff options
author | Kent Overstreet <kent.overstreet@linux.dev> | 2023-02-19 02:36:22 -0500 |
---|---|---|
committer | Kent Overstreet <kent.overstreet@linux.dev> | 2023-03-17 10:33:08 -0400 |
commit | 475c638bc09f8279cd0d687f857d8513d2095d76 (patch) | |
tree | ff398457d8bf40af0552d33702d5ad275a155eaa | |
parent | ba83cd8fe637896b9b2c7832d3d55cbf79e564c4 (diff) |
bcachefs: btree write buffer: Do write buffer updates via journal
Instead of appending to the write buffer in the transaction commit path,
remember that we also have everything we need in the journal:
This adds a new journal entry type, BCH_JSET_ENTRY_buffered_keys, for
keys that needed to be added to the write buffer. Before doing a journal
write, in our compaction pass, we find those journal entries and add
them to the write buffer, and write them out with the normal
BCH_JSET_ENTRY_btree_keys type.
Signed-off-by: Kent Overstreet <kent.overstreet@linux.dev>
-rw-r--r-- | fs/bcachefs/backpointers.c | 2 | ||||
-rw-r--r-- | fs/bcachefs/bcachefs_format.h | 3 | ||||
-rw-r--r-- | fs/bcachefs/btree_update_leaf.c | 33 | ||||
-rw-r--r-- | fs/bcachefs/btree_write_buffer.c | 137 | ||||
-rw-r--r-- | fs/bcachefs/btree_write_buffer.h | 7 | ||||
-rw-r--r-- | fs/bcachefs/btree_write_buffer_types.h | 23 | ||||
-rw-r--r-- | fs/bcachefs/journal_io.c | 31 |
7 files changed, 102 insertions, 134 deletions
diff --git a/fs/bcachefs/backpointers.c b/fs/bcachefs/backpointers.c index 740084b3ff12..362bf53c0abe 100644 --- a/fs/bcachefs/backpointers.c +++ b/fs/bcachefs/backpointers.c @@ -625,7 +625,7 @@ static int check_bp_exists(struct btree_trans *trans, last_flushed->level = bp.level; last_flushed->pos = orig_k.k->p; - ret = bch2_btree_write_buffer_flush_sync(trans) ?: + ret = bch2_btree_write_buffer_flush_sync(trans, 0) ?: -BCH_ERR_transaction_restart_write_buffer_flush; goto out; } diff --git a/fs/bcachefs/bcachefs_format.h b/fs/bcachefs/bcachefs_format.h index 8b29e462492b..4845471d1edf 100644 --- a/fs/bcachefs/bcachefs_format.h +++ b/fs/bcachefs/bcachefs_format.h @@ -1944,7 +1944,8 @@ static inline __u64 __bset_magic(struct bch_sb *sb) x(clock, 7) \ x(dev_usage, 8) \ x(log, 9) \ - x(overwrite, 10) + x(overwrite, 10) \ + x(buffered_keys, 11) enum { #define x(f, nr) BCH_JSET_ENTRY_##f = nr, diff --git a/fs/bcachefs/btree_update_leaf.c b/fs/bcachefs/btree_update_leaf.c index d22ceb9506fd..1dc2c1138ad4 100644 --- a/fs/bcachefs/btree_update_leaf.c +++ b/fs/bcachefs/btree_update_leaf.c @@ -640,8 +640,7 @@ bch2_trans_commit_write_locked(struct btree_trans *trans, unsigned flags, marking = true; } - if (trans->nr_wb_updates && - trans->nr_wb_updates + c->btree_write_buffer.state.nr > c->btree_write_buffer.size) + if (c->btree_write_buffer.nr > c->btree_write_buffer.size / 2) return -BCH_ERR_btree_insert_need_flush_buffer; /* @@ -680,14 +679,6 @@ bch2_trans_commit_write_locked(struct btree_trans *trans, unsigned flags, bch2_trans_fs_usage_apply(trans, trans->fs_usage_deltas)) return -BCH_ERR_btree_insert_need_mark_replicas; - if (trans->nr_wb_updates) { - EBUG_ON(flags & BTREE_INSERT_JOURNAL_REPLAY); - - ret = bch2_btree_insert_keys_write_buffer(trans); - if (ret) - goto revert_fs_usage; - } - h = trans->hooks; while (h) { ret = h->fn(trans, h); @@ -749,7 +740,7 @@ bch2_trans_commit_write_locked(struct btree_trans *trans, unsigned flags, trans_for_each_wb_update(trans, wb) { entry = bch2_journal_add_entry(j, &trans->journal_res, - BCH_JSET_ENTRY_btree_keys, + BCH_JSET_ENTRY_buffered_keys, wb->btree, 0, wb->k.k.u64s); bkey_copy(&entry->start[0], &wb->k); @@ -775,7 +766,6 @@ bch2_trans_commit_write_locked(struct btree_trans *trans, unsigned flags, return 0; fatal_err: bch2_fatal_error(c); -revert_fs_usage: if (trans->fs_usage_deltas) bch2_trans_fs_usage_revert(trans, trans->fs_usage_deltas); return ret; @@ -1009,17 +999,15 @@ int bch2_trans_commit_error(struct btree_trans *trans, unsigned flags, ret = 0; - if (wb->state.nr > wb->size * 3 / 4) { + if (wb->nr) { bch2_trans_reset_updates(trans); bch2_trans_unlock(trans); - mutex_lock(&wb->flush_lock); - - if (wb->state.nr > wb->size * 3 / 4) - ret = __bch2_btree_write_buffer_flush(trans, + mutex_lock(&wb->lock); + if (wb->nr) + ret = bch2_btree_write_buffer_flush_locked(trans, flags|BTREE_INSERT_NOCHECK_RW, true); - else - mutex_unlock(&wb->flush_lock); + mutex_unlock(&wb->lock); if (!ret) { trace_and_count(c, trans_restart_write_buffer_flush, trans, _THIS_IP_); @@ -1117,13 +1105,14 @@ int __bch2_trans_commit(struct btree_trans *trans, unsigned flags) goto out_reset; } - if (c->btree_write_buffer.state.nr > c->btree_write_buffer.size / 2 && - mutex_trylock(&c->btree_write_buffer.flush_lock)) { + if (c->btree_write_buffer.nr && + mutex_trylock(&c->btree_write_buffer.lock)) { bch2_trans_begin(trans); bch2_trans_unlock(trans); - ret = __bch2_btree_write_buffer_flush(trans, + ret = bch2_btree_write_buffer_flush_locked(trans, flags|BTREE_INSERT_NOCHECK_RW, true); + mutex_unlock(&trans->c->btree_write_buffer.lock); if (!ret) { trace_and_count(c, trans_restart_write_buffer_flush, trans, _THIS_IP_); ret = btree_trans_restart(trans, BCH_ERR_transaction_restart_write_buffer_flush); diff --git a/fs/bcachefs/btree_write_buffer.c b/fs/bcachefs/btree_write_buffer.c index 0a6a774e6662..1ba17fa6fecf 100644 --- a/fs/bcachefs/btree_write_buffer.c +++ b/fs/bcachefs/btree_write_buffer.c @@ -152,28 +152,8 @@ trans_commit: BTREE_INSERT_JOURNAL_RECLAIM); } -static union btree_write_buffer_state btree_write_buffer_switch(struct btree_write_buffer *wb) -{ - union btree_write_buffer_state old, new; - u64 v = READ_ONCE(wb->state.v); - - do { - old.v = new.v = v; - - new.nr = 0; - new.idx++; - } while ((v = atomic64_cmpxchg_acquire(&wb->state.counter, old.v, new.v)) != old.v); - - while (old.idx == 0 ? wb->state.ref0 : wb->state.ref1) - cpu_relax(); - - smp_mb(); - - return old; -} - -int __bch2_btree_write_buffer_flush(struct btree_trans *trans, unsigned commit_flags, - bool locked) +int bch2_btree_write_buffer_flush_locked(struct btree_trans *trans, unsigned commit_flags, + bool may_commit) { struct bch_fs *c = trans->c; struct journal *j = &c->journal; @@ -183,20 +163,15 @@ int __bch2_btree_write_buffer_flush(struct btree_trans *trans, unsigned commit_f struct btree_iter iter = { NULL }; size_t nr = 0, skipped = 0, fast = 0; bool write_locked = false; - union btree_write_buffer_state s; int ret = 0; memset(&pin, 0, sizeof(pin)); - if (!locked && !mutex_trylock(&wb->flush_lock)) - return 0; - bch2_journal_pin_copy(j, &pin, &wb->journal_pin, NULL); bch2_journal_pin_drop(j, &wb->journal_pin); - s = btree_write_buffer_switch(wb); - keys = wb->keys[s.idx]; - nr = s.nr; + keys = wb->keys; + swap(nr, wb->nr); /* * We first sort so that we can detect and skip redundant updates, and @@ -262,7 +237,6 @@ int __bch2_btree_write_buffer_flush(struct btree_trans *trans, unsigned commit_f bch2_fs_fatal_err_on(ret, c, "%s: insert error %s", __func__, bch2_err_str(ret)); out: bch2_journal_pin_drop(j, &pin); - mutex_unlock(&wb->flush_lock); return ret; slowpath: trace_write_buffer_flush_slowpath(trans, i - keys, nr); @@ -308,100 +282,89 @@ slowpath: goto out; } -int bch2_btree_write_buffer_flush_sync(struct btree_trans *trans) +int bch2_btree_write_buffer_flush(struct btree_trans *trans) { - bch2_trans_unlock(trans); - mutex_lock(&trans->c->btree_write_buffer.flush_lock); - return __bch2_btree_write_buffer_flush(trans, 0, true); + struct bch_fs *c = trans->c; + struct btree_write_buffer *wb = &c->btree_write_buffer; + int ret; + + if (!mutex_trylock(&wb->lock)) + return 0; + + ret = bch2_btree_write_buffer_flush_locked(trans, 0, true); + mutex_unlock(&wb->lock); + + return ret; } -int bch2_btree_write_buffer_flush(struct btree_trans *trans) +int bch2_btree_write_buffer_flush_sync(struct btree_trans *trans, unsigned commit_flags) { - return __bch2_btree_write_buffer_flush(trans, 0, false); + int ret; + + bch2_trans_unlock(trans); + + mutex_lock(&trans->c->btree_write_buffer.lock); + ret = bch2_btree_write_buffer_flush_locked(trans, commit_flags, true); + mutex_unlock(&trans->c->btree_write_buffer.lock); + + return ret; } static int bch2_btree_write_buffer_journal_flush(struct journal *j, struct journal_entry_pin *_pin, u64 seq) { struct bch_fs *c = container_of(j, struct bch_fs, journal); - struct btree_write_buffer *wb = &c->btree_write_buffer; - - mutex_lock(&wb->flush_lock); return bch2_trans_run(c, - __bch2_btree_write_buffer_flush(&trans, BTREE_INSERT_NOCHECK_RW, true)); -} - -static inline u64 btree_write_buffer_ref(int idx) -{ - return ((union btree_write_buffer_state) { - .ref0 = idx == 0, - .ref1 = idx == 1, - }).v; + bch2_btree_write_buffer_flush_sync(&trans, BTREE_INSERT_NOCHECK_RW)); } -int bch2_btree_insert_keys_write_buffer(struct btree_trans *trans) +int bch2_write_buffer_key(struct bch_fs *c, u64 seq, unsigned offset, + enum btree_id btree, struct bkey_i *k) { - struct bch_fs *c = trans->c; struct btree_write_buffer *wb = &c->btree_write_buffer; - struct btree_write_buffered_key *i; - union btree_write_buffer_state old, new; - int ret = 0; - u64 v; - trans_for_each_wb_update(trans, i) { - EBUG_ON(i->k.k.u64s > BTREE_WRITE_BUFERED_U64s_MAX); + lockdep_assert_held(&wb->lock); - i->journal_seq = trans->journal_res.seq; - i->journal_offset = trans->journal_res.offset; + if (wb->nr == wb->size) { + BUG(); + /* + int ret = bch2_trans_run(c, + bch2_btree_write_buffer_flush_locked(&trans, + BTREE_INSERT_NOCHECK_RW, false)); + BUG_ON(ret); + */ } - preempt_disable(); - v = READ_ONCE(wb->state.v); - do { - old.v = new.v = v; - - new.v += btree_write_buffer_ref(new.idx); - new.nr += trans->nr_wb_updates; - if (new.nr > wb->size) { - ret = -BCH_ERR_btree_insert_need_flush_buffer; - goto out; - } - } while ((v = atomic64_cmpxchg_acquire(&wb->state.counter, old.v, new.v)) != old.v); - - memcpy(wb->keys[new.idx] + old.nr, - trans->wb_updates, - sizeof(trans->wb_updates[0]) * trans->nr_wb_updates); + wb->keys[wb->nr].journal_seq = seq; + wb->keys[wb->nr].journal_offset = offset; + wb->keys[wb->nr].btree = btree; + bkey_copy(&wb->keys[wb->nr].k, k); + wb->nr++; - bch2_journal_pin_add(&c->journal, trans->journal_res.seq, &wb->journal_pin, + bch2_journal_pin_add(&c->journal, seq, &wb->journal_pin, bch2_btree_write_buffer_journal_flush); - - atomic64_sub_return_release(btree_write_buffer_ref(new.idx), &wb->state.counter); -out: - preempt_enable(); - return ret; + return 0; } void bch2_fs_btree_write_buffer_exit(struct bch_fs *c) { struct btree_write_buffer *wb = &c->btree_write_buffer; - BUG_ON(wb->state.nr && !bch2_journal_error(&c->journal)); + WARN_ON(wb->nr && !bch2_journal_error(&c->journal)); - kvfree(wb->keys[1]); - kvfree(wb->keys[0]); + kvfree(wb->keys); } int bch2_fs_btree_write_buffer_init(struct bch_fs *c) { struct btree_write_buffer *wb = &c->btree_write_buffer; - mutex_init(&wb->flush_lock); + mutex_init(&wb->lock); wb->size = c->opts.btree_write_buffer_size; - wb->keys[0] = kvmalloc_array(wb->size, sizeof(*wb->keys[0]), GFP_KERNEL); - wb->keys[1] = kvmalloc_array(wb->size, sizeof(*wb->keys[1]), GFP_KERNEL); - if (!wb->keys[0] || !wb->keys[1]) + wb->keys = kvmalloc_array(wb->size, sizeof(*wb->keys), GFP_KERNEL); + if (!wb->keys) return -BCH_ERR_ENOMEM_fs_btree_write_buffer_init; return 0; diff --git a/fs/bcachefs/btree_write_buffer.h b/fs/bcachefs/btree_write_buffer.h index 322df1c8304e..5663a0f941d2 100644 --- a/fs/bcachefs/btree_write_buffer.h +++ b/fs/bcachefs/btree_write_buffer.h @@ -2,11 +2,12 @@ #ifndef _BCACHEFS_BTREE_WRITE_BUFFER_H #define _BCACHEFS_BTREE_WRITE_BUFFER_H -int __bch2_btree_write_buffer_flush(struct btree_trans *, unsigned, bool); -int bch2_btree_write_buffer_flush_sync(struct btree_trans *); +int bch2_btree_write_buffer_flush_locked(struct btree_trans *, unsigned, bool); +int bch2_btree_write_buffer_flush_sync(struct btree_trans *, unsigned); int bch2_btree_write_buffer_flush(struct btree_trans *); -int bch2_btree_insert_keys_write_buffer(struct btree_trans *); +int bch2_write_buffer_key(struct bch_fs *, u64, unsigned, + enum btree_id, struct bkey_i *); void bch2_fs_btree_write_buffer_exit(struct bch_fs *); int bch2_fs_btree_write_buffer_init(struct bch_fs *); diff --git a/fs/bcachefs/btree_write_buffer_types.h b/fs/bcachefs/btree_write_buffer_types.h index 99993ba77aea..be224c3c1850 100644 --- a/fs/bcachefs/btree_write_buffer_types.h +++ b/fs/bcachefs/btree_write_buffer_types.h @@ -14,31 +14,14 @@ struct btree_write_buffered_key { __BKEY_PADDED(k, BTREE_WRITE_BUFERED_VAL_U64s_MAX); }; -union btree_write_buffer_state { - struct { - atomic64_t counter; - }; - - struct { - u64 v; - }; - - struct { - u64 nr:23; - u64 idx:1; - u64 ref0:20; - u64 ref1:20; - }; -}; - struct btree_write_buffer { - struct mutex flush_lock; + struct mutex lock; struct journal_entry_pin journal_pin; - union btree_write_buffer_state state; + size_t nr; size_t size; - struct btree_write_buffered_key *keys[2]; + struct btree_write_buffered_key *keys; }; #endif /* _BCACHEFS_BTREE_WRITE_BUFFER_TYPES_H */ diff --git a/fs/bcachefs/journal_io.c b/fs/bcachefs/journal_io.c index 45b1b839783d..6c1bfcf92dc4 100644 --- a/fs/bcachefs/journal_io.c +++ b/fs/bcachefs/journal_io.c @@ -4,6 +4,7 @@ #include "alloc_foreground.h" #include "btree_io.h" #include "btree_update_interior.h" +#include "btree_write_buffer.h" #include "buckets.h" #include "checksum.h" #include "disk_groups.h" @@ -672,6 +673,21 @@ static void journal_entry_overwrite_to_text(struct printbuf *out, struct bch_fs journal_entry_btree_keys_to_text(out, c, entry); } +static int journal_entry_buffered_keys_validate(struct bch_fs *c, + struct jset *jset, + struct jset_entry *entry, + unsigned version, int big_endian, int write) +{ + return -EINVAL; +} + +static void journal_entry_buffered_keys_to_text(struct printbuf *out, + struct bch_fs *c, + struct jset_entry *entry) +{ + journal_entry_btree_keys_to_text(out, c, entry); +} + struct jset_entry_ops { int (*validate)(struct bch_fs *, struct jset *, struct jset_entry *, unsigned, int, int); @@ -1624,6 +1640,9 @@ static void do_journal_write(struct closure *cl) static void bch2_journal_entries_postprocess(struct bch_fs *c, struct jset *jset) { struct jset_entry *i, *next, *prev = NULL; + u64 seq = le64_to_cpu(jset->seq); + + mutex_lock(&c->btree_write_buffer.lock); /* * Simple compaction, dropping empty jset_entries (from journal @@ -1643,6 +1662,16 @@ static void bch2_journal_entries_postprocess(struct bch_fs *c, struct jset *jset if (i->type == BCH_JSET_ENTRY_btree_root) bch2_journal_entry_to_btree_root(c, i); + if (i->type == BCH_JSET_ENTRY_buffered_keys) { + struct bkey_i *k; + + vstruct_for_each(i, k) + bch2_write_buffer_key(c, seq, (u64 *) k - jset->_data, + i->btree_id, k); + + i->type = BCH_JSET_ENTRY_btree_keys; + } + /* Can we merge with previous entry? */ if (prev && i->btree_id == prev->btree_id && @@ -1665,6 +1694,8 @@ static void bch2_journal_entries_postprocess(struct bch_fs *c, struct jset *jset prev = prev ? vstruct_next(prev) : jset->start; jset->u64s = cpu_to_le32((u64 *) prev - jset->_data); + + mutex_unlock(&c->btree_write_buffer.lock); } void bch2_journal_write(struct closure *cl) |