summaryrefslogtreecommitdiff
path: root/fs
diff options
context:
space:
mode:
Diffstat (limited to 'fs')
-rw-r--r--fs/bcachefs/btree_update_leaf.c13
-rw-r--r--fs/bcachefs/btree_write_buffer.c108
-rw-r--r--fs/bcachefs/btree_write_buffer_types.h24
3 files changed, 91 insertions, 54 deletions
diff --git a/fs/bcachefs/btree_update_leaf.c b/fs/bcachefs/btree_update_leaf.c
index edc7faacd460..b2da6cf2b279 100644
--- a/fs/bcachefs/btree_update_leaf.c
+++ b/fs/bcachefs/btree_update_leaf.c
@@ -646,7 +646,7 @@ bch2_trans_commit_write_locked(struct btree_trans *trans,
}
if (trans->nr_wb_updates &&
- trans->nr_wb_updates + c->btree_write_buffer.nr > c->btree_write_buffer.size)
+ trans->nr_wb_updates + c->btree_write_buffer.state.nr > c->btree_write_buffer.size)
return -BCH_ERR_btree_insert_need_flush_buffer;
/*
@@ -1006,21 +1006,16 @@ int bch2_trans_commit_error(struct btree_trans *trans,
break;
case -BCH_ERR_btree_insert_need_flush_buffer: {
struct btree_write_buffer *wb = &c->btree_write_buffer;
- bool need_flush;
ret = 0;
- mutex_lock(&wb->lock);
- need_flush = wb->nr > wb->size * 3 / 4;
- mutex_unlock(&wb->lock);
-
- if (need_flush) {
+ if (wb->state.nr > wb->size * 3 / 4) {
bch2_trans_reset_updates(trans);
bch2_trans_unlock(trans);
mutex_lock(&wb->flush_lock);
- if (wb->nr > wb->size * 3 / 4)
+ if (wb->state.nr > wb->size * 3 / 4)
ret = __bch2_btree_write_buffer_flush(trans,
trans->flags|BTREE_INSERT_NOCHECK_RW,
true);
@@ -1122,7 +1117,7 @@ int __bch2_trans_commit(struct btree_trans *trans)
goto out_reset;
}
- if (c->btree_write_buffer.nr > c->btree_write_buffer.size / 2 &&
+ if (c->btree_write_buffer.state.nr > c->btree_write_buffer.size / 2 &&
mutex_trylock(&c->btree_write_buffer.flush_lock)) {
bch2_trans_begin(trans);
bch2_trans_unlock(trans);
diff --git a/fs/bcachefs/btree_write_buffer.c b/fs/bcachefs/btree_write_buffer.c
index 760269ce763f..1cd0ffaf961c 100644
--- a/fs/bcachefs/btree_write_buffer.c
+++ b/fs/bcachefs/btree_write_buffer.c
@@ -69,6 +69,24 @@ trans_commit:
BTREE_INSERT_JOURNAL_RECLAIM);
}
+static union btree_write_buffer_state btree_write_buffer_switch(struct btree_write_buffer *wb)
+{
+ union btree_write_buffer_state old, new;
+ u64 v = READ_ONCE(wb->state.v);
+
+ do {
+ old.v = new.v = v;
+
+ new.nr = 0;
+ new.idx++;
+ } while ((v = atomic64_cmpxchg_acquire(&wb->state.counter, old.v, new.v)) != old.v);
+
+ while (old.idx == 0 ? wb->state.ref0 : wb->state.ref1)
+ cpu_relax();
+
+ return old;
+}
+
int __bch2_btree_write_buffer_flush(struct btree_trans *trans, unsigned commit_flags,
bool locked)
{
@@ -76,10 +94,11 @@ int __bch2_btree_write_buffer_flush(struct btree_trans *trans, unsigned commit_f
struct journal *j = &c->journal;
struct btree_write_buffer *wb = &c->btree_write_buffer;
struct journal_entry_pin pin;
- struct btree_write_buffered_key *i, *dst;
+ struct btree_write_buffered_key *i, *dst, *keys;
struct btree_iter iter = { NULL };
size_t nr = 0, skipped = 0, fast = 0;
bool write_locked = false;
+ union btree_write_buffer_state s;
int ret = 0;
memset(&pin, 0, sizeof(pin));
@@ -87,13 +106,11 @@ int __bch2_btree_write_buffer_flush(struct btree_trans *trans, unsigned commit_f
if (!locked && !mutex_trylock(&wb->flush_lock))
return 0;
- mutex_lock(&wb->lock);
- swap(wb->keys, wb->flushing);
- swap(wb->nr, nr);
-
bch2_journal_pin_copy(j, &pin, &wb->journal_pin, NULL);
bch2_journal_pin_drop(j, &wb->journal_pin);
- mutex_unlock(&wb->lock);
+
+ s = btree_write_buffer_switch(wb);
+ keys = wb->keys[s.idx];
/*
* We first sort so that we can detect and skip redundant updates, and
@@ -111,15 +128,11 @@ int __bch2_btree_write_buffer_flush(struct btree_trans *trans, unsigned commit_f
* flushing, this time dropping journal pins as we go.
*/
- sort(wb->flushing, nr,
- sizeof(wb->flushing[0]),
- btree_write_buffered_key_cmp,
- NULL);
+ sort(keys, nr, sizeof(keys[0]),
+ btree_write_buffered_key_cmp, NULL);
- for (i = wb->flushing;
- i < wb->flushing + nr && !ret;
- i++) {
- if (i + 1 < wb->flushing + nr &&
+ for (i = keys; i < keys + nr && !ret; i++) {
+ if (i + 1 < keys + nr &&
i[0].btree == i[1].btree &&
bpos_eq(i[0].k.k.p, i[1].k.k.p)) {
skipped++;
@@ -167,13 +180,11 @@ out:
mutex_unlock(&wb->flush_lock);
return ret;
slowpath:
- trace_write_buffer_flush_slowpath(trans, i - wb->flushing, nr);
+ trace_write_buffer_flush_slowpath(trans, i - keys, nr);
- dst = wb->flushing;
- for (;
- i < wb->flushing + nr;
- i++) {
- if (i + 1 < wb->flushing + nr &&
+ dst = keys;
+ for (; i < keys + nr; i++) {
+ if (i + 1 < keys + nr &&
i[0].btree == i[1].btree &&
bpos_eq(i[0].k.k.p, i[1].k.k.p)) {
if (bkey_deleted(&i[1].k.k))
@@ -184,16 +195,13 @@ slowpath:
*dst = *i;
dst++;
}
- nr = dst - wb->flushing;
+ nr = dst - keys;
- sort(wb->flushing, nr,
- sizeof(wb->flushing[0]),
+ sort(keys, nr, sizeof(keys[0]),
btree_write_buffered_journal_cmp,
NULL);
- for (i = wb->flushing;
- i < wb->flushing + nr;
- i++) {
+ for (i = keys; i < keys + nr; i++) {
if (i->journal_seq > pin.seq) {
struct journal_entry_pin pin2;
@@ -233,11 +241,22 @@ static int bch2_btree_write_buffer_journal_flush(struct journal *j,
false));
}
+static inline u64 btree_write_buffer_ref(int idx)
+{
+ return ((union btree_write_buffer_state) {
+ .ref0 = idx == 0,
+ .ref1 = idx == 1,
+ }).v;
+}
+
int bch2_btree_insert_keys_write_buffer(struct btree_trans *trans)
{
struct bch_fs *c = trans->c;
struct btree_write_buffer *wb = &c->btree_write_buffer;
struct btree_write_buffered_key *i;
+ union btree_write_buffer_state old, new;
+ int ret = 0;
+ u64 v;
EBUG_ON(trans->flags & BTREE_INSERT_JOURNAL_REPLAY);
@@ -248,44 +267,51 @@ int bch2_btree_insert_keys_write_buffer(struct btree_trans *trans)
i->journal_offset = trans->journal_res.offset;
}
- mutex_lock(&wb->lock);
- if (wb->nr + trans->nr_wb_updates > wb->size) {
- mutex_unlock(&wb->lock);
- return -BCH_ERR_btree_insert_need_flush_buffer;
- }
+ preempt_disable();
+ v = READ_ONCE(wb->state.v);
+ do {
+ old.v = new.v = v;
- memcpy(wb->keys + wb->nr,
+ new.v += btree_write_buffer_ref(new.idx);
+ new.nr += trans->nr_wb_updates;
+ if (new.nr > wb->size) {
+ ret = -BCH_ERR_btree_insert_need_flush_buffer;
+ goto out;
+ }
+ } while ((v = atomic64_cmpxchg_acquire(&wb->state.counter, old.v, new.v)) != old.v);
+
+ memcpy(wb->keys[new.idx] + old.nr,
trans->wb_updates,
sizeof(trans->wb_updates[0]) * trans->nr_wb_updates);
- wb->nr += trans->nr_wb_updates;
if (likely(!(trans->flags & BTREE_INSERT_JOURNAL_REPLAY)))
bch2_journal_pin_add(&c->journal, trans->journal_res.seq, &wb->journal_pin,
bch2_btree_write_buffer_journal_flush);
- mutex_unlock(&wb->lock);
- return 0;
+ atomic64_sub_return_release(btree_write_buffer_ref(new.idx), &wb->state.counter);
+out:
+ preempt_enable();
+ return ret;
}
void bch2_fs_btree_write_buffer_exit(struct bch_fs *c)
{
struct btree_write_buffer *wb = &c->btree_write_buffer;
- kvfree(wb->flushing);
- kvfree(wb->keys);
+ kvfree(wb->keys[1]);
+ kvfree(wb->keys[0]);
}
int bch2_fs_btree_write_buffer_init(struct bch_fs *c)
{
struct btree_write_buffer *wb = &c->btree_write_buffer;
- mutex_init(&wb->lock);
mutex_init(&wb->flush_lock);
wb->size = c->opts.btree_write_buffer_size;
- wb->keys = kvmalloc_array(wb->size, sizeof(wb->keys[0]), GFP_KERNEL);
- wb->flushing = kvmalloc_array(wb->size, sizeof(wb->keys[0]), GFP_KERNEL);
- if (!wb->keys || !wb->flushing)
+ wb->keys[0] = kvmalloc_array(wb->size, sizeof(*wb->keys[0]), GFP_KERNEL);
+ wb->keys[1] = kvmalloc_array(wb->size, sizeof(*wb->keys[1]), GFP_KERNEL);
+ if (!wb->keys[0] || !wb->keys[1])
return -ENOMEM;
return 0;
diff --git a/fs/bcachefs/btree_write_buffer_types.h b/fs/bcachefs/btree_write_buffer_types.h
index 86d6d1df5a82..99993ba77aea 100644
--- a/fs/bcachefs/btree_write_buffer_types.h
+++ b/fs/bcachefs/btree_write_buffer_types.h
@@ -14,15 +14,31 @@ struct btree_write_buffered_key {
__BKEY_PADDED(k, BTREE_WRITE_BUFERED_VAL_U64s_MAX);
};
+union btree_write_buffer_state {
+ struct {
+ atomic64_t counter;
+ };
+
+ struct {
+ u64 v;
+ };
+
+ struct {
+ u64 nr:23;
+ u64 idx:1;
+ u64 ref0:20;
+ u64 ref1:20;
+ };
+};
+
struct btree_write_buffer {
- struct mutex lock;
struct mutex flush_lock;
struct journal_entry_pin journal_pin;
- struct btree_write_buffered_key *keys;
- struct btree_write_buffered_key *flushing;
- size_t nr;
+ union btree_write_buffer_state state;
size_t size;
+
+ struct btree_write_buffered_key *keys[2];
};
#endif /* _BCACHEFS_BTREE_WRITE_BUFFER_TYPES_H */