summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorKent Overstreet <kent.overstreet@gmail.com>2021-04-06 15:33:19 -0400
committerKent Overstreet <kent.overstreet@gmail.com>2021-05-19 15:32:05 -0400
commit682aac735228d4f94fdcfa55ea6be2e3a4fabb4b (patch)
treeaea82d1362e2e06ab8deb73cde6a8cc2a4dada1b
parent87e789152f91045a276636d1a3f93ea68f38008b (diff)
bcachefs: Improve bset compaction
The previous patch that fixed btree nodes being written too aggressively now meant that we weren't sorting btree node bsets optimally - this patch fixes that. Signed-off-by: Kent Overstreet <kent.overstreet@gmail.com>
-rw-r--r--fs/bcachefs/btree_cache.c2
-rw-r--r--fs/bcachefs/btree_io.c51
-rw-r--r--fs/bcachefs/btree_io.h3
-rw-r--r--fs/bcachefs/btree_update_interior.h4
4 files changed, 39 insertions, 21 deletions
diff --git a/fs/bcachefs/btree_cache.c b/fs/bcachefs/btree_cache.c
index 1abc50f134e6..26e8c04f981a 100644
--- a/fs/bcachefs/btree_cache.c
+++ b/fs/bcachefs/btree_cache.c
@@ -214,7 +214,7 @@ static int __btree_node_reclaim(struct bch_fs *c, struct btree *b, bool flush)
if (bch2_verify_btree_ondisk)
bch2_btree_node_write(c, b, SIX_LOCK_intent);
else
- __bch2_btree_node_write(c, b, SIX_LOCK_read);
+ __bch2_btree_node_write(c, b);
/* wait for any in flight btree write */
btree_node_wait_on_io(b);
diff --git a/fs/bcachefs/btree_io.c b/fs/bcachefs/btree_io.c
index 7e6858e3af2b..c8d8df9637db 100644
--- a/fs/bcachefs/btree_io.c
+++ b/fs/bcachefs/btree_io.c
@@ -241,7 +241,6 @@ bool bch2_compact_whiteouts(struct bch_fs *c, struct btree *b,
}
static void btree_node_sort(struct bch_fs *c, struct btree *b,
- struct btree_iter *iter,
unsigned start_idx,
unsigned end_idx,
bool filter_whiteouts)
@@ -377,8 +376,7 @@ void bch2_btree_sort_into(struct bch_fs *c,
* We're about to add another bset to the btree node, so if there's currently
* too many bsets - sort some of them together:
*/
-static bool btree_node_compact(struct bch_fs *c, struct btree *b,
- struct btree_iter *iter)
+static bool btree_node_compact(struct bch_fs *c, struct btree *b)
{
unsigned unwritten_idx;
bool ret = false;
@@ -390,13 +388,13 @@ static bool btree_node_compact(struct bch_fs *c, struct btree *b,
break;
if (b->nsets - unwritten_idx > 1) {
- btree_node_sort(c, b, iter, unwritten_idx,
+ btree_node_sort(c, b, unwritten_idx,
b->nsets, false);
ret = true;
}
if (unwritten_idx > 1) {
- btree_node_sort(c, b, iter, 0, unwritten_idx, false);
+ btree_node_sort(c, b, 0, unwritten_idx, false);
ret = true;
}
@@ -426,12 +424,30 @@ void bch2_btree_init_next(struct bch_fs *c, struct btree *b,
struct btree_iter *iter)
{
struct btree_node_entry *bne;
- bool did_sort;
+ bool reinit_iter = false;
EBUG_ON(!(b->c.lock.state.seq & 1));
EBUG_ON(iter && iter->l[b->c.level].b != b);
+ BUG_ON(bset_written(b, bset(b, &b->set[1])));
+
+ if (b->nsets == MAX_BSETS) {
+ unsigned log_u64s[] = {
+ ilog2(bset_u64s(&b->set[0])),
+ ilog2(bset_u64s(&b->set[1])),
+ ilog2(bset_u64s(&b->set[2])),
+ };
+
+ if (log_u64s[1] >= (log_u64s[0] + log_u64s[2]) / 2) {
+ bch2_btree_node_write(c, b, SIX_LOCK_write);
+ reinit_iter = true;
+ }
+ }
+
+ if (b->nsets == MAX_BSETS &&
+ btree_node_compact(c, b))
+ reinit_iter = true;
- did_sort = btree_node_compact(c, b, iter);
+ BUG_ON(b->nsets >= MAX_BSETS);
bne = want_new_bset(c, b);
if (bne)
@@ -439,7 +455,7 @@ void bch2_btree_init_next(struct bch_fs *c, struct btree *b,
bch2_btree_build_aux_trees(b);
- if (iter && did_sort)
+ if (iter && reinit_iter)
bch2_btree_iter_reinit_node(iter, b);
}
@@ -1324,8 +1340,7 @@ static int validate_bset_for_write(struct bch_fs *c, struct btree *b,
return ret;
}
-void __bch2_btree_node_write(struct bch_fs *c, struct btree *b,
- enum six_lock_type lock_type_held)
+void __bch2_btree_node_write(struct bch_fs *c, struct btree *b)
{
struct btree_write_bio *wbio;
struct bset_tree *t;
@@ -1595,7 +1610,7 @@ bool bch2_btree_post_write_cleanup(struct bch_fs *c, struct btree *b)
* single bset:
*/
if (b->nsets > 1) {
- btree_node_sort(c, b, NULL, 0, b->nsets, true);
+ btree_node_sort(c, b, 0, b->nsets, true);
invalidated_iter = true;
} else {
invalidated_iter = bch2_drop_whiteouts(b, COMPACT_ALL);
@@ -1625,13 +1640,12 @@ bool bch2_btree_post_write_cleanup(struct bch_fs *c, struct btree *b)
* Use this one if the node is intent locked:
*/
void bch2_btree_node_write(struct bch_fs *c, struct btree *b,
- enum six_lock_type lock_type_held)
+ enum six_lock_type lock_type_held)
{
- BUG_ON(lock_type_held == SIX_LOCK_write);
-
if (lock_type_held == SIX_LOCK_intent ||
- six_lock_tryupgrade(&b->c.lock)) {
- __bch2_btree_node_write(c, b, SIX_LOCK_intent);
+ (lock_type_held == SIX_LOCK_read &&
+ six_lock_tryupgrade(&b->c.lock))) {
+ __bch2_btree_node_write(c, b);
/* don't cycle lock unnecessarily: */
if (btree_node_just_written(b) &&
@@ -1643,7 +1657,10 @@ void bch2_btree_node_write(struct bch_fs *c, struct btree *b,
if (lock_type_held == SIX_LOCK_read)
six_lock_downgrade(&b->c.lock);
} else {
- __bch2_btree_node_write(c, b, SIX_LOCK_read);
+ __bch2_btree_node_write(c, b);
+ if (lock_type_held == SIX_LOCK_write &&
+ btree_node_just_written(b))
+ bch2_btree_post_write_cleanup(c, b);
}
}
diff --git a/fs/bcachefs/btree_io.h b/fs/bcachefs/btree_io.h
index 9c14cd30a09e..95c351611045 100644
--- a/fs/bcachefs/btree_io.h
+++ b/fs/bcachefs/btree_io.h
@@ -144,8 +144,7 @@ void bch2_btree_complete_write(struct bch_fs *, struct btree *,
struct btree_write *);
void bch2_btree_write_error_work(struct work_struct *);
-void __bch2_btree_node_write(struct bch_fs *, struct btree *,
- enum six_lock_type);
+void __bch2_btree_node_write(struct bch_fs *, struct btree *);
bool bch2_btree_post_write_cleanup(struct bch_fs *, struct btree *);
void bch2_btree_node_write(struct bch_fs *, struct btree *,
diff --git a/fs/bcachefs/btree_update_interior.h b/fs/bcachefs/btree_update_interior.h
index f2925b0d7f17..7eef3dbb6ef1 100644
--- a/fs/bcachefs/btree_update_interior.h
+++ b/fs/bcachefs/btree_update_interior.h
@@ -256,13 +256,15 @@ static inline size_t bch_btree_keys_u64s_remaining(struct bch_fs *c,
return remaining;
}
+#define BTREE_WRITE_SET_U64s_BITS 9
+
static inline unsigned btree_write_set_buffer(struct btree *b)
{
/*
* Could buffer up larger amounts of keys for btrees with larger keys,
* pending benchmarking:
*/
- return 4 << 10;
+ return 8 << BTREE_WRITE_SET_U64s_BITS;
}
static inline struct btree_node_entry *want_new_bset(struct bch_fs *c,