summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorKent Overstreet <kent.overstreet@linux.dev>2024-11-15 16:31:54 -0500
committerKent Overstreet <kent.overstreet@linux.dev>2024-12-04 21:52:26 -0500
commit8b8b31d87ec7c805b5a459997e5429b78d835bbf (patch)
treef635987d5d9ea7e2c61c1304c36284a1068e6a5d
parent6a41646582cd17b1a9429f31e7a180b1b2b928e8 (diff)
bcachefs: check_extents_to_backpointers() now only checks buckets with mismatches
Instead of walking every extent and every backpointer it points to, first sum up backpointers in each bucket and check for mismatches, and only look for missing backpointers if mismatches were detected, and only check extents in those buckets. This is a major fsck scalability improvement, since the two backpointers passes (backpointers -> extents and extents -> backpointers) are the most expensive fsck passes by far. Signed-off-by: Kent Overstreet <kent.overstreet@linux.dev>
-rw-r--r--fs/bcachefs/backpointers.c278
-rw-r--r--fs/bcachefs/bcachefs.h2
-rw-r--r--fs/bcachefs/btree_cache.c1
-rw-r--r--fs/bcachefs/errcode.h1
4 files changed, 263 insertions, 19 deletions
diff --git a/fs/bcachefs/backpointers.c b/fs/bcachefs/backpointers.c
index 31cbf781d489..41621570aede 100644
--- a/fs/bcachefs/backpointers.c
+++ b/fs/bcachefs/backpointers.c
@@ -567,25 +567,29 @@ static int check_extent_to_backpointers(struct btree_trans *trans,
struct bkey_s_c k)
{
struct bch_fs *c = trans->c;
- struct bkey_ptrs_c ptrs;
+ struct bkey_ptrs_c ptrs = bch2_bkey_ptrs_c(k);
const union bch_extent_entry *entry;
struct extent_ptr_decoded p;
- int ret;
- ptrs = bch2_bkey_ptrs_c(k);
bkey_for_each_ptr_decode(k.k, ptrs, p, entry) {
- struct bkey_i_backpointer bp;
-
if (p.ptr.cached)
continue;
if (p.ptr.dev == BCH_SB_MEMBER_INVALID)
continue;
- bch2_extent_ptr_to_bp(c, btree, level, k, p, entry, &bp);
- ret = check_bp_exists(trans, s, &bp, k);
- if (ret)
- return ret;
+ rcu_read_lock();
+ struct bch_dev *ca = bch2_dev_rcu_noerror(c, p.ptr.dev);
+ bool check = ca && test_bit(PTR_BUCKET_NR(ca, &p.ptr), ca->bucket_backpointer_mismatches);
+ rcu_read_unlock();
+
+ if (check) {
+ struct bkey_i_backpointer bp;
+ bch2_extent_ptr_to_bp(c, btree, level, k, p, entry, &bp);
+ int ret = check_bp_exists(trans, s, &bp, k);
+ if (ret)
+ return ret;
+ }
}
return 0;
@@ -794,26 +798,259 @@ static int bch2_check_extents_to_backpointers_pass(struct btree_trans *trans,
return 0;
}
+enum alloc_sector_counter {
+ ALLOC_dirty,
+ ALLOC_cached,
+ ALLOC_stripe,
+ ALLOC_SECTORS_NR
+};
+
+static enum alloc_sector_counter data_type_to_alloc_counter(enum bch_data_type t)
+{
+ switch (t) {
+ case BCH_DATA_btree:
+ case BCH_DATA_user:
+ return ALLOC_dirty;
+ case BCH_DATA_cached:
+ return ALLOC_cached;
+ case BCH_DATA_stripe:
+ return ALLOC_stripe;
+ default:
+ BUG();
+ }
+}
+
+static int check_bucket_backpointer_mismatch_one(struct btree_trans *trans, struct bkey_s_c alloc_k,
+ struct bkey_buf *last_flushed)
+{
+ int ret = 0;
+ struct bch_alloc_v4 a_convert;
+ const struct bch_alloc_v4 *a = bch2_alloc_to_v4(alloc_k, &a_convert);
+
+ if (a->data_type == BCH_DATA_sb ||
+ a->data_type == BCH_DATA_journal ||
+ a->data_type == BCH_DATA_parity)
+ return 0;
+
+ u32 sectors[ALLOC_SECTORS_NR];
+ memset(sectors, 0, sizeof(sectors));
+
+ struct bch_dev *ca = bch2_dev_bucket_tryget_noerror(trans->c, alloc_k.k->p);
+ if (!ca)
+ return 0;
+
+ struct btree_iter iter;
+ struct bkey_s_c bp_k;
+ for_each_btree_key_max_norestart(trans, iter, BTREE_ID_backpointers,
+ bucket_pos_to_bp_start(ca, alloc_k.k->p),
+ bucket_pos_to_bp_end(ca, alloc_k.k->p), 0, bp_k, ret) {
+ if (bp_k.k->type != KEY_TYPE_backpointer)
+ continue;
+
+ struct bkey_s_c_backpointer bp = bkey_s_c_to_backpointer(bp_k);
+
+ if (bp.v->bucket_gen != a->gen)
+ continue;
+
+ sectors[data_type_to_alloc_counter(bp.v->data_type)] += bp.v->bucket_len;
+ };
+ bch2_trans_iter_exit(trans, &iter);
+ if (ret)
+ goto err;
+
+ /* Cached pointers don't have backpointers: */
+
+ if (sectors[ALLOC_dirty] != a->dirty_sectors ||
+ sectors[ALLOC_stripe] != a->stripe_sectors) {
+ ret = bch2_backpointers_maybe_flush(trans, alloc_k, last_flushed);
+ if (ret)
+ goto err;
+
+ __set_bit(alloc_k.k->p.offset, ca->bucket_backpointer_mismatches);
+ }
+err:
+ bch2_dev_put(ca);
+ return ret;
+}
+
+static int check_bucket_backpointer_mismatches(struct btree_trans *trans,
+ struct bkey_buf *last_flushed)
+{
+ return for_each_btree_key(trans, iter, BTREE_ID_alloc,
+ POS_MIN, BTREE_ITER_prefetch, k, ({
+ check_bucket_backpointer_mismatch_one(trans, k, last_flushed);
+ }));
+}
+
+static bool backpointer_node_has_missing(struct bch_fs *c, struct bkey_s_c k)
+{
+ switch (k.k->type) {
+ case KEY_TYPE_btree_ptr_v2: {
+ bool ret = false;
+
+ rcu_read_lock();
+ struct bpos pos = bkey_s_c_to_btree_ptr_v2(k).v->min_key;
+ while (pos.inode <= k.k->p.inode) {
+ struct bch_dev *ca = bch2_dev_rcu_noerror(c, pos.inode);
+ if (!ca)
+ goto next;
+
+ struct bpos bucket = bp_pos_to_bucket(ca, pos);
+ bucket.offset = find_next_bit(ca->bucket_backpointer_mismatches,
+ ca->mi.nbuckets, bucket.offset);
+ if (bucket.offset == ca->mi.nbuckets)
+ goto next;
+
+ ret = bpos_le(bucket_pos_to_bp_end(ca, bucket), k.k->p);
+ if (ret)
+ break;
+next:
+ pos = SPOS(pos.inode + 1, 0, 0);
+ }
+ rcu_read_unlock();
+
+ return ret;
+ }
+ case KEY_TYPE_btree_ptr:
+ return true;
+ default:
+ return false;
+ }
+}
+
+static int btree_node_get_and_pin(struct btree_trans *trans, struct bkey_i *k,
+ enum btree_id btree, unsigned level)
+{
+ struct btree_iter iter;
+ bch2_trans_node_iter_init(trans, &iter, btree, k->k.p, 0, level, 0);
+ struct btree *b = bch2_btree_iter_peek_node(&iter);
+ int ret = PTR_ERR_OR_ZERO(b);
+ if (ret)
+ goto err;
+
+ if (b)
+ bch2_node_pin(trans->c, b);
+err:
+ bch2_trans_iter_exit(trans, &iter);
+ return ret;
+}
+
+static int bch2_pin_backpointer_nodes_with_missing(struct btree_trans *trans,
+ struct bpos start, struct bpos *end)
+{
+ struct bch_fs *c = trans->c;
+ int ret = 0;
+
+ struct bkey_buf tmp;
+ bch2_bkey_buf_init(&tmp);
+
+ bch2_btree_cache_unpin(c);
+
+ *end = SPOS_MAX;
+
+ s64 mem_may_pin = mem_may_pin_bytes(c);
+ struct btree_iter iter;
+ bch2_trans_node_iter_init(trans, &iter, BTREE_ID_backpointers, start,
+ 0, 1, BTREE_ITER_prefetch);
+ ret = for_each_btree_key_continue(trans, iter, 0, k, ({
+ if (!backpointer_node_has_missing(c, k))
+ continue;
+
+ mem_may_pin -= c->opts.btree_node_size;
+ if (mem_may_pin <= 0)
+ break;
+
+ bch2_bkey_buf_reassemble(&tmp, c, k);
+ struct btree_path *path = btree_iter_path(trans, &iter);
+
+ BUG_ON(path->level != 1);
+
+ bch2_btree_node_prefetch(trans, path, tmp.k, path->btree_id, path->level - 1);
+ }));
+ if (ret)
+ return ret;
+
+ struct bpos pinned = SPOS_MAX;
+ mem_may_pin = mem_may_pin_bytes(c);
+ bch2_trans_node_iter_init(trans, &iter, BTREE_ID_backpointers, start,
+ 0, 1, BTREE_ITER_prefetch);
+ ret = for_each_btree_key_continue(trans, iter, 0, k, ({
+ if (!backpointer_node_has_missing(c, k))
+ continue;
+
+ mem_may_pin -= c->opts.btree_node_size;
+ if (mem_may_pin <= 0) {
+ *end = pinned;
+ break;
+ }
+
+ bch2_bkey_buf_reassemble(&tmp, c, k);
+ struct btree_path *path = btree_iter_path(trans, &iter);
+
+ BUG_ON(path->level != 1);
+
+ int ret2 = btree_node_get_and_pin(trans, tmp.k, path->btree_id, path->level - 1);
+
+ if (!ret2)
+ pinned = tmp.k->k.p;
+
+ ret;
+ }));
+ if (ret)
+ return ret;
+
+ return ret;
+}
+
int bch2_check_extents_to_backpointers(struct bch_fs *c)
{
+ int ret = 0;
+
+ /*
+ * Can't allow devices to come/go/resize while we have bucket bitmaps
+ * allocated
+ */
+ lockdep_assert_held(&c->state_lock);
+
+ for_each_member_device(c, ca) {
+ BUG_ON(ca->bucket_backpointer_mismatches);
+ ca->bucket_backpointer_mismatches = kvcalloc(BITS_TO_LONGS(ca->mi.nbuckets),
+ sizeof(unsigned long),
+ GFP_KERNEL);
+ if (!ca->bucket_backpointer_mismatches) {
+ bch2_dev_put(ca);
+ ret = -BCH_ERR_ENOMEM_backpointer_mismatches_bitmap;
+ goto err_free_bitmaps;
+ }
+ }
+
struct btree_trans *trans = bch2_trans_get(c);
struct extents_to_bp_state s = { .bp_start = POS_MIN };
- int ret;
bch2_bkey_buf_init(&s.last_flushed);
bkey_init(&s.last_flushed.k->k);
+ ret = check_bucket_backpointer_mismatches(trans, &s.last_flushed);
+ if (ret)
+ goto err;
+
+ u64 nr_buckets = 0, nr_mismatches = 0;
+ for_each_member_device(c, ca) {
+ nr_buckets += ca->mi.nbuckets;
+ nr_mismatches += bitmap_weight(ca->bucket_backpointer_mismatches, ca->mi.nbuckets);
+ }
+
+ if (!nr_mismatches)
+ goto err;
+
+ bch_info(c, "scanning for missing backpointers in %llu/%llu buckets",
+ nr_mismatches, nr_buckets);
+
while (1) {
- struct bbpos end;
- ret = bch2_get_btree_in_memory_pos(trans,
- BIT_ULL(BTREE_ID_backpointers),
- BIT_ULL(BTREE_ID_backpointers),
- BBPOS(BTREE_ID_backpointers, s.bp_start), &end);
+ ret = bch2_pin_backpointer_nodes_with_missing(trans, s.bp_start, &s.bp_end);
if (ret)
break;
- s.bp_end = end.pos;
-
if ( bpos_eq(s.bp_start, POS_MIN) &&
!bpos_eq(s.bp_end, SPOS_MAX))
bch_verbose(c, "%s(): alloc info does not fit in ram, running in multiple passes with %zu nodes per pass",
@@ -838,10 +1075,15 @@ int bch2_check_extents_to_backpointers(struct bch_fs *c)
s.bp_start = bpos_successor(s.bp_end);
}
+err:
bch2_trans_put(trans);
bch2_bkey_buf_exit(&s.last_flushed, c);
-
bch2_btree_cache_unpin(c);
+err_free_bitmaps:
+ for_each_member_device(c, ca) {
+ kvfree(ca->bucket_backpointer_mismatches);
+ ca->bucket_backpointer_mismatches = NULL;
+ }
bch_err_fn(c, ret);
return ret;
diff --git a/fs/bcachefs/bcachefs.h b/fs/bcachefs/bcachefs.h
index e6cd93e1ed0f..f74f4d3edfb4 100644
--- a/fs/bcachefs/bcachefs.h
+++ b/fs/bcachefs/bcachefs.h
@@ -557,6 +557,8 @@ struct bch_dev {
unsigned long *buckets_nouse;
struct rw_semaphore bucket_lock;
+ unsigned long *bucket_backpointer_mismatches;
+
struct bch_dev_usage __percpu *usage;
/* Allocator: */
diff --git a/fs/bcachefs/btree_cache.c b/fs/bcachefs/btree_cache.c
index 1117be901cf0..b00c6a20be27 100644
--- a/fs/bcachefs/btree_cache.c
+++ b/fs/bcachefs/btree_cache.c
@@ -222,7 +222,6 @@ void bch2_node_pin(struct bch_fs *c, struct btree *b)
struct btree_cache *bc = &c->btree_cache;
mutex_lock(&bc->lock);
- BUG_ON(!__btree_node_pinned(bc, b));
if (b != btree_node_root(c, b) && !btree_node_pinned(b)) {
set_btree_node_pinned(b);
list_move(&b->list, &bc->live[1].list);
diff --git a/fs/bcachefs/errcode.h b/fs/bcachefs/errcode.h
index 5e4dd85ac669..775425df0105 100644
--- a/fs/bcachefs/errcode.h
+++ b/fs/bcachefs/errcode.h
@@ -54,6 +54,7 @@
x(ENOMEM, ENOMEM_compression_bounce_read_init) \
x(ENOMEM, ENOMEM_compression_bounce_write_init) \
x(ENOMEM, ENOMEM_compression_workspace_init) \
+ x(ENOMEM, ENOMEM_backpointer_mismatches_bitmap) \
x(EIO, compression_workspace_not_initialized) \
x(ENOMEM, ENOMEM_bucket_gens) \
x(ENOMEM, ENOMEM_buckets_nouse) \