summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--fs/bcachefs/backpointers.c278
-rw-r--r--fs/bcachefs/bcachefs.h2
-rw-r--r--fs/bcachefs/btree_cache.c1
-rw-r--r--fs/bcachefs/errcode.h1
4 files changed, 263 insertions, 19 deletions
diff --git a/fs/bcachefs/backpointers.c b/fs/bcachefs/backpointers.c
index 31cbf781d489..41621570aede 100644
--- a/fs/bcachefs/backpointers.c
+++ b/fs/bcachefs/backpointers.c
@@ -567,25 +567,29 @@ static int check_extent_to_backpointers(struct btree_trans *trans,
struct bkey_s_c k)
{
struct bch_fs *c = trans->c;
- struct bkey_ptrs_c ptrs;
+ struct bkey_ptrs_c ptrs = bch2_bkey_ptrs_c(k);
const union bch_extent_entry *entry;
struct extent_ptr_decoded p;
- int ret;
- ptrs = bch2_bkey_ptrs_c(k);
bkey_for_each_ptr_decode(k.k, ptrs, p, entry) {
- struct bkey_i_backpointer bp;
-
if (p.ptr.cached)
continue;
if (p.ptr.dev == BCH_SB_MEMBER_INVALID)
continue;
- bch2_extent_ptr_to_bp(c, btree, level, k, p, entry, &bp);
- ret = check_bp_exists(trans, s, &bp, k);
- if (ret)
- return ret;
+ rcu_read_lock();
+ struct bch_dev *ca = bch2_dev_rcu_noerror(c, p.ptr.dev);
+ bool check = ca && test_bit(PTR_BUCKET_NR(ca, &p.ptr), ca->bucket_backpointer_mismatches);
+ rcu_read_unlock();
+
+ if (check) {
+ struct bkey_i_backpointer bp;
+ bch2_extent_ptr_to_bp(c, btree, level, k, p, entry, &bp);
+ int ret = check_bp_exists(trans, s, &bp, k);
+ if (ret)
+ return ret;
+ }
}
return 0;
@@ -794,26 +798,259 @@ static int bch2_check_extents_to_backpointers_pass(struct btree_trans *trans,
return 0;
}
+enum alloc_sector_counter {
+ ALLOC_dirty,
+ ALLOC_cached,
+ ALLOC_stripe,
+ ALLOC_SECTORS_NR
+};
+
+static enum alloc_sector_counter data_type_to_alloc_counter(enum bch_data_type t)
+{
+ switch (t) {
+ case BCH_DATA_btree:
+ case BCH_DATA_user:
+ return ALLOC_dirty;
+ case BCH_DATA_cached:
+ return ALLOC_cached;
+ case BCH_DATA_stripe:
+ return ALLOC_stripe;
+ default:
+ BUG();
+ }
+}
+
+static int check_bucket_backpointer_mismatch_one(struct btree_trans *trans, struct bkey_s_c alloc_k,
+ struct bkey_buf *last_flushed)
+{
+ int ret = 0;
+ struct bch_alloc_v4 a_convert;
+ const struct bch_alloc_v4 *a = bch2_alloc_to_v4(alloc_k, &a_convert);
+
+ if (a->data_type == BCH_DATA_sb ||
+ a->data_type == BCH_DATA_journal ||
+ a->data_type == BCH_DATA_parity)
+ return 0;
+
+ u32 sectors[ALLOC_SECTORS_NR];
+ memset(sectors, 0, sizeof(sectors));
+
+ struct bch_dev *ca = bch2_dev_bucket_tryget_noerror(trans->c, alloc_k.k->p);
+ if (!ca)
+ return 0;
+
+ struct btree_iter iter;
+ struct bkey_s_c bp_k;
+ for_each_btree_key_max_norestart(trans, iter, BTREE_ID_backpointers,
+ bucket_pos_to_bp_start(ca, alloc_k.k->p),
+ bucket_pos_to_bp_end(ca, alloc_k.k->p), 0, bp_k, ret) {
+ if (bp_k.k->type != KEY_TYPE_backpointer)
+ continue;
+
+ struct bkey_s_c_backpointer bp = bkey_s_c_to_backpointer(bp_k);
+
+ if (bp.v->bucket_gen != a->gen)
+ continue;
+
+ sectors[data_type_to_alloc_counter(bp.v->data_type)] += bp.v->bucket_len;
+ };
+ bch2_trans_iter_exit(trans, &iter);
+ if (ret)
+ goto err;
+
+ /* Cached pointers don't have backpointers: */
+
+ if (sectors[ALLOC_dirty] != a->dirty_sectors ||
+ sectors[ALLOC_stripe] != a->stripe_sectors) {
+ ret = bch2_backpointers_maybe_flush(trans, alloc_k, last_flushed);
+ if (ret)
+ goto err;
+
+ __set_bit(alloc_k.k->p.offset, ca->bucket_backpointer_mismatches);
+ }
+err:
+ bch2_dev_put(ca);
+ return ret;
+}
+
+static int check_bucket_backpointer_mismatches(struct btree_trans *trans,
+ struct bkey_buf *last_flushed)
+{
+ return for_each_btree_key(trans, iter, BTREE_ID_alloc,
+ POS_MIN, BTREE_ITER_prefetch, k, ({
+ check_bucket_backpointer_mismatch_one(trans, k, last_flushed);
+ }));
+}
+
+static bool backpointer_node_has_missing(struct bch_fs *c, struct bkey_s_c k)
+{
+ switch (k.k->type) {
+ case KEY_TYPE_btree_ptr_v2: {
+ bool ret = false;
+
+ rcu_read_lock();
+ struct bpos pos = bkey_s_c_to_btree_ptr_v2(k).v->min_key;
+ while (pos.inode <= k.k->p.inode) {
+ struct bch_dev *ca = bch2_dev_rcu_noerror(c, pos.inode);
+ if (!ca)
+ goto next;
+
+ struct bpos bucket = bp_pos_to_bucket(ca, pos);
+ bucket.offset = find_next_bit(ca->bucket_backpointer_mismatches,
+ ca->mi.nbuckets, bucket.offset);
+ if (bucket.offset == ca->mi.nbuckets)
+ goto next;
+
+ ret = bpos_le(bucket_pos_to_bp_end(ca, bucket), k.k->p);
+ if (ret)
+ break;
+next:
+ pos = SPOS(pos.inode + 1, 0, 0);
+ }
+ rcu_read_unlock();
+
+ return ret;
+ }
+ case KEY_TYPE_btree_ptr:
+ return true;
+ default:
+ return false;
+ }
+}
+
+static int btree_node_get_and_pin(struct btree_trans *trans, struct bkey_i *k,
+ enum btree_id btree, unsigned level)
+{
+ struct btree_iter iter;
+ bch2_trans_node_iter_init(trans, &iter, btree, k->k.p, 0, level, 0);
+ struct btree *b = bch2_btree_iter_peek_node(&iter);
+ int ret = PTR_ERR_OR_ZERO(b);
+ if (ret)
+ goto err;
+
+ if (b)
+ bch2_node_pin(trans->c, b);
+err:
+ bch2_trans_iter_exit(trans, &iter);
+ return ret;
+}
+
+static int bch2_pin_backpointer_nodes_with_missing(struct btree_trans *trans,
+ struct bpos start, struct bpos *end)
+{
+ struct bch_fs *c = trans->c;
+ int ret = 0;
+
+ struct bkey_buf tmp;
+ bch2_bkey_buf_init(&tmp);
+
+ bch2_btree_cache_unpin(c);
+
+ *end = SPOS_MAX;
+
+ s64 mem_may_pin = mem_may_pin_bytes(c);
+ struct btree_iter iter;
+ bch2_trans_node_iter_init(trans, &iter, BTREE_ID_backpointers, start,
+ 0, 1, BTREE_ITER_prefetch);
+ ret = for_each_btree_key_continue(trans, iter, 0, k, ({
+ if (!backpointer_node_has_missing(c, k))
+ continue;
+
+ mem_may_pin -= c->opts.btree_node_size;
+ if (mem_may_pin <= 0)
+ break;
+
+ bch2_bkey_buf_reassemble(&tmp, c, k);
+ struct btree_path *path = btree_iter_path(trans, &iter);
+
+ BUG_ON(path->level != 1);
+
+ bch2_btree_node_prefetch(trans, path, tmp.k, path->btree_id, path->level - 1);
+ }));
+ if (ret)
+ return ret;
+
+ struct bpos pinned = SPOS_MAX;
+ mem_may_pin = mem_may_pin_bytes(c);
+ bch2_trans_node_iter_init(trans, &iter, BTREE_ID_backpointers, start,
+ 0, 1, BTREE_ITER_prefetch);
+ ret = for_each_btree_key_continue(trans, iter, 0, k, ({
+ if (!backpointer_node_has_missing(c, k))
+ continue;
+
+ mem_may_pin -= c->opts.btree_node_size;
+ if (mem_may_pin <= 0) {
+ *end = pinned;
+ break;
+ }
+
+ bch2_bkey_buf_reassemble(&tmp, c, k);
+ struct btree_path *path = btree_iter_path(trans, &iter);
+
+ BUG_ON(path->level != 1);
+
+ int ret2 = btree_node_get_and_pin(trans, tmp.k, path->btree_id, path->level - 1);
+
+ if (!ret2)
+ pinned = tmp.k->k.p;
+
+ ret;
+ }));
+ if (ret)
+ return ret;
+
+ return ret;
+}
+
int bch2_check_extents_to_backpointers(struct bch_fs *c)
{
+ int ret = 0;
+
+ /*
+ * Can't allow devices to come/go/resize while we have bucket bitmaps
+ * allocated
+ */
+ lockdep_assert_held(&c->state_lock);
+
+ for_each_member_device(c, ca) {
+ BUG_ON(ca->bucket_backpointer_mismatches);
+ ca->bucket_backpointer_mismatches = kvcalloc(BITS_TO_LONGS(ca->mi.nbuckets),
+ sizeof(unsigned long),
+ GFP_KERNEL);
+ if (!ca->bucket_backpointer_mismatches) {
+ bch2_dev_put(ca);
+ ret = -BCH_ERR_ENOMEM_backpointer_mismatches_bitmap;
+ goto err_free_bitmaps;
+ }
+ }
+
struct btree_trans *trans = bch2_trans_get(c);
struct extents_to_bp_state s = { .bp_start = POS_MIN };
- int ret;
bch2_bkey_buf_init(&s.last_flushed);
bkey_init(&s.last_flushed.k->k);
+ ret = check_bucket_backpointer_mismatches(trans, &s.last_flushed);
+ if (ret)
+ goto err;
+
+ u64 nr_buckets = 0, nr_mismatches = 0;
+ for_each_member_device(c, ca) {
+ nr_buckets += ca->mi.nbuckets;
+ nr_mismatches += bitmap_weight(ca->bucket_backpointer_mismatches, ca->mi.nbuckets);
+ }
+
+ if (!nr_mismatches)
+ goto err;
+
+ bch_info(c, "scanning for missing backpointers in %llu/%llu buckets",
+ nr_mismatches, nr_buckets);
+
while (1) {
- struct bbpos end;
- ret = bch2_get_btree_in_memory_pos(trans,
- BIT_ULL(BTREE_ID_backpointers),
- BIT_ULL(BTREE_ID_backpointers),
- BBPOS(BTREE_ID_backpointers, s.bp_start), &end);
+ ret = bch2_pin_backpointer_nodes_with_missing(trans, s.bp_start, &s.bp_end);
if (ret)
break;
- s.bp_end = end.pos;
-
if ( bpos_eq(s.bp_start, POS_MIN) &&
!bpos_eq(s.bp_end, SPOS_MAX))
bch_verbose(c, "%s(): alloc info does not fit in ram, running in multiple passes with %zu nodes per pass",
@@ -838,10 +1075,15 @@ int bch2_check_extents_to_backpointers(struct bch_fs *c)
s.bp_start = bpos_successor(s.bp_end);
}
+err:
bch2_trans_put(trans);
bch2_bkey_buf_exit(&s.last_flushed, c);
-
bch2_btree_cache_unpin(c);
+err_free_bitmaps:
+ for_each_member_device(c, ca) {
+ kvfree(ca->bucket_backpointer_mismatches);
+ ca->bucket_backpointer_mismatches = NULL;
+ }
bch_err_fn(c, ret);
return ret;
diff --git a/fs/bcachefs/bcachefs.h b/fs/bcachefs/bcachefs.h
index e6cd93e1ed0f..f74f4d3edfb4 100644
--- a/fs/bcachefs/bcachefs.h
+++ b/fs/bcachefs/bcachefs.h
@@ -557,6 +557,8 @@ struct bch_dev {
unsigned long *buckets_nouse;
struct rw_semaphore bucket_lock;
+ unsigned long *bucket_backpointer_mismatches;
+
struct bch_dev_usage __percpu *usage;
/* Allocator: */
diff --git a/fs/bcachefs/btree_cache.c b/fs/bcachefs/btree_cache.c
index 1117be901cf0..b00c6a20be27 100644
--- a/fs/bcachefs/btree_cache.c
+++ b/fs/bcachefs/btree_cache.c
@@ -222,7 +222,6 @@ void bch2_node_pin(struct bch_fs *c, struct btree *b)
struct btree_cache *bc = &c->btree_cache;
mutex_lock(&bc->lock);
- BUG_ON(!__btree_node_pinned(bc, b));
if (b != btree_node_root(c, b) && !btree_node_pinned(b)) {
set_btree_node_pinned(b);
list_move(&b->list, &bc->live[1].list);
diff --git a/fs/bcachefs/errcode.h b/fs/bcachefs/errcode.h
index 5e4dd85ac669..775425df0105 100644
--- a/fs/bcachefs/errcode.h
+++ b/fs/bcachefs/errcode.h
@@ -54,6 +54,7 @@
x(ENOMEM, ENOMEM_compression_bounce_read_init) \
x(ENOMEM, ENOMEM_compression_bounce_write_init) \
x(ENOMEM, ENOMEM_compression_workspace_init) \
+ x(ENOMEM, ENOMEM_backpointer_mismatches_bitmap) \
x(EIO, compression_workspace_not_initialized) \
x(ENOMEM, ENOMEM_bucket_gens) \
x(ENOMEM, ENOMEM_buckets_nouse) \