summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--fs/bcachefs/Makefile1
-rw-r--r--fs/bcachefs/alloc_background.c209
-rw-r--r--fs/bcachefs/alloc_background.h26
-rw-r--r--fs/bcachefs/backpointers.c858
-rw-r--r--fs/bcachefs/backpointers.h38
-rw-r--r--fs/bcachefs/bcachefs.h1
-rw-r--r--fs/bcachefs/bcachefs_format.h36
-rw-r--r--fs/bcachefs/bkey_methods.c4
-rw-r--r--fs/bcachefs/btree_types.h5
-rw-r--r--fs/bcachefs/buckets.c48
-rw-r--r--fs/bcachefs/buckets.h19
-rw-r--r--fs/bcachefs/recovery.c28
-rw-r--r--fs/bcachefs/super.c2
13 files changed, 1180 insertions, 95 deletions
diff --git a/fs/bcachefs/Makefile b/fs/bcachefs/Makefile
index 96a9a964a6f2..f1277d58b348 100644
--- a/fs/bcachefs/Makefile
+++ b/fs/bcachefs/Makefile
@@ -4,6 +4,7 @@ obj-$(CONFIG_BCACHEFS_FS) += bcachefs.o
bcachefs-y := \
alloc_background.o \
alloc_foreground.o \
+ backpointers.o \
bkey.o \
bkey_methods.o \
bkey_sort.o \
diff --git a/fs/bcachefs/alloc_background.c b/fs/bcachefs/alloc_background.c
index d114ba0949a8..6f82f58c4f6e 100644
--- a/fs/bcachefs/alloc_background.c
+++ b/fs/bcachefs/alloc_background.c
@@ -2,6 +2,7 @@
#include "bcachefs.h"
#include "alloc_background.h"
#include "alloc_foreground.h"
+#include "backpointers.h"
#include "btree_cache.h"
#include "btree_io.h"
#include "btree_key_cache.h"
@@ -37,8 +38,6 @@ static const unsigned BCH_ALLOC_V1_FIELD_BYTES[] = {
struct bkey_alloc_unpacked {
u64 journal_seq;
- u64 bucket;
- u8 dev;
u8 gen;
u8 oldest_gen;
u8 data_type;
@@ -194,11 +193,7 @@ static int bch2_alloc_unpack_v3(struct bkey_alloc_unpacked *out,
static struct bkey_alloc_unpacked bch2_alloc_unpack(struct bkey_s_c k)
{
- struct bkey_alloc_unpacked ret = {
- .dev = k.k->p.inode,
- .bucket = k.k->p.offset,
- .gen = 0,
- };
+ struct bkey_alloc_unpacked ret = { .gen = 0 };
switch (k.k->type) {
case KEY_TYPE_alloc:
@@ -215,48 +210,6 @@ static struct bkey_alloc_unpacked bch2_alloc_unpack(struct bkey_s_c k)
return ret;
}
-void bch2_alloc_to_v4(struct bkey_s_c k, struct bch_alloc_v4 *out)
-{
- if (k.k->type == KEY_TYPE_alloc_v4) {
- *out = *bkey_s_c_to_alloc_v4(k).v;
- } else {
- struct bkey_alloc_unpacked u = bch2_alloc_unpack(k);
-
- *out = (struct bch_alloc_v4) {
- .journal_seq = u.journal_seq,
- .flags = u.need_discard,
- .gen = u.gen,
- .oldest_gen = u.oldest_gen,
- .data_type = u.data_type,
- .stripe_redundancy = u.stripe_redundancy,
- .dirty_sectors = u.dirty_sectors,
- .cached_sectors = u.cached_sectors,
- .io_time[READ] = u.read_time,
- .io_time[WRITE] = u.write_time,
- .stripe = u.stripe,
- };
- }
-}
-
-struct bkey_i_alloc_v4 *bch2_alloc_to_v4_mut(struct btree_trans *trans, struct bkey_s_c k)
-{
- struct bkey_i_alloc_v4 *ret;
-
- if (k.k->type == KEY_TYPE_alloc_v4) {
- ret = bch2_trans_kmalloc(trans, bkey_bytes(k.k));
- if (!IS_ERR(ret))
- bkey_reassemble(&ret->k_i, k);
- } else {
- ret = bch2_trans_kmalloc(trans, sizeof(*ret));
- if (!IS_ERR(ret)) {
- bkey_alloc_v4_init(&ret->k_i);
- ret->k.p = k.k->p;
- bch2_alloc_to_v4(k, &ret->v);
- }
- }
- return ret;
-}
-
struct bkey_i_alloc_v4 *
bch2_trans_start_alloc_update(struct btree_trans *trans, struct btree_iter *iter,
struct bpos pos)
@@ -339,9 +292,15 @@ int bch2_alloc_v4_invalid(const struct bch_fs *c, struct bkey_s_c k,
{
struct bkey_s_c_alloc_v4 a = bkey_s_c_to_alloc_v4(k);
- if (bkey_val_bytes(k.k) != sizeof(struct bch_alloc_v4)) {
- prt_printf(err, "bad val size (%zu != %zu)",
- bkey_val_bytes(k.k), sizeof(struct bch_alloc_v4));
+ if (alloc_v4_u64s(a.v) != bkey_val_u64s(k.k)) {
+ prt_printf(err, "bad val size (%lu != %u)",
+ bkey_val_u64s(k.k), alloc_v4_u64s(a.v));
+ return -EINVAL;
+ }
+
+ if (!BCH_ALLOC_V4_BACKPOINTERS_START(a.v) &&
+ BCH_ALLOC_V4_NR_BACKPOINTERS(a.v)) {
+ prt_printf(err, "invalid backpointers_start");
return -EINVAL;
}
@@ -401,9 +360,19 @@ int bch2_alloc_v4_invalid(const struct bch_fs *c, struct bkey_s_c k,
return 0;
}
+static inline u64 swab40(u64 x)
+{
+ return (((x & 0x00000000ffULL) << 32)|
+ ((x & 0x000000ff00ULL) << 16)|
+ ((x & 0x0000ff0000ULL) >> 0)|
+ ((x & 0x00ff000000ULL) >> 16)|
+ ((x & 0xff00000000ULL) >> 32));
+}
+
void bch2_alloc_v4_swab(struct bkey_s k)
{
struct bch_alloc_v4 *a = bkey_s_to_alloc_v4(k).v;
+ struct bch_backpointer *bp, *bps;
a->journal_seq = swab64(a->journal_seq);
a->flags = swab32(a->flags);
@@ -413,25 +382,135 @@ void bch2_alloc_v4_swab(struct bkey_s k)
a->io_time[1] = swab64(a->io_time[1]);
a->stripe = swab32(a->stripe);
a->nr_external_backpointers = swab32(a->nr_external_backpointers);
+
+ bps = alloc_v4_backpointers(a);
+ for (bp = bps; bp < bps + BCH_ALLOC_V4_NR_BACKPOINTERS(a); bp++) {
+ bp->bucket_offset = swab40(bp->bucket_offset);
+ bp->bucket_len = swab32(bp->bucket_len);
+ bch2_bpos_swab(&bp->pos);
+ }
}
void bch2_alloc_to_text(struct printbuf *out, struct bch_fs *c, struct bkey_s_c k)
{
- struct bch_alloc_v4 a;
+ struct bch_alloc_v4 _a;
+ const struct bch_alloc_v4 *a = &_a;
+ const struct bch_backpointer *bps;
+ unsigned i;
- bch2_alloc_to_v4(k, &a);
+ if (k.k->type == KEY_TYPE_alloc_v4)
+ a = bkey_s_c_to_alloc_v4(k).v;
+ else
+ bch2_alloc_to_v4(k, &_a);
+
+ prt_newline(out);
+ printbuf_indent_add(out, 2);
+
+ prt_printf(out, "gen %u oldest_gen %u data_type %s",
+ a->gen, a->oldest_gen, bch2_data_types[a->data_type]);
+ prt_newline(out);
+ prt_printf(out, "journal_seq %llu", a->journal_seq);
+ prt_newline(out);
+ prt_printf(out, "need_discard %llu", BCH_ALLOC_V4_NEED_DISCARD(a));
+ prt_newline(out);
+ prt_printf(out, "need_inc_gen %llu", BCH_ALLOC_V4_NEED_INC_GEN(a));
+ prt_newline(out);
+ prt_printf(out, "dirty_sectors %u", a->dirty_sectors);
+ prt_newline(out);
+ prt_printf(out, "cached_sectors %u", a->cached_sectors);
+ prt_newline(out);
+ prt_printf(out, "stripe %u", a->stripe);
+ prt_newline(out);
+ prt_printf(out, "stripe_redundancy %u", a->stripe_redundancy);
+ prt_newline(out);
+ prt_printf(out, "io_time[READ] %llu", a->io_time[READ]);
+ prt_newline(out);
+ prt_printf(out, "io_time[WRITE] %llu", a->io_time[WRITE]);
+ prt_newline(out);
+ prt_printf(out, "backpointers: %llu", BCH_ALLOC_V4_NR_BACKPOINTERS(a));
+ printbuf_indent_add(out, 2);
+
+ bps = alloc_v4_backpointers_c(a);
+ for (i = 0; i < BCH_ALLOC_V4_NR_BACKPOINTERS(a); i++) {
+ prt_newline(out);
+ bch2_backpointer_to_text(out, &bps[i]);
+ }
- prt_printf(out, "gen %u oldest_gen %u data_type %s journal_seq %llu need_discard %llu need_inc_gen %llu",
- a.gen, a.oldest_gen, bch2_data_types[a.data_type],
- a.journal_seq,
- BCH_ALLOC_V4_NEED_DISCARD(&a),
- BCH_ALLOC_V4_NEED_INC_GEN(&a));
- prt_printf(out, " dirty_sectors %u", a.dirty_sectors);
- prt_printf(out, " cached_sectors %u", a.cached_sectors);
- prt_printf(out, " stripe %u", a.stripe);
- prt_printf(out, " stripe_redundancy %u", a.stripe_redundancy);
- prt_printf(out, " read_time %llu", a.io_time[READ]);
- prt_printf(out, " write_time %llu", a.io_time[WRITE]);
+ printbuf_indent_sub(out, 4);
+}
+
+void bch2_alloc_to_v4(struct bkey_s_c k, struct bch_alloc_v4 *out)
+{
+ if (k.k->type == KEY_TYPE_alloc_v4) {
+ int d;
+
+ *out = *bkey_s_c_to_alloc_v4(k).v;
+
+ d = (int) BCH_ALLOC_V4_U64s -
+ (int) (BCH_ALLOC_V4_BACKPOINTERS_START(out) ?: BCH_ALLOC_V4_U64s_V0);
+ if (unlikely(d > 0)) {
+ memset((u64 *) out + BCH_ALLOC_V4_BACKPOINTERS_START(out),
+ 0,
+ d * sizeof(u64));
+ SET_BCH_ALLOC_V4_BACKPOINTERS_START(out, BCH_ALLOC_V4_U64s);
+ }
+ } else {
+ struct bkey_alloc_unpacked u = bch2_alloc_unpack(k);
+
+ *out = (struct bch_alloc_v4) {
+ .journal_seq = u.journal_seq,
+ .flags = u.need_discard,
+ .gen = u.gen,
+ .oldest_gen = u.oldest_gen,
+ .data_type = u.data_type,
+ .stripe_redundancy = u.stripe_redundancy,
+ .dirty_sectors = u.dirty_sectors,
+ .cached_sectors = u.cached_sectors,
+ .io_time[READ] = u.read_time,
+ .io_time[WRITE] = u.write_time,
+ .stripe = u.stripe,
+ };
+
+ SET_BCH_ALLOC_V4_BACKPOINTERS_START(out, BCH_ALLOC_V4_U64s);
+ }
+}
+
+struct bkey_i_alloc_v4 *bch2_alloc_to_v4_mut(struct btree_trans *trans, struct bkey_s_c k)
+{
+ unsigned bytes = k.k->type == KEY_TYPE_alloc_v4
+ ? bkey_bytes(k.k)
+ : sizeof(struct bkey_i_alloc_v4);
+ struct bkey_i_alloc_v4 *ret;
+
+ /*
+ * Reserve space for one more backpointer here:
+ * Not sketchy at doing it this way, nope...
+ */
+ ret = bch2_trans_kmalloc(trans, bytes + sizeof(struct bch_backpointer));
+ if (IS_ERR(ret))
+ return ret;
+
+ if (k.k->type == KEY_TYPE_alloc_v4) {
+ bkey_reassemble(&ret->k_i, k);
+
+ if (BCH_ALLOC_V4_BACKPOINTERS_START(&ret->v) < BCH_ALLOC_V4_U64s) {
+ struct bch_backpointer *src, *dst;
+
+ src = alloc_v4_backpointers(&ret->v);
+ SET_BCH_ALLOC_V4_BACKPOINTERS_START(&ret->v, BCH_ALLOC_V4_U64s);
+ dst = alloc_v4_backpointers(&ret->v);
+
+ memmove(dst, src, BCH_ALLOC_V4_NR_BACKPOINTERS(&ret->v) *
+ sizeof(struct bch_backpointer));
+ memset(src, 0, dst - src);
+ set_alloc_v4_u64s(ret);
+ }
+ } else {
+ bkey_alloc_v4_init(&ret->k_i);
+ ret->k.p = k.k->p;
+ bch2_alloc_to_v4(k, &ret->v);
+ }
+ return ret;
}
int bch2_alloc_read(struct bch_fs *c)
diff --git a/fs/bcachefs/alloc_background.h b/fs/bcachefs/alloc_background.h
index ff366e61ace5..2ac6b5046c67 100644
--- a/fs/bcachefs/alloc_background.h
+++ b/fs/bcachefs/alloc_background.h
@@ -70,6 +70,22 @@ static inline struct bpos alloc_freespace_pos(struct bpos pos, struct bch_alloc_
return pos;
}
+static inline unsigned alloc_v4_u64s(const struct bch_alloc_v4 *a)
+{
+ unsigned ret = (BCH_ALLOC_V4_BACKPOINTERS_START(a) ?:
+ BCH_ALLOC_V4_U64s_V0) +
+ BCH_ALLOC_V4_NR_BACKPOINTERS(a) *
+ (sizeof(struct bch_backpointer) / sizeof(u64));
+
+ BUG_ON(ret > U8_MAX - BKEY_U64s);
+ return ret;
+}
+
+static inline void set_alloc_v4_u64s(struct bkey_i_alloc_v4 *a)
+{
+ set_bkey_val_u64s(&a->k, alloc_v4_u64s(&a->v));
+}
+
struct bkey_i_alloc_v4 *
bch2_trans_start_alloc_update(struct btree_trans *, struct btree_iter *, struct bpos);
@@ -143,6 +159,16 @@ static inline u64 should_invalidate_buckets(struct bch_dev *ca,
void bch2_do_invalidates(struct bch_fs *);
+static inline struct bch_backpointer *alloc_v4_backpointers(struct bch_alloc_v4 *a)
+{
+ return (void *) ((u64 *) &a->v + BCH_ALLOC_V4_BACKPOINTERS_START(a));
+}
+
+static inline const struct bch_backpointer *alloc_v4_backpointers_c(const struct bch_alloc_v4 *a)
+{
+ return (void *) ((u64 *) &a->v + BCH_ALLOC_V4_BACKPOINTERS_START(a));
+}
+
int bch2_fs_freespace_init(struct bch_fs *);
void bch2_recalc_capacity(struct bch_fs *);
diff --git a/fs/bcachefs/backpointers.c b/fs/bcachefs/backpointers.c
new file mode 100644
index 000000000000..18af3dfbf26a
--- /dev/null
+++ b/fs/bcachefs/backpointers.c
@@ -0,0 +1,858 @@
+// SPDX-License-Identifier: GPL-2.0
+#include "bcachefs.h"
+#include "alloc_background.h"
+#include "backpointers.h"
+#include "btree_cache.h"
+#include "btree_update.h"
+#include "error.h"
+
+#define MAX_EXTENT_COMPRESS_RATIO_SHIFT 10
+
+static inline struct bpos __backpointer_pos(struct bch_dev *ca, u64 b,
+ u64 bucket_offset)
+{
+ return POS(ca->dev_idx,
+ (bucket_to_sector(ca, b) <<
+ MAX_EXTENT_COMPRESS_RATIO_SHIFT) + bucket_offset);
+}
+
+static inline struct bpos backpointer_pos(struct bch_fs *c,
+ struct bpos alloc_pos,
+ u64 bucket_offset)
+{
+ struct bch_dev *ca = bch_dev_bkey_exists(c, alloc_pos.inode);
+
+ return POS(alloc_pos.inode,
+ (bucket_to_sector(ca, alloc_pos.offset) <<
+ MAX_EXTENT_COMPRESS_RATIO_SHIFT) + bucket_offset);
+}
+
+int bch2_backpointer_invalid(const struct bch_fs *c, struct bkey_s_c k,
+ int rw, struct printbuf *err)
+{
+ struct bkey_s_c_backpointer bp = bkey_s_c_to_backpointer(k);
+ struct bch_dev *ca = bch_dev_bkey_exists(c, bp.k->p.inode);
+ u64 b;
+
+ if (bkey_val_bytes(bp.k) != sizeof(*bp.v)) {
+ prt_printf(err, "incorrect value size");
+ return -EINVAL;
+ }
+
+ b = sector_to_bucket(ca, k.k->p.offset >> MAX_EXTENT_COMPRESS_RATIO_SHIFT);
+
+ if (bpos_cmp(__backpointer_pos(ca, b, bp.v->bucket_offset), bp.k->p)) {
+ prt_printf(err, "backpointer at wrong pos");
+ return -EINVAL;
+ }
+
+ return 0;
+}
+
+void bch2_backpointer_to_text(struct printbuf *out, const struct bch_backpointer *bp)
+{
+ prt_printf(out, "btree=%s l=%u offset=%llu:%u len=%u pos=",
+ bch2_btree_ids[bp->btree_id],
+ bp->level,
+ (u64) (bp->bucket_offset >> MAX_EXTENT_COMPRESS_RATIO_SHIFT),
+ (u32) bp->bucket_offset & ~(~0U << MAX_EXTENT_COMPRESS_RATIO_SHIFT),
+ bp->bucket_len);
+ bch2_bpos_to_text(out, bp->pos);
+}
+
+void bch2_backpointer_k_to_text(struct printbuf *out, struct bch_fs *c, struct bkey_s_c k)
+{
+ bch2_backpointer_to_text(out, bkey_s_c_to_backpointer(k).v);
+}
+
+void bch2_backpointer_swab(struct bkey_s k)
+{
+ struct bkey_s_backpointer bp = bkey_s_to_backpointer(k);
+
+ bp.v->bucket_offset = swab32(bp.v->bucket_offset);
+ bp.v->bucket_len = swab32(bp.v->bucket_len);
+ bch2_bpos_swab(&bp.v->pos);
+}
+
+#define BACKPOINTER_OFFSET_MAX ((1ULL << 40) - 1)
+
+void bch2_pointer_to_bucket_and_backpointer(struct bch_fs *c,
+ enum btree_id btree_id, unsigned level,
+ struct bkey_s_c k,
+ struct extent_ptr_decoded p,
+ struct bpos *bucket_pos,
+ struct bch_backpointer *bp)
+{
+ enum bch_data_type data_type = level ? BCH_DATA_btree : BCH_DATA_user;
+ s64 sectors = level ? btree_sectors(c) : k.k->size;
+ u32 bucket_offset;
+
+ *bucket_pos = PTR_BUCKET_POS_OFFSET(c, &p.ptr, &bucket_offset);
+ *bp = (struct bch_backpointer) {
+ .btree_id = btree_id,
+ .level = level,
+ .data_type = data_type,
+ .bucket_offset = ((u64) bucket_offset << MAX_EXTENT_COMPRESS_RATIO_SHIFT) +
+ p.crc.offset,
+ .bucket_len = ptr_disk_sectors(sectors, p),
+ .pos = k.k->p,
+ };
+}
+
+static inline int backpointer_cmp(struct bch_backpointer l, struct bch_backpointer r)
+{
+ return cmp_int(l.bucket_offset, r.bucket_offset);
+}
+
+static int bch2_backpointer_del_by_offset(struct btree_trans *trans,
+ struct bpos alloc_pos,
+ u64 bp_offset)
+{
+ struct bch_fs *c = trans->c;
+ struct btree_iter iter;
+ struct bkey_s_c k;
+ int ret;
+
+ if (bp_offset < BACKPOINTER_OFFSET_MAX) {
+ struct bch_backpointer *bps;
+ struct bkey_i_alloc_v4 *a;
+ unsigned i, nr;
+
+ bch2_trans_iter_init(trans, &iter, BTREE_ID_alloc,
+ alloc_pos,
+ BTREE_ITER_INTENT|
+ BTREE_ITER_SLOTS|
+ BTREE_ITER_WITH_UPDATES);
+ k = bch2_btree_iter_peek_slot(&iter);
+ ret = bkey_err(k);
+ if (ret)
+ goto err;
+
+ if (k.k->type != KEY_TYPE_alloc_v4) {
+ ret = -ENOENT;
+ goto err;
+ }
+
+ a = bch2_alloc_to_v4_mut(trans, k);
+ ret = PTR_ERR_OR_ZERO(a);
+ if (ret)
+ goto err;
+ bps = alloc_v4_backpointers(&a->v);
+ nr = BCH_ALLOC_V4_NR_BACKPOINTERS(&a->v);
+
+ for (i = 0; i < nr; i++) {
+ if (bps[i].bucket_offset == bp_offset)
+ goto found;
+ if (bps[i].bucket_offset > bp_offset)
+ break;
+ }
+
+ ret = -ENOENT;
+ goto err;
+found:
+ array_remove_item(bps, nr, i);
+ SET_BCH_ALLOC_V4_NR_BACKPOINTERS(&a->v, nr);
+ set_alloc_v4_u64s(a);
+ ret = bch2_trans_update(trans, &iter, &a->k_i, 0);
+ } else {
+ bp_offset -= BACKPOINTER_OFFSET_MAX;
+
+ bch2_trans_iter_init(trans, &iter, BTREE_ID_backpointers,
+ backpointer_pos(c, alloc_pos, bp_offset),
+ BTREE_ITER_INTENT|
+ BTREE_ITER_SLOTS|
+ BTREE_ITER_WITH_UPDATES);
+ k = bch2_btree_iter_peek_slot(&iter);
+ ret = bkey_err(k);
+ if (ret)
+ goto err;
+
+ if (k.k->type == KEY_TYPE_backpointer)
+ ret = bch2_btree_delete_at(trans, &iter, 0);
+ else
+ ret = -ENOENT;
+ }
+err:
+ bch2_trans_iter_exit(trans, &iter);
+ return ret;
+}
+
+int bch2_bucket_backpointer_del(struct btree_trans *trans,
+ struct bkey_i_alloc_v4 *a,
+ struct bch_backpointer bp,
+ struct bkey_s_c orig_k)
+{
+ struct bch_fs *c = trans->c;
+ struct bch_backpointer *bps = alloc_v4_backpointers(&a->v);
+ unsigned i, nr = BCH_ALLOC_V4_NR_BACKPOINTERS(&a->v);
+ struct btree_iter bp_iter;
+ struct bkey_s_c k;
+ int ret;
+
+ for (i = 0; i < nr; i++) {
+ int cmp = backpointer_cmp(bps[i], bp) ?:
+ memcmp(&bps[i], &bp, sizeof(bp));
+ if (!cmp)
+ goto found;
+ if (cmp >= 0)
+ break;
+ }
+
+ goto btree;
+found:
+ array_remove_item(bps, nr, i);
+ SET_BCH_ALLOC_V4_NR_BACKPOINTERS(&a->v, nr);
+ set_alloc_v4_u64s(a);
+ return 0;
+btree:
+ bch2_trans_iter_init(trans, &bp_iter, BTREE_ID_backpointers,
+ backpointer_pos(c, a->k.p, bp.bucket_offset),
+ BTREE_ITER_INTENT|
+ BTREE_ITER_SLOTS|
+ BTREE_ITER_WITH_UPDATES);
+ k = bch2_btree_iter_peek_slot(&bp_iter);
+ ret = bkey_err(k);
+ if (ret)
+ goto err;
+
+ if (k.k->type != KEY_TYPE_backpointer ||
+ memcmp(bkey_s_c_to_backpointer(k).v, &bp, sizeof(bp))) {
+ struct printbuf buf = PRINTBUF;
+
+ prt_printf(&buf, "backpointer not found when deleting");
+ prt_newline(&buf);
+ printbuf_indent_add(&buf, 2);
+
+ prt_printf(&buf, "searching for ");
+ bch2_backpointer_to_text(&buf, &bp);
+ prt_newline(&buf);
+
+ prt_printf(&buf, "got ");
+ bch2_bkey_val_to_text(&buf, c, k);
+ prt_newline(&buf);
+
+ prt_printf(&buf, "alloc ");
+ bch2_bkey_val_to_text(&buf, c, bkey_i_to_s_c(&a->k_i));
+ prt_newline(&buf);
+
+ prt_printf(&buf, "for ");
+ bch2_bkey_val_to_text(&buf, c, orig_k);
+
+ if (!test_bit(BCH_FS_CHECK_BACKPOINTERS_DONE, &c->flags)) {
+ bch_err(c, "%s", buf.buf);
+ } else {
+ ret = -EIO;
+ bch2_trans_inconsistent(trans, "%s", buf.buf);
+ }
+ printbuf_exit(&buf);
+ goto err;
+ }
+
+ ret = bch2_btree_delete_at(trans, &bp_iter, 0);
+err:
+ bch2_trans_iter_exit(trans, &bp_iter);
+ return ret;
+}
+
+int bch2_bucket_backpointer_add(struct btree_trans *trans,
+ struct bkey_i_alloc_v4 *a,
+ struct bch_backpointer bp,
+ struct bkey_s_c orig_k)
+{
+ struct bch_fs *c = trans->c;
+ struct bch_dev *ca;
+ struct bch_backpointer *bps = alloc_v4_backpointers(&a->v);
+ unsigned i, nr = BCH_ALLOC_V4_NR_BACKPOINTERS(&a->v);
+ struct bkey_i_backpointer *bp_k;
+ struct btree_iter bp_iter;
+ struct bkey_s_c k;
+ int ret;
+
+ /* Check for duplicates: */
+ for (i = 0; i < nr; i++) {
+ int cmp = backpointer_cmp(bps[i], bp);
+ if (cmp >= 0)
+ break;
+ }
+
+ if ((i &&
+ (bps[i - 1].bucket_offset +
+ bps[i - 1].bucket_len > bp.bucket_offset)) ||
+ (i < nr &&
+ (bp.bucket_offset + bp.bucket_len > bps[i].bucket_offset))) {
+ struct printbuf buf = PRINTBUF;
+
+ prt_printf(&buf, "overlapping backpointer found when inserting ");
+ bch2_backpointer_to_text(&buf, &bp);
+ prt_newline(&buf);
+ printbuf_indent_add(&buf, 2);
+
+ prt_printf(&buf, "into ");
+ bch2_bkey_val_to_text(&buf, c, bkey_i_to_s_c(&a->k_i));
+ prt_newline(&buf);
+
+ prt_printf(&buf, "for ");
+ bch2_bkey_val_to_text(&buf, c, orig_k);
+
+ bch2_trans_inconsistent(trans, "%s", buf.buf);
+
+ printbuf_exit(&buf);
+ return -EIO;
+ }
+
+ if (nr < BCH_ALLOC_V4_NR_BACKPOINTERS_MAX) {
+ array_insert_item(bps, nr, i, bp);
+ SET_BCH_ALLOC_V4_NR_BACKPOINTERS(&a->v, nr);
+ set_alloc_v4_u64s(a);
+ return 0;
+ }
+
+ /* Overflow: use backpointer btree */
+ bp_k = bch2_trans_kmalloc(trans, sizeof(*bp_k));
+ ret = PTR_ERR_OR_ZERO(bp_k);
+ if (ret)
+ return ret;
+
+ ca = bch_dev_bkey_exists(c, a->k.p.inode);
+
+ bkey_backpointer_init(&bp_k->k_i);
+ bp_k->k.p = backpointer_pos(c, a->k.p, bp.bucket_offset);
+ bp_k->v = bp;
+
+ bch2_trans_iter_init(trans, &bp_iter, BTREE_ID_backpointers, bp_k->k.p,
+ BTREE_ITER_INTENT|
+ BTREE_ITER_SLOTS|
+ BTREE_ITER_WITH_UPDATES);
+ k = bch2_btree_iter_peek_slot(&bp_iter);
+ ret = bkey_err(k);
+ if (ret)
+ goto err;
+
+ if (k.k->type) {
+ struct printbuf buf = PRINTBUF;
+
+ prt_printf(&buf, "existing btree backpointer key found when inserting ");
+ bch2_backpointer_to_text(&buf, &bp);
+ prt_newline(&buf);
+ printbuf_indent_add(&buf, 2);
+
+ prt_printf(&buf, "found ");
+ bch2_bkey_val_to_text(&buf, c, k);
+ prt_newline(&buf);
+
+ prt_printf(&buf, "for ");
+ bch2_bkey_val_to_text(&buf, c, orig_k);
+
+ bch2_trans_inconsistent(trans, "%s", buf.buf);
+
+ printbuf_exit(&buf);
+ ret = -EIO;
+ goto err;
+ }
+
+ ret = bch2_trans_update(trans, &bp_iter, &bp_k->k_i, 0);
+err:
+ bch2_trans_iter_exit(trans, &bp_iter);
+ return ret;
+}
+
+int bch2_get_next_backpointer(struct btree_trans *trans,
+ unsigned dev, u64 bucket, int gen,
+ u64 *bp_offset,
+ struct bch_backpointer *dst)
+{
+ struct bch_fs *c = trans->c;
+ struct bpos alloc_pos = POS(dev, bucket);
+ struct bpos bp_pos =
+ backpointer_pos(c, alloc_pos,
+ max_t(u64, *bp_offset, BACKPOINTER_OFFSET_MAX) - BACKPOINTER_OFFSET_MAX);
+ struct bpos bp_end_pos =
+ backpointer_pos(c, bpos_nosnap_successor(alloc_pos), 0);
+ struct btree_iter alloc_iter, bp_iter = { NULL };
+ struct bkey_s_c k;
+ struct bkey_s_c_alloc_v4 a;
+ size_t i;
+ int ret;
+
+ bch2_trans_iter_init(trans, &alloc_iter, BTREE_ID_alloc,
+ alloc_pos,
+ BTREE_ITER_CACHED);
+ k = bch2_btree_iter_peek_slot(&alloc_iter);
+ ret = bkey_err(k);
+ if (ret)
+ goto done;
+
+ if (k.k->type != KEY_TYPE_alloc_v4)
+ goto done;
+
+ a = bkey_s_c_to_alloc_v4(k);
+ if (gen >= 0 && a.v->gen != gen)
+ goto done;
+
+ for (i = 0; i < BCH_ALLOC_V4_NR_BACKPOINTERS(a.v); i++) {
+ if (alloc_v4_backpointers_c(a.v)[i].bucket_offset < *bp_offset)
+ continue;
+
+ *dst = alloc_v4_backpointers_c(a.v)[i];
+ *bp_offset = dst->bucket_offset;
+ goto out;
+ }
+
+ for_each_btree_key(trans, bp_iter, BTREE_ID_backpointers,
+ bp_pos, 0, k, ret) {
+ if (bpos_cmp(k.k->p, bp_end_pos) >= 0)
+ break;
+
+ if (k.k->type != KEY_TYPE_backpointer)
+ continue;
+
+ *dst = *bkey_s_c_to_backpointer(k).v;
+ *bp_offset = dst->bucket_offset + BACKPOINTER_OFFSET_MAX;
+ goto out;
+ }
+done:
+ *bp_offset = U64_MAX;
+out:
+ bch2_trans_iter_exit(trans, &bp_iter);
+ bch2_trans_iter_exit(trans, &alloc_iter);
+ return ret;
+}
+
+struct bkey_s_c __bch2_backpointer_get_key(struct btree_trans *trans,
+ struct btree_iter *iter,
+ struct bch_backpointer bp,
+ bool in_fsck)
+{
+ struct bch_fs *c = trans->c;
+ struct bkey_ptrs_c ptrs;
+ const union bch_extent_entry *entry;
+ struct extent_ptr_decoded p;
+ struct bkey_s_c k;
+ struct printbuf buf = PRINTBUF;
+
+ bch2_trans_node_iter_init(trans, iter,
+ bp.btree_id,
+ bp.pos,
+ 0,
+ min(bp.level, c->btree_roots[bp.btree_id].level),
+ 0);
+ k = bch2_btree_iter_peek_slot(iter);
+ if (bkey_err(k)) {
+ bch2_trans_iter_exit(trans, iter);
+ return k;
+ }
+
+ if (bp.level == c->btree_roots[bp.btree_id].level + 1)
+ k = bkey_i_to_s_c(&c->btree_roots[bp.btree_id].key);
+
+ /* Check if key matches backpointer: */
+ ptrs = bch2_bkey_ptrs_c(k);
+ bkey_for_each_ptr_decode(k.k, ptrs, p, entry) {
+ struct bpos bucket_pos;
+ struct bch_backpointer bp2;
+
+ bch2_pointer_to_bucket_and_backpointer(c, bp.btree_id, bp.level,
+ k, p, &bucket_pos, &bp2);
+ if (!memcmp(&bp, &bp2, sizeof(bp)))
+ return k;
+ }
+
+ if (!in_fsck) {
+ prt_printf(&buf, "backpointer doesn't match extent it points to:\n ");
+ bch2_backpointer_to_text(&buf, &bp);
+ prt_printf(&buf, "\n ");
+ bch2_bkey_val_to_text(&buf, c, k);
+ bch2_trans_inconsistent(trans, "%s", buf.buf);
+
+ bch2_trans_iter_exit(trans, iter);
+ }
+
+ printbuf_exit(&buf);
+ return bkey_s_c_null;
+}
+
+struct bkey_s_c bch2_backpointer_get_key(struct btree_trans *trans,
+ struct btree_iter *iter,
+ struct bch_backpointer bp)
+{
+ return __bch2_backpointer_get_key(trans, iter, bp, false);
+}
+
+struct btree *bch2_backpointer_get_node(struct btree_trans *trans,
+ struct btree_iter *iter,
+ struct bch_backpointer bp)
+{
+ struct bch_fs *c = trans->c;
+ struct bkey_ptrs_c ptrs;
+ const union bch_extent_entry *entry;
+ struct extent_ptr_decoded p;
+ struct btree *b;
+ struct bkey_s_c k;
+ struct printbuf buf = PRINTBUF;
+
+ BUG_ON(!bp.level);
+
+ bch2_trans_node_iter_init(trans, iter,
+ bp.btree_id,
+ bp.pos,
+ 0,
+ bp.level - 1,
+ 0);
+ b = bch2_btree_iter_peek_node(iter);
+ if (IS_ERR(b)) {
+ bch2_trans_iter_exit(trans, iter);
+ return b;
+ }
+
+ /* Check if key matches backpointer: */
+ k = bkey_i_to_s_c(&b->key);
+ ptrs = bch2_bkey_ptrs_c(k);
+ bkey_for_each_ptr_decode(k.k, ptrs, p, entry) {
+ struct bpos bucket_pos;
+ struct bch_backpointer bp2;
+
+ bch2_pointer_to_bucket_and_backpointer(c, bp.btree_id, bp.level,
+ k, p, &bucket_pos, &bp2);
+ if (!memcmp(&bp, &bp2, sizeof(bp)))
+ return b;
+ }
+
+ if (btree_node_will_make_reachable(b))
+ goto out;
+
+ prt_printf(&buf, "backpointer doesn't match btree node it points to:\n ");
+ bch2_backpointer_to_text(&buf, &bp);
+ prt_printf(&buf, "\n ");
+ bch2_bkey_val_to_text(&buf, c, k);
+ bch2_trans_inconsistent(trans, "%s", buf.buf);
+out:
+ bch2_trans_iter_exit(trans, iter);
+ printbuf_exit(&buf);
+ return NULL;
+}
+
+static int bch2_check_backpointer(struct btree_trans *trans, struct btree_iter *bp_iter)
+{
+ struct bch_fs *c = trans->c;
+ struct btree_iter alloc_iter = { NULL };
+ struct bch_dev *ca;
+ struct bkey_s_c k, alloc_k;
+ struct printbuf buf = PRINTBUF;
+ int ret = 0;
+
+ k = bch2_btree_iter_peek(bp_iter);
+ ret = bkey_err(k);
+ if (ret)
+ return ret;
+ if (!k.k)
+ return 0;
+
+ if (fsck_err_on(!bch2_dev_exists2(c, k.k->p.inode), c,
+ "backpointer for mising device:\n%s",
+ (bch2_bkey_val_to_text(&buf, c, k), buf.buf))) {
+ ret = bch2_btree_delete_at(trans, bp_iter, 0);
+ goto out;
+ }
+
+ ca = bch_dev_bkey_exists(c, k.k->p.inode);
+
+ bch2_trans_iter_init(trans, &alloc_iter, BTREE_ID_alloc,
+ POS(k.k->p.inode, sector_to_bucket(ca,
+ k.k->p.offset >> MAX_EXTENT_COMPRESS_RATIO_SHIFT)), 0);
+
+ alloc_k = bch2_btree_iter_peek_slot(&alloc_iter);
+ ret = bkey_err(alloc_k);
+ if (ret)
+ goto out;
+
+ if (fsck_err_on(alloc_k.k->type != KEY_TYPE_alloc_v4, c,
+ "backpointer for nonexistent alloc key: %llu:%llu:0\n%s",
+ alloc_iter.pos.inode, alloc_iter.pos.offset,
+ (bch2_bkey_val_to_text(&buf, c, alloc_k), buf.buf))) {
+ ret = bch2_btree_delete_at(trans, bp_iter, 0);
+ goto out;
+ }
+out:
+fsck_err:
+ bch2_trans_iter_exit(trans, &alloc_iter);
+ printbuf_exit(&buf);
+ return ret;
+}
+
+/* verify that every backpointer has a corresponding alloc key */
+int bch2_check_backpointers(struct bch_fs *c)
+{
+ struct btree_trans trans;
+ struct btree_iter iter;
+ int ret = 0;
+
+ bch2_trans_init(&trans, c, 0, 0);
+ bch2_trans_iter_init(&trans, &iter, BTREE_ID_backpointers, POS_MIN, 0);
+
+ do {
+ ret = __bch2_trans_do(&trans, NULL, NULL,
+ BTREE_INSERT_LAZY_RW|
+ BTREE_INSERT_NOFAIL,
+ bch2_check_backpointer(&trans, &iter));
+ if (ret)
+ break;
+ } while (bch2_btree_iter_advance(&iter));
+
+ bch2_trans_iter_exit(&trans, &iter);
+ bch2_trans_exit(&trans);
+ return ret;
+}
+
+/*
+ * Verify that @bp exists:
+ */
+static int backpointer_check(struct btree_trans *trans,
+ struct bpos bucket_pos,
+ struct bch_backpointer bp,
+ struct bkey_s_c orig_k)
+{
+ struct bch_fs *c = trans->c;
+ struct btree_iter alloc_iter, bp_iter = { NULL };
+ struct printbuf buf = PRINTBUF;
+ struct bkey_s_c alloc_k, bp_k;
+ int ret;
+
+ bch2_trans_iter_init(trans, &alloc_iter, BTREE_ID_alloc, bucket_pos, 0);
+ alloc_k = bch2_btree_iter_peek_slot(&alloc_iter);
+ ret = bkey_err(alloc_k);
+ if (ret)
+ goto err;
+
+ if (alloc_k.k->type == KEY_TYPE_alloc_v4) {
+ struct bkey_s_c_alloc_v4 a = bkey_s_c_to_alloc_v4(alloc_k);
+ const struct bch_backpointer *bps = alloc_v4_backpointers_c(a.v);
+ unsigned i, nr = BCH_ALLOC_V4_NR_BACKPOINTERS(a.v);
+
+ for (i = 0; i < nr; i++) {
+ int cmp = backpointer_cmp(bps[i], bp) ?:
+ memcmp(&bps[i], &bp, sizeof(bp));
+ if (!cmp)
+ goto out;
+ if (cmp >= 0)
+ break;
+ }
+ } else {
+ goto missing;
+ }
+
+ bch2_trans_iter_init(trans, &bp_iter, BTREE_ID_backpointers,
+ backpointer_pos(c, bucket_pos, bp.bucket_offset),
+ 0);
+ bp_k = bch2_btree_iter_peek_slot(&bp_iter);
+ ret = bkey_err(bp_k);
+ if (ret)
+ goto err;
+
+ if (bp_k.k->type != KEY_TYPE_backpointer ||
+ memcmp(bkey_s_c_to_backpointer(bp_k).v, &bp, sizeof(bp)))
+ goto missing;
+out:
+err:
+fsck_err:
+ bch2_trans_iter_exit(trans, &bp_iter);
+ bch2_trans_iter_exit(trans, &alloc_iter);
+ printbuf_exit(&buf);
+ return ret;
+missing:
+ prt_printf(&buf, "missing backpointer for btree=%s l=%u ",
+ bch2_btree_ids[bp.btree_id], bp.level);
+ bch2_bkey_val_to_text(&buf, c, orig_k);
+ prt_printf(&buf, "\nin alloc key ");
+ bch2_bkey_val_to_text(&buf, c, alloc_k);
+
+ if (c->sb.version < bcachefs_metadata_version_backpointers ||
+ fsck_err(c, "%s", buf.buf)) {
+ struct bkey_i_alloc_v4 *a = bch2_alloc_to_v4_mut(trans, alloc_k);
+
+ ret = PTR_ERR_OR_ZERO(a) ?:
+ bch2_bucket_backpointer_add(trans, a, bp, orig_k) ?:
+ bch2_trans_update(trans, &alloc_iter, &a->k_i, 0);
+ }
+
+ goto out;
+}
+
+static int check_extent_to_backpointers(struct btree_trans *trans,
+ struct btree_iter *iter)
+{
+ struct bch_fs *c = trans->c;
+ struct bkey_ptrs_c ptrs;
+ const union bch_extent_entry *entry;
+ struct extent_ptr_decoded p;
+ struct bkey_s_c k;
+ int ret;
+
+ k = bch2_btree_iter_peek_all_levels(iter);
+ ret = bkey_err(k);
+ if (ret)
+ return ret;
+ if (!k.k)
+ return 0;
+
+ ptrs = bch2_bkey_ptrs_c(k);
+ bkey_for_each_ptr_decode(k.k, ptrs, p, entry) {
+ struct bpos bucket_pos;
+ struct bch_backpointer bp;
+
+ if (p.ptr.cached)
+ continue;
+
+ bch2_pointer_to_bucket_and_backpointer(c, iter->btree_id, iter->path->level,
+ k, p, &bucket_pos, &bp);
+
+ ret = backpointer_check(trans, bucket_pos, bp, k);
+ if (ret)
+ return ret;
+ }
+
+ return 0;
+}
+
+static int check_btree_root_to_backpointers(struct btree_trans *trans,
+ enum btree_id btree_id)
+{
+ struct bch_fs *c = trans->c;
+ struct btree_iter iter;
+ struct btree *b;
+ struct bkey_s_c k;
+ struct bkey_ptrs_c ptrs;
+ struct extent_ptr_decoded p;
+ const union bch_extent_entry *entry;
+ int ret;
+
+ bch2_trans_node_iter_init(trans, &iter, btree_id, POS_MIN, 0,
+ c->btree_roots[btree_id].level, 0);
+ b = bch2_btree_iter_peek_node(&iter);
+ ret = PTR_ERR_OR_ZERO(b);
+ if (ret)
+ goto err;
+
+ BUG_ON(b != btree_node_root(c, b));
+
+ k = bkey_i_to_s_c(&b->key);
+ ptrs = bch2_bkey_ptrs_c(k);
+ bkey_for_each_ptr_decode(k.k, ptrs, p, entry) {
+ struct bpos bucket_pos;
+ struct bch_backpointer bp;
+
+ if (p.ptr.cached)
+ continue;
+
+ bch2_pointer_to_bucket_and_backpointer(c, iter.btree_id, iter.path->level + 1,
+ k, p, &bucket_pos, &bp);
+
+ ret = backpointer_check(trans, bucket_pos, bp, k);
+ if (ret)
+ goto err;
+ }
+err:
+ bch2_trans_iter_exit(trans, &iter);
+ return ret;
+}
+
+int bch2_check_extents_to_backpointers(struct bch_fs *c)
+{
+ struct btree_trans trans;
+ struct btree_iter iter;
+ enum btree_id btree_id;
+ int ret = 0;
+
+ bch2_trans_init(&trans, c, 0, 0);
+ for (btree_id = 0; btree_id < BTREE_ID_NR; btree_id++) {
+ bch2_trans_node_iter_init(&trans, &iter, btree_id, POS_MIN, 0,
+ 0,
+ BTREE_ITER_ALL_LEVELS|
+ BTREE_ITER_PREFETCH);
+
+ do {
+ ret = __bch2_trans_do(&trans, NULL, NULL,
+ BTREE_INSERT_LAZY_RW|
+ BTREE_INSERT_NOFAIL,
+ check_extent_to_backpointers(&trans, &iter));
+ if (ret)
+ break;
+ } while (!bch2_btree_iter_advance(&iter));
+
+ bch2_trans_iter_exit(&trans, &iter);
+
+ if (ret)
+ break;
+
+ ret = __bch2_trans_do(&trans, NULL, NULL,
+ BTREE_INSERT_LAZY_RW|
+ BTREE_INSERT_NOFAIL,
+ check_btree_root_to_backpointers(&trans, btree_id));
+ if (ret)
+ break;
+ }
+ bch2_trans_exit(&trans);
+ return ret;
+}
+
+static int check_one_backpointer(struct btree_trans *trans,
+ struct bpos alloc_pos,
+ u64 *bp_offset)
+{
+ struct btree_iter iter;
+ struct bch_backpointer bp;
+ struct bkey_s_c k;
+ struct printbuf buf = PRINTBUF;
+ int ret;
+
+ ret = bch2_get_next_backpointer(trans, alloc_pos.inode,
+ alloc_pos.offset, -1,
+ bp_offset, &bp);
+ if (ret || *bp_offset == U64_MAX)
+ return ret;
+
+ k = __bch2_backpointer_get_key(trans, &iter, bp, true);
+ ret = bkey_err(k);
+ if (ret)
+ return ret;
+
+ if (fsck_err_on(!k.k, trans->c,
+ "backpointer offset %llu points to missing extent\n%s",
+ *bp_offset,
+ (bch2_backpointer_to_text(&buf, &bp), buf.buf))) {
+ ret = bch2_backpointer_del_by_offset(trans, alloc_pos, *bp_offset);
+ if (ret == -ENOENT)
+ bch_err(trans->c, "backpointer at %llu not found", *bp_offset);
+ }
+
+ bch2_trans_iter_exit(trans, &iter);
+fsck_err:
+ printbuf_exit(&buf);
+ return ret;
+}
+
+int bch2_check_backpointers_to_extents(struct bch_fs *c)
+{
+ struct btree_trans trans;
+ struct btree_iter iter;
+ struct bkey_s_c k;
+ int ret = 0;
+
+ bch2_trans_init(&trans, c, 0, 0);
+ for_each_btree_key(&trans, iter, BTREE_ID_alloc, POS_MIN,
+ BTREE_ITER_PREFETCH, k, ret) {
+ u64 bp_offset = 0;
+
+ while (!(ret = __bch2_trans_do(&trans, NULL, NULL,
+ BTREE_INSERT_LAZY_RW|
+ BTREE_INSERT_NOFAIL,
+ check_one_backpointer(&trans, iter.pos, &bp_offset))) &&
+ bp_offset < U64_MAX)
+ bp_offset++;
+
+ if (ret)
+ break;
+ }
+ bch2_trans_iter_exit(&trans, &iter);
+ bch2_trans_exit(&trans);
+ return ret < 0 ? ret : 0;
+}
diff --git a/fs/bcachefs/backpointers.h b/fs/bcachefs/backpointers.h
new file mode 100644
index 000000000000..6b44fdc92cee
--- /dev/null
+++ b/fs/bcachefs/backpointers.h
@@ -0,0 +1,38 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _BCACHEFS_BACKPOINTERS_BACKGROUND_H
+#define _BCACHEFS_BACKPOINTERS_BACKGROUND_H
+
+#include "super.h"
+
+int bch2_backpointer_invalid(const struct bch_fs *, struct bkey_s_c k,
+ int, struct printbuf *);
+void bch2_backpointer_to_text(struct printbuf *, const struct bch_backpointer *);
+void bch2_backpointer_k_to_text(struct printbuf *, struct bch_fs *, struct bkey_s_c);
+void bch2_backpointer_swab(struct bkey_s);
+
+#define bch2_bkey_ops_backpointer (struct bkey_ops) { \
+ .key_invalid = bch2_backpointer_invalid, \
+ .val_to_text = bch2_backpointer_k_to_text, \
+ .swab = bch2_backpointer_swab, \
+}
+
+void bch2_pointer_to_bucket_and_backpointer(struct bch_fs *, enum btree_id, unsigned,
+ struct bkey_s_c, struct extent_ptr_decoded,
+ struct bpos *, struct bch_backpointer *);
+
+int bch2_bucket_backpointer_del(struct btree_trans *, struct bkey_i_alloc_v4 *,
+ struct bch_backpointer, struct bkey_s_c);
+int bch2_bucket_backpointer_add(struct btree_trans *, struct bkey_i_alloc_v4 *,
+ struct bch_backpointer, struct bkey_s_c);
+int bch2_get_next_backpointer(struct btree_trans *, unsigned, u64, int,
+ u64 *, struct bch_backpointer *);
+struct bkey_s_c bch2_backpointer_get_key(struct btree_trans *,
+ struct btree_iter *, struct bch_backpointer);
+struct btree *bch2_backpointer_get_node(struct btree_trans *,
+ struct btree_iter *, struct bch_backpointer);
+
+int bch2_check_backpointers(struct bch_fs *);
+int bch2_check_extents_to_backpointers(struct bch_fs *);
+int bch2_check_backpointers_to_extents(struct bch_fs *);
+
+#endif /* _BCACHEFS_BACKPOINTERS_BACKGROUND_H */
diff --git a/fs/bcachefs/bcachefs.h b/fs/bcachefs/bcachefs.h
index 2eced20667fa..1f0484aa6501 100644
--- a/fs/bcachefs/bcachefs.h
+++ b/fs/bcachefs/bcachefs.h
@@ -509,6 +509,7 @@ enum {
BCH_FS_TOPOLOGY_REPAIR_DONE,
BCH_FS_INITIAL_GC_DONE, /* kill when we enumerate fsck passes */
BCH_FS_CHECK_LRUS_DONE,
+ BCH_FS_CHECK_BACKPOINTERS_DONE,
BCH_FS_CHECK_ALLOC_TO_LRU_REFS_DONE,
BCH_FS_FSCK_DONE,
BCH_FS_INITIAL_GC_UNFIXED, /* kill when we enumerate fsck errors */
diff --git a/fs/bcachefs/bcachefs_format.h b/fs/bcachefs/bcachefs_format.h
index 5af9f2e7ea86..f6075dd368d7 100644
--- a/fs/bcachefs/bcachefs_format.h
+++ b/fs/bcachefs/bcachefs_format.h
@@ -365,7 +365,8 @@ static inline void bkey_init(struct bkey *k)
x(alloc_v3, 24) \
x(set, 25) \
x(lru, 26) \
- x(alloc_v4, 27)
+ x(alloc_v4, 27) \
+ x(backpointer, 28)
enum bch_bkey_type {
#define x(name, nr) KEY_TYPE_##name = nr,
@@ -886,6 +887,12 @@ struct bch_alloc {
x(stripe, 32) \
x(stripe_redundancy, 8)
+enum {
+#define x(name, _bits) BCH_ALLOC_FIELD_V1_##name,
+ BCH_ALLOC_FIELDS_V1()
+#undef x
+};
+
struct bch_alloc_v2 {
struct bch_val v;
__u8 nr_fields;
@@ -914,6 +921,9 @@ struct bch_alloc_v3 {
__u8 data[];
} __attribute__((packed, aligned(8)));
+LE32_BITMASK(BCH_ALLOC_V3_NEED_DISCARD,struct bch_alloc_v3, flags, 0, 1)
+LE32_BITMASK(BCH_ALLOC_V3_NEED_INC_GEN,struct bch_alloc_v3, flags, 1, 2)
+
struct bch_alloc_v4 {
struct bch_val v;
__u64 journal_seq;
@@ -927,22 +937,27 @@ struct bch_alloc_v4 {
__u64 io_time[2];
__u32 stripe;
__u32 nr_external_backpointers;
- struct bpos backpointers[0];
} __attribute__((packed, aligned(8)));
-LE32_BITMASK(BCH_ALLOC_V3_NEED_DISCARD,struct bch_alloc_v3, flags, 0, 1)
-LE32_BITMASK(BCH_ALLOC_V3_NEED_INC_GEN,struct bch_alloc_v3, flags, 1, 2)
+#define BCH_ALLOC_V4_U64s_V0 6
+#define BCH_ALLOC_V4_U64s (sizeof(struct bch_alloc_v4) / sizeof(u64))
BITMASK(BCH_ALLOC_V4_NEED_DISCARD, struct bch_alloc_v4, flags, 0, 1)
BITMASK(BCH_ALLOC_V4_NEED_INC_GEN, struct bch_alloc_v4, flags, 1, 2)
BITMASK(BCH_ALLOC_V4_BACKPOINTERS_START,struct bch_alloc_v4, flags, 2, 8)
BITMASK(BCH_ALLOC_V4_NR_BACKPOINTERS, struct bch_alloc_v4, flags, 8, 14)
-enum {
-#define x(name, _bits) BCH_ALLOC_FIELD_V1_##name,
- BCH_ALLOC_FIELDS_V1()
-#undef x
-};
+#define BCH_ALLOC_V4_NR_BACKPOINTERS_MAX 40
+
+struct bch_backpointer {
+ struct bch_val v;
+ __u8 btree_id;
+ __u8 level;
+ __u8 data_type;
+ __u64 bucket_offset:40;
+ __u32 bucket_len;
+ struct bpos pos;
+} __attribute__((packed, aligned(8)));
/* Quotas: */
@@ -1406,7 +1421,8 @@ struct bch_sb_field_journal_seq_blacklist {
x(inode_v2, 18) \
x(freespace, 19) \
x(alloc_v4, 20) \
- x(new_data_types, 21)
+ x(new_data_types, 21) \
+ x(backpointers, 22)
enum bcachefs_metadata_version {
bcachefs_metadata_version_min = 9,
diff --git a/fs/bcachefs/bkey_methods.c b/fs/bcachefs/bkey_methods.c
index 229d51578086..fd352a672d62 100644
--- a/fs/bcachefs/bkey_methods.c
+++ b/fs/bcachefs/bkey_methods.c
@@ -1,6 +1,7 @@
// SPDX-License-Identifier: GPL-2.0
#include "bcachefs.h"
+#include "backpointers.h"
#include "bkey_methods.h"
#include "btree_types.h"
#include "alloc_background.h"
@@ -191,6 +192,9 @@ static unsigned bch2_key_types_allowed[] = {
[BKEY_TYPE_need_discard] =
(1U << KEY_TYPE_deleted)|
(1U << KEY_TYPE_set),
+ [BKEY_TYPE_backpointers] =
+ (1U << KEY_TYPE_deleted)|
+ (1U << KEY_TYPE_backpointer),
[BKEY_TYPE_btree] =
(1U << KEY_TYPE_deleted)|
(1U << KEY_TYPE_btree_ptr)|
diff --git a/fs/bcachefs/btree_types.h b/fs/bcachefs/btree_types.h
index d8f92cc96c4f..b97bfd954c4a 100644
--- a/fs/bcachefs/btree_types.h
+++ b/fs/bcachefs/btree_types.h
@@ -638,6 +638,11 @@ static inline bool btree_type_has_snapshots(enum btree_id id)
return (1 << id) & BTREE_ID_HAS_SNAPSHOTS;
}
+static inline bool btree_type_has_ptrs(enum btree_id id)
+{
+ return (1 << id) & BTREE_ID_HAS_PTRS;
+}
+
static inline bool btree_node_type_needs_gc(enum btree_node_type type)
{
return BTREE_NODE_TYPE_HAS_TRIGGERS & (1U << type);
diff --git a/fs/bcachefs/buckets.c b/fs/bcachefs/buckets.c
index e2944fc4cfe2..46c2655701e9 100644
--- a/fs/bcachefs/buckets.c
+++ b/fs/bcachefs/buckets.c
@@ -7,6 +7,7 @@
#include "bcachefs.h"
#include "alloc_background.h"
+#include "backpointers.h"
#include "bset.h"
#include "btree_gc.h"
#include "btree_update.h"
@@ -655,16 +656,6 @@ err:
return ret;
}
-static s64 ptr_disk_sectors(s64 sectors, struct extent_ptr_decoded p)
-{
- EBUG_ON(sectors < 0);
-
- return crc_is_compressed(p.crc)
- ? DIV_ROUND_UP_ULL(sectors * p.crc.compressed_size,
- p.crc.uncompressed_size)
- : sectors;
-}
-
static int check_bucket_ref(struct bch_fs *c,
struct bkey_s_c k,
const struct bch_extent_ptr *ptr,
@@ -1368,21 +1359,43 @@ need_mark:
/* trans_mark: */
static int bch2_trans_mark_pointer(struct btree_trans *trans,
- struct bkey_s_c k, struct extent_ptr_decoded p,
- s64 sectors, enum bch_data_type data_type)
+ enum btree_id btree_id, unsigned level,
+ struct bkey_s_c k, struct extent_ptr_decoded p,
+ unsigned flags)
{
+ bool insert = !(flags & BTREE_TRIGGER_OVERWRITE);
struct btree_iter iter;
struct bkey_i_alloc_v4 *a;
+ struct bpos bucket_pos;
+ struct bch_backpointer bp;
+ s64 sectors;
int ret;
- a = bch2_trans_start_alloc_update(trans, &iter, PTR_BUCKET_POS(trans->c, &p.ptr));
+ bch2_pointer_to_bucket_and_backpointer(trans->c, btree_id, level, k, p, &bucket_pos, &bp);
+ sectors = bp.bucket_len;
+ if (!insert)
+ sectors = -sectors;
+
+ a = bch2_trans_start_alloc_update(trans, &iter, bucket_pos);
if (IS_ERR(a))
return PTR_ERR(a);
- ret = __mark_pointer(trans, k, &p.ptr, sectors, data_type,
+ ret = __mark_pointer(trans, k, &p.ptr, sectors, bp.data_type,
a->v.gen, &a->v.data_type,
- &a->v.dirty_sectors, &a->v.cached_sectors) ?:
- bch2_trans_update(trans, &iter, &a->k_i, 0);
+ &a->v.dirty_sectors, &a->v.cached_sectors);
+ if (ret)
+ goto err;
+
+ if (!p.ptr.cached) {
+ ret = insert
+ ? bch2_bucket_backpointer_add(trans, a, bp, k)
+ : bch2_bucket_backpointer_del(trans, a, bp, k);
+ if (ret)
+ goto err;
+ }
+
+ ret = bch2_trans_update(trans, &iter, &a->k_i, 0);
+err:
bch2_trans_iter_exit(trans, &iter);
return ret;
}
@@ -1476,8 +1489,7 @@ int bch2_trans_mark_extent(struct btree_trans *trans,
if (flags & BTREE_TRIGGER_OVERWRITE)
disk_sectors = -disk_sectors;
- ret = bch2_trans_mark_pointer(trans, k, p,
- disk_sectors, data_type);
+ ret = bch2_trans_mark_pointer(trans, btree_id, level, k, p, flags);
if (ret < 0)
return ret;
diff --git a/fs/bcachefs/buckets.h b/fs/bcachefs/buckets.h
index 3469327d6c9d..b5e4bceb9a22 100644
--- a/fs/bcachefs/buckets.h
+++ b/fs/bcachefs/buckets.h
@@ -75,6 +75,15 @@ static inline struct bpos PTR_BUCKET_POS(const struct bch_fs *c,
return POS(ptr->dev, PTR_BUCKET_NR(ca, ptr));
}
+static inline struct bpos PTR_BUCKET_POS_OFFSET(const struct bch_fs *c,
+ const struct bch_extent_ptr *ptr,
+ u32 *bucket_offset)
+{
+ struct bch_dev *ca = bch_dev_bkey_exists(c, ptr->dev);
+
+ return POS(ptr->dev, sector_to_bucket_and_offset(ca, ptr->offset, bucket_offset));
+}
+
static inline struct bucket *PTR_GC_BUCKET(struct bch_dev *ca,
const struct bch_extent_ptr *ptr)
{
@@ -90,6 +99,16 @@ static inline enum bch_data_type ptr_data_type(const struct bkey *k,
return ptr->cached ? BCH_DATA_cached : BCH_DATA_user;
}
+static inline s64 ptr_disk_sectors(s64 sectors, struct extent_ptr_decoded p)
+{
+ EBUG_ON(sectors < 0);
+
+ return crc_is_compressed(p.crc)
+ ? DIV_ROUND_UP_ULL(sectors * p.crc.compressed_size,
+ p.crc.uncompressed_size)
+ : sectors;
+}
+
static inline int gen_cmp(u8 a, u8 b)
{
return (s8) (a - b);
diff --git a/fs/bcachefs/recovery.c b/fs/bcachefs/recovery.c
index 480abf13afcb..b8ec841c25d7 100644
--- a/fs/bcachefs/recovery.c
+++ b/fs/bcachefs/recovery.c
@@ -1,6 +1,7 @@
// SPDX-License-Identifier: GPL-2.0
#include "bcachefs.h"
+#include "backpointers.h"
#include "bkey_buf.h"
#include "alloc_background.h"
#include "btree_gc.h"
@@ -1075,8 +1076,8 @@ int bch2_fs_recovery(struct bch_fs *c)
}
if (!c->opts.nochanges) {
- if (c->sb.version < bcachefs_metadata_version_new_data_types) {
- bch_info(c, "version prior to new_data_types, upgrade and fsck required");
+ if (c->sb.version < bcachefs_metadata_version_backpointers) {
+ bch_info(c, "version prior to backpointers, upgrade and fsck required");
c->opts.version_upgrade = true;
c->opts.fsck = true;
c->opts.fix_errors = FSCK_OPT_YES;
@@ -1254,6 +1255,28 @@ use_clean:
bch_verbose(c, "done checking lrus");
set_bit(BCH_FS_CHECK_LRUS_DONE, &c->flags);
+ bch_info(c, "checking backpointers to alloc keys");
+ err = "error checking backpointers to alloc keys";
+ ret = bch2_check_backpointers(c);
+ if (ret)
+ goto err;
+ bch_verbose(c, "done checking backpointers to alloc keys");
+
+ bch_info(c, "checking backpointers to extents");
+ err = "error checking backpointers to extents";
+ ret = bch2_check_backpointers_to_extents(c);
+ if (ret)
+ goto err;
+ bch_verbose(c, "done checking backpointers to extents");
+
+ bch_info(c, "checking extents to backpointers");
+ err = "error checking extents to backpointers";
+ ret = bch2_check_extents_to_backpointers(c);
+ if (ret)
+ goto err;
+ bch_verbose(c, "done checking extents to backpointers");
+ set_bit(BCH_FS_CHECK_BACKPOINTERS_DONE, &c->flags);
+
bch_info(c, "checking alloc to lru refs");
err = "error checking alloc to lru refs";
ret = bch2_check_alloc_to_lru_refs(c);
@@ -1265,6 +1288,7 @@ use_clean:
set_bit(BCH_FS_MAY_GO_RW, &c->flags);
set_bit(BCH_FS_INITIAL_GC_DONE, &c->flags);
set_bit(BCH_FS_CHECK_LRUS_DONE, &c->flags);
+ set_bit(BCH_FS_CHECK_BACKPOINTERS_DONE, &c->flags);
set_bit(BCH_FS_CHECK_ALLOC_TO_LRU_REFS_DONE, &c->flags);
set_bit(BCH_FS_FSCK_DONE, &c->flags);
diff --git a/fs/bcachefs/super.c b/fs/bcachefs/super.c
index 6aba429a15ed..6752f5de4c1d 100644
--- a/fs/bcachefs/super.c
+++ b/fs/bcachefs/super.c
@@ -1432,6 +1432,8 @@ static int bch2_dev_remove_alloc(struct bch_fs *c, struct bch_dev *ca)
BTREE_TRIGGER_NORUN, NULL) ?:
bch2_btree_delete_range(c, BTREE_ID_freespace, start, end,
BTREE_TRIGGER_NORUN, NULL) ?:
+ bch2_btree_delete_range(c, BTREE_ID_backpointers, start, end,
+ BTREE_TRIGGER_NORUN, NULL) ?:
bch2_btree_delete_range(c, BTREE_ID_alloc, start, end,
BTREE_TRIGGER_NORUN, NULL);
if (ret)