diff options
author | Kent Overstreet <kent.overstreet@gmail.com> | 2022-03-17 20:51:27 -0400 |
---|---|---|
committer | Kent Overstreet <kent.overstreet@gmail.com> | 2022-06-09 15:37:03 -0400 |
commit | 79d0fa54e578865bdc16e432b0f66a9f9dfcc37c (patch) | |
tree | a5318a6f32da7321680f3798cb019b9899aa173f | |
parent | 644a654d0aaf270fa51fe21702cc41ac6c0578aa (diff) |
bcachefs: New on disk format: Backpointers
This patch adds backpointers: we now have a reverse index from device
and offset on that device (specifically, offset within a bucket) back to
btree nodes and (non cached) data extents.
The first 40 backpointers within a bucket are stored in the alloc key;
after that backpointers spill over to the next backpointers btree. This
is to help avoid performance regressions from additional btree updates
on large streaming workloads.
This patch adds all the code for creating, checking and repairing
backpointers. The next patch in the series is going to use backpointers
for copygc - finally getting rid of the need to scan all extents to do
copygc.
Signed-off-by: Kent Overstreet <kent.overstreet@gmail.com>
-rw-r--r-- | fs/bcachefs/Makefile | 1 | ||||
-rw-r--r-- | fs/bcachefs/alloc_background.c | 209 | ||||
-rw-r--r-- | fs/bcachefs/alloc_background.h | 26 | ||||
-rw-r--r-- | fs/bcachefs/backpointers.c | 858 | ||||
-rw-r--r-- | fs/bcachefs/backpointers.h | 38 | ||||
-rw-r--r-- | fs/bcachefs/bcachefs.h | 1 | ||||
-rw-r--r-- | fs/bcachefs/bcachefs_format.h | 36 | ||||
-rw-r--r-- | fs/bcachefs/bkey_methods.c | 4 | ||||
-rw-r--r-- | fs/bcachefs/btree_types.h | 5 | ||||
-rw-r--r-- | fs/bcachefs/buckets.c | 48 | ||||
-rw-r--r-- | fs/bcachefs/buckets.h | 19 | ||||
-rw-r--r-- | fs/bcachefs/recovery.c | 28 | ||||
-rw-r--r-- | fs/bcachefs/super.c | 2 |
13 files changed, 1180 insertions, 95 deletions
diff --git a/fs/bcachefs/Makefile b/fs/bcachefs/Makefile index 96a9a964a6f2..f1277d58b348 100644 --- a/fs/bcachefs/Makefile +++ b/fs/bcachefs/Makefile @@ -4,6 +4,7 @@ obj-$(CONFIG_BCACHEFS_FS) += bcachefs.o bcachefs-y := \ alloc_background.o \ alloc_foreground.o \ + backpointers.o \ bkey.o \ bkey_methods.o \ bkey_sort.o \ diff --git a/fs/bcachefs/alloc_background.c b/fs/bcachefs/alloc_background.c index d114ba0949a8..6f82f58c4f6e 100644 --- a/fs/bcachefs/alloc_background.c +++ b/fs/bcachefs/alloc_background.c @@ -2,6 +2,7 @@ #include "bcachefs.h" #include "alloc_background.h" #include "alloc_foreground.h" +#include "backpointers.h" #include "btree_cache.h" #include "btree_io.h" #include "btree_key_cache.h" @@ -37,8 +38,6 @@ static const unsigned BCH_ALLOC_V1_FIELD_BYTES[] = { struct bkey_alloc_unpacked { u64 journal_seq; - u64 bucket; - u8 dev; u8 gen; u8 oldest_gen; u8 data_type; @@ -194,11 +193,7 @@ static int bch2_alloc_unpack_v3(struct bkey_alloc_unpacked *out, static struct bkey_alloc_unpacked bch2_alloc_unpack(struct bkey_s_c k) { - struct bkey_alloc_unpacked ret = { - .dev = k.k->p.inode, - .bucket = k.k->p.offset, - .gen = 0, - }; + struct bkey_alloc_unpacked ret = { .gen = 0 }; switch (k.k->type) { case KEY_TYPE_alloc: @@ -215,48 +210,6 @@ static struct bkey_alloc_unpacked bch2_alloc_unpack(struct bkey_s_c k) return ret; } -void bch2_alloc_to_v4(struct bkey_s_c k, struct bch_alloc_v4 *out) -{ - if (k.k->type == KEY_TYPE_alloc_v4) { - *out = *bkey_s_c_to_alloc_v4(k).v; - } else { - struct bkey_alloc_unpacked u = bch2_alloc_unpack(k); - - *out = (struct bch_alloc_v4) { - .journal_seq = u.journal_seq, - .flags = u.need_discard, - .gen = u.gen, - .oldest_gen = u.oldest_gen, - .data_type = u.data_type, - .stripe_redundancy = u.stripe_redundancy, - .dirty_sectors = u.dirty_sectors, - .cached_sectors = u.cached_sectors, - .io_time[READ] = u.read_time, - .io_time[WRITE] = u.write_time, - .stripe = u.stripe, - }; - } -} - -struct bkey_i_alloc_v4 *bch2_alloc_to_v4_mut(struct btree_trans *trans, struct bkey_s_c k) -{ - struct bkey_i_alloc_v4 *ret; - - if (k.k->type == KEY_TYPE_alloc_v4) { - ret = bch2_trans_kmalloc(trans, bkey_bytes(k.k)); - if (!IS_ERR(ret)) - bkey_reassemble(&ret->k_i, k); - } else { - ret = bch2_trans_kmalloc(trans, sizeof(*ret)); - if (!IS_ERR(ret)) { - bkey_alloc_v4_init(&ret->k_i); - ret->k.p = k.k->p; - bch2_alloc_to_v4(k, &ret->v); - } - } - return ret; -} - struct bkey_i_alloc_v4 * bch2_trans_start_alloc_update(struct btree_trans *trans, struct btree_iter *iter, struct bpos pos) @@ -339,9 +292,15 @@ int bch2_alloc_v4_invalid(const struct bch_fs *c, struct bkey_s_c k, { struct bkey_s_c_alloc_v4 a = bkey_s_c_to_alloc_v4(k); - if (bkey_val_bytes(k.k) != sizeof(struct bch_alloc_v4)) { - prt_printf(err, "bad val size (%zu != %zu)", - bkey_val_bytes(k.k), sizeof(struct bch_alloc_v4)); + if (alloc_v4_u64s(a.v) != bkey_val_u64s(k.k)) { + prt_printf(err, "bad val size (%lu != %u)", + bkey_val_u64s(k.k), alloc_v4_u64s(a.v)); + return -EINVAL; + } + + if (!BCH_ALLOC_V4_BACKPOINTERS_START(a.v) && + BCH_ALLOC_V4_NR_BACKPOINTERS(a.v)) { + prt_printf(err, "invalid backpointers_start"); return -EINVAL; } @@ -401,9 +360,19 @@ int bch2_alloc_v4_invalid(const struct bch_fs *c, struct bkey_s_c k, return 0; } +static inline u64 swab40(u64 x) +{ + return (((x & 0x00000000ffULL) << 32)| + ((x & 0x000000ff00ULL) << 16)| + ((x & 0x0000ff0000ULL) >> 0)| + ((x & 0x00ff000000ULL) >> 16)| + ((x & 0xff00000000ULL) >> 32)); +} + void bch2_alloc_v4_swab(struct bkey_s k) { struct bch_alloc_v4 *a = bkey_s_to_alloc_v4(k).v; + struct bch_backpointer *bp, *bps; a->journal_seq = swab64(a->journal_seq); a->flags = swab32(a->flags); @@ -413,25 +382,135 @@ void bch2_alloc_v4_swab(struct bkey_s k) a->io_time[1] = swab64(a->io_time[1]); a->stripe = swab32(a->stripe); a->nr_external_backpointers = swab32(a->nr_external_backpointers); + + bps = alloc_v4_backpointers(a); + for (bp = bps; bp < bps + BCH_ALLOC_V4_NR_BACKPOINTERS(a); bp++) { + bp->bucket_offset = swab40(bp->bucket_offset); + bp->bucket_len = swab32(bp->bucket_len); + bch2_bpos_swab(&bp->pos); + } } void bch2_alloc_to_text(struct printbuf *out, struct bch_fs *c, struct bkey_s_c k) { - struct bch_alloc_v4 a; + struct bch_alloc_v4 _a; + const struct bch_alloc_v4 *a = &_a; + const struct bch_backpointer *bps; + unsigned i; - bch2_alloc_to_v4(k, &a); + if (k.k->type == KEY_TYPE_alloc_v4) + a = bkey_s_c_to_alloc_v4(k).v; + else + bch2_alloc_to_v4(k, &_a); + + prt_newline(out); + printbuf_indent_add(out, 2); + + prt_printf(out, "gen %u oldest_gen %u data_type %s", + a->gen, a->oldest_gen, bch2_data_types[a->data_type]); + prt_newline(out); + prt_printf(out, "journal_seq %llu", a->journal_seq); + prt_newline(out); + prt_printf(out, "need_discard %llu", BCH_ALLOC_V4_NEED_DISCARD(a)); + prt_newline(out); + prt_printf(out, "need_inc_gen %llu", BCH_ALLOC_V4_NEED_INC_GEN(a)); + prt_newline(out); + prt_printf(out, "dirty_sectors %u", a->dirty_sectors); + prt_newline(out); + prt_printf(out, "cached_sectors %u", a->cached_sectors); + prt_newline(out); + prt_printf(out, "stripe %u", a->stripe); + prt_newline(out); + prt_printf(out, "stripe_redundancy %u", a->stripe_redundancy); + prt_newline(out); + prt_printf(out, "io_time[READ] %llu", a->io_time[READ]); + prt_newline(out); + prt_printf(out, "io_time[WRITE] %llu", a->io_time[WRITE]); + prt_newline(out); + prt_printf(out, "backpointers: %llu", BCH_ALLOC_V4_NR_BACKPOINTERS(a)); + printbuf_indent_add(out, 2); + + bps = alloc_v4_backpointers_c(a); + for (i = 0; i < BCH_ALLOC_V4_NR_BACKPOINTERS(a); i++) { + prt_newline(out); + bch2_backpointer_to_text(out, &bps[i]); + } - prt_printf(out, "gen %u oldest_gen %u data_type %s journal_seq %llu need_discard %llu need_inc_gen %llu", - a.gen, a.oldest_gen, bch2_data_types[a.data_type], - a.journal_seq, - BCH_ALLOC_V4_NEED_DISCARD(&a), - BCH_ALLOC_V4_NEED_INC_GEN(&a)); - prt_printf(out, " dirty_sectors %u", a.dirty_sectors); - prt_printf(out, " cached_sectors %u", a.cached_sectors); - prt_printf(out, " stripe %u", a.stripe); - prt_printf(out, " stripe_redundancy %u", a.stripe_redundancy); - prt_printf(out, " read_time %llu", a.io_time[READ]); - prt_printf(out, " write_time %llu", a.io_time[WRITE]); + printbuf_indent_sub(out, 4); +} + +void bch2_alloc_to_v4(struct bkey_s_c k, struct bch_alloc_v4 *out) +{ + if (k.k->type == KEY_TYPE_alloc_v4) { + int d; + + *out = *bkey_s_c_to_alloc_v4(k).v; + + d = (int) BCH_ALLOC_V4_U64s - + (int) (BCH_ALLOC_V4_BACKPOINTERS_START(out) ?: BCH_ALLOC_V4_U64s_V0); + if (unlikely(d > 0)) { + memset((u64 *) out + BCH_ALLOC_V4_BACKPOINTERS_START(out), + 0, + d * sizeof(u64)); + SET_BCH_ALLOC_V4_BACKPOINTERS_START(out, BCH_ALLOC_V4_U64s); + } + } else { + struct bkey_alloc_unpacked u = bch2_alloc_unpack(k); + + *out = (struct bch_alloc_v4) { + .journal_seq = u.journal_seq, + .flags = u.need_discard, + .gen = u.gen, + .oldest_gen = u.oldest_gen, + .data_type = u.data_type, + .stripe_redundancy = u.stripe_redundancy, + .dirty_sectors = u.dirty_sectors, + .cached_sectors = u.cached_sectors, + .io_time[READ] = u.read_time, + .io_time[WRITE] = u.write_time, + .stripe = u.stripe, + }; + + SET_BCH_ALLOC_V4_BACKPOINTERS_START(out, BCH_ALLOC_V4_U64s); + } +} + +struct bkey_i_alloc_v4 *bch2_alloc_to_v4_mut(struct btree_trans *trans, struct bkey_s_c k) +{ + unsigned bytes = k.k->type == KEY_TYPE_alloc_v4 + ? bkey_bytes(k.k) + : sizeof(struct bkey_i_alloc_v4); + struct bkey_i_alloc_v4 *ret; + + /* + * Reserve space for one more backpointer here: + * Not sketchy at doing it this way, nope... + */ + ret = bch2_trans_kmalloc(trans, bytes + sizeof(struct bch_backpointer)); + if (IS_ERR(ret)) + return ret; + + if (k.k->type == KEY_TYPE_alloc_v4) { + bkey_reassemble(&ret->k_i, k); + + if (BCH_ALLOC_V4_BACKPOINTERS_START(&ret->v) < BCH_ALLOC_V4_U64s) { + struct bch_backpointer *src, *dst; + + src = alloc_v4_backpointers(&ret->v); + SET_BCH_ALLOC_V4_BACKPOINTERS_START(&ret->v, BCH_ALLOC_V4_U64s); + dst = alloc_v4_backpointers(&ret->v); + + memmove(dst, src, BCH_ALLOC_V4_NR_BACKPOINTERS(&ret->v) * + sizeof(struct bch_backpointer)); + memset(src, 0, dst - src); + set_alloc_v4_u64s(ret); + } + } else { + bkey_alloc_v4_init(&ret->k_i); + ret->k.p = k.k->p; + bch2_alloc_to_v4(k, &ret->v); + } + return ret; } int bch2_alloc_read(struct bch_fs *c) diff --git a/fs/bcachefs/alloc_background.h b/fs/bcachefs/alloc_background.h index ff366e61ace5..2ac6b5046c67 100644 --- a/fs/bcachefs/alloc_background.h +++ b/fs/bcachefs/alloc_background.h @@ -70,6 +70,22 @@ static inline struct bpos alloc_freespace_pos(struct bpos pos, struct bch_alloc_ return pos; } +static inline unsigned alloc_v4_u64s(const struct bch_alloc_v4 *a) +{ + unsigned ret = (BCH_ALLOC_V4_BACKPOINTERS_START(a) ?: + BCH_ALLOC_V4_U64s_V0) + + BCH_ALLOC_V4_NR_BACKPOINTERS(a) * + (sizeof(struct bch_backpointer) / sizeof(u64)); + + BUG_ON(ret > U8_MAX - BKEY_U64s); + return ret; +} + +static inline void set_alloc_v4_u64s(struct bkey_i_alloc_v4 *a) +{ + set_bkey_val_u64s(&a->k, alloc_v4_u64s(&a->v)); +} + struct bkey_i_alloc_v4 * bch2_trans_start_alloc_update(struct btree_trans *, struct btree_iter *, struct bpos); @@ -143,6 +159,16 @@ static inline u64 should_invalidate_buckets(struct bch_dev *ca, void bch2_do_invalidates(struct bch_fs *); +static inline struct bch_backpointer *alloc_v4_backpointers(struct bch_alloc_v4 *a) +{ + return (void *) ((u64 *) &a->v + BCH_ALLOC_V4_BACKPOINTERS_START(a)); +} + +static inline const struct bch_backpointer *alloc_v4_backpointers_c(const struct bch_alloc_v4 *a) +{ + return (void *) ((u64 *) &a->v + BCH_ALLOC_V4_BACKPOINTERS_START(a)); +} + int bch2_fs_freespace_init(struct bch_fs *); void bch2_recalc_capacity(struct bch_fs *); diff --git a/fs/bcachefs/backpointers.c b/fs/bcachefs/backpointers.c new file mode 100644 index 000000000000..18af3dfbf26a --- /dev/null +++ b/fs/bcachefs/backpointers.c @@ -0,0 +1,858 @@ +// SPDX-License-Identifier: GPL-2.0 +#include "bcachefs.h" +#include "alloc_background.h" +#include "backpointers.h" +#include "btree_cache.h" +#include "btree_update.h" +#include "error.h" + +#define MAX_EXTENT_COMPRESS_RATIO_SHIFT 10 + +static inline struct bpos __backpointer_pos(struct bch_dev *ca, u64 b, + u64 bucket_offset) +{ + return POS(ca->dev_idx, + (bucket_to_sector(ca, b) << + MAX_EXTENT_COMPRESS_RATIO_SHIFT) + bucket_offset); +} + +static inline struct bpos backpointer_pos(struct bch_fs *c, + struct bpos alloc_pos, + u64 bucket_offset) +{ + struct bch_dev *ca = bch_dev_bkey_exists(c, alloc_pos.inode); + + return POS(alloc_pos.inode, + (bucket_to_sector(ca, alloc_pos.offset) << + MAX_EXTENT_COMPRESS_RATIO_SHIFT) + bucket_offset); +} + +int bch2_backpointer_invalid(const struct bch_fs *c, struct bkey_s_c k, + int rw, struct printbuf *err) +{ + struct bkey_s_c_backpointer bp = bkey_s_c_to_backpointer(k); + struct bch_dev *ca = bch_dev_bkey_exists(c, bp.k->p.inode); + u64 b; + + if (bkey_val_bytes(bp.k) != sizeof(*bp.v)) { + prt_printf(err, "incorrect value size"); + return -EINVAL; + } + + b = sector_to_bucket(ca, k.k->p.offset >> MAX_EXTENT_COMPRESS_RATIO_SHIFT); + + if (bpos_cmp(__backpointer_pos(ca, b, bp.v->bucket_offset), bp.k->p)) { + prt_printf(err, "backpointer at wrong pos"); + return -EINVAL; + } + + return 0; +} + +void bch2_backpointer_to_text(struct printbuf *out, const struct bch_backpointer *bp) +{ + prt_printf(out, "btree=%s l=%u offset=%llu:%u len=%u pos=", + bch2_btree_ids[bp->btree_id], + bp->level, + (u64) (bp->bucket_offset >> MAX_EXTENT_COMPRESS_RATIO_SHIFT), + (u32) bp->bucket_offset & ~(~0U << MAX_EXTENT_COMPRESS_RATIO_SHIFT), + bp->bucket_len); + bch2_bpos_to_text(out, bp->pos); +} + +void bch2_backpointer_k_to_text(struct printbuf *out, struct bch_fs *c, struct bkey_s_c k) +{ + bch2_backpointer_to_text(out, bkey_s_c_to_backpointer(k).v); +} + +void bch2_backpointer_swab(struct bkey_s k) +{ + struct bkey_s_backpointer bp = bkey_s_to_backpointer(k); + + bp.v->bucket_offset = swab32(bp.v->bucket_offset); + bp.v->bucket_len = swab32(bp.v->bucket_len); + bch2_bpos_swab(&bp.v->pos); +} + +#define BACKPOINTER_OFFSET_MAX ((1ULL << 40) - 1) + +void bch2_pointer_to_bucket_and_backpointer(struct bch_fs *c, + enum btree_id btree_id, unsigned level, + struct bkey_s_c k, + struct extent_ptr_decoded p, + struct bpos *bucket_pos, + struct bch_backpointer *bp) +{ + enum bch_data_type data_type = level ? BCH_DATA_btree : BCH_DATA_user; + s64 sectors = level ? btree_sectors(c) : k.k->size; + u32 bucket_offset; + + *bucket_pos = PTR_BUCKET_POS_OFFSET(c, &p.ptr, &bucket_offset); + *bp = (struct bch_backpointer) { + .btree_id = btree_id, + .level = level, + .data_type = data_type, + .bucket_offset = ((u64) bucket_offset << MAX_EXTENT_COMPRESS_RATIO_SHIFT) + + p.crc.offset, + .bucket_len = ptr_disk_sectors(sectors, p), + .pos = k.k->p, + }; +} + +static inline int backpointer_cmp(struct bch_backpointer l, struct bch_backpointer r) +{ + return cmp_int(l.bucket_offset, r.bucket_offset); +} + +static int bch2_backpointer_del_by_offset(struct btree_trans *trans, + struct bpos alloc_pos, + u64 bp_offset) +{ + struct bch_fs *c = trans->c; + struct btree_iter iter; + struct bkey_s_c k; + int ret; + + if (bp_offset < BACKPOINTER_OFFSET_MAX) { + struct bch_backpointer *bps; + struct bkey_i_alloc_v4 *a; + unsigned i, nr; + + bch2_trans_iter_init(trans, &iter, BTREE_ID_alloc, + alloc_pos, + BTREE_ITER_INTENT| + BTREE_ITER_SLOTS| + BTREE_ITER_WITH_UPDATES); + k = bch2_btree_iter_peek_slot(&iter); + ret = bkey_err(k); + if (ret) + goto err; + + if (k.k->type != KEY_TYPE_alloc_v4) { + ret = -ENOENT; + goto err; + } + + a = bch2_alloc_to_v4_mut(trans, k); + ret = PTR_ERR_OR_ZERO(a); + if (ret) + goto err; + bps = alloc_v4_backpointers(&a->v); + nr = BCH_ALLOC_V4_NR_BACKPOINTERS(&a->v); + + for (i = 0; i < nr; i++) { + if (bps[i].bucket_offset == bp_offset) + goto found; + if (bps[i].bucket_offset > bp_offset) + break; + } + + ret = -ENOENT; + goto err; +found: + array_remove_item(bps, nr, i); + SET_BCH_ALLOC_V4_NR_BACKPOINTERS(&a->v, nr); + set_alloc_v4_u64s(a); + ret = bch2_trans_update(trans, &iter, &a->k_i, 0); + } else { + bp_offset -= BACKPOINTER_OFFSET_MAX; + + bch2_trans_iter_init(trans, &iter, BTREE_ID_backpointers, + backpointer_pos(c, alloc_pos, bp_offset), + BTREE_ITER_INTENT| + BTREE_ITER_SLOTS| + BTREE_ITER_WITH_UPDATES); + k = bch2_btree_iter_peek_slot(&iter); + ret = bkey_err(k); + if (ret) + goto err; + + if (k.k->type == KEY_TYPE_backpointer) + ret = bch2_btree_delete_at(trans, &iter, 0); + else + ret = -ENOENT; + } +err: + bch2_trans_iter_exit(trans, &iter); + return ret; +} + +int bch2_bucket_backpointer_del(struct btree_trans *trans, + struct bkey_i_alloc_v4 *a, + struct bch_backpointer bp, + struct bkey_s_c orig_k) +{ + struct bch_fs *c = trans->c; + struct bch_backpointer *bps = alloc_v4_backpointers(&a->v); + unsigned i, nr = BCH_ALLOC_V4_NR_BACKPOINTERS(&a->v); + struct btree_iter bp_iter; + struct bkey_s_c k; + int ret; + + for (i = 0; i < nr; i++) { + int cmp = backpointer_cmp(bps[i], bp) ?: + memcmp(&bps[i], &bp, sizeof(bp)); + if (!cmp) + goto found; + if (cmp >= 0) + break; + } + + goto btree; +found: + array_remove_item(bps, nr, i); + SET_BCH_ALLOC_V4_NR_BACKPOINTERS(&a->v, nr); + set_alloc_v4_u64s(a); + return 0; +btree: + bch2_trans_iter_init(trans, &bp_iter, BTREE_ID_backpointers, + backpointer_pos(c, a->k.p, bp.bucket_offset), + BTREE_ITER_INTENT| + BTREE_ITER_SLOTS| + BTREE_ITER_WITH_UPDATES); + k = bch2_btree_iter_peek_slot(&bp_iter); + ret = bkey_err(k); + if (ret) + goto err; + + if (k.k->type != KEY_TYPE_backpointer || + memcmp(bkey_s_c_to_backpointer(k).v, &bp, sizeof(bp))) { + struct printbuf buf = PRINTBUF; + + prt_printf(&buf, "backpointer not found when deleting"); + prt_newline(&buf); + printbuf_indent_add(&buf, 2); + + prt_printf(&buf, "searching for "); + bch2_backpointer_to_text(&buf, &bp); + prt_newline(&buf); + + prt_printf(&buf, "got "); + bch2_bkey_val_to_text(&buf, c, k); + prt_newline(&buf); + + prt_printf(&buf, "alloc "); + bch2_bkey_val_to_text(&buf, c, bkey_i_to_s_c(&a->k_i)); + prt_newline(&buf); + + prt_printf(&buf, "for "); + bch2_bkey_val_to_text(&buf, c, orig_k); + + if (!test_bit(BCH_FS_CHECK_BACKPOINTERS_DONE, &c->flags)) { + bch_err(c, "%s", buf.buf); + } else { + ret = -EIO; + bch2_trans_inconsistent(trans, "%s", buf.buf); + } + printbuf_exit(&buf); + goto err; + } + + ret = bch2_btree_delete_at(trans, &bp_iter, 0); +err: + bch2_trans_iter_exit(trans, &bp_iter); + return ret; +} + +int bch2_bucket_backpointer_add(struct btree_trans *trans, + struct bkey_i_alloc_v4 *a, + struct bch_backpointer bp, + struct bkey_s_c orig_k) +{ + struct bch_fs *c = trans->c; + struct bch_dev *ca; + struct bch_backpointer *bps = alloc_v4_backpointers(&a->v); + unsigned i, nr = BCH_ALLOC_V4_NR_BACKPOINTERS(&a->v); + struct bkey_i_backpointer *bp_k; + struct btree_iter bp_iter; + struct bkey_s_c k; + int ret; + + /* Check for duplicates: */ + for (i = 0; i < nr; i++) { + int cmp = backpointer_cmp(bps[i], bp); + if (cmp >= 0) + break; + } + + if ((i && + (bps[i - 1].bucket_offset + + bps[i - 1].bucket_len > bp.bucket_offset)) || + (i < nr && + (bp.bucket_offset + bp.bucket_len > bps[i].bucket_offset))) { + struct printbuf buf = PRINTBUF; + + prt_printf(&buf, "overlapping backpointer found when inserting "); + bch2_backpointer_to_text(&buf, &bp); + prt_newline(&buf); + printbuf_indent_add(&buf, 2); + + prt_printf(&buf, "into "); + bch2_bkey_val_to_text(&buf, c, bkey_i_to_s_c(&a->k_i)); + prt_newline(&buf); + + prt_printf(&buf, "for "); + bch2_bkey_val_to_text(&buf, c, orig_k); + + bch2_trans_inconsistent(trans, "%s", buf.buf); + + printbuf_exit(&buf); + return -EIO; + } + + if (nr < BCH_ALLOC_V4_NR_BACKPOINTERS_MAX) { + array_insert_item(bps, nr, i, bp); + SET_BCH_ALLOC_V4_NR_BACKPOINTERS(&a->v, nr); + set_alloc_v4_u64s(a); + return 0; + } + + /* Overflow: use backpointer btree */ + bp_k = bch2_trans_kmalloc(trans, sizeof(*bp_k)); + ret = PTR_ERR_OR_ZERO(bp_k); + if (ret) + return ret; + + ca = bch_dev_bkey_exists(c, a->k.p.inode); + + bkey_backpointer_init(&bp_k->k_i); + bp_k->k.p = backpointer_pos(c, a->k.p, bp.bucket_offset); + bp_k->v = bp; + + bch2_trans_iter_init(trans, &bp_iter, BTREE_ID_backpointers, bp_k->k.p, + BTREE_ITER_INTENT| + BTREE_ITER_SLOTS| + BTREE_ITER_WITH_UPDATES); + k = bch2_btree_iter_peek_slot(&bp_iter); + ret = bkey_err(k); + if (ret) + goto err; + + if (k.k->type) { + struct printbuf buf = PRINTBUF; + + prt_printf(&buf, "existing btree backpointer key found when inserting "); + bch2_backpointer_to_text(&buf, &bp); + prt_newline(&buf); + printbuf_indent_add(&buf, 2); + + prt_printf(&buf, "found "); + bch2_bkey_val_to_text(&buf, c, k); + prt_newline(&buf); + + prt_printf(&buf, "for "); + bch2_bkey_val_to_text(&buf, c, orig_k); + + bch2_trans_inconsistent(trans, "%s", buf.buf); + + printbuf_exit(&buf); + ret = -EIO; + goto err; + } + + ret = bch2_trans_update(trans, &bp_iter, &bp_k->k_i, 0); +err: + bch2_trans_iter_exit(trans, &bp_iter); + return ret; +} + +int bch2_get_next_backpointer(struct btree_trans *trans, + unsigned dev, u64 bucket, int gen, + u64 *bp_offset, + struct bch_backpointer *dst) +{ + struct bch_fs *c = trans->c; + struct bpos alloc_pos = POS(dev, bucket); + struct bpos bp_pos = + backpointer_pos(c, alloc_pos, + max_t(u64, *bp_offset, BACKPOINTER_OFFSET_MAX) - BACKPOINTER_OFFSET_MAX); + struct bpos bp_end_pos = + backpointer_pos(c, bpos_nosnap_successor(alloc_pos), 0); + struct btree_iter alloc_iter, bp_iter = { NULL }; + struct bkey_s_c k; + struct bkey_s_c_alloc_v4 a; + size_t i; + int ret; + + bch2_trans_iter_init(trans, &alloc_iter, BTREE_ID_alloc, + alloc_pos, + BTREE_ITER_CACHED); + k = bch2_btree_iter_peek_slot(&alloc_iter); + ret = bkey_err(k); + if (ret) + goto done; + + if (k.k->type != KEY_TYPE_alloc_v4) + goto done; + + a = bkey_s_c_to_alloc_v4(k); + if (gen >= 0 && a.v->gen != gen) + goto done; + + for (i = 0; i < BCH_ALLOC_V4_NR_BACKPOINTERS(a.v); i++) { + if (alloc_v4_backpointers_c(a.v)[i].bucket_offset < *bp_offset) + continue; + + *dst = alloc_v4_backpointers_c(a.v)[i]; + *bp_offset = dst->bucket_offset; + goto out; + } + + for_each_btree_key(trans, bp_iter, BTREE_ID_backpointers, + bp_pos, 0, k, ret) { + if (bpos_cmp(k.k->p, bp_end_pos) >= 0) + break; + + if (k.k->type != KEY_TYPE_backpointer) + continue; + + *dst = *bkey_s_c_to_backpointer(k).v; + *bp_offset = dst->bucket_offset + BACKPOINTER_OFFSET_MAX; + goto out; + } +done: + *bp_offset = U64_MAX; +out: + bch2_trans_iter_exit(trans, &bp_iter); + bch2_trans_iter_exit(trans, &alloc_iter); + return ret; +} + +struct bkey_s_c __bch2_backpointer_get_key(struct btree_trans *trans, + struct btree_iter *iter, + struct bch_backpointer bp, + bool in_fsck) +{ + struct bch_fs *c = trans->c; + struct bkey_ptrs_c ptrs; + const union bch_extent_entry *entry; + struct extent_ptr_decoded p; + struct bkey_s_c k; + struct printbuf buf = PRINTBUF; + + bch2_trans_node_iter_init(trans, iter, + bp.btree_id, + bp.pos, + 0, + min(bp.level, c->btree_roots[bp.btree_id].level), + 0); + k = bch2_btree_iter_peek_slot(iter); + if (bkey_err(k)) { + bch2_trans_iter_exit(trans, iter); + return k; + } + + if (bp.level == c->btree_roots[bp.btree_id].level + 1) + k = bkey_i_to_s_c(&c->btree_roots[bp.btree_id].key); + + /* Check if key matches backpointer: */ + ptrs = bch2_bkey_ptrs_c(k); + bkey_for_each_ptr_decode(k.k, ptrs, p, entry) { + struct bpos bucket_pos; + struct bch_backpointer bp2; + + bch2_pointer_to_bucket_and_backpointer(c, bp.btree_id, bp.level, + k, p, &bucket_pos, &bp2); + if (!memcmp(&bp, &bp2, sizeof(bp))) + return k; + } + + if (!in_fsck) { + prt_printf(&buf, "backpointer doesn't match extent it points to:\n "); + bch2_backpointer_to_text(&buf, &bp); + prt_printf(&buf, "\n "); + bch2_bkey_val_to_text(&buf, c, k); + bch2_trans_inconsistent(trans, "%s", buf.buf); + + bch2_trans_iter_exit(trans, iter); + } + + printbuf_exit(&buf); + return bkey_s_c_null; +} + +struct bkey_s_c bch2_backpointer_get_key(struct btree_trans *trans, + struct btree_iter *iter, + struct bch_backpointer bp) +{ + return __bch2_backpointer_get_key(trans, iter, bp, false); +} + +struct btree *bch2_backpointer_get_node(struct btree_trans *trans, + struct btree_iter *iter, + struct bch_backpointer bp) +{ + struct bch_fs *c = trans->c; + struct bkey_ptrs_c ptrs; + const union bch_extent_entry *entry; + struct extent_ptr_decoded p; + struct btree *b; + struct bkey_s_c k; + struct printbuf buf = PRINTBUF; + + BUG_ON(!bp.level); + + bch2_trans_node_iter_init(trans, iter, + bp.btree_id, + bp.pos, + 0, + bp.level - 1, + 0); + b = bch2_btree_iter_peek_node(iter); + if (IS_ERR(b)) { + bch2_trans_iter_exit(trans, iter); + return b; + } + + /* Check if key matches backpointer: */ + k = bkey_i_to_s_c(&b->key); + ptrs = bch2_bkey_ptrs_c(k); + bkey_for_each_ptr_decode(k.k, ptrs, p, entry) { + struct bpos bucket_pos; + struct bch_backpointer bp2; + + bch2_pointer_to_bucket_and_backpointer(c, bp.btree_id, bp.level, + k, p, &bucket_pos, &bp2); + if (!memcmp(&bp, &bp2, sizeof(bp))) + return b; + } + + if (btree_node_will_make_reachable(b)) + goto out; + + prt_printf(&buf, "backpointer doesn't match btree node it points to:\n "); + bch2_backpointer_to_text(&buf, &bp); + prt_printf(&buf, "\n "); + bch2_bkey_val_to_text(&buf, c, k); + bch2_trans_inconsistent(trans, "%s", buf.buf); +out: + bch2_trans_iter_exit(trans, iter); + printbuf_exit(&buf); + return NULL; +} + +static int bch2_check_backpointer(struct btree_trans *trans, struct btree_iter *bp_iter) +{ + struct bch_fs *c = trans->c; + struct btree_iter alloc_iter = { NULL }; + struct bch_dev *ca; + struct bkey_s_c k, alloc_k; + struct printbuf buf = PRINTBUF; + int ret = 0; + + k = bch2_btree_iter_peek(bp_iter); + ret = bkey_err(k); + if (ret) + return ret; + if (!k.k) + return 0; + + if (fsck_err_on(!bch2_dev_exists2(c, k.k->p.inode), c, + "backpointer for mising device:\n%s", + (bch2_bkey_val_to_text(&buf, c, k), buf.buf))) { + ret = bch2_btree_delete_at(trans, bp_iter, 0); + goto out; + } + + ca = bch_dev_bkey_exists(c, k.k->p.inode); + + bch2_trans_iter_init(trans, &alloc_iter, BTREE_ID_alloc, + POS(k.k->p.inode, sector_to_bucket(ca, + k.k->p.offset >> MAX_EXTENT_COMPRESS_RATIO_SHIFT)), 0); + + alloc_k = bch2_btree_iter_peek_slot(&alloc_iter); + ret = bkey_err(alloc_k); + if (ret) + goto out; + + if (fsck_err_on(alloc_k.k->type != KEY_TYPE_alloc_v4, c, + "backpointer for nonexistent alloc key: %llu:%llu:0\n%s", + alloc_iter.pos.inode, alloc_iter.pos.offset, + (bch2_bkey_val_to_text(&buf, c, alloc_k), buf.buf))) { + ret = bch2_btree_delete_at(trans, bp_iter, 0); + goto out; + } +out: +fsck_err: + bch2_trans_iter_exit(trans, &alloc_iter); + printbuf_exit(&buf); + return ret; +} + +/* verify that every backpointer has a corresponding alloc key */ +int bch2_check_backpointers(struct bch_fs *c) +{ + struct btree_trans trans; + struct btree_iter iter; + int ret = 0; + + bch2_trans_init(&trans, c, 0, 0); + bch2_trans_iter_init(&trans, &iter, BTREE_ID_backpointers, POS_MIN, 0); + + do { + ret = __bch2_trans_do(&trans, NULL, NULL, + BTREE_INSERT_LAZY_RW| + BTREE_INSERT_NOFAIL, + bch2_check_backpointer(&trans, &iter)); + if (ret) + break; + } while (bch2_btree_iter_advance(&iter)); + + bch2_trans_iter_exit(&trans, &iter); + bch2_trans_exit(&trans); + return ret; +} + +/* + * Verify that @bp exists: + */ +static int backpointer_check(struct btree_trans *trans, + struct bpos bucket_pos, + struct bch_backpointer bp, + struct bkey_s_c orig_k) +{ + struct bch_fs *c = trans->c; + struct btree_iter alloc_iter, bp_iter = { NULL }; + struct printbuf buf = PRINTBUF; + struct bkey_s_c alloc_k, bp_k; + int ret; + + bch2_trans_iter_init(trans, &alloc_iter, BTREE_ID_alloc, bucket_pos, 0); + alloc_k = bch2_btree_iter_peek_slot(&alloc_iter); + ret = bkey_err(alloc_k); + if (ret) + goto err; + + if (alloc_k.k->type == KEY_TYPE_alloc_v4) { + struct bkey_s_c_alloc_v4 a = bkey_s_c_to_alloc_v4(alloc_k); + const struct bch_backpointer *bps = alloc_v4_backpointers_c(a.v); + unsigned i, nr = BCH_ALLOC_V4_NR_BACKPOINTERS(a.v); + + for (i = 0; i < nr; i++) { + int cmp = backpointer_cmp(bps[i], bp) ?: + memcmp(&bps[i], &bp, sizeof(bp)); + if (!cmp) + goto out; + if (cmp >= 0) + break; + } + } else { + goto missing; + } + + bch2_trans_iter_init(trans, &bp_iter, BTREE_ID_backpointers, + backpointer_pos(c, bucket_pos, bp.bucket_offset), + 0); + bp_k = bch2_btree_iter_peek_slot(&bp_iter); + ret = bkey_err(bp_k); + if (ret) + goto err; + + if (bp_k.k->type != KEY_TYPE_backpointer || + memcmp(bkey_s_c_to_backpointer(bp_k).v, &bp, sizeof(bp))) + goto missing; +out: +err: +fsck_err: + bch2_trans_iter_exit(trans, &bp_iter); + bch2_trans_iter_exit(trans, &alloc_iter); + printbuf_exit(&buf); + return ret; +missing: + prt_printf(&buf, "missing backpointer for btree=%s l=%u ", + bch2_btree_ids[bp.btree_id], bp.level); + bch2_bkey_val_to_text(&buf, c, orig_k); + prt_printf(&buf, "\nin alloc key "); + bch2_bkey_val_to_text(&buf, c, alloc_k); + + if (c->sb.version < bcachefs_metadata_version_backpointers || + fsck_err(c, "%s", buf.buf)) { + struct bkey_i_alloc_v4 *a = bch2_alloc_to_v4_mut(trans, alloc_k); + + ret = PTR_ERR_OR_ZERO(a) ?: + bch2_bucket_backpointer_add(trans, a, bp, orig_k) ?: + bch2_trans_update(trans, &alloc_iter, &a->k_i, 0); + } + + goto out; +} + +static int check_extent_to_backpointers(struct btree_trans *trans, + struct btree_iter *iter) +{ + struct bch_fs *c = trans->c; + struct bkey_ptrs_c ptrs; + const union bch_extent_entry *entry; + struct extent_ptr_decoded p; + struct bkey_s_c k; + int ret; + + k = bch2_btree_iter_peek_all_levels(iter); + ret = bkey_err(k); + if (ret) + return ret; + if (!k.k) + return 0; + + ptrs = bch2_bkey_ptrs_c(k); + bkey_for_each_ptr_decode(k.k, ptrs, p, entry) { + struct bpos bucket_pos; + struct bch_backpointer bp; + + if (p.ptr.cached) + continue; + + bch2_pointer_to_bucket_and_backpointer(c, iter->btree_id, iter->path->level, + k, p, &bucket_pos, &bp); + + ret = backpointer_check(trans, bucket_pos, bp, k); + if (ret) + return ret; + } + + return 0; +} + +static int check_btree_root_to_backpointers(struct btree_trans *trans, + enum btree_id btree_id) +{ + struct bch_fs *c = trans->c; + struct btree_iter iter; + struct btree *b; + struct bkey_s_c k; + struct bkey_ptrs_c ptrs; + struct extent_ptr_decoded p; + const union bch_extent_entry *entry; + int ret; + + bch2_trans_node_iter_init(trans, &iter, btree_id, POS_MIN, 0, + c->btree_roots[btree_id].level, 0); + b = bch2_btree_iter_peek_node(&iter); + ret = PTR_ERR_OR_ZERO(b); + if (ret) + goto err; + + BUG_ON(b != btree_node_root(c, b)); + + k = bkey_i_to_s_c(&b->key); + ptrs = bch2_bkey_ptrs_c(k); + bkey_for_each_ptr_decode(k.k, ptrs, p, entry) { + struct bpos bucket_pos; + struct bch_backpointer bp; + + if (p.ptr.cached) + continue; + + bch2_pointer_to_bucket_and_backpointer(c, iter.btree_id, iter.path->level + 1, + k, p, &bucket_pos, &bp); + + ret = backpointer_check(trans, bucket_pos, bp, k); + if (ret) + goto err; + } +err: + bch2_trans_iter_exit(trans, &iter); + return ret; +} + +int bch2_check_extents_to_backpointers(struct bch_fs *c) +{ + struct btree_trans trans; + struct btree_iter iter; + enum btree_id btree_id; + int ret = 0; + + bch2_trans_init(&trans, c, 0, 0); + for (btree_id = 0; btree_id < BTREE_ID_NR; btree_id++) { + bch2_trans_node_iter_init(&trans, &iter, btree_id, POS_MIN, 0, + 0, + BTREE_ITER_ALL_LEVELS| + BTREE_ITER_PREFETCH); + + do { + ret = __bch2_trans_do(&trans, NULL, NULL, + BTREE_INSERT_LAZY_RW| + BTREE_INSERT_NOFAIL, + check_extent_to_backpointers(&trans, &iter)); + if (ret) + break; + } while (!bch2_btree_iter_advance(&iter)); + + bch2_trans_iter_exit(&trans, &iter); + + if (ret) + break; + + ret = __bch2_trans_do(&trans, NULL, NULL, + BTREE_INSERT_LAZY_RW| + BTREE_INSERT_NOFAIL, + check_btree_root_to_backpointers(&trans, btree_id)); + if (ret) + break; + } + bch2_trans_exit(&trans); + return ret; +} + +static int check_one_backpointer(struct btree_trans *trans, + struct bpos alloc_pos, + u64 *bp_offset) +{ + struct btree_iter iter; + struct bch_backpointer bp; + struct bkey_s_c k; + struct printbuf buf = PRINTBUF; + int ret; + + ret = bch2_get_next_backpointer(trans, alloc_pos.inode, + alloc_pos.offset, -1, + bp_offset, &bp); + if (ret || *bp_offset == U64_MAX) + return ret; + + k = __bch2_backpointer_get_key(trans, &iter, bp, true); + ret = bkey_err(k); + if (ret) + return ret; + + if (fsck_err_on(!k.k, trans->c, + "backpointer offset %llu points to missing extent\n%s", + *bp_offset, + (bch2_backpointer_to_text(&buf, &bp), buf.buf))) { + ret = bch2_backpointer_del_by_offset(trans, alloc_pos, *bp_offset); + if (ret == -ENOENT) + bch_err(trans->c, "backpointer at %llu not found", *bp_offset); + } + + bch2_trans_iter_exit(trans, &iter); +fsck_err: + printbuf_exit(&buf); + return ret; +} + +int bch2_check_backpointers_to_extents(struct bch_fs *c) +{ + struct btree_trans trans; + struct btree_iter iter; + struct bkey_s_c k; + int ret = 0; + + bch2_trans_init(&trans, c, 0, 0); + for_each_btree_key(&trans, iter, BTREE_ID_alloc, POS_MIN, + BTREE_ITER_PREFETCH, k, ret) { + u64 bp_offset = 0; + + while (!(ret = __bch2_trans_do(&trans, NULL, NULL, + BTREE_INSERT_LAZY_RW| + BTREE_INSERT_NOFAIL, + check_one_backpointer(&trans, iter.pos, &bp_offset))) && + bp_offset < U64_MAX) + bp_offset++; + + if (ret) + break; + } + bch2_trans_iter_exit(&trans, &iter); + bch2_trans_exit(&trans); + return ret < 0 ? ret : 0; +} diff --git a/fs/bcachefs/backpointers.h b/fs/bcachefs/backpointers.h new file mode 100644 index 000000000000..6b44fdc92cee --- /dev/null +++ b/fs/bcachefs/backpointers.h @@ -0,0 +1,38 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef _BCACHEFS_BACKPOINTERS_BACKGROUND_H +#define _BCACHEFS_BACKPOINTERS_BACKGROUND_H + +#include "super.h" + +int bch2_backpointer_invalid(const struct bch_fs *, struct bkey_s_c k, + int, struct printbuf *); +void bch2_backpointer_to_text(struct printbuf *, const struct bch_backpointer *); +void bch2_backpointer_k_to_text(struct printbuf *, struct bch_fs *, struct bkey_s_c); +void bch2_backpointer_swab(struct bkey_s); + +#define bch2_bkey_ops_backpointer (struct bkey_ops) { \ + .key_invalid = bch2_backpointer_invalid, \ + .val_to_text = bch2_backpointer_k_to_text, \ + .swab = bch2_backpointer_swab, \ +} + +void bch2_pointer_to_bucket_and_backpointer(struct bch_fs *, enum btree_id, unsigned, + struct bkey_s_c, struct extent_ptr_decoded, + struct bpos *, struct bch_backpointer *); + +int bch2_bucket_backpointer_del(struct btree_trans *, struct bkey_i_alloc_v4 *, + struct bch_backpointer, struct bkey_s_c); +int bch2_bucket_backpointer_add(struct btree_trans *, struct bkey_i_alloc_v4 *, + struct bch_backpointer, struct bkey_s_c); +int bch2_get_next_backpointer(struct btree_trans *, unsigned, u64, int, + u64 *, struct bch_backpointer *); +struct bkey_s_c bch2_backpointer_get_key(struct btree_trans *, + struct btree_iter *, struct bch_backpointer); +struct btree *bch2_backpointer_get_node(struct btree_trans *, + struct btree_iter *, struct bch_backpointer); + +int bch2_check_backpointers(struct bch_fs *); +int bch2_check_extents_to_backpointers(struct bch_fs *); +int bch2_check_backpointers_to_extents(struct bch_fs *); + +#endif /* _BCACHEFS_BACKPOINTERS_BACKGROUND_H */ diff --git a/fs/bcachefs/bcachefs.h b/fs/bcachefs/bcachefs.h index 2eced20667fa..1f0484aa6501 100644 --- a/fs/bcachefs/bcachefs.h +++ b/fs/bcachefs/bcachefs.h @@ -509,6 +509,7 @@ enum { BCH_FS_TOPOLOGY_REPAIR_DONE, BCH_FS_INITIAL_GC_DONE, /* kill when we enumerate fsck passes */ BCH_FS_CHECK_LRUS_DONE, + BCH_FS_CHECK_BACKPOINTERS_DONE, BCH_FS_CHECK_ALLOC_TO_LRU_REFS_DONE, BCH_FS_FSCK_DONE, BCH_FS_INITIAL_GC_UNFIXED, /* kill when we enumerate fsck errors */ diff --git a/fs/bcachefs/bcachefs_format.h b/fs/bcachefs/bcachefs_format.h index 5af9f2e7ea86..f6075dd368d7 100644 --- a/fs/bcachefs/bcachefs_format.h +++ b/fs/bcachefs/bcachefs_format.h @@ -365,7 +365,8 @@ static inline void bkey_init(struct bkey *k) x(alloc_v3, 24) \ x(set, 25) \ x(lru, 26) \ - x(alloc_v4, 27) + x(alloc_v4, 27) \ + x(backpointer, 28) enum bch_bkey_type { #define x(name, nr) KEY_TYPE_##name = nr, @@ -886,6 +887,12 @@ struct bch_alloc { x(stripe, 32) \ x(stripe_redundancy, 8) +enum { +#define x(name, _bits) BCH_ALLOC_FIELD_V1_##name, + BCH_ALLOC_FIELDS_V1() +#undef x +}; + struct bch_alloc_v2 { struct bch_val v; __u8 nr_fields; @@ -914,6 +921,9 @@ struct bch_alloc_v3 { __u8 data[]; } __attribute__((packed, aligned(8))); +LE32_BITMASK(BCH_ALLOC_V3_NEED_DISCARD,struct bch_alloc_v3, flags, 0, 1) +LE32_BITMASK(BCH_ALLOC_V3_NEED_INC_GEN,struct bch_alloc_v3, flags, 1, 2) + struct bch_alloc_v4 { struct bch_val v; __u64 journal_seq; @@ -927,22 +937,27 @@ struct bch_alloc_v4 { __u64 io_time[2]; __u32 stripe; __u32 nr_external_backpointers; - struct bpos backpointers[0]; } __attribute__((packed, aligned(8))); -LE32_BITMASK(BCH_ALLOC_V3_NEED_DISCARD,struct bch_alloc_v3, flags, 0, 1) -LE32_BITMASK(BCH_ALLOC_V3_NEED_INC_GEN,struct bch_alloc_v3, flags, 1, 2) +#define BCH_ALLOC_V4_U64s_V0 6 +#define BCH_ALLOC_V4_U64s (sizeof(struct bch_alloc_v4) / sizeof(u64)) BITMASK(BCH_ALLOC_V4_NEED_DISCARD, struct bch_alloc_v4, flags, 0, 1) BITMASK(BCH_ALLOC_V4_NEED_INC_GEN, struct bch_alloc_v4, flags, 1, 2) BITMASK(BCH_ALLOC_V4_BACKPOINTERS_START,struct bch_alloc_v4, flags, 2, 8) BITMASK(BCH_ALLOC_V4_NR_BACKPOINTERS, struct bch_alloc_v4, flags, 8, 14) -enum { -#define x(name, _bits) BCH_ALLOC_FIELD_V1_##name, - BCH_ALLOC_FIELDS_V1() -#undef x -}; +#define BCH_ALLOC_V4_NR_BACKPOINTERS_MAX 40 + +struct bch_backpointer { + struct bch_val v; + __u8 btree_id; + __u8 level; + __u8 data_type; + __u64 bucket_offset:40; + __u32 bucket_len; + struct bpos pos; +} __attribute__((packed, aligned(8))); /* Quotas: */ @@ -1406,7 +1421,8 @@ struct bch_sb_field_journal_seq_blacklist { x(inode_v2, 18) \ x(freespace, 19) \ x(alloc_v4, 20) \ - x(new_data_types, 21) + x(new_data_types, 21) \ + x(backpointers, 22) enum bcachefs_metadata_version { bcachefs_metadata_version_min = 9, diff --git a/fs/bcachefs/bkey_methods.c b/fs/bcachefs/bkey_methods.c index 229d51578086..fd352a672d62 100644 --- a/fs/bcachefs/bkey_methods.c +++ b/fs/bcachefs/bkey_methods.c @@ -1,6 +1,7 @@ // SPDX-License-Identifier: GPL-2.0 #include "bcachefs.h" +#include "backpointers.h" #include "bkey_methods.h" #include "btree_types.h" #include "alloc_background.h" @@ -191,6 +192,9 @@ static unsigned bch2_key_types_allowed[] = { [BKEY_TYPE_need_discard] = (1U << KEY_TYPE_deleted)| (1U << KEY_TYPE_set), + [BKEY_TYPE_backpointers] = + (1U << KEY_TYPE_deleted)| + (1U << KEY_TYPE_backpointer), [BKEY_TYPE_btree] = (1U << KEY_TYPE_deleted)| (1U << KEY_TYPE_btree_ptr)| diff --git a/fs/bcachefs/btree_types.h b/fs/bcachefs/btree_types.h index d8f92cc96c4f..b97bfd954c4a 100644 --- a/fs/bcachefs/btree_types.h +++ b/fs/bcachefs/btree_types.h @@ -638,6 +638,11 @@ static inline bool btree_type_has_snapshots(enum btree_id id) return (1 << id) & BTREE_ID_HAS_SNAPSHOTS; } +static inline bool btree_type_has_ptrs(enum btree_id id) +{ + return (1 << id) & BTREE_ID_HAS_PTRS; +} + static inline bool btree_node_type_needs_gc(enum btree_node_type type) { return BTREE_NODE_TYPE_HAS_TRIGGERS & (1U << type); diff --git a/fs/bcachefs/buckets.c b/fs/bcachefs/buckets.c index e2944fc4cfe2..46c2655701e9 100644 --- a/fs/bcachefs/buckets.c +++ b/fs/bcachefs/buckets.c @@ -7,6 +7,7 @@ #include "bcachefs.h" #include "alloc_background.h" +#include "backpointers.h" #include "bset.h" #include "btree_gc.h" #include "btree_update.h" @@ -655,16 +656,6 @@ err: return ret; } -static s64 ptr_disk_sectors(s64 sectors, struct extent_ptr_decoded p) -{ - EBUG_ON(sectors < 0); - - return crc_is_compressed(p.crc) - ? DIV_ROUND_UP_ULL(sectors * p.crc.compressed_size, - p.crc.uncompressed_size) - : sectors; -} - static int check_bucket_ref(struct bch_fs *c, struct bkey_s_c k, const struct bch_extent_ptr *ptr, @@ -1368,21 +1359,43 @@ need_mark: /* trans_mark: */ static int bch2_trans_mark_pointer(struct btree_trans *trans, - struct bkey_s_c k, struct extent_ptr_decoded p, - s64 sectors, enum bch_data_type data_type) + enum btree_id btree_id, unsigned level, + struct bkey_s_c k, struct extent_ptr_decoded p, + unsigned flags) { + bool insert = !(flags & BTREE_TRIGGER_OVERWRITE); struct btree_iter iter; struct bkey_i_alloc_v4 *a; + struct bpos bucket_pos; + struct bch_backpointer bp; + s64 sectors; int ret; - a = bch2_trans_start_alloc_update(trans, &iter, PTR_BUCKET_POS(trans->c, &p.ptr)); + bch2_pointer_to_bucket_and_backpointer(trans->c, btree_id, level, k, p, &bucket_pos, &bp); + sectors = bp.bucket_len; + if (!insert) + sectors = -sectors; + + a = bch2_trans_start_alloc_update(trans, &iter, bucket_pos); if (IS_ERR(a)) return PTR_ERR(a); - ret = __mark_pointer(trans, k, &p.ptr, sectors, data_type, + ret = __mark_pointer(trans, k, &p.ptr, sectors, bp.data_type, a->v.gen, &a->v.data_type, - &a->v.dirty_sectors, &a->v.cached_sectors) ?: - bch2_trans_update(trans, &iter, &a->k_i, 0); + &a->v.dirty_sectors, &a->v.cached_sectors); + if (ret) + goto err; + + if (!p.ptr.cached) { + ret = insert + ? bch2_bucket_backpointer_add(trans, a, bp, k) + : bch2_bucket_backpointer_del(trans, a, bp, k); + if (ret) + goto err; + } + + ret = bch2_trans_update(trans, &iter, &a->k_i, 0); +err: bch2_trans_iter_exit(trans, &iter); return ret; } @@ -1476,8 +1489,7 @@ int bch2_trans_mark_extent(struct btree_trans *trans, if (flags & BTREE_TRIGGER_OVERWRITE) disk_sectors = -disk_sectors; - ret = bch2_trans_mark_pointer(trans, k, p, - disk_sectors, data_type); + ret = bch2_trans_mark_pointer(trans, btree_id, level, k, p, flags); if (ret < 0) return ret; diff --git a/fs/bcachefs/buckets.h b/fs/bcachefs/buckets.h index 3469327d6c9d..b5e4bceb9a22 100644 --- a/fs/bcachefs/buckets.h +++ b/fs/bcachefs/buckets.h @@ -75,6 +75,15 @@ static inline struct bpos PTR_BUCKET_POS(const struct bch_fs *c, return POS(ptr->dev, PTR_BUCKET_NR(ca, ptr)); } +static inline struct bpos PTR_BUCKET_POS_OFFSET(const struct bch_fs *c, + const struct bch_extent_ptr *ptr, + u32 *bucket_offset) +{ + struct bch_dev *ca = bch_dev_bkey_exists(c, ptr->dev); + + return POS(ptr->dev, sector_to_bucket_and_offset(ca, ptr->offset, bucket_offset)); +} + static inline struct bucket *PTR_GC_BUCKET(struct bch_dev *ca, const struct bch_extent_ptr *ptr) { @@ -90,6 +99,16 @@ static inline enum bch_data_type ptr_data_type(const struct bkey *k, return ptr->cached ? BCH_DATA_cached : BCH_DATA_user; } +static inline s64 ptr_disk_sectors(s64 sectors, struct extent_ptr_decoded p) +{ + EBUG_ON(sectors < 0); + + return crc_is_compressed(p.crc) + ? DIV_ROUND_UP_ULL(sectors * p.crc.compressed_size, + p.crc.uncompressed_size) + : sectors; +} + static inline int gen_cmp(u8 a, u8 b) { return (s8) (a - b); diff --git a/fs/bcachefs/recovery.c b/fs/bcachefs/recovery.c index 480abf13afcb..b8ec841c25d7 100644 --- a/fs/bcachefs/recovery.c +++ b/fs/bcachefs/recovery.c @@ -1,6 +1,7 @@ // SPDX-License-Identifier: GPL-2.0 #include "bcachefs.h" +#include "backpointers.h" #include "bkey_buf.h" #include "alloc_background.h" #include "btree_gc.h" @@ -1075,8 +1076,8 @@ int bch2_fs_recovery(struct bch_fs *c) } if (!c->opts.nochanges) { - if (c->sb.version < bcachefs_metadata_version_new_data_types) { - bch_info(c, "version prior to new_data_types, upgrade and fsck required"); + if (c->sb.version < bcachefs_metadata_version_backpointers) { + bch_info(c, "version prior to backpointers, upgrade and fsck required"); c->opts.version_upgrade = true; c->opts.fsck = true; c->opts.fix_errors = FSCK_OPT_YES; @@ -1254,6 +1255,28 @@ use_clean: bch_verbose(c, "done checking lrus"); set_bit(BCH_FS_CHECK_LRUS_DONE, &c->flags); + bch_info(c, "checking backpointers to alloc keys"); + err = "error checking backpointers to alloc keys"; + ret = bch2_check_backpointers(c); + if (ret) + goto err; + bch_verbose(c, "done checking backpointers to alloc keys"); + + bch_info(c, "checking backpointers to extents"); + err = "error checking backpointers to extents"; + ret = bch2_check_backpointers_to_extents(c); + if (ret) + goto err; + bch_verbose(c, "done checking backpointers to extents"); + + bch_info(c, "checking extents to backpointers"); + err = "error checking extents to backpointers"; + ret = bch2_check_extents_to_backpointers(c); + if (ret) + goto err; + bch_verbose(c, "done checking extents to backpointers"); + set_bit(BCH_FS_CHECK_BACKPOINTERS_DONE, &c->flags); + bch_info(c, "checking alloc to lru refs"); err = "error checking alloc to lru refs"; ret = bch2_check_alloc_to_lru_refs(c); @@ -1265,6 +1288,7 @@ use_clean: set_bit(BCH_FS_MAY_GO_RW, &c->flags); set_bit(BCH_FS_INITIAL_GC_DONE, &c->flags); set_bit(BCH_FS_CHECK_LRUS_DONE, &c->flags); + set_bit(BCH_FS_CHECK_BACKPOINTERS_DONE, &c->flags); set_bit(BCH_FS_CHECK_ALLOC_TO_LRU_REFS_DONE, &c->flags); set_bit(BCH_FS_FSCK_DONE, &c->flags); diff --git a/fs/bcachefs/super.c b/fs/bcachefs/super.c index 6aba429a15ed..6752f5de4c1d 100644 --- a/fs/bcachefs/super.c +++ b/fs/bcachefs/super.c @@ -1432,6 +1432,8 @@ static int bch2_dev_remove_alloc(struct bch_fs *c, struct bch_dev *ca) BTREE_TRIGGER_NORUN, NULL) ?: bch2_btree_delete_range(c, BTREE_ID_freespace, start, end, BTREE_TRIGGER_NORUN, NULL) ?: + bch2_btree_delete_range(c, BTREE_ID_backpointers, start, end, + BTREE_TRIGGER_NORUN, NULL) ?: bch2_btree_delete_range(c, BTREE_ID_alloc, start, end, BTREE_TRIGGER_NORUN, NULL); if (ret) |