summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorKent Overstreet <kent.overstreet@linux.dev>2023-03-29 11:18:52 -0400
committerKent Overstreet <kent.overstreet@linux.dev>2023-05-13 00:19:58 -0400
commit984325c28aa0da3526c8c6d3a75ae15c9df80572 (patch)
tree5cc4de39df87cab8093241eafab595518b8d60c7
parent4e18b5567482b0de7bcd56b4007ac4b9688f4169 (diff)
bcachefs: BTREE_ID_snapshot_tree
This adds a new btree which gets us a persistent per-snapshot-tree identifier. - BTREE_ID_snapshot_trees - KEY_TYPE_snapshot_tree - bch_snapshot now has a field that points to a snapshot_tree This is going to be used to designate one snapshot ID/subvolume out of a given tree of snapshots as the "main" subvolume, so that we can do quota accounting in that subvolume and not the rest. Signed-off-by: Kent Overstreet <kent.overstreet@linux.dev>
-rw-r--r--fs/bcachefs/bcachefs.h1
-rw-r--r--fs/bcachefs/bcachefs_format.h26
-rw-r--r--fs/bcachefs/bkey_methods.c3
-rw-r--r--fs/bcachefs/errcode.h1
-rw-r--r--fs/bcachefs/fsck.c3
-rw-r--r--fs/bcachefs/recovery.c23
-rw-r--r--fs/bcachefs/subvolume.c590
-rw-r--r--fs/bcachefs/subvolume.h20
-rw-r--r--fs/bcachefs/subvolume_types.h1
9 files changed, 588 insertions, 80 deletions
diff --git a/fs/bcachefs/bcachefs.h b/fs/bcachefs/bcachefs.h
index 1e7c810d3569..e8ec7b847aea 100644
--- a/fs/bcachefs/bcachefs.h
+++ b/fs/bcachefs/bcachefs.h
@@ -445,6 +445,7 @@ enum gc_phase {
GC_PHASE_BTREE_need_discard,
GC_PHASE_BTREE_backpointers,
GC_PHASE_BTREE_bucket_gens,
+ GC_PHASE_BTREE_snapshot_trees,
GC_PHASE_PENDING_DELETE,
};
diff --git a/fs/bcachefs/bcachefs_format.h b/fs/bcachefs/bcachefs_format.h
index 7d1c0b1e3c54..4866c271afcb 100644
--- a/fs/bcachefs/bcachefs_format.h
+++ b/fs/bcachefs/bcachefs_format.h
@@ -360,7 +360,8 @@ static inline void bkey_init(struct bkey *k)
x(alloc_v4, 27) \
x(backpointer, 28) \
x(inode_v3, 29) \
- x(bucket_gens, 30)
+ x(bucket_gens, 30) \
+ x(snapshot_tree, 31)
enum bch_bkey_type {
#define x(name, nr) KEY_TYPE_##name = nr,
@@ -1119,7 +1120,7 @@ struct bch_snapshot {
__le32 parent;
__le32 children[2];
__le32 subvol;
- __le32 pad;
+ __le32 tree;
};
LE32_BITMASK(BCH_SNAPSHOT_DELETED, struct bch_snapshot, flags, 0, 1)
@@ -1127,6 +1128,19 @@ LE32_BITMASK(BCH_SNAPSHOT_DELETED, struct bch_snapshot, flags, 0, 1)
/* True if a subvolume points to this snapshot node: */
LE32_BITMASK(BCH_SNAPSHOT_SUBVOL, struct bch_snapshot, flags, 1, 2)
+/*
+ * Snapshot trees:
+ *
+ * The snapshot_trees btree gives us persistent indentifier for each tree of
+ * bch_snapshot nodes, and allow us to record and easily find the root/master
+ * subvolume that other snapshots were created from:
+ */
+struct bch_snapshot_tree {
+ struct bch_val v;
+ __le32 master_subvol;
+ __le32 root_snapshot;
+};
+
/* LRU btree: */
struct bch_lru {
@@ -1555,7 +1569,8 @@ struct bch_sb_field_journal_seq_blacklist {
x(bucket_gens, 25) \
x(lru_v2, 26) \
x(fragmentation_lru, 27) \
- x(no_bps_in_alloc_keys, 28)
+ x(no_bps_in_alloc_keys, 28) \
+ x(snapshot_trees, 29)
enum bcachefs_metadata_version {
bcachefs_metadata_version_min = 9,
@@ -1565,6 +1580,8 @@ enum bcachefs_metadata_version {
bcachefs_metadata_version_max
};
+static const unsigned bcachefs_metadata_required_upgrade_below = bcachefs_metadata_version_snapshot_trees;
+
#define bcachefs_metadata_version_current (bcachefs_metadata_version_max - 1)
#define BCH_SB_SECTOR 8
@@ -2091,7 +2108,8 @@ LE32_BITMASK(JSET_NO_FLUSH, struct jset, flags, 5, 6);
x(freespace, 11) \
x(need_discard, 12) \
x(backpointers, 13) \
- x(bucket_gens, 14)
+ x(bucket_gens, 14) \
+ x(snapshot_trees, 15)
enum btree_id {
#define x(kwd, val) BTREE_ID_##kwd = val,
diff --git a/fs/bcachefs/bkey_methods.c b/fs/bcachefs/bkey_methods.c
index 47f0ab023d64..79f3fbe925d5 100644
--- a/fs/bcachefs/bkey_methods.c
+++ b/fs/bcachefs/bkey_methods.c
@@ -204,6 +204,9 @@ static unsigned bch2_key_types_allowed[] = {
[BKEY_TYPE_bucket_gens] =
(1U << KEY_TYPE_deleted)|
(1U << KEY_TYPE_bucket_gens),
+ [BKEY_TYPE_snapshot_trees] =
+ (1U << KEY_TYPE_deleted)|
+ (1U << KEY_TYPE_snapshot_tree),
[BKEY_TYPE_btree] =
(1U << KEY_TYPE_deleted)|
(1U << KEY_TYPE_btree_ptr)|
diff --git a/fs/bcachefs/errcode.h b/fs/bcachefs/errcode.h
index c73a5e78e260..c8ac08e5548b 100644
--- a/fs/bcachefs/errcode.h
+++ b/fs/bcachefs/errcode.h
@@ -93,6 +93,7 @@
x(ENOSPC, ENOSPC_sb_members) \
x(ENOSPC, ENOSPC_sb_crypt) \
x(ENOSPC, ENOSPC_btree_slot) \
+ x(ENOSPC, ENOSPC_snapshot_tree) \
x(0, open_buckets_empty) \
x(0, freelist_empty) \
x(BCH_ERR_freelist_empty, no_buckets_found) \
diff --git a/fs/bcachefs/fsck.c b/fs/bcachefs/fsck.c
index 4b28fc4f77c6..eb3609aa4593 100644
--- a/fs/bcachefs/fsck.c
+++ b/fs/bcachefs/fsck.c
@@ -2427,7 +2427,8 @@ int bch2_fsck_full(struct bch_fs *c)
{
int ret;
again:
- ret = bch2_fs_check_snapshots(c) ?:
+ ret = bch2_fs_check_snapshot_trees(c);
+ bch2_fs_check_snapshots(c) ?:
bch2_fs_check_subvols(c) ?:
bch2_delete_dead_snapshots(c) ?:
check_inodes(c, true) ?:
diff --git a/fs/bcachefs/recovery.c b/fs/bcachefs/recovery.c
index 6214691fa441..af76c029fb6a 100644
--- a/fs/bcachefs/recovery.c
+++ b/fs/bcachefs/recovery.c
@@ -1025,16 +1025,25 @@ fsck_err:
static int bch2_fs_initialize_subvolumes(struct bch_fs *c)
{
- struct bkey_i_snapshot root_snapshot;
- struct bkey_i_subvolume root_volume;
+ struct bkey_i_snapshot_tree root_tree;
+ struct bkey_i_snapshot root_snapshot;
+ struct bkey_i_subvolume root_volume;
int ret;
+ bkey_snapshot_tree_init(&root_tree.k_i);
+ root_tree.k.p.offset = 1;
+ root_tree.v.master_subvol = cpu_to_le32(1);
+ root_tree.v.root_snapshot = cpu_to_le32(U32_MAX);
+ ret = bch2_btree_insert(c, BTREE_ID_snapshot_trees,
+ &root_tree.k_i,
+ NULL, NULL, 0);
+
bkey_snapshot_init(&root_snapshot.k_i);
root_snapshot.k.p.offset = U32_MAX;
root_snapshot.v.flags = 0;
root_snapshot.v.parent = 0;
root_snapshot.v.subvol = BCACHEFS_ROOT_SUBVOL;
- root_snapshot.v.pad = 0;
+ root_snapshot.v.tree = cpu_to_le32(1);
SET_BCH_SNAPSHOT_SUBVOL(&root_snapshot.v, true);
ret = bch2_btree_insert(c, BTREE_ID_snapshots,
@@ -1135,8 +1144,12 @@ int bch2_fs_recovery(struct bch_fs *c)
}
if (!c->opts.nochanges) {
- if (c->sb.version < bcachefs_metadata_version_no_bps_in_alloc_keys) {
- bch_info(c, "version prior to no_bps_in_alloc_keys, upgrade and fsck required");
+ if (c->sb.version < bcachefs_metadata_required_upgrade_below) {
+ bch_info(c, "version %s (%u) prior to %s (%u), upgrade and fsck required",
+ bch2_metadata_versions[c->sb.version],
+ c->sb.version,
+ bch2_metadata_versions[bcachefs_metadata_required_upgrade_below],
+ bcachefs_metadata_required_upgrade_below);
c->opts.version_upgrade = true;
c->opts.fsck = true;
c->opts.fix_errors = FSCK_OPT_YES;
diff --git a/fs/bcachefs/subvolume.c b/fs/bcachefs/subvolume.c
index 8d87f90a0ac6..b14da196e7fd 100644
--- a/fs/bcachefs/subvolume.c
+++ b/fs/bcachefs/subvolume.c
@@ -10,6 +10,71 @@
/* Snapshot tree: */
+void bch2_snapshot_tree_to_text(struct printbuf *out, struct bch_fs *c,
+ struct bkey_s_c k)
+{
+ struct bkey_s_c_snapshot_tree t = bkey_s_c_to_snapshot_tree(k);
+
+ prt_printf(out, "subvol %u root snapshot %u",
+ le32_to_cpu(t.v->master_subvol),
+ le32_to_cpu(t.v->root_snapshot));
+}
+
+int bch2_snapshot_tree_invalid(const struct bch_fs *c, struct bkey_s_c k,
+ unsigned flags, struct printbuf *err)
+{
+ if (bkey_gt(k.k->p, POS(0, U32_MAX)) ||
+ bkey_lt(k.k->p, POS(0, 1))) {
+ prt_printf(err, "bad pos");
+ return -BCH_ERR_invalid_bkey;
+ }
+
+ return 0;
+}
+
+static int snapshot_tree_lookup(struct btree_trans *trans, u32 id,
+ struct bch_snapshot_tree *s)
+{
+ return bch2_bkey_get_val_typed(trans, BTREE_ID_snapshot_trees, POS(0, id),
+ BTREE_ITER_WITH_UPDATES, snapshot_tree, s);
+}
+
+static struct bkey_i_snapshot_tree *
+__snapshot_tree_create(struct btree_trans *trans)
+{
+ struct btree_iter iter;
+ int ret = bch2_bkey_get_empty_slot(trans, &iter,
+ BTREE_ID_snapshot_trees, POS(0, U32_MAX));
+ struct bkey_i_snapshot_tree *s_t;
+
+ if (ret == -BCH_ERR_ENOSPC_btree_slot)
+ ret = -BCH_ERR_ENOSPC_snapshot_tree;
+ if (ret)
+ return ERR_PTR(ret);
+
+ s_t = bch2_bkey_alloc(trans, &iter, 0, snapshot_tree);
+ ret = PTR_ERR_OR_ZERO(s_t);
+ bch2_trans_iter_exit(trans, &iter);
+ return ret ? ERR_PTR(ret) : s_t;
+}
+
+static int snapshot_tree_create(struct btree_trans *trans,
+ u32 root_id, u32 subvol_id, u32 *tree_id)
+{
+ struct bkey_i_snapshot_tree *n_tree =
+ __snapshot_tree_create(trans);
+
+ if (IS_ERR(n_tree))
+ return PTR_ERR(n_tree);
+
+ n_tree->v.master_subvol = cpu_to_le32(subvol_id);
+ n_tree->v.root_snapshot = cpu_to_le32(root_id);
+ *tree_id = n_tree->k.p.offset;
+ return 0;
+}
+
+/* Snapshot nodes: */
+
void bch2_snapshot_to_text(struct printbuf *out, struct bch_fs *c,
struct bkey_s_c k)
{
@@ -90,11 +155,13 @@ int bch2_mark_snapshot(struct btree_trans *trans,
t->children[0] = le32_to_cpu(s.v->children[0]);
t->children[1] = le32_to_cpu(s.v->children[1]);
t->subvol = BCH_SNAPSHOT_SUBVOL(s.v) ? le32_to_cpu(s.v->subvol) : 0;
+ t->tree = le32_to_cpu(s.v->tree);
} else {
t->parent = 0;
t->children[0] = 0;
t->children[1] = 0;
t->subvol = 0;
+ t->tree = 0;
}
return 0;
@@ -116,7 +183,7 @@ static int snapshot_live(struct btree_trans *trans, u32 id)
return 0;
ret = snapshot_lookup(trans, id, &v);
- if (ret == -ENOENT)
+ if (bch2_err_matches(ret, ENOENT))
bch_err(trans->c, "snapshot node %u not found", id);
if (ret)
return ret;
@@ -157,6 +224,274 @@ static int bch2_snapshot_set_equiv(struct btree_trans *trans, struct bkey_s_c k)
}
/* fsck: */
+
+static u32 bch2_snapshot_child(struct bch_fs *c, u32 id, unsigned child)
+{
+ return snapshot_t(c, id)->children[child];
+}
+
+static u32 bch2_snapshot_left_child(struct bch_fs *c, u32 id)
+{
+ return bch2_snapshot_child(c, id, 0);
+}
+
+static u32 bch2_snapshot_right_child(struct bch_fs *c, u32 id)
+{
+ return bch2_snapshot_child(c, id, 1);
+}
+
+static u32 bch2_snapshot_tree_next(struct bch_fs *c, u32 id)
+{
+ u32 n, parent;
+
+ n = bch2_snapshot_left_child(c, id);
+ if (n)
+ return n;
+
+ while ((parent = bch2_snapshot_parent(c, id))) {
+ n = bch2_snapshot_right_child(c, parent);
+ if (n && n != id)
+ return n;
+ id = parent;
+ }
+
+ return 0;
+}
+
+static u32 bch2_snapshot_tree_oldest_subvol(struct bch_fs *c, u32 snapshot_root)
+{
+ u32 id = snapshot_root;
+ u32 subvol = 0, s;
+
+ while (id) {
+ s = snapshot_t(c, id)->subvol;
+
+ if (s && (!subvol || s < subvol))
+ subvol = s;
+
+ id = bch2_snapshot_tree_next(c, id);
+ }
+
+ return subvol;
+}
+
+static int bch2_snapshot_tree_master_subvol(struct btree_trans *trans,
+ u32 snapshot_root, u32 *subvol_id)
+{
+ struct bch_fs *c = trans->c;
+ struct btree_iter iter;
+ struct bkey_s_c k;
+ struct bkey_s_c_subvolume s;
+ int ret;
+
+ for_each_btree_key_norestart(trans, iter, BTREE_ID_subvolumes, POS_MIN,
+ 0, k, ret) {
+ if (k.k->type != KEY_TYPE_subvolume)
+ continue;
+
+ s = bkey_s_c_to_subvolume(k);
+ if (!bch2_snapshot_is_ancestor(c, le32_to_cpu(s.v->snapshot), snapshot_root))
+ continue;
+ if (!BCH_SUBVOLUME_SNAP(s.v)) {
+ *subvol_id = s.k->p.offset;
+ goto found;
+ }
+ }
+ ret = ret ?: -ENOENT;
+found:
+ bch2_trans_iter_exit(trans, &iter);
+
+ if (bch2_err_matches(ret, ENOENT)) {
+ struct bkey_i_subvolume *s;
+
+ *subvol_id = bch2_snapshot_tree_oldest_subvol(c, snapshot_root);
+
+ s = bch2_bkey_get_mut_typed(trans, &iter,
+ BTREE_ID_subvolumes, POS(0, *subvol_id),
+ 0, subvolume);
+ ret = PTR_ERR_OR_ZERO(s);
+ if (ret)
+ return ret;
+
+ SET_BCH_SUBVOLUME_SNAP(&s->v, false);
+ }
+
+ return ret;
+}
+
+static int check_snapshot_tree(struct btree_trans *trans,
+ struct btree_iter *iter,
+ struct bkey_s_c k)
+{
+ struct bch_fs *c = trans->c;
+ struct bkey_s_c_snapshot_tree st;
+ struct bch_snapshot s;
+ struct bch_subvolume subvol;
+ struct printbuf buf = PRINTBUF;
+ u32 root_id;
+ int ret;
+
+ if (k.k->type != KEY_TYPE_snapshot_tree)
+ return 0;
+
+ st = bkey_s_c_to_snapshot_tree(k);
+ root_id = le32_to_cpu(st.v->root_snapshot);
+
+ ret = snapshot_lookup(trans, root_id, &s);
+ if (ret && !bch2_err_matches(ret, ENOENT))
+ goto err;
+
+ if (fsck_err_on(ret ||
+ root_id != bch2_snapshot_root(c, root_id) ||
+ st.k->p.offset != le32_to_cpu(s.tree),
+ c,
+ "snapshot tree points to missing/incorrect snapshot:\n %s",
+ (bch2_bkey_val_to_text(&buf, c, st.s_c), buf.buf))) {
+ ret = bch2_btree_delete_at(trans, iter, 0);
+ goto err;
+ }
+
+ ret = bch2_subvolume_get(trans, le32_to_cpu(st.v->master_subvol),
+ false, 0, &subvol);
+ if (ret && !bch2_err_matches(ret, ENOENT))
+ goto err;
+
+ if (fsck_err_on(ret, c,
+ "snapshot tree points to missing subvolume:\n %s",
+ (printbuf_reset(&buf),
+ bch2_bkey_val_to_text(&buf, c, st.s_c), buf.buf)) ||
+ fsck_err_on(!bch2_snapshot_is_ancestor(c,
+ le32_to_cpu(subvol.snapshot),
+ root_id), c,
+ "snapshot tree points to subvolume that does not point to snapshot in this tree:\n %s",
+ (printbuf_reset(&buf),
+ bch2_bkey_val_to_text(&buf, c, st.s_c), buf.buf)) ||
+ fsck_err_on(BCH_SUBVOLUME_SNAP(&subvol), c,
+ "snapshot tree points to snapshot subvolume:\n %s",
+ (printbuf_reset(&buf),
+ bch2_bkey_val_to_text(&buf, c, st.s_c), buf.buf))) {
+ struct bkey_i_snapshot_tree *u;
+ u32 subvol_id;
+
+ ret = bch2_snapshot_tree_master_subvol(trans, root_id, &subvol_id);
+ if (ret)
+ goto err;
+
+ u = bch2_bkey_make_mut_typed(trans, iter, k, 0, snapshot_tree);
+ ret = PTR_ERR_OR_ZERO(u);
+ if (ret)
+ goto err;
+
+ u->v.master_subvol = cpu_to_le32(subvol_id);
+ st = snapshot_tree_i_to_s_c(u);
+ }
+err:
+fsck_err:
+ printbuf_exit(&buf);
+ return ret;
+}
+
+/*
+ * For each snapshot_tree, make sure it points to the root of a snapshot tree
+ * and that snapshot entry points back to it, or delete it.
+ *
+ * And, make sure it points to a subvolume within that snapshot tree, or correct
+ * it to point to the oldest subvolume within that snapshot tree.
+ */
+int bch2_fs_check_snapshot_trees(struct bch_fs *c)
+{
+ struct btree_iter iter;
+ struct bkey_s_c k;
+ int ret;
+
+ ret = bch2_trans_run(c,
+ for_each_btree_key_commit(&trans, iter,
+ BTREE_ID_snapshot_trees, POS_MIN,
+ BTREE_ITER_PREFETCH, k,
+ NULL, NULL, BTREE_INSERT_LAZY_RW|BTREE_INSERT_NOFAIL,
+ check_snapshot_tree(&trans, &iter, k)));
+
+ if (ret)
+ bch_err(c, "error %i checking snapshot trees", ret);
+ return ret;
+}
+
+/*
+ * Look up snapshot tree for @tree_id and find root,
+ * make sure @snap_id is a descendent:
+ */
+static int snapshot_tree_ptr_good(struct btree_trans *trans,
+ u32 snap_id, u32 tree_id)
+{
+ struct bch_snapshot_tree s_t;
+ int ret = snapshot_tree_lookup(trans, tree_id, &s_t);
+
+ if (bch2_err_matches(ret, ENOENT))
+ return 0;
+ if (ret)
+ return ret;
+
+ return bch2_snapshot_is_ancestor(trans->c, snap_id, le32_to_cpu(s_t.root_snapshot));
+}
+
+/*
+ * snapshot_tree pointer was incorrect: look up root snapshot node, make sure
+ * its snapshot_tree pointer is correct (allocate new one if necessary), then
+ * update this node's pointer to root node's pointer:
+ */
+static int snapshot_tree_ptr_repair(struct btree_trans *trans,
+ struct btree_iter *iter,
+ struct bkey_s_c_snapshot *s)
+{
+ struct bch_fs *c = trans->c;
+ struct btree_iter root_iter;
+ struct bch_snapshot_tree s_t;
+ struct bkey_s_c_snapshot root;
+ struct bkey_i_snapshot *u;
+ u32 root_id = bch2_snapshot_root(c, s->k->p.offset), tree_id;
+ int ret;
+
+ root = bch2_bkey_get_iter_typed(trans, &root_iter,
+ BTREE_ID_snapshots, POS(0, root_id),
+ BTREE_ITER_WITH_UPDATES, snapshot);
+ ret = bkey_err(root);
+ if (ret)
+ goto err;
+
+ tree_id = le32_to_cpu(root.v->tree);
+
+ ret = snapshot_tree_lookup(trans, tree_id, &s_t);
+ if (ret && !bch2_err_matches(ret, ENOENT))
+ return ret;
+
+ if (ret || le32_to_cpu(s_t.root_snapshot) != root_id) {
+ u = bch2_bkey_make_mut_typed(trans, &root_iter, root.s_c, 0, snapshot);
+ ret = PTR_ERR_OR_ZERO(u) ?:
+ snapshot_tree_create(trans, root_id,
+ bch2_snapshot_tree_oldest_subvol(c, root_id),
+ &tree_id);
+ if (ret)
+ goto err;
+
+ u->v.tree = cpu_to_le32(tree_id);
+ if (s->k->p.snapshot == root_id)
+ *s = snapshot_i_to_s_c(u);
+ }
+
+ if (s->k->p.snapshot != root_id) {
+ u = bch2_bkey_make_mut_typed(trans, iter, s->s_c, 0, snapshot);
+ ret = PTR_ERR_OR_ZERO(u);
+ if (ret)
+ goto err;
+
+ u->v.tree = cpu_to_le32(tree_id);
+ *s = snapshot_i_to_s_c(u);
+ }
+err:
+ bch2_trans_iter_exit(trans, &root_iter);
+ return ret;
+}
+
static int check_snapshot(struct btree_trans *trans,
struct btree_iter *iter,
struct bkey_s_c k)
@@ -177,7 +512,7 @@ static int check_snapshot(struct btree_trans *trans,
id = le32_to_cpu(s.v->parent);
if (id) {
ret = snapshot_lookup(trans, id, &v);
- if (ret == -ENOENT)
+ if (bch2_err_matches(ret, ENOENT))
bch_err(c, "snapshot with nonexistent parent:\n %s",
(bch2_bkey_val_to_text(&buf, c, s.s_c), buf.buf));
if (ret)
@@ -196,7 +531,7 @@ static int check_snapshot(struct btree_trans *trans,
id = le32_to_cpu(s.v->children[i]);
ret = snapshot_lookup(trans, id, &v);
- if (ret == -ENOENT)
+ if (bch2_err_matches(ret, ENOENT))
bch_err(c, "snapshot node %llu has nonexistent child %u",
s.k->p.offset, id);
if (ret)
@@ -216,7 +551,7 @@ static int check_snapshot(struct btree_trans *trans,
if (should_have_subvol) {
id = le32_to_cpu(s.v->subvol);
ret = bch2_subvolume_get(trans, id, 0, false, &subvol);
- if (ret == -ENOENT)
+ if (bch2_err_matches(ret, ENOENT))
bch_err(c, "snapshot points to nonexistent subvolume:\n %s",
(bch2_bkey_val_to_text(&buf, c, s.s_c), buf.buf));
if (ret)
@@ -242,9 +577,23 @@ static int check_snapshot(struct btree_trans *trans,
ret = bch2_trans_update(trans, iter, &u->k_i, 0);
if (ret)
goto err;
+
+ s = snapshot_i_to_s_c(u);
}
}
+ ret = snapshot_tree_ptr_good(trans, s.k->p.offset, le32_to_cpu(s.v->tree));
+ if (ret < 0)
+ goto err;
+
+ if (fsck_err_on(!ret, c, "snapshot points to missing/incorrect tree:\n %s",
+ (bch2_bkey_val_to_text(&buf, c, s.s_c), buf.buf))) {
+ ret = snapshot_tree_ptr_repair(trans, iter, &s);
+ if (ret)
+ goto err;
+ }
+ ret = 0;
+
if (BCH_SNAPSHOT_DELETED(s.v))
set_bit(BCH_FS_HAVE_DELETED_SNAPSHOTS, &c->flags);
err:
@@ -255,23 +604,18 @@ fsck_err:
int bch2_fs_check_snapshots(struct bch_fs *c)
{
- struct btree_trans trans;
struct btree_iter iter;
struct bkey_s_c k;
int ret;
- bch2_trans_init(&trans, c, 0, 0);
-
- ret = for_each_btree_key_commit(&trans, iter,
+ ret = bch2_trans_run(c,
+ for_each_btree_key_commit(&trans, iter,
BTREE_ID_snapshots, POS_MIN,
BTREE_ITER_PREFETCH, k,
NULL, NULL, BTREE_INSERT_LAZY_RW|BTREE_INSERT_NOFAIL,
- check_snapshot(&trans, &iter, k));
-
+ check_snapshot(&trans, &iter, k)));
if (ret)
- bch_err(c, "error %i checking snapshots", ret);
-
- bch2_trans_exit(&trans);
+ bch_err(c, "%s: error %s", __func__, bch2_err_str(ret));
return ret;
}
@@ -279,10 +623,11 @@ static int check_subvol(struct btree_trans *trans,
struct btree_iter *iter,
struct bkey_s_c k)
{
+ struct bch_fs *c = trans->c;
struct bkey_s_c_subvolume subvol;
struct bch_snapshot snapshot;
unsigned snapid;
- int ret;
+ int ret = 0;
if (k.k->type != KEY_TYPE_subvolume)
return 0;
@@ -291,8 +636,8 @@ static int check_subvol(struct btree_trans *trans,
snapid = le32_to_cpu(subvol.v->snapshot);
ret = snapshot_lookup(trans, snapid, &snapshot);
- if (ret == -ENOENT)
- bch_err(trans->c, "subvolume %llu points to nonexistent snapshot %u",
+ if (bch2_err_matches(ret, ENOENT))
+ bch_err(c, "subvolume %llu points to nonexistent snapshot %u",
k.k->p.offset, snapid);
if (ret)
return ret;
@@ -300,30 +645,55 @@ static int check_subvol(struct btree_trans *trans,
if (BCH_SUBVOLUME_UNLINKED(subvol.v)) {
ret = bch2_subvolume_delete(trans, iter->pos.offset);
if (ret && !bch2_err_matches(ret, BCH_ERR_transaction_restart))
- bch_err(trans->c, "error deleting subvolume %llu: %s",
+ bch_err(c, "error deleting subvolume %llu: %s",
iter->pos.offset, bch2_err_str(ret));
if (ret)
return ret;
}
- return 0;
+ if (!BCH_SUBVOLUME_SNAP(subvol.v)) {
+ u32 snapshot_root = bch2_snapshot_root(c, le32_to_cpu(subvol.v->snapshot));
+ u32 snapshot_tree = snapshot_t(c, snapshot_root)->tree;
+ struct bch_snapshot_tree st;
+
+ ret = snapshot_tree_lookup(trans, snapshot_tree, &st);
+
+ bch2_fs_inconsistent_on(bch2_err_matches(ret, ENOENT), c,
+ "%s: snapshot tree %u not found", __func__, snapshot_tree);
+
+ if (ret)
+ return ret;
+
+ if (fsck_err_on(le32_to_cpu(st.master_subvol) != subvol.k->p.offset, c,
+ "subvolume %llu is not set as snapshot but is not master subvolume",
+ k.k->p.offset)) {
+ struct bkey_i_subvolume *s =
+ bch2_bkey_make_mut_typed(trans, iter, subvol.s_c, 0, subvolume);
+ ret = PTR_ERR_OR_ZERO(s);
+ if (ret)
+ return ret;
+
+ SET_BCH_SUBVOLUME_SNAP(&s->v, true);
+ }
+ }
+
+fsck_err:
+ return ret;
}
int bch2_fs_check_subvols(struct bch_fs *c)
{
- struct btree_trans trans;
struct btree_iter iter;
struct bkey_s_c k;
int ret;
- bch2_trans_init(&trans, c, 0, 0);
-
- ret = for_each_btree_key_commit(&trans, iter,
+ ret = bch2_trans_run(c,
+ for_each_btree_key_commit(&trans, iter,
BTREE_ID_subvolumes, POS_MIN, BTREE_ITER_PREFETCH, k,
NULL, NULL, BTREE_INSERT_LAZY_RW|BTREE_INSERT_NOFAIL,
- check_subvol(&trans, &iter, k));
-
- bch2_trans_exit(&trans);
+ check_subvol(&trans, &iter, k)));
+ if (ret)
+ bch_err(c, "%s: error %s", __func__, bch2_err_str(ret));
return ret;
}
@@ -335,20 +705,15 @@ void bch2_fs_snapshots_exit(struct bch_fs *c)
int bch2_fs_snapshots_start(struct bch_fs *c)
{
- struct btree_trans trans;
struct btree_iter iter;
struct bkey_s_c k;
int ret = 0;
- bch2_trans_init(&trans, c, 0, 0);
-
- for_each_btree_key2(&trans, iter, BTREE_ID_snapshots,
+ ret = bch2_trans_run(c,
+ for_each_btree_key2(&trans, iter, BTREE_ID_snapshots,
POS_MIN, 0, k,
- bch2_mark_snapshot(&trans, BTREE_ID_snapshots, 0, bkey_s_c_null, k, 0) ?:
- bch2_snapshot_set_equiv(&trans, k));
-
- bch2_trans_exit(&trans);
-
+ bch2_mark_snapshot(&trans, BTREE_ID_snapshots, 0, bkey_s_c_null, k, 0) ?:
+ bch2_snapshot_set_equiv(&trans, k)));
if (ret)
bch_err(c, "error starting snapshots: %s", bch2_err_str(ret));
return ret;
@@ -368,7 +733,8 @@ static int bch2_snapshot_node_set_deleted(struct btree_trans *trans, u32 id)
0, snapshot);
ret = PTR_ERR_OR_ZERO(s);
if (unlikely(ret)) {
- bch2_fs_inconsistent_on(ret == -ENOENT, trans->c, "missing snapshot %u", id);
+ bch2_fs_inconsistent_on(bch2_err_matches(ret, ENOENT),
+ trans->c, "missing snapshot %u", id);
return ret;
}
@@ -388,6 +754,7 @@ static int bch2_snapshot_node_delete(struct btree_trans *trans, u32 id)
{
struct bch_fs *c = trans->c;
struct btree_iter iter, p_iter = (struct btree_iter) { NULL };
+ struct btree_iter tree_iter = (struct btree_iter) { NULL };
struct bkey_s_c_snapshot s;
u32 parent_id;
unsigned i;
@@ -396,7 +763,8 @@ static int bch2_snapshot_node_delete(struct btree_trans *trans, u32 id)
s = bch2_bkey_get_iter_typed(trans, &iter, BTREE_ID_snapshots, POS(0, id),
BTREE_ITER_INTENT, snapshot);
ret = bkey_err(s);
- bch2_fs_inconsistent_on(ret == -ENOENT, c, "missing snapshot %u", id);
+ bch2_fs_inconsistent_on(bch2_err_matches(ret, ENOENT), c,
+ "missing snapshot %u", id);
if (ret)
goto err;
@@ -412,7 +780,8 @@ static int bch2_snapshot_node_delete(struct btree_trans *trans, u32 id)
0, snapshot);
ret = PTR_ERR_OR_ZERO(parent);
if (unlikely(ret)) {
- bch2_fs_inconsistent_on(ret == -ENOENT, c, "missing snapshot %u", parent_id);
+ bch2_fs_inconsistent_on(bch2_err_matches(ret, ENOENT), c,
+ "missing snapshot %u", parent_id);
goto err;
}
@@ -430,25 +799,49 @@ static int bch2_snapshot_node_delete(struct btree_trans *trans, u32 id)
le32_to_cpu(parent->v.children[1]))
swap(parent->v.children[0],
parent->v.children[1]);
+ } else {
+ /*
+ * We're deleting the root of a snapshot tree: update the
+ * snapshot_tree entry to point to the new root, or delete it if
+ * this is the last snapshot ID in this tree:
+ */
+ struct bkey_i_snapshot_tree *s_t;
+
+ BUG_ON(s.v->children[1]);
+
+ s_t = bch2_bkey_get_mut_typed(trans, &tree_iter,
+ BTREE_ID_snapshot_trees, POS(0, le32_to_cpu(s.v->tree)),
+ 0, snapshot_tree);
+ ret = PTR_ERR_OR_ZERO(s_t);
+ if (ret)
+ goto err;
+
+ if (s.v->children[0]) {
+ s_t->v.root_snapshot = cpu_to_le32(s.v->children[0]);
+ } else {
+ s_t->k.type = KEY_TYPE_deleted;
+ set_bkey_val_u64s(&s_t->k, 0);
+ }
}
ret = bch2_btree_delete_at(trans, &iter, 0);
err:
+ bch2_trans_iter_exit(trans, &tree_iter);
bch2_trans_iter_exit(trans, &p_iter);
bch2_trans_iter_exit(trans, &iter);
return ret;
}
-int bch2_snapshot_node_create(struct btree_trans *trans, u32 parent,
- u32 *new_snapids,
- u32 *snapshot_subvols,
- unsigned nr_snapids)
+static int create_snapids(struct btree_trans *trans, u32 parent, u32 tree,
+ u32 *new_snapids,
+ u32 *snapshot_subvols,
+ unsigned nr_snapids)
{
- struct btree_iter iter, parent_iter = { NULL };
+ struct btree_iter iter;
struct bkey_i_snapshot *n;
struct bkey_s_c k;
unsigned i;
- int ret = 0;
+ int ret;
bch2_trans_iter_init(trans, &iter, BTREE_ID_snapshots,
POS_MIN, BTREE_ITER_INTENT);
@@ -476,7 +869,7 @@ int bch2_snapshot_node_create(struct btree_trans *trans, u32 parent,
n->v.flags = 0;
n->v.parent = cpu_to_le32(parent);
n->v.subvol = cpu_to_le32(snapshot_subvols[i]);
- n->v.pad = 0;
+ n->v.tree = cpu_to_le32(tree);
SET_BCH_SNAPSHOT_SUBVOL(&n->v, true);
ret = bch2_mark_snapshot(trans, BTREE_ID_snapshots, 0,
@@ -486,38 +879,92 @@ int bch2_snapshot_node_create(struct btree_trans *trans, u32 parent,
new_snapids[i] = iter.pos.offset;
}
+err:
+ bch2_trans_iter_exit(trans, &iter);
+ return ret;
+}
- if (parent) {
- n = bch2_bkey_get_mut_typed(trans, &parent_iter,
- BTREE_ID_snapshots, POS(0, parent),
- 0, snapshot);
- ret = PTR_ERR_OR_ZERO(n);
- if (unlikely(ret)) {
- if (ret == -ENOENT)
- bch_err(trans->c, "snapshot %u not found", parent);
- goto err;
- }
+/*
+ * Create new snapshot IDs as children of an existing snapshot ID:
+ */
+static int bch2_snapshot_node_create_children(struct btree_trans *trans, u32 parent,
+ u32 *new_snapids,
+ u32 *snapshot_subvols,
+ unsigned nr_snapids)
+{
+ struct btree_iter iter;
+ struct bkey_i_snapshot *n_parent;
+ int ret = 0;
- if (n->v.children[0] || n->v.children[1]) {
- bch_err(trans->c, "Trying to add child snapshot nodes to parent that already has children");
- ret = -EINVAL;
- goto err;
- }
+ n_parent = bch2_bkey_get_mut_typed(trans, &iter,
+ BTREE_ID_snapshots, POS(0, parent),
+ 0, snapshot);
+ ret = PTR_ERR_OR_ZERO(n_parent);
+ if (unlikely(ret)) {
+ if (bch2_err_matches(ret, ENOENT))
+ bch_err(trans->c, "snapshot %u not found", parent);
+ return ret;
+ }
- n->v.children[0] = cpu_to_le32(new_snapids[0]);
- n->v.children[1] = cpu_to_le32(new_snapids[1]);
- n->v.subvol = 0;
- SET_BCH_SNAPSHOT_SUBVOL(&n->v, false);
- ret = bch2_trans_update(trans, &parent_iter, &n->k_i, 0);
- if (ret)
- goto err;
+ if (n_parent->v.children[0] || n_parent->v.children[1]) {
+ bch_err(trans->c, "Trying to add child snapshot nodes to parent that already has children");
+ ret = -EINVAL;
+ goto err;
}
+
+ ret = create_snapids(trans, parent, le32_to_cpu(n_parent->v.tree),
+ new_snapids, snapshot_subvols, nr_snapids);
+ if (ret)
+ goto err;
+
+ n_parent->v.children[0] = cpu_to_le32(new_snapids[0]);
+ n_parent->v.children[1] = cpu_to_le32(new_snapids[1]);
+ n_parent->v.subvol = 0;
+ SET_BCH_SNAPSHOT_SUBVOL(&n_parent->v, false);
err:
- bch2_trans_iter_exit(trans, &parent_iter);
bch2_trans_iter_exit(trans, &iter);
return ret;
}
+/*
+ * Create a snapshot node that is the root of a new tree:
+ */
+static int bch2_snapshot_node_create_tree(struct btree_trans *trans,
+ u32 *new_snapids,
+ u32 *snapshot_subvols,
+ unsigned nr_snapids)
+{
+ struct bkey_i_snapshot_tree *n_tree;
+ int ret;
+
+ n_tree = __snapshot_tree_create(trans);
+ ret = PTR_ERR_OR_ZERO(n_tree) ?:
+ create_snapids(trans, 0, n_tree->k.p.offset,
+ new_snapids, snapshot_subvols, nr_snapids);
+ if (ret)
+ return ret;
+
+ n_tree->v.master_subvol = cpu_to_le32(snapshot_subvols[0]);
+ n_tree->v.root_snapshot = cpu_to_le32(new_snapids[0]);
+ return 0;
+}
+
+int bch2_snapshot_node_create(struct btree_trans *trans, u32 parent,
+ u32 *new_snapids,
+ u32 *snapshot_subvols,
+ unsigned nr_snapids)
+{
+ BUG_ON((parent == 0) != (nr_snapids == 1));
+ BUG_ON((parent != 0) != (nr_snapids == 2));
+
+ return parent
+ ? bch2_snapshot_node_create_children(trans, parent,
+ new_snapids, snapshot_subvols, nr_snapids)
+ : bch2_snapshot_node_create_tree(trans,
+ new_snapids, snapshot_subvols, nr_snapids);
+
+}
+
static int snapshot_delete_key(struct btree_trans *trans,
struct btree_iter *iter,
struct bkey_s_c k,
@@ -731,7 +1178,8 @@ bch2_subvolume_get_inlined(struct btree_trans *trans, unsigned subvol,
{
int ret = bch2_bkey_get_val_typed(trans, BTREE_ID_subvolumes, POS(0, subvol),
iter_flags, subvolume, s);
- bch2_fs_inconsistent_on(ret == -ENOENT && inconsistent_if_not_found,
+ bch2_fs_inconsistent_on(bch2_err_matches(ret, ENOENT) &&
+ inconsistent_if_not_found,
trans->c, "missing subvolume %u", subvol);
return ret;
}
@@ -785,7 +1233,8 @@ int bch2_subvolume_delete(struct btree_trans *trans, u32 subvolid)
BTREE_ITER_CACHED|BTREE_ITER_INTENT,
subvolume);
ret = bkey_err(subvol);
- bch2_fs_inconsistent_on(ret == -ENOENT, trans->c, "missing subvolume %u", subvolid);
+ bch2_fs_inconsistent_on(bch2_err_matches(ret, ENOENT), trans->c,
+ "missing subvolume %u", subvolid);
if (ret)
return ret;
@@ -894,7 +1343,8 @@ int bch2_subvolume_unlink(struct btree_trans *trans, u32 subvolid)
BTREE_ITER_CACHED, subvolume);
ret = PTR_ERR_OR_ZERO(n);
if (unlikely(ret)) {
- bch2_fs_inconsistent_on(ret == -ENOENT, trans->c, "missing subvolume %u", subvolid);
+ bch2_fs_inconsistent_on(bch2_err_matches(ret, ENOENT), trans->c,
+ "missing subvolume %u", subvolid);
return ret;
}
diff --git a/fs/bcachefs/subvolume.h b/fs/bcachefs/subvolume.h
index 1f6f7862e48f..dcd9f5f95535 100644
--- a/fs/bcachefs/subvolume.h
+++ b/fs/bcachefs/subvolume.h
@@ -5,6 +5,16 @@
#include "darray.h"
#include "subvolume_types.h"
+void bch2_snapshot_tree_to_text(struct printbuf *, struct bch_fs *, struct bkey_s_c);
+int bch2_snapshot_tree_invalid(const struct bch_fs *, struct bkey_s_c,
+ unsigned, struct printbuf *);
+
+#define bch2_bkey_ops_snapshot_tree ((struct bkey_ops) { \
+ .key_invalid = bch2_snapshot_tree_invalid, \
+ .val_to_text = bch2_snapshot_tree_to_text, \
+ .min_val_size = 8, \
+})
+
void bch2_snapshot_to_text(struct printbuf *, struct bch_fs *, struct bkey_s_c);
int bch2_snapshot_invalid(const struct bch_fs *, struct bkey_s_c,
unsigned, struct printbuf *);
@@ -28,6 +38,15 @@ static inline u32 bch2_snapshot_parent(struct bch_fs *c, u32 id)
return snapshot_t(c, id)->parent;
}
+static inline u32 bch2_snapshot_root(struct bch_fs *c, u32 id)
+{
+ u32 parent;
+
+ while ((parent = bch2_snapshot_parent(c, id)))
+ id = parent;
+ return id;
+}
+
static inline u32 bch2_snapshot_equiv(struct bch_fs *c, u32 id)
{
return snapshot_t(c, id)->equiv;
@@ -107,6 +126,7 @@ static inline int snapshot_list_add(struct bch_fs *c, snapshot_id_list *s, u32 i
return ret;
}
+int bch2_fs_check_snapshot_trees(struct bch_fs *);
int bch2_fs_check_snapshots(struct bch_fs *);
int bch2_fs_check_subvols(struct bch_fs *);
diff --git a/fs/bcachefs/subvolume_types.h b/fs/bcachefs/subvolume_types.h
index aa49c45a35ab..c6c1cbad9781 100644
--- a/fs/bcachefs/subvolume_types.h
+++ b/fs/bcachefs/subvolume_types.h
@@ -10,6 +10,7 @@ struct snapshot_t {
u32 parent;
u32 children[2];
u32 subvol; /* Nonzero only if a subvolume points to this node: */
+ u32 tree;
u32 equiv;
};