summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorKent Overstreet <kent.overstreet@linux.dev>2023-08-02 20:27:38 -0400
committerKent Overstreet <kent.overstreet@linux.dev>2024-02-16 01:28:01 -0500
commitdabe015010d9f660c05bd9de063186aac6267629 (patch)
tree88bdd1161de0de71340a8a7a1c6e2677d684170b
parent063b1c4d939adf5d5e4f95b17db7d6818c87bf0c (diff)
bcachefs: bcachefs_metadata_version_inode_depth WIPbcachefs_bi_depth
This adds a new inode field, bi_depth, for directory inodes: this allows us to make the check_directory_structure pass much more efficient. Currently, to ensure the filesystem is fully connect and has no loops, for every directory we follow backpointers until we find the root. But by adding a depth counter, it sufficies to only check the parent of each directory, and check that the parent's bi_depth is smaller. (fsck doesn't require that bi_depth = parent->bi_depth + 1; if a rename causes bi_depth off, but the chain to the root is still strictly decreasing, then the algorithm still works and there's no need for fsck to fixup the bi_depth fields). We've already checked backpointers, so we know that every directory (excluding the root)has a valid parent: if bi_depth is always decreasing, every chain must terminate, and terminate at the root directory. bi_depth will not necessarily be correct when fsck runs, due to directory renames - we can't change bi_depth on every child directory when renaming a directory. That's ok; fsck will silently fix the bi_depth field as needed, and future fsck runs will be faster. TODO: - all the fsck stuff - subvolumes make things weird: with snapshots, a given inode may have multiple paths up to the root (how does this even work in the current code?) - we may want to check subvolume connectivity separately, needs more thought Signed-off-by: Kent Overstreet <kent.overstreet@linux.dev>
-rw-r--r--fs/bcachefs/bcachefs_format.h3
-rw-r--r--fs/bcachefs/fs-common.c10
-rw-r--r--fs/bcachefs/inode_format.h3
3 files changed, 14 insertions, 2 deletions
diff --git a/fs/bcachefs/bcachefs_format.h b/fs/bcachefs/bcachefs_format.h
index 8aa5241b5517..93fed4afc2a5 100644
--- a/fs/bcachefs/bcachefs_format.h
+++ b/fs/bcachefs/bcachefs_format.h
@@ -871,7 +871,8 @@ struct bch_sb_field_downgrade {
x(rebalance_work, BCH_VERSION(1, 3)) \
x(member_seq, BCH_VERSION(1, 4)) \
x(subvolume_fs_parent, BCH_VERSION(1, 5)) \
- x(btree_subvolume_children, BCH_VERSION(1, 6))
+ x(btree_subvolume_children, BCH_VERSION(1, 6)) \
+ x(inode_depth, BCH_VERSION(1, 7))
enum bcachefs_metadata_version {
bcachefs_metadata_version_min = 9,
diff --git a/fs/bcachefs/fs-common.c b/fs/bcachefs/fs-common.c
index 624e6f963240..74e390c6cc48 100644
--- a/fs/bcachefs/fs-common.c
+++ b/fs/bcachefs/fs-common.c
@@ -171,6 +171,9 @@ int bch2_create_trans(struct btree_trans *trans,
new_inode->bi_dir_offset = dir_offset;
}
+ if (S_ISDIR(mode))
+ new_inode->bi_depth = dir_u->bi_depth + 1;
+
inode_iter.flags &= ~BTREE_ITER_ALL_SNAPSHOTS;
bch2_btree_iter_set_snapshot(&inode_iter, snapshot);
@@ -511,6 +514,13 @@ int bch2_rename_trans(struct btree_trans *trans,
dst_dir_u->bi_nlink++;
}
+ if (S_ISDIR(src_inode_u->bi_mode))
+ src_inode_u->bi_depth = max(src_inode_u->bi_depth, dst_dir_u->bi_depth + 1);
+
+ if (mode == BCH_RENAME_EXCHANGE &&
+ S_ISDIR(dst_inode_u->bi_mode))
+ dst_inode_u->bi_depth = max(dst_inode_u->bi_depth, src_dir_u->bi_depth + 1);
+
if (dst_inum.inum && is_subdir_for_nlink(dst_inode_u)) {
dst_dir_u->bi_nlink--;
src_dir_u->bi_nlink += mode == BCH_RENAME_EXCHANGE;
diff --git a/fs/bcachefs/inode_format.h b/fs/bcachefs/inode_format.h
index 83d107331edf..96803d91c48e 100644
--- a/fs/bcachefs/inode_format.h
+++ b/fs/bcachefs/inode_format.h
@@ -101,7 +101,8 @@ struct bch_inode_generation {
x(bi_dir_offset, 64) \
x(bi_subvol, 32) \
x(bi_parent_subvol, 32) \
- x(bi_nocow, 8)
+ x(bi_nocow, 8) \
+ x(bi_depth, 32)
/* subset of BCH_INODE_FIELDS */
#define BCH_INODE_OPTS() \