diff options
author | Kent Overstreet <kent.overstreet@gmail.com> | 2021-04-20 00:15:44 -0400 |
---|---|---|
committer | Kent Overstreet <kent.overstreet@gmail.com> | 2021-04-27 17:05:16 -0400 |
commit | fa2efabb73ee5d34155ea4d965398dc834fb5ac8 (patch) | |
tree | 9e7cca0aff13a0f511f9edc59717f5682ebb43f5 | |
parent | aaddf6625b52432cb31199eb14605aa64a55114f (diff) |
updating fsck for snapshots
-rw-r--r-- | fs/bcachefs/dirent.c | 38 | ||||
-rw-r--r-- | fs/bcachefs/dirent.h | 5 | ||||
-rw-r--r-- | fs/bcachefs/fsck.c | 738 |
3 files changed, 536 insertions, 245 deletions
diff --git a/fs/bcachefs/dirent.c b/fs/bcachefs/dirent.c index 8cbf8c9338f2..0a821d3497c0 100644 --- a/fs/bcachefs/dirent.c +++ b/fs/bcachefs/dirent.c @@ -175,24 +175,29 @@ static void dirent_copy_target(struct bkey_i_dirent *dst, dst->v.d_type = src.v->d_type; } -int bch2_dirent_read_target(struct btree_trans *trans, subvol_inum dir, - struct bkey_s_c_dirent d, subvol_inum *target) +int __bch2_dirent_read_target(struct btree_trans *trans, + struct bkey_s_c_dirent d, + u32 *subvol, + u32 *snapshot, + u64 *inum) { int ret = 0; + *subvol = 0; + *snapshot = 0; + if (likely(d.v->d_type != DT_SUBVOL)) { - *target = dir; - target->inum = le64_to_cpu(d.v->d_inum); + *inum = le64_to_cpu(d.v->d_inum); } else { struct btree_iter *iter; struct bkey_s_c k; struct bkey_s_c_subvolume s; int ret; - *target = (subvol_inum) { .subvol = le64_to_cpu(d.v->d_inum) }; - + *subvol = le64_to_cpu(d.v->d_inum); iter = bch2_trans_get_iter(trans, BTREE_ID_subvolumes, - POS(0, target->subvol), 0); + POS(0, *subvol), + BTREE_ITER_CACHED); k = bch2_btree_iter_peek_slot(iter); ret = bkey_err(k); @@ -201,13 +206,14 @@ int bch2_dirent_read_target(struct btree_trans *trans, subvol_inum dir, if (k.k->type != KEY_TYPE_subvolume) { bch2_fs_inconsistent(trans->c, "pointer to missing subvolume %u", - target->subvol); + *subvol); ret = -EIO; goto put_iter; } s = bkey_s_c_to_subvolume(k); - target->inum = le64_to_cpu(s.v->inode); + *snapshot = le32_to_cpu(s.v->snapshot); + *inum = le64_to_cpu(s.v->inode); put_iter: bch2_trans_iter_put(trans, iter); } @@ -215,6 +221,20 @@ put_iter: return ret; } +static int bch2_dirent_read_target(struct btree_trans *trans, subvol_inum dir, + struct bkey_s_c_dirent d, subvol_inum *target) +{ + u32 subvol, snapshot; + int ret = 0; + + *target = dir; + ret = __bch2_dirent_read_target(trans, d, &subvol, &snapshot, &target->inum); + + if (subvol) + target->subvol = subvol; + return ret; +} + int bch2_dirent_rename(struct btree_trans *trans, subvol_inum src_dir, struct bch_hash_info *src_hash, subvol_inum dst_dir, struct bch_hash_info *dst_hash, diff --git a/fs/bcachefs/dirent.h b/fs/bcachefs/dirent.h index 4a7e3a2e512c..4d8c9e24d55c 100644 --- a/fs/bcachefs/dirent.h +++ b/fs/bcachefs/dirent.h @@ -37,8 +37,9 @@ int bch2_dirent_delete_at(struct btree_trans *, const struct bch_hash_info *, struct btree_iter *); -int bch2_dirent_read_target(struct btree_trans *, subvol_inum, - struct bkey_s_c_dirent, subvol_inum *); +int __bch2_dirent_read_target(struct btree_trans *, + struct bkey_s_c_dirent, + u32 *, u32 *, u64 *); static inline unsigned vfs_d_type(unsigned type) { diff --git a/fs/bcachefs/fsck.c b/fs/bcachefs/fsck.c index 8fdf62545fa5..bfc27233d9c5 100644 --- a/fs/bcachefs/fsck.c +++ b/fs/bcachefs/fsck.c @@ -72,8 +72,8 @@ static int snapshot_lookup_subvol(struct btree_trans *trans, u32 snapshot, return lockrestart_do(trans, __snapshot_lookup_subvol(trans, snapshot, subvol)); } -static int __subvol_lookup_root(struct btree_trans *trans, u32 subvol, - u64 *inum) +static int __subvol_lookup(struct btree_trans *trans, u32 subvol, + u32 *snapshot, u64 *inum) { struct btree_iter *iter; struct bkey_s_c k; @@ -92,6 +92,7 @@ static int __subvol_lookup_root(struct btree_trans *trans, u32 subvol, goto err; } + *snapshot = le32_to_cpu(bkey_s_c_to_subvolume(k).v->snapshot); *inum = le64_to_cpu(bkey_s_c_to_subvolume(k).v->inode); err: bch2_trans_iter_free(trans, iter); @@ -99,9 +100,10 @@ err: } -static int subvol_lookup_root(struct btree_trans *trans, u32 subvol, u64 *inum) +static int subvol_lookup(struct btree_trans *trans, u32 subvol, + u32 *snapshot, u64 *inum) { - return lockrestart_do(trans, __subvol_lookup_root(trans, subvol, inum)); + return lockrestart_do(trans, __subvol_lookup(trans, subvol, snapshot, inum)); } static int __lookup_inode(struct btree_trans *trans, u64 inode_nr, @@ -136,6 +138,35 @@ static int lookup_inode(struct btree_trans *trans, u64 inode_nr, return lockrestart_do(trans, __lookup_inode(trans, inode_nr, inode, snapshot)); } +static int __lookup_dirent(struct btree_trans *trans, + struct bch_hash_info hash_info, + subvol_inum dir, struct qstr *name, + u64 *target, unsigned *type) +{ + struct btree_iter *iter; + struct bkey_s_c_dirent d; + + iter = bch2_hash_lookup(trans, bch2_dirent_hash_desc, + &hash_info, dir, name, 0); + if (IS_ERR(iter)) + return PTR_ERR(iter); + + d = bkey_s_c_to_dirent(bch2_btree_iter_peek_slot(iter)); + *target = le64_to_cpu(d.v->d_inum); + *type = d.v->d_type; + bch2_trans_iter_put(trans, iter); + return 0; +} + +static int lookup_dirent(struct btree_trans *trans, + struct bch_hash_info hash_info, + subvol_inum dir, struct qstr *name, + u64 *target, unsigned *type) +{ + return lockrestart_do(trans, + __lookup_dirent(trans, hash_info, dir, name, target, type)); +} + static int __write_inode(struct btree_trans *trans, struct bch_inode_unpacked *inode, u32 snapshot) @@ -196,32 +227,49 @@ static int remove_dirent(struct btree_trans *trans, struct bpos pos) } /* Get lost+found, create if it doesn't exist: */ -static int lookup_lostfound(struct btree_trans *trans, - u32 subvol, +static int lookup_lostfound(struct btree_trans *trans, u32 subvol, struct bch_inode_unpacked *lostfound) { struct bch_fs *c = trans->c; struct bch_inode_unpacked root; struct bch_hash_info root_hash_info; struct qstr lostfound_str = QSTR("lost+found"); - u64 inum; + subvol_inum root_inum = { .subvol = subvol }; + u64 inum = 0; + unsigned d_type = 0; u32 snapshot; int ret; - ret = subvol_lookup_root(trans, subvol, &inum); + ret = subvol_lookup(trans, subvol, &snapshot, &root_inum.inum); + if (ret) + return ret; - ret = lookup_inode(trans, inum, &root, &snapshot); - if (ret && ret != -ENOENT) + ret = lookup_inode(trans, root_inum.inum, &root, &snapshot); + if (ret) { + bch_err(c, "error fetching subvol root: %i", ret); return ret; + } root_hash_info = bch2_hash_info_init(c, &root); - inum = bch2_dirent_lookup(c, root.bi_inum, &root_hash_info, - &lostfound_str); - if (!inum) { + + ret = lookup_dirent(trans, root_hash_info, root_inum, + &lostfound_str, &inum, &d_type); + if (ret == -ENOENT) { bch_notice(c, "creating lost+found"); goto create_lostfound; } + if (ret) { + bch_err(c, "error looking up lost+found: %i", ret); + return ret; + } + + if (d_type != DT_DIR) { + bch_err(c, "error looking up lost+found: not a directory"); + return ret; + + } + ret = lookup_inode(trans, inum, lostfound, &snapshot); if (ret && ret != -ENOENT) { /* @@ -239,11 +287,9 @@ create_lostfound: ret = __bch2_trans_do(trans, NULL, NULL, BTREE_INSERT_NOFAIL| BTREE_INSERT_LAZY_RW, - bch2_create_trans(trans, - BCACHEFS_ROOT_INO, &root, - lostfound, - &lostfound_str, - 0, 0, S_IFDIR|0700, 0, NULL, NULL)); + bch2_create_trans(trans, root_inum, &root, + lostfound, &lostfound_str, + 0, 0, S_IFDIR|0700, 0, NULL, NULL, 0)); if (ret) bch_err(c, "error creating lost+found: %i", ret); } @@ -253,7 +299,7 @@ create_lostfound: static int reattach_inode(struct btree_trans *trans, struct bch_inode_unpacked *inode, - u32 snapshot) + u32 inode_snapshot) { struct bch_hash_info dir_hash; struct bch_inode_unpacked lostfound; @@ -263,7 +309,7 @@ static int reattach_inode(struct btree_trans *trans, u32 subvol; int ret; - ret = snapshot_lookup_subvol(trans, snapshot, &subvol); + ret = snapshot_lookup_subvol(trans, inode_snapshot, &subvol); if (ret) return ret; @@ -285,10 +331,15 @@ static int reattach_inode(struct btree_trans *trans, name = (struct qstr) QSTR(name_buf); ret = __bch2_trans_do(trans, NULL, NULL, BTREE_INSERT_LAZY_RW, - bch2_dirent_create(trans, lostfound.bi_inum, &dir_hash, - mode_to_type(inode->bi_mode), - &name, inode->bi_inum, &dir_offset, - BCH_HASH_SET_MUST_CREATE)); + bch2_dirent_create(trans, + (subvol_inum) { + .subvol = subvol, + .inum = lostfound.bi_inum, + }, + &dir_hash, + mode_to_type(inode->bi_mode), + &name, inode->bi_inum, &dir_offset, + BCH_HASH_SET_MUST_CREATE)); if (ret) { bch_err(trans->c, "error %i reattaching inode %llu", ret, inode->bi_inum); @@ -298,7 +349,7 @@ static int reattach_inode(struct btree_trans *trans, inode->bi_dir = lostfound.bi_inum; inode->bi_dir_offset = dir_offset; - return write_inode(trans, inode, U32_MAX); + return write_inode(trans, inode, inode_snapshot); } static int remove_backpointer(struct btree_trans *trans, @@ -326,38 +377,124 @@ out: } struct inode_walker { - bool first_this_inode; - bool have_inode; - u64 cur_inum; - u32 snapshot; - struct bch_inode_unpacked inode; + bool first_this_inode; + u64 cur_inum; + u32 cur_snapshot; + + size_t nr; + size_t size; + struct inode_walker_entry { + struct bch_inode_unpacked inode; + u32 snapshot; + bool visited; + u64 count; + } *d; }; +static void inode_walker_exit(struct inode_walker *w) +{ + kfree(w->d); + w->d = NULL; +} + static struct inode_walker inode_walker_init(void) { - return (struct inode_walker) { - .cur_inum = -1, - .have_inode = false, + return (struct inode_walker) { 0, }; +} + +static int add_inode(struct inode_walker *w, struct bkey_s_c_inode inode) +{ + struct bch_inode_unpacked u; + + if (w->nr == w->size) { + size_t new_size = max_t(size_t, 8UL, w->size * 2); + void *d = krealloc(w->d, new_size * sizeof(w->d[0]), + GFP_KERNEL); + if (!d) { + return -ENOMEM; + } + + w->d = d; + w->size = new_size; + } + + BUG_ON(bch2_inode_unpack(inode, &u)); + + w->d[w->nr++] = (struct inode_walker_entry) { + .inode = u, + .snapshot = inode.k->p.snapshot, }; + + return 0; +} + +static int __walk_inode(struct btree_trans *trans, + struct inode_walker *w, u64 inum) +{ + struct btree_iter *iter; + struct bkey_s_c k; + int ret; + + if (inum == w->cur_inum) { + w->first_this_inode = false; + return 0; + } + + w->nr = 0; + + for_each_btree_key(trans, iter, BTREE_ID_inodes, POS(0, inum), + BTREE_ITER_ALL_SNAPSHOTS, k, ret) { + if (k.k->p.offset != inum) + break; + + if (k.k->type == KEY_TYPE_inode) + add_inode(w, bkey_s_c_to_inode(k)); + } + bch2_trans_iter_put(trans, iter); + + if (!ret) { + w->cur_inum = inum; + w->cur_snapshot = 0; + w->first_this_inode = true; + } + + return ret; } static int walk_inode(struct btree_trans *trans, struct inode_walker *w, u64 inum) { - if (inum != w->cur_inum) { - int ret = lookup_inode(trans, inum, &w->inode, &w->snapshot); + return lockrestart_do(trans, __walk_inode(trans, w, inum)); +} - if (ret && ret != -ENOENT) - return ret; +static struct inode_walker_entry *get_next_inode(struct bch_fs *c, + struct inode_walker *w, u32 snapshot) +{ + struct inode_walker_entry *i; - w->have_inode = !ret; - w->cur_inum = inum; - w->first_this_inode = true; - } else { - w->first_this_inode = false; + for (i = w->d; i < w->d + w->nr; i++) { + if (i->visited) + continue; + + if (i->snapshot < snapshot && + bch2_snapshot_is_ancestor(c, i->snapshot, snapshot)) { + i->visited = true; + return i; + } + + if (snapshot <= i->snapshot && + snapshot == w->cur_snapshot) + break; + + if (snapshot <= i->snapshot && + bch2_snapshot_is_ancestor(c, snapshot, i->snapshot)) { + w->cur_snapshot = snapshot; + i->visited = true; + return i; + } } - return 0; + return NULL; } static int hash_redo_key(struct btree_trans *trans, @@ -365,6 +502,9 @@ static int hash_redo_key(struct btree_trans *trans, struct bch_hash_info *hash_info, struct btree_iter *k_iter, struct bkey_s_c k) { + bch_err(trans->c, "hash_redo_key() not implemented yet"); + return -EINVAL; +#if 0 struct bkey_i *delete; struct bkey_i *tmp; @@ -383,6 +523,7 @@ static int hash_redo_key(struct btree_trans *trans, bch2_trans_update(trans, k_iter, delete, 0); return bch2_hash_set(trans, desc, hash_info, k_iter->pos.inode, tmp, 0); +#endif } static int fsck_hash_delete_at(struct btree_trans *trans, @@ -474,26 +615,33 @@ fsck_err: static int check_inode(struct btree_trans *trans, struct btree_iter *iter, - struct bkey_s_c_inode inode) + struct bch_inode_unpacked *prev, + struct bch_inode_unpacked u) { struct bch_fs *c = trans->c; - struct bch_inode_unpacked u; bool do_update = false; int ret = 0; - ret = bch2_inode_unpack(inode, &u); - - if (bch2_fs_inconsistent_on(ret, c, - "error unpacking inode %llu in fsck", - inode.k->p.inode)) - return ret; + if (fsck_err_on(prev && + (prev->bi_hash_seed != u.bi_hash_seed || + mode_to_type(prev->bi_mode) != mode_to_type(u.bi_mode)), c, + "inodes in different snapshots don't match")) { + BUG(); + } if (u.bi_flags & BCH_INODE_UNLINKED && (!c->sb.clean || fsck_err(c, "filesystem marked clean, but inode %llu unlinked", u.bi_inum))) { + /* + * XXX: check if this inode isn't deleted in newer snapshots, if + * so we can't delete it and should probably copy the inode + * from the newer snapshot instead + */ bch_verbose(c, "deleting inode %llu", u.bi_inum); - + bch_err(c, "repair not implemented yet"); + return -EINVAL; +#if 0 bch2_trans_unlock(trans); bch2_fs_lazy_rw(c); @@ -501,6 +649,7 @@ static int check_inode(struct btree_trans *trans, if (ret) bch_err(c, "error in fsck: error %i while deleting inode", ret); return ret; +#endif } if (u.bi_flags & BCH_INODE_I_SIZE_DIRTY && @@ -583,24 +732,35 @@ static int check_inodes(struct bch_fs *c, bool full) struct btree_iter *iter; struct bkey_s_c k; struct bkey_s_c_inode inode; + struct bch_inode_unpacked prev, u; int ret; + memset(&prev, 0, sizeof(prev)); + bch2_trans_init(&trans, c, BTREE_ITER_MAX, 0); - for_each_btree_key(&trans, iter, BTREE_ID_inodes, POS_MIN, 0, k, ret) { + for_each_btree_key(&trans, iter, BTREE_ID_inodes, POS_MIN, + BTREE_ITER_ALL_SNAPSHOTS, k, ret) { if (k.k->type != KEY_TYPE_inode) continue; inode = bkey_s_c_to_inode(k); - if (full || - (inode.v->bi_flags & (BCH_INODE_I_SIZE_DIRTY| - BCH_INODE_I_SECTORS_DIRTY| - BCH_INODE_UNLINKED))) { - ret = check_inode(&trans, iter, inode); - if (ret) - break; - } + if (!full && + !(inode.v->bi_flags & (BCH_INODE_I_SIZE_DIRTY| + BCH_INODE_I_SECTORS_DIRTY| + BCH_INODE_UNLINKED))) + continue; + + BUG_ON(bch2_inode_unpack(inode, &u)); + + ret = check_inode(&trans, iter, + full && prev.bi_inum == u.bi_inum + ? &prev : NULL, u); + if (ret) + break; + + prev = u; } bch2_trans_iter_put(&trans, iter); @@ -609,6 +769,29 @@ static int check_inodes(struct bch_fs *c, bool full) return bch2_trans_exit(&trans) ?: ret; } +noinline_for_stack +static int check_subvols(struct bch_fs *c) +{ + struct btree_trans trans; + struct btree_iter *iter; + struct bkey_s_c k; + int ret; + + bch2_trans_init(&trans, c, BTREE_ITER_MAX, 0); + + for_each_btree_key(&trans, iter, BTREE_ID_subvolumes, POS_MIN, + 0, k, ret) { + } + bch2_trans_iter_put(&trans, iter); + + bch2_trans_exit(&trans); + return ret; +} + +/* + * Checking for overlapping extents needs to be reimplemented + */ +#if 0 static int fix_overlapping_extent(struct btree_trans *trans, struct bkey_s_c k, struct bpos cut_at) { @@ -643,16 +826,18 @@ static int fix_overlapping_extent(struct btree_trans *trans, BTREE_INSERT_NOFAIL| BTREE_INSERT_LAZY_RW); } +#endif static int inode_backpointer_exists(struct btree_trans *trans, - struct bch_inode_unpacked *inode) + struct bch_inode_unpacked *inode, + u32 snapshot) { struct btree_iter *iter; struct bkey_s_c k; int ret; iter = bch2_trans_get_iter(trans, BTREE_ID_dirents, - POS(inode->bi_dir, inode->bi_dir_offset), 0); + SPOS(inode->bi_dir, inode->bi_dir_offset, snapshot), 0); k = bch2_btree_iter_peek_slot(iter); ret = bkey_err(k); if (ret) @@ -673,6 +858,31 @@ static bool inode_backpointer_matches(struct bkey_s_c_dirent d, d.k->p.offset == inode->bi_dir_offset; } +static int check_i_sectors(struct btree_trans *trans, struct inode_walker *w) +{ + struct bch_fs *c = trans->c; + struct inode_walker_entry *i; + int ret = 0; + + for (i = w->d; i < w->d + w->nr; i++) { + if (i->inode.bi_sectors == i->count) + continue; + + if (fsck_err_on(!(i->inode.bi_flags & BCH_INODE_I_SECTORS_DIRTY), c, + "inode %llu:%u has incorrect i_sectors: got %llu, should be %llu", + w->cur_inum, i->snapshot, + i->inode.bi_sectors, i->count) == FSCK_ERR_IGNORE) + continue; + + i->inode.bi_sectors = i->count; + ret = write_inode(trans, &i->inode, i->snapshot); + if (ret) + break; + } +fsck_err: + return ret; +} + /* * Walk extents: verify that extents have a corresponding S_ISREG inode, and * that i_size an i_sectors are consistent @@ -684,12 +894,13 @@ static int check_extents(struct bch_fs *c) struct btree_trans trans; struct btree_iter *iter; struct bkey_s_c k; - struct bkey_buf prev; - u64 i_sectors = 0; int ret = 0; +#if 0 + struct bkey_buf prev; bch2_bkey_buf_init(&prev); prev.k->k = KEY(0, 0, 0); +#endif bch2_trans_init(&trans, c, BTREE_ITER_MAX, 0); bch_verbose(c, "checking extents"); @@ -700,20 +911,14 @@ static int check_extents(struct bch_fs *c) retry: while ((k = bch2_btree_iter_peek(iter)).k && !(ret = bkey_err(k))) { - if (w.have_inode && - w.cur_inum != k.k->p.inode && - !(w.inode.bi_flags & BCH_INODE_I_SECTORS_DIRTY) && - fsck_err_on(w.inode.bi_sectors != i_sectors, c, - "inode %llu has incorrect i_sectors: got %llu, should be %llu", - w.inode.bi_inum, - w.inode.bi_sectors, i_sectors)) { - w.inode.bi_sectors = i_sectors; - - ret = write_inode(&trans, &w.inode, w.snapshot); + struct inode_walker_entry *i; + + if (w.cur_inum != k.k->p.inode) { + ret = check_i_sectors(&trans, &w); if (ret) break; } - +#if 0 if (bkey_cmp(prev.k->k.p, bkey_start_pos(k.k)) > 0) { char buf1[200]; char buf2[200]; @@ -724,55 +929,179 @@ retry: if (fsck_err(c, "overlapping extents:\n%s\n%s", buf1, buf2)) return fix_overlapping_extent(&trans, k, prev.k->k.p) ?: -EINTR; } - +#endif ret = walk_inode(&trans, &w, k.k->p.inode); if (ret) break; - if (w.first_this_inode) - i_sectors = 0; - - if (fsck_err_on(!w.have_inode, c, + if (fsck_err_on(!w.nr, c, "extent type %u for missing inode %llu", k.k->type, k.k->p.inode) || - fsck_err_on(w.have_inode && - !S_ISREG(w.inode.bi_mode) && !S_ISLNK(w.inode.bi_mode), c, + fsck_err_on(!S_ISREG(w.d[0].inode.bi_mode) && + !S_ISLNK(w.d[0].inode.bi_mode), c, "extent type %u for non regular file, inode %llu mode %o", - k.k->type, k.k->p.inode, w.inode.bi_mode)) { + k.k->type, k.k->p.inode, i->inode.bi_mode)) { bch2_fs_lazy_rw(c); - return bch2_btree_delete_range_trans(&trans, BTREE_ID_extents, + ret = bch2_btree_delete_range_trans(&trans, BTREE_ID_extents, POS(k.k->p.inode, 0), POS(k.k->p.inode, U64_MAX), - NULL) ?: -EINTR; + NULL); + continue; } - if (fsck_err_on(w.have_inode && - !(w.inode.bi_flags & BCH_INODE_I_SIZE_DIRTY) && - k.k->type != KEY_TYPE_reservation && - k.k->p.offset > round_up(w.inode.bi_size, block_bytes(c)) >> 9, c, - "extent type %u offset %llu past end of inode %llu, i_size %llu", - k.k->type, k.k->p.offset, k.k->p.inode, w.inode.bi_size)) { - bch2_fs_lazy_rw(c); - return bch2_btree_delete_range_trans(&trans, BTREE_ID_extents, - POS(k.k->p.inode, round_up(w.inode.bi_size, block_bytes(c))), - POS(k.k->p.inode, U64_MAX), - NULL) ?: -EINTR; - } + while ((i = get_next_inode(c, &w, k.k->p.snapshot))) { + if (fsck_err_on(!(i->inode.bi_flags & BCH_INODE_I_SIZE_DIRTY) && + k.k->type != KEY_TYPE_reservation && + k.k->p.offset > round_up(i->inode.bi_size, block_bytes(c)) >> 9, c, + "extent type %u offset %llu past end of inode %llu, i_size %llu", + k.k->type, k.k->p.offset, k.k->p.inode, i->inode.bi_size)) { + bch2_fs_lazy_rw(c); + ret = bch2_btree_delete_range_trans(&trans, BTREE_ID_extents, + POS(k.k->p.inode, round_up(i->inode.bi_size, block_bytes(c))), + POS(k.k->p.inode, U64_MAX), + NULL); + continue; + } - if (bkey_extent_is_allocation(k.k)) - i_sectors += k.k->size; + if (bkey_extent_is_allocation(k.k)) + i->count += k.k->size; + } +#if 0 bch2_bkey_buf_reassemble(&prev, c, k); - +#endif bch2_btree_iter_advance(iter); } fsck_err: if (ret == -EINTR) goto retry; bch2_trans_iter_put(&trans, iter); +#if 0 bch2_bkey_buf_exit(&prev, c); +#endif + inode_walker_exit(&w); return bch2_trans_exit(&trans) ?: ret; } +static int check_subdir_count(struct btree_trans *trans, struct inode_walker *w) +{ + struct bch_fs *c = trans->c; + struct inode_walker_entry *i; + int ret = 0; + + for (i = w->d; i < w->d + w->nr; i++) { + if (i->inode.bi_nlink == i->count) + continue; + + if (fsck_err_on(i->inode.bi_nlink != i->count, c, + "directory %llu:%u with wrong i_nlink: got %u, should be %llu", + w->cur_inum, i->snapshot, i->inode.bi_nlink, i->count)) { + i->inode.bi_nlink = i->count; + ret = write_inode(trans, &i->inode, i->snapshot); + if (ret) + break; + } + } +fsck_err: + return ret; +} + +static int check_dirent_target(struct btree_trans *trans, + struct btree_iter *iter, + struct bkey_s_c_dirent d, + struct bch_inode_unpacked *target, + u32 target_snapshot) +{ + struct bch_fs *c = trans->c; + bool backpointer_exists = true; + char buf[200]; + int ret = 0; + + if (!target->bi_dir && + !target->bi_dir_offset) { + target->bi_dir = d.k->p.inode; + target->bi_dir_offset = d.k->p.offset; + + ret = write_inode(trans, target, target_snapshot); + if (ret) + goto err; + } + + if (!inode_backpointer_matches(d, target)) { + ret = inode_backpointer_exists(trans, target, d.k->p.snapshot); + if (ret < 0) + goto err; + + backpointer_exists = ret; + ret = 0; + + if (fsck_err_on(S_ISDIR(target->bi_mode) && + backpointer_exists, c, + "directory %llu with multiple links", + target->bi_inum)) { + ret = remove_dirent(trans, d.k->p); + if (ret) + goto err; + return 0; + } + + if (fsck_err_on(backpointer_exists && + !target->bi_nlink, c, + "inode %llu has multiple links but i_nlink 0", + target->bi_inum)) { + target->bi_nlink++; + target->bi_flags &= ~BCH_INODE_UNLINKED; + + ret = write_inode(trans, target, target_snapshot); + if (ret) + goto err; + } + + if (fsck_err_on(!backpointer_exists, c, + "inode %llu has wrong backpointer:\n" + "got %llu:%llu\n" + "should be %llu:%llu", + target->bi_inum, + target->bi_dir, + target->bi_dir_offset, + d.k->p.inode, + d.k->p.offset)) { + target->bi_dir = d.k->p.inode; + target->bi_dir_offset = d.k->p.offset; + + ret = write_inode(trans, target, target_snapshot); + if (ret) + goto err; + } + } + + if (fsck_err_on(vfs_d_type(d.v->d_type) != mode_to_type(target->bi_mode), c, + "incorrect d_type: should be %u:\n%s", + mode_to_type(target->bi_mode), + (bch2_bkey_val_to_text(&PBUF(buf), c, d.s_c), buf))) { + struct bkey_i_dirent *n; + + n = kmalloc(bkey_bytes(d.k), GFP_KERNEL); + if (!n) { + ret = -ENOMEM; + goto err; + } + + bkey_reassemble(&n->k_i, d.s_c); + n->v.d_type = mode_to_type(target->bi_mode); + + ret = __bch2_trans_do(trans, NULL, NULL, + BTREE_INSERT_NOFAIL| + BTREE_INSERT_LAZY_RW, + (bch2_trans_update(trans, iter, &n->k_i, 0), 0)); + kfree(n); + if (ret) + goto err; + } +err: +fsck_err: + return ret; +} + /* * Walk dirents: verify that they all have a corresponding S_ISDIR inode, * validate d_type @@ -780,13 +1109,13 @@ fsck_err: noinline_for_stack static int check_dirents(struct bch_fs *c) { - struct inode_walker w = inode_walker_init(); + struct inode_walker dir = inode_walker_init(); + struct inode_walker target = inode_walker_init(); struct bch_hash_info hash_info; struct btree_trans trans; struct btree_iter *iter; struct bkey_s_c k; char buf[200]; - unsigned nr_subdirs = 0; int ret = 0; bch_verbose(c, "checking dirents"); @@ -799,38 +1128,28 @@ retry: while ((k = bch2_btree_iter_peek(iter)).k && !(ret = bkey_err(k))) { struct bkey_s_c_dirent d; - struct bch_inode_unpacked target; u32 target_snapshot; u32 target_subvol; - bool have_target; - bool backpointer_exists = true; - u64 d_inum; - - if (w.have_inode && - w.cur_inum != k.k->p.inode && - fsck_err_on(w.inode.bi_nlink != nr_subdirs, c, - "directory %llu with wrong i_nlink: got %u, should be %u", - w.inode.bi_inum, w.inode.bi_nlink, nr_subdirs)) { - w.inode.bi_nlink = nr_subdirs; - ret = write_inode(&trans, &w.inode, w.snapshot); + u64 target_inum; + struct inode_walker_entry *i; + + if (dir.cur_inum != k.k->p.inode) { + ret = check_subdir_count(&trans, &dir); if (ret) break; } - ret = walk_inode(&trans, &w, k.k->p.inode); + ret = walk_inode(&trans, &dir, k.k->p.inode); if (ret) break; - if (w.first_this_inode) - nr_subdirs = 0; - - if (fsck_err_on(!w.have_inode, c, + if (fsck_err_on(!dir.nr, c, "dirent in nonexisting directory:\n%s", (bch2_bkey_val_to_text(&PBUF(buf), c, k), buf)) || - fsck_err_on(!S_ISDIR(w.inode.bi_mode), c, + fsck_err_on(!S_ISDIR(dir.d[0].inode.bi_mode), c, "dirent in non directory inode type %u:\n%s", - mode_to_type(w.inode.bi_mode), + mode_to_type(dir.d[0].inode.bi_mode), (bch2_bkey_val_to_text(&PBUF(buf), c, k), buf))) { ret = lockrestart_do(&trans, @@ -840,11 +1159,11 @@ retry: goto next; } - if (!w.have_inode) + if (!dir.nr) goto next; - if (w.first_this_inode) - hash_info = bch2_hash_info_init(c, &w.inode); + if (dir.first_this_inode) + hash_info = bch2_hash_info_init(c, &dir.d[0].inode); ret = hash_check_key(&trans, bch2_dirent_hash_desc, &hash_info, iter, k); @@ -860,132 +1179,79 @@ retry: d = bkey_s_c_to_dirent(k); - ret = lockrestart_do(&trans, bch2_dirent_read_target(&trans, d, &d_inum)); - if (ret && ret != -ENOENT) - break; + while ((i = get_next_inode(c, &dir, k.k->p.snapshot))) + i->count += d.v->d_type == DT_DIR; + + ret = lockrestart_do(&trans, + __bch2_dirent_read_target(&trans, d, + &target_subvol, + &target_snapshot, + &target_inum)); + if (fsck_err_on(ret == -ENOENT, c, + "dirent points to missing subvolume %llu", + le64_to_cpu(d.v->d_inum))) { + BUG(); + } - ret = lookup_inode(&trans, d_inum, &target, &target_snapshot); if (ret && ret != -ENOENT) break; - have_target = !ret; - ret = 0; + if (target_subvol) { + struct bch_inode_unpacked subvol_root; - if (fsck_err_on(!have_target, c, - "dirent points to missing inode:\n%s", - (bch2_bkey_val_to_text(&PBUF(buf), c, - k), buf))) { - ret = remove_dirent(&trans, d.k->p); - if (ret) - goto err; - goto next; - } - - if (!have_target) - goto next; - - if (!target.bi_dir && - !target.bi_dir_offset) { - target.bi_dir = k.k->p.inode; - target.bi_dir_offset = k.k->p.offset; + ret = lookup_inode(&trans, target_inum, + &subvol_root, &target_snapshot); + if (fsck_err_on(ret == -ENOENT, c, + "subvolume %u points to missing subvolume root %llu", + target_subvol, + target_inum)) { + BUG(); + } - ret = write_inode(&trans, &target, target_snapshot); if (ret) - goto err; - } - - if (!inode_backpointer_matches(d, &target)) { - ret = inode_backpointer_exists(&trans, &target); - if (ret < 0) - goto err; + return ret; - backpointer_exists = ret; - ret = 0; + if (fsck_err_on(subvol_root.bi_subvol != target_subvol, c, + "subvol root %llu has wrong bi_subvol field: got %u, should be %u", + target_inum, + subvol_root.bi_subvol, target_subvol)) { - if (fsck_err_on(S_ISDIR(target.bi_mode) && - backpointer_exists, c, - "directory %llu with multiple links", - target.bi_inum)) { - ret = remove_dirent(&trans, d.k->p); + subvol_root.bi_subvol = target_subvol; + ret = write_inode(&trans, &subvol_root, target_snapshot); if (ret) goto err; - continue; } - if (fsck_err_on(backpointer_exists && - !target.bi_nlink, c, - "inode %llu has multiple links but i_nlink 0", - d_inum)) { - target.bi_nlink++; - target.bi_flags &= ~BCH_INODE_UNLINKED; + ret = check_dirent_target(&trans, iter, d, &subvol_root, + target_snapshot); + if (ret) + goto err; + } else { + bool have_target = false; + + target.cur_inum = 0; + ret = walk_inode(&trans, &target, target_inum); + if (ret) + break; - ret = write_inode(&trans, &target, target_snapshot); + while ((i = get_next_inode(c, &target, target_inum))) { + ret = check_dirent_target(&trans, iter, d, + &i->inode, i->snapshot); if (ret) goto err; + + have_target = true; } - if (fsck_err_on(!backpointer_exists, c, - "inode %llu has wrong backpointer:\n" - "got %llu:%llu\n" - "should be %llu:%llu", - d_inum, - target.bi_dir, - target.bi_dir_offset, - k.k->p.inode, - k.k->p.offset)) { - target.bi_dir = k.k->p.inode; - target.bi_dir_offset = k.k->p.offset; - - ret = write_inode(&trans, &target, target_snapshot); + if (fsck_err_on(!have_target, c, + "dirent points to missing inode:\n%s", + (bch2_bkey_val_to_text(&PBUF(buf), c, + k), buf))) { + ret = remove_dirent(&trans, d.k->p); if (ret) goto err; } } - - target_subvol = d.v->d_type == DT_SUBVOL - ? le64_to_cpu(d.v->d_inum) : 0; - - if (fsck_err_on(target.bi_subvol != target_subvol, c, - "subvol root %llu has wrong subvol field:\n" - "got %u\n" - "should be %u", - target.bi_inum, - target.bi_subvol, - target_subvol)) { - target.bi_subvol = target_subvol; - - ret = write_inode(&trans, &target, target_snapshot); - if (ret) - goto err; - } - - if (fsck_err_on(vfs_d_type(d.v->d_type) != mode_to_type(target.bi_mode), c, - "incorrect d_type: should be %u:\n%s", - mode_to_type(target.bi_mode), - (bch2_bkey_val_to_text(&PBUF(buf), c, - k), buf))) { - struct bkey_i_dirent *n; - - n = kmalloc(bkey_bytes(d.k), GFP_KERNEL); - if (!n) { - ret = -ENOMEM; - goto err; - } - - bkey_reassemble(&n->k_i, d.s_c); - n->v.d_type = mode_to_type(target.bi_mode); - - ret = __bch2_trans_do(&trans, NULL, NULL, - BTREE_INSERT_NOFAIL| - BTREE_INSERT_LAZY_RW, - (bch2_trans_update(&trans, iter, &n->k_i, 0), 0)); - kfree(n); - if (ret) - goto err; - - } - - nr_subdirs += d.v->d_type == DT_DIR; next: bch2_btree_iter_advance(iter); } @@ -1024,7 +1290,7 @@ retry: if (ret) break; - if (fsck_err_on(!w.have_inode, c, + if (fsck_err_on(!w.nr, c, "xattr for missing inode %llu", k.k->p.inode)) { ret = bch2_btree_delete_at(&trans, iter, 0); @@ -1033,14 +1299,17 @@ retry: continue; } - if (w.first_this_inode && w.have_inode) - hash_info = bch2_hash_info_init(c, &w.inode); + if (!w.nr) + goto next; + + if (w.first_this_inode) + hash_info = bch2_hash_info_init(c, &w.d[0].inode); ret = hash_check_key(&trans, bch2_xattr_hash_desc, &hash_info, iter, k); if (ret) break; - +next: bch2_btree_iter_advance(iter); } fsck_err: @@ -1131,7 +1400,7 @@ static int check_path(struct btree_trans *trans, while (inode->bi_inum != BCACHEFS_ROOT_INO) { ret = lockrestart_do(trans, - inode_backpointer_exists(trans, inode)); + inode_backpointer_exists(trans, inode, snapshot)); if (ret < 0) break; @@ -1481,6 +1750,7 @@ int bch2_fsck_full(struct bch_fs *c) return bch2_fs_snapshots_check(c) ?: check_inodes(c, true) ?: + check_subvols(c) ?: check_extents(c) ?: check_dirents(c) ?: check_xattrs(c) ?: |