diff options
author | Kent Overstreet <kent.overstreet@gmail.com> | 2018-07-12 22:06:50 -0400 |
---|---|---|
committer | Kent Overstreet <kent.overstreet@gmail.com> | 2018-08-12 18:09:15 -0400 |
commit | 044eff90f2370a6705c380dc19fecb0f562cfff7 (patch) | |
tree | 1befd2db4de8982e5a79c80019ac56925edb6679 | |
parent | a7d3cbf08ccfa33fcf7fa2aa5deaf95fa79a4ce2 (diff) |
bcachefs: add BCH_INODE_UNLINKED
will be used for better tmpfile/unlink
Signed-off-by: Kent Overstreet <kent.overstreet@gmail.com>
-rw-r--r-- | fs/bcachefs/bcachefs_format.h | 7 | ||||
-rw-r--r-- | fs/bcachefs/error.h | 6 | ||||
-rw-r--r-- | fs/bcachefs/fs.c | 4 | ||||
-rw-r--r-- | fs/bcachefs/fsck.c | 310 | ||||
-rw-r--r-- | fs/bcachefs/fsck.h | 2 | ||||
-rw-r--r-- | fs/bcachefs/inode.c | 4 | ||||
-rw-r--r-- | fs/bcachefs/recovery.c | 4 |
7 files changed, 211 insertions, 126 deletions
diff --git a/fs/bcachefs/bcachefs_format.h b/fs/bcachefs/bcachefs_format.h index b6e7b983bc5b..e300738d6c61 100644 --- a/fs/bcachefs/bcachefs_format.h +++ b/fs/bcachefs/bcachefs_format.h @@ -722,9 +722,7 @@ enum { __BCH_INODE_I_SIZE_DIRTY= 5, __BCH_INODE_I_SECTORS_DIRTY= 6, - - /* not implemented yet: */ - __BCH_INODE_HAS_XATTRS = 7, /* has xattrs in xattr btree */ + __BCH_INODE_UNLINKED = 7, /* bits 20+ reserved for packed fields below: */ }; @@ -736,7 +734,7 @@ enum { #define BCH_INODE_NOATIME (1 << __BCH_INODE_NOATIME) #define BCH_INODE_I_SIZE_DIRTY (1 << __BCH_INODE_I_SIZE_DIRTY) #define BCH_INODE_I_SECTORS_DIRTY (1 << __BCH_INODE_I_SECTORS_DIRTY) -#define BCH_INODE_HAS_XATTRS (1 << __BCH_INODE_HAS_XATTRS) +#define BCH_INODE_UNLINKED (1 << __BCH_INODE_UNLINKED) LE32_BITMASK(INODE_STR_HASH, struct bch_inode, bi_flags, 20, 24); LE32_BITMASK(INODE_NR_FIELDS, struct bch_inode, bi_flags, 24, 32); @@ -1222,6 +1220,7 @@ enum bch_sb_features { BCH_FEATURE_LZ4 = 0, BCH_FEATURE_GZIP = 1, BCH_FEATURE_ZSTD = 2, + BCH_FEATURE_ATOMIC_NLINK = 3, }; /* options: */ diff --git a/fs/bcachefs/error.h b/fs/bcachefs/error.h index f65ef132461e..588e763f0440 100644 --- a/fs/bcachefs/error.h +++ b/fs/bcachefs/error.h @@ -147,12 +147,18 @@ void bch2_flush_fsck_errs(struct bch_fs *); #define need_fsck_err_on(cond, c, ...) \ __fsck_err_on(cond, c, FSCK_CAN_IGNORE|FSCK_NEED_FSCK, ##__VA_ARGS__) +#define need_fsck_err(c, ...) \ + __fsck_err(c, FSCK_CAN_IGNORE|FSCK_NEED_FSCK, ##__VA_ARGS__) + #define mustfix_fsck_err(c, ...) \ __fsck_err(c, FSCK_CAN_FIX, ##__VA_ARGS__) #define mustfix_fsck_err_on(cond, c, ...) \ __fsck_err_on(cond, c, FSCK_CAN_FIX, ##__VA_ARGS__) +#define fsck_err(c, ...) \ + __fsck_err(c, FSCK_CAN_FIX|FSCK_CAN_IGNORE, ##__VA_ARGS__) + #define fsck_err_on(cond, c, ...) \ __fsck_err_on(cond, c, FSCK_CAN_FIX|FSCK_CAN_IGNORE, ##__VA_ARGS__) diff --git a/fs/bcachefs/fs.c b/fs/bcachefs/fs.c index f333d79fef59..8d09dcbf4840 100644 --- a/fs/bcachefs/fs.c +++ b/fs/bcachefs/fs.c @@ -1096,7 +1096,9 @@ static void bch2_evict_inode(struct inode *vinode) bch2_quota_acct(c, inode->ei_qid, Q_INO, -1, BCH_QUOTA_WARN); bch2_inode_rm(c, inode->v.i_ino); - atomic_long_dec(&c->nr_inodes); + + WARN_ONCE(atomic_long_dec_return(&c->nr_inodes) < 0, + "nr_inodes < 0"); } } diff --git a/fs/bcachefs/fsck.c b/fs/bcachefs/fsck.c index edf714f7b98d..3bbb0a2e8ac5 100644 --- a/fs/bcachefs/fsck.c +++ b/fs/bcachefs/fsck.c @@ -250,6 +250,8 @@ static int check_extents(struct bch_fs *c) u64 i_sectors; int ret = 0; + bch_verbose(c, "checking extents"); + for_each_btree_key(&iter, c, BTREE_ID_EXTENTS, POS(BCACHEFS_ROOT_INO, 0), 0, k) { ret = walk_inode(c, &w, k.k->p.inode); @@ -338,6 +340,8 @@ static int check_dirents(struct bch_fs *c) char buf[200]; int ret = 0; + bch_verbose(c, "checking dirents"); + hash_check_init(bch2_dirent_hash_desc, &h, c); for_each_btree_key(&iter, c, BTREE_ID_DIRENTS, @@ -472,6 +476,8 @@ static int check_xattrs(struct bch_fs *c) struct bkey_s_c k; int ret = 0; + bch_verbose(c, "checking xattrs"); + hash_check_init(bch2_xattr_hash_desc, &h, c); for_each_btree_key(&iter, c, BTREE_ID_XATTRS, @@ -509,6 +515,8 @@ static int check_root(struct bch_fs *c, struct bch_inode_unpacked *root_inode) struct bkey_inode_buf packed; int ret; + bch_verbose(c, "checking root directory"); + ret = bch2_inode_find_by_inum(c, BCACHEFS_ROOT_INO, root_inode); if (ret && ret != -ENOENT) return ret; @@ -546,6 +554,8 @@ static int check_lostfound(struct bch_fs *c, u64 inum; int ret; + bch_verbose(c, "checking lost+found"); + inum = bch2_dirent_lookup(c, BCACHEFS_ROOT_INO, &root_hash_info, &lostfound); if (!inum) { @@ -672,6 +682,8 @@ static int check_directory_structure(struct bch_fs *c, u64 d_inum; int ret = 0; + bch_verbose(c, "checking directory structure"); + /* DFS: */ restart_dfs: had_unreachable = false; @@ -872,15 +884,89 @@ s64 bch2_count_inode_sectors(struct bch_fs *c, u64 inum) return bch2_btree_iter_unlock(&iter) ?: sectors; } -static int bch2_gc_do_inode(struct bch_fs *c, - struct bch_inode_unpacked *lostfound_inode, - struct btree_iter *iter, - struct bkey_s_c_inode inode, struct nlink link) +static int check_inode_nlink(struct bch_fs *c, + struct bch_inode_unpacked *lostfound_inode, + struct bch_inode_unpacked *u, + struct nlink *link, + bool *do_update) { - struct bch_inode_unpacked u; + u32 i_nlink = u->bi_flags & BCH_INODE_UNLINKED + ? 0 + : u->bi_nlink + nlink_bias(u->bi_mode); + u32 real_i_nlink = + link->count * nlink_bias(u->bi_mode) + + link->dir_count; int ret = 0; - u32 i_nlink, real_i_nlink; + + /* + * These should have been caught/fixed by earlier passes, we don't + * repair them here: + */ + if (S_ISDIR(u->bi_mode) && link->count > 1) { + need_fsck_err(c, "directory %llu with multiple hardlinks: %u", + u->bi_inum, link->count); + return 0; + } + + if (S_ISDIR(u->bi_mode) && !link->count) { + need_fsck_err(c, "unreachable directory found (inum %llu)", + u->bi_inum); + return 0; + } + + if (!S_ISDIR(u->bi_mode) && link->dir_count) { + need_fsck_err(c, "non directory with subdirectories", + u->bi_inum); + return 0; + } + + if (i_nlink < link->count) { + if (fsck_err(c, "inode %llu i_link too small (%u < %u, type %i)", + u->bi_inum, i_nlink, link->count, + mode_to_type(u->bi_mode)) == FSCK_ERR_IGNORE) + return 0; + goto set_i_nlink; + } + + if (i_nlink != real_i_nlink && + c->sb.clean) { + if (fsck_err(c, "filesystem marked clean, " + "but inode %llu has wrong i_nlink " + "(type %u i_nlink %u, should be %u)", + u->bi_inum, mode_to_type(u->bi_mode), + i_nlink, real_i_nlink) == FSCK_ERR_IGNORE) + return 0; + goto set_i_nlink; + } + + if (real_i_nlink && i_nlink != real_i_nlink) + bch_verbose(c, "setting inode %llu nlink from %u to %u", + u->bi_inum, i_nlink, real_i_nlink); +set_i_nlink: + if (i_nlink != real_i_nlink) { + if (real_i_nlink) { + u->bi_nlink = real_i_nlink - nlink_bias(u->bi_mode); + u->bi_flags &= ~BCH_INODE_UNLINKED; + } else { + u->bi_nlink = 0; + u->bi_flags |= BCH_INODE_UNLINKED; + } + + *do_update = true; + } +fsck_err: + return ret; +} + +static int check_inode(struct bch_fs *c, + struct bch_inode_unpacked *lostfound_inode, + struct btree_iter *iter, + struct bkey_s_c_inode inode, + struct nlink *link) +{ + struct bch_inode_unpacked u; bool do_update = false; + int ret = 0; ret = bch2_inode_unpack(inode, &u); if (bch2_fs_inconsistent_on(ret, c, @@ -888,48 +974,17 @@ static int bch2_gc_do_inode(struct bch_fs *c, inode.k->p.inode)) return ret; - i_nlink = u.bi_nlink + nlink_bias(u.bi_mode); - - fsck_err_on(i_nlink < link.count, c, - "inode %llu i_link too small (%u < %u, type %i)", - inode.k->p.inode, i_nlink, - link.count, mode_to_type(u.bi_mode)); - - /* These should have been caught/fixed by earlier passes: */ - if (S_ISDIR(u.bi_mode)) { - need_fsck_err_on(link.count > 1, c, - "directory %llu with multiple hardlinks: %u", - inode.k->p.inode, link.count); - - real_i_nlink = link.count * 2 + link.dir_count; - } else { - need_fsck_err_on(link.dir_count, c, - "found dirents for non directory %llu", - inode.k->p.inode); - - real_i_nlink = link.count + link.dir_count; + if (link) { + ret = check_inode_nlink(c, lostfound_inode, &u, link, + &do_update); + if (ret) + return ret; } - if (!link.count) { - fsck_err_on(c->sb.clean, c, - "filesystem marked clean, " - "but found orphaned inode %llu", - inode.k->p.inode); - - if (fsck_err_on(S_ISDIR(u.bi_mode) && - bch2_empty_dir(c, inode.k->p.inode), c, - "non empty directory with link count 0, " - "inode nlink %u, dir links found %u", - i_nlink, link.dir_count)) { - ret = reattach_inode(c, lostfound_inode, - inode.k->p.inode); - if (ret) - return ret; - } - - bch_verbose(c, "deleting inode %llu", inode.k->p.inode); + if (u.bi_flags & BCH_INODE_UNLINKED) { + bch_verbose(c, "deleting inode %llu", u.bi_inum); - ret = bch2_inode_rm(c, inode.k->p.inode); + ret = bch2_inode_rm(c, u.bi_inum); if (ret) bch_err(c, "error in fs gc: error %i " "while deleting inode", ret); @@ -940,16 +995,16 @@ static int bch2_gc_do_inode(struct bch_fs *c, fsck_err_on(c->sb.clean, c, "filesystem marked clean, " "but inode %llu has i_size dirty", - inode.k->p.inode); + u.bi_inum); - bch_verbose(c, "truncating inode %llu", inode.k->p.inode); + bch_verbose(c, "truncating inode %llu", u.bi_inum); /* * XXX: need to truncate partial blocks too here - or ideally * just switch units to bytes and that issue goes away */ - ret = bch2_inode_truncate(c, inode.k->p.inode, + ret = bch2_inode_truncate(c, u.bi_inum, round_up(u.bi_size, PAGE_SIZE) >> 9, NULL, NULL); if (ret) { @@ -974,12 +1029,12 @@ static int bch2_gc_do_inode(struct bch_fs *c, fsck_err_on(c->sb.clean, c, "filesystem marked clean, " "but inode %llu has i_sectors dirty", - inode.k->p.inode); + u.bi_inum); bch_verbose(c, "recounting sectors for inode %llu", - inode.k->p.inode); + u.bi_inum); - sectors = bch2_count_inode_sectors(c, inode.k->p.inode); + sectors = bch2_count_inode_sectors(c, u.bi_inum); if (sectors < 0) { bch_err(c, "error in fs gc: error %i " "recounting inode sectors", @@ -992,20 +1047,6 @@ static int bch2_gc_do_inode(struct bch_fs *c, do_update = true; } - if (i_nlink != real_i_nlink) { - fsck_err_on(c->sb.clean, c, - "filesystem marked clean, " - "but inode %llu has wrong i_nlink " - "(type %u i_nlink %u, should be %u)", - inode.k->p.inode, mode_to_type(u.bi_mode), - i_nlink, real_i_nlink); - - bch_verbose(c, "setting inode %llu nlinks from %u to %u", - inode.k->p.inode, i_nlink, real_i_nlink); - u.bi_nlink = real_i_nlink - nlink_bias(u.bi_mode); - do_update = true; - } - if (do_update) { struct bkey_inode_buf p; @@ -1024,9 +1065,9 @@ fsck_err: noinline_for_stack static int bch2_gc_walk_inodes(struct bch_fs *c, - struct bch_inode_unpacked *lostfound_inode, - nlink_table *links, - u64 range_start, u64 range_end) + struct bch_inode_unpacked *lostfound_inode, + nlink_table *links, + u64 range_start, u64 range_end) { struct btree_iter iter; struct bkey_s_c k; @@ -1065,10 +1106,9 @@ peek_nlinks: link = genradix_iter_peek(&nlinks_iter, links); */ bch2_btree_iter_unlock(&iter); - ret = bch2_gc_do_inode(c, lostfound_inode, &iter, - bkey_s_c_to_inode(k), *link); - if (ret == -EINTR) - continue; + ret = check_inode(c, lostfound_inode, &iter, + bkey_s_c_to_inode(k), link); + BUG_ON(ret == -EINTR); if (ret) break; @@ -1103,6 +1143,8 @@ static int check_inode_nlinks(struct bch_fs *c, u64 this_iter_range_start, next_iter_range_start = 0; int ret = 0; + bch_verbose(c, "checking inode nlinks"); + genradix_init(&links); do { @@ -1129,68 +1171,102 @@ static int check_inode_nlinks(struct bch_fs *c, return ret; } +noinline_for_stack +static int check_inodes_fast(struct bch_fs *c) +{ + struct btree_iter iter; + struct bkey_s_c k; + struct bkey_s_c_inode inode; + unsigned long nr_inodes = 0; + int ret = 0; + + for_each_btree_key(&iter, c, BTREE_ID_INODES, POS_MIN, 0, k) { + if (k.k->type != BCH_INODE_FS) + continue; + + inode = bkey_s_c_to_inode(k); + + if (!(inode.v->bi_flags & BCH_INODE_UNLINKED)) + nr_inodes++; + + if (inode.v->bi_flags & + (BCH_INODE_I_SIZE_DIRTY| + BCH_INODE_I_SECTORS_DIRTY| + BCH_INODE_UNLINKED)) { + fsck_err_on(c->sb.clean, c, + "filesystem marked clean but found inode %llu with flags %x", + inode.k->p.inode, inode.v->bi_flags); + ret = check_inode(c, NULL, &iter, inode, NULL); + BUG_ON(ret == -EINTR); + if (ret) + break; + } + } + atomic_long_set(&c->nr_inodes, nr_inodes); +fsck_err: + return bch2_btree_iter_unlock(&iter) ?: ret; +} + /* * Checks for inconsistencies that shouldn't happen, unless we have a bug. * Doesn't fix them yet, mainly because they haven't yet been observed: */ -int bch2_fsck(struct bch_fs *c, bool full_fsck) +static int bch2_fsck_full(struct bch_fs *c) { struct bch_inode_unpacked root_inode, lostfound_inode; int ret; - if (full_fsck) { - bch_verbose(c, "checking extents"); - ret = check_extents(c); - if (ret) - return ret; + bch_verbose(c, "starting fsck:"); + ret = check_extents(c) ?: + check_dirents(c) ?: + check_xattrs(c) ?: + check_root(c, &root_inode) ?: + check_lostfound(c, &root_inode, &lostfound_inode) ?: + check_directory_structure(c, &lostfound_inode) ?: + check_inode_nlinks(c, &lostfound_inode); - bch_verbose(c, "checking dirents"); - ret = check_dirents(c); - if (ret) - return ret; + bch2_flush_fsck_errs(c); + bch_verbose(c, "fsck done"); - bch_verbose(c, "checking xattrs"); - ret = check_xattrs(c); - if (ret) - return ret; + return ret; +} - bch_verbose(c, "checking root directory"); - ret = check_root(c, &root_inode); - if (ret) - return ret; +static int bch2_fsck_inode_nlink(struct bch_fs *c) +{ + struct bch_inode_unpacked root_inode, lostfound_inode; + int ret; - bch_verbose(c, "checking lost+found"); - ret = check_lostfound(c, &root_inode, &lostfound_inode); - if (ret) - return ret; + bch_verbose(c, "checking inode link counts:"); + ret = check_root(c, &root_inode) ?: + check_lostfound(c, &root_inode, &lostfound_inode) ?: + check_inode_nlinks(c, &lostfound_inode); - bch_verbose(c, "checking directory structure"); - ret = check_directory_structure(c, &lostfound_inode); - if (ret) - return ret; + bch2_flush_fsck_errs(c); + bch_verbose(c, "done"); - bch_verbose(c, "checking inode nlinks"); - ret = check_inode_nlinks(c, &lostfound_inode); - if (ret) - return ret; - } else { - bch_verbose(c, "checking root directory"); - ret = check_root(c, &root_inode); - if (ret) - return ret; + return ret; +} - bch_verbose(c, "checking lost+found"); - ret = check_lostfound(c, &root_inode, &lostfound_inode); - if (ret) - return ret; +static int bch2_fsck_walk_inodes_only(struct bch_fs *c) +{ + int ret; - bch_verbose(c, "checking inode nlinks"); - ret = check_inode_nlinks(c, &lostfound_inode); - if (ret) - return ret; - } + bch_verbose(c, "walking inodes:"); + ret = check_inodes_fast(c); bch2_flush_fsck_errs(c); + bch_verbose(c, "done"); - return 0; + return ret; +} + +int bch2_fsck(struct bch_fs *c) +{ + if (!c->opts.nofsck) + return bch2_fsck_full(c); + + if (!c->sb.clean) + return bch2_fsck_inode_nlink(c); + + return bch2_fsck_walk_inodes_only(c); } diff --git a/fs/bcachefs/fsck.h b/fs/bcachefs/fsck.h index f9af1305dc2a..bc9caaf23797 100644 --- a/fs/bcachefs/fsck.h +++ b/fs/bcachefs/fsck.h @@ -2,6 +2,6 @@ #define _BCACHEFS_FSCK_H s64 bch2_count_inode_sectors(struct bch_fs *, u64); -int bch2_fsck(struct bch_fs *, bool); +int bch2_fsck(struct bch_fs *); #endif /* _BCACHEFS_FSCK_H */ diff --git a/fs/bcachefs/inode.c b/fs/bcachefs/inode.c index 81d040d46e41..79c10df849a4 100644 --- a/fs/bcachefs/inode.c +++ b/fs/bcachefs/inode.c @@ -203,6 +203,10 @@ const char *bch2_inode_invalid(const struct bch_fs *c, struct bkey_s_c k) if (unpacked.bi_compression >= BCH_COMPRESSION_OPT_NR + 1) return "invalid data checksum type"; + if ((unpacked.bi_flags & BCH_INODE_UNLINKED) && + unpacked.bi_nlink != 0) + return "flagged as unlinked but bi_nlink != 0"; + return NULL; } case BCH_INODE_BLOCKDEV: diff --git a/fs/bcachefs/recovery.c b/fs/bcachefs/recovery.c index f7c138d274e0..c9b879a6d136 100644 --- a/fs/bcachefs/recovery.c +++ b/fs/bcachefs/recovery.c @@ -250,12 +250,10 @@ int bch2_fs_recovery(struct bch_fs *c) if (c->opts.norecovery) goto out; - bch_verbose(c, "starting fsck:"); err = "error in fsck"; - ret = bch2_fsck(c, !c->opts.nofsck); + ret = bch2_fsck(c); if (ret) goto err; - bch_verbose(c, "fsck done"); if (enabled_qtypes(c)) { bch_verbose(c, "reading quotas:"); |