summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorKent Overstreet <kent.overstreet@gmail.com>2018-07-12 22:06:50 -0400
committerKent Overstreet <kent.overstreet@gmail.com>2018-08-12 18:09:15 -0400
commit044eff90f2370a6705c380dc19fecb0f562cfff7 (patch)
tree1befd2db4de8982e5a79c80019ac56925edb6679
parenta7d3cbf08ccfa33fcf7fa2aa5deaf95fa79a4ce2 (diff)
bcachefs: add BCH_INODE_UNLINKED
will be used for better tmpfile/unlink Signed-off-by: Kent Overstreet <kent.overstreet@gmail.com>
-rw-r--r--fs/bcachefs/bcachefs_format.h7
-rw-r--r--fs/bcachefs/error.h6
-rw-r--r--fs/bcachefs/fs.c4
-rw-r--r--fs/bcachefs/fsck.c310
-rw-r--r--fs/bcachefs/fsck.h2
-rw-r--r--fs/bcachefs/inode.c4
-rw-r--r--fs/bcachefs/recovery.c4
7 files changed, 211 insertions, 126 deletions
diff --git a/fs/bcachefs/bcachefs_format.h b/fs/bcachefs/bcachefs_format.h
index b6e7b983bc5b..e300738d6c61 100644
--- a/fs/bcachefs/bcachefs_format.h
+++ b/fs/bcachefs/bcachefs_format.h
@@ -722,9 +722,7 @@ enum {
__BCH_INODE_I_SIZE_DIRTY= 5,
__BCH_INODE_I_SECTORS_DIRTY= 6,
-
- /* not implemented yet: */
- __BCH_INODE_HAS_XATTRS = 7, /* has xattrs in xattr btree */
+ __BCH_INODE_UNLINKED = 7,
/* bits 20+ reserved for packed fields below: */
};
@@ -736,7 +734,7 @@ enum {
#define BCH_INODE_NOATIME (1 << __BCH_INODE_NOATIME)
#define BCH_INODE_I_SIZE_DIRTY (1 << __BCH_INODE_I_SIZE_DIRTY)
#define BCH_INODE_I_SECTORS_DIRTY (1 << __BCH_INODE_I_SECTORS_DIRTY)
-#define BCH_INODE_HAS_XATTRS (1 << __BCH_INODE_HAS_XATTRS)
+#define BCH_INODE_UNLINKED (1 << __BCH_INODE_UNLINKED)
LE32_BITMASK(INODE_STR_HASH, struct bch_inode, bi_flags, 20, 24);
LE32_BITMASK(INODE_NR_FIELDS, struct bch_inode, bi_flags, 24, 32);
@@ -1222,6 +1220,7 @@ enum bch_sb_features {
BCH_FEATURE_LZ4 = 0,
BCH_FEATURE_GZIP = 1,
BCH_FEATURE_ZSTD = 2,
+ BCH_FEATURE_ATOMIC_NLINK = 3,
};
/* options: */
diff --git a/fs/bcachefs/error.h b/fs/bcachefs/error.h
index f65ef132461e..588e763f0440 100644
--- a/fs/bcachefs/error.h
+++ b/fs/bcachefs/error.h
@@ -147,12 +147,18 @@ void bch2_flush_fsck_errs(struct bch_fs *);
#define need_fsck_err_on(cond, c, ...) \
__fsck_err_on(cond, c, FSCK_CAN_IGNORE|FSCK_NEED_FSCK, ##__VA_ARGS__)
+#define need_fsck_err(c, ...) \
+ __fsck_err(c, FSCK_CAN_IGNORE|FSCK_NEED_FSCK, ##__VA_ARGS__)
+
#define mustfix_fsck_err(c, ...) \
__fsck_err(c, FSCK_CAN_FIX, ##__VA_ARGS__)
#define mustfix_fsck_err_on(cond, c, ...) \
__fsck_err_on(cond, c, FSCK_CAN_FIX, ##__VA_ARGS__)
+#define fsck_err(c, ...) \
+ __fsck_err(c, FSCK_CAN_FIX|FSCK_CAN_IGNORE, ##__VA_ARGS__)
+
#define fsck_err_on(cond, c, ...) \
__fsck_err_on(cond, c, FSCK_CAN_FIX|FSCK_CAN_IGNORE, ##__VA_ARGS__)
diff --git a/fs/bcachefs/fs.c b/fs/bcachefs/fs.c
index f333d79fef59..8d09dcbf4840 100644
--- a/fs/bcachefs/fs.c
+++ b/fs/bcachefs/fs.c
@@ -1096,7 +1096,9 @@ static void bch2_evict_inode(struct inode *vinode)
bch2_quota_acct(c, inode->ei_qid, Q_INO, -1,
BCH_QUOTA_WARN);
bch2_inode_rm(c, inode->v.i_ino);
- atomic_long_dec(&c->nr_inodes);
+
+ WARN_ONCE(atomic_long_dec_return(&c->nr_inodes) < 0,
+ "nr_inodes < 0");
}
}
diff --git a/fs/bcachefs/fsck.c b/fs/bcachefs/fsck.c
index edf714f7b98d..3bbb0a2e8ac5 100644
--- a/fs/bcachefs/fsck.c
+++ b/fs/bcachefs/fsck.c
@@ -250,6 +250,8 @@ static int check_extents(struct bch_fs *c)
u64 i_sectors;
int ret = 0;
+ bch_verbose(c, "checking extents");
+
for_each_btree_key(&iter, c, BTREE_ID_EXTENTS,
POS(BCACHEFS_ROOT_INO, 0), 0, k) {
ret = walk_inode(c, &w, k.k->p.inode);
@@ -338,6 +340,8 @@ static int check_dirents(struct bch_fs *c)
char buf[200];
int ret = 0;
+ bch_verbose(c, "checking dirents");
+
hash_check_init(bch2_dirent_hash_desc, &h, c);
for_each_btree_key(&iter, c, BTREE_ID_DIRENTS,
@@ -472,6 +476,8 @@ static int check_xattrs(struct bch_fs *c)
struct bkey_s_c k;
int ret = 0;
+ bch_verbose(c, "checking xattrs");
+
hash_check_init(bch2_xattr_hash_desc, &h, c);
for_each_btree_key(&iter, c, BTREE_ID_XATTRS,
@@ -509,6 +515,8 @@ static int check_root(struct bch_fs *c, struct bch_inode_unpacked *root_inode)
struct bkey_inode_buf packed;
int ret;
+ bch_verbose(c, "checking root directory");
+
ret = bch2_inode_find_by_inum(c, BCACHEFS_ROOT_INO, root_inode);
if (ret && ret != -ENOENT)
return ret;
@@ -546,6 +554,8 @@ static int check_lostfound(struct bch_fs *c,
u64 inum;
int ret;
+ bch_verbose(c, "checking lost+found");
+
inum = bch2_dirent_lookup(c, BCACHEFS_ROOT_INO, &root_hash_info,
&lostfound);
if (!inum) {
@@ -672,6 +682,8 @@ static int check_directory_structure(struct bch_fs *c,
u64 d_inum;
int ret = 0;
+ bch_verbose(c, "checking directory structure");
+
/* DFS: */
restart_dfs:
had_unreachable = false;
@@ -872,15 +884,89 @@ s64 bch2_count_inode_sectors(struct bch_fs *c, u64 inum)
return bch2_btree_iter_unlock(&iter) ?: sectors;
}
-static int bch2_gc_do_inode(struct bch_fs *c,
- struct bch_inode_unpacked *lostfound_inode,
- struct btree_iter *iter,
- struct bkey_s_c_inode inode, struct nlink link)
+static int check_inode_nlink(struct bch_fs *c,
+ struct bch_inode_unpacked *lostfound_inode,
+ struct bch_inode_unpacked *u,
+ struct nlink *link,
+ bool *do_update)
{
- struct bch_inode_unpacked u;
+ u32 i_nlink = u->bi_flags & BCH_INODE_UNLINKED
+ ? 0
+ : u->bi_nlink + nlink_bias(u->bi_mode);
+ u32 real_i_nlink =
+ link->count * nlink_bias(u->bi_mode) +
+ link->dir_count;
int ret = 0;
- u32 i_nlink, real_i_nlink;
+
+ /*
+ * These should have been caught/fixed by earlier passes, we don't
+ * repair them here:
+ */
+ if (S_ISDIR(u->bi_mode) && link->count > 1) {
+ need_fsck_err(c, "directory %llu with multiple hardlinks: %u",
+ u->bi_inum, link->count);
+ return 0;
+ }
+
+ if (S_ISDIR(u->bi_mode) && !link->count) {
+ need_fsck_err(c, "unreachable directory found (inum %llu)",
+ u->bi_inum);
+ return 0;
+ }
+
+ if (!S_ISDIR(u->bi_mode) && link->dir_count) {
+ need_fsck_err(c, "non directory with subdirectories",
+ u->bi_inum);
+ return 0;
+ }
+
+ if (i_nlink < link->count) {
+ if (fsck_err(c, "inode %llu i_link too small (%u < %u, type %i)",
+ u->bi_inum, i_nlink, link->count,
+ mode_to_type(u->bi_mode)) == FSCK_ERR_IGNORE)
+ return 0;
+ goto set_i_nlink;
+ }
+
+ if (i_nlink != real_i_nlink &&
+ c->sb.clean) {
+ if (fsck_err(c, "filesystem marked clean, "
+ "but inode %llu has wrong i_nlink "
+ "(type %u i_nlink %u, should be %u)",
+ u->bi_inum, mode_to_type(u->bi_mode),
+ i_nlink, real_i_nlink) == FSCK_ERR_IGNORE)
+ return 0;
+ goto set_i_nlink;
+ }
+
+ if (real_i_nlink && i_nlink != real_i_nlink)
+ bch_verbose(c, "setting inode %llu nlink from %u to %u",
+ u->bi_inum, i_nlink, real_i_nlink);
+set_i_nlink:
+ if (i_nlink != real_i_nlink) {
+ if (real_i_nlink) {
+ u->bi_nlink = real_i_nlink - nlink_bias(u->bi_mode);
+ u->bi_flags &= ~BCH_INODE_UNLINKED;
+ } else {
+ u->bi_nlink = 0;
+ u->bi_flags |= BCH_INODE_UNLINKED;
+ }
+
+ *do_update = true;
+ }
+fsck_err:
+ return ret;
+}
+
+static int check_inode(struct bch_fs *c,
+ struct bch_inode_unpacked *lostfound_inode,
+ struct btree_iter *iter,
+ struct bkey_s_c_inode inode,
+ struct nlink *link)
+{
+ struct bch_inode_unpacked u;
bool do_update = false;
+ int ret = 0;
ret = bch2_inode_unpack(inode, &u);
if (bch2_fs_inconsistent_on(ret, c,
@@ -888,48 +974,17 @@ static int bch2_gc_do_inode(struct bch_fs *c,
inode.k->p.inode))
return ret;
- i_nlink = u.bi_nlink + nlink_bias(u.bi_mode);
-
- fsck_err_on(i_nlink < link.count, c,
- "inode %llu i_link too small (%u < %u, type %i)",
- inode.k->p.inode, i_nlink,
- link.count, mode_to_type(u.bi_mode));
-
- /* These should have been caught/fixed by earlier passes: */
- if (S_ISDIR(u.bi_mode)) {
- need_fsck_err_on(link.count > 1, c,
- "directory %llu with multiple hardlinks: %u",
- inode.k->p.inode, link.count);
-
- real_i_nlink = link.count * 2 + link.dir_count;
- } else {
- need_fsck_err_on(link.dir_count, c,
- "found dirents for non directory %llu",
- inode.k->p.inode);
-
- real_i_nlink = link.count + link.dir_count;
+ if (link) {
+ ret = check_inode_nlink(c, lostfound_inode, &u, link,
+ &do_update);
+ if (ret)
+ return ret;
}
- if (!link.count) {
- fsck_err_on(c->sb.clean, c,
- "filesystem marked clean, "
- "but found orphaned inode %llu",
- inode.k->p.inode);
-
- if (fsck_err_on(S_ISDIR(u.bi_mode) &&
- bch2_empty_dir(c, inode.k->p.inode), c,
- "non empty directory with link count 0, "
- "inode nlink %u, dir links found %u",
- i_nlink, link.dir_count)) {
- ret = reattach_inode(c, lostfound_inode,
- inode.k->p.inode);
- if (ret)
- return ret;
- }
-
- bch_verbose(c, "deleting inode %llu", inode.k->p.inode);
+ if (u.bi_flags & BCH_INODE_UNLINKED) {
+ bch_verbose(c, "deleting inode %llu", u.bi_inum);
- ret = bch2_inode_rm(c, inode.k->p.inode);
+ ret = bch2_inode_rm(c, u.bi_inum);
if (ret)
bch_err(c, "error in fs gc: error %i "
"while deleting inode", ret);
@@ -940,16 +995,16 @@ static int bch2_gc_do_inode(struct bch_fs *c,
fsck_err_on(c->sb.clean, c,
"filesystem marked clean, "
"but inode %llu has i_size dirty",
- inode.k->p.inode);
+ u.bi_inum);
- bch_verbose(c, "truncating inode %llu", inode.k->p.inode);
+ bch_verbose(c, "truncating inode %llu", u.bi_inum);
/*
* XXX: need to truncate partial blocks too here - or ideally
* just switch units to bytes and that issue goes away
*/
- ret = bch2_inode_truncate(c, inode.k->p.inode,
+ ret = bch2_inode_truncate(c, u.bi_inum,
round_up(u.bi_size, PAGE_SIZE) >> 9,
NULL, NULL);
if (ret) {
@@ -974,12 +1029,12 @@ static int bch2_gc_do_inode(struct bch_fs *c,
fsck_err_on(c->sb.clean, c,
"filesystem marked clean, "
"but inode %llu has i_sectors dirty",
- inode.k->p.inode);
+ u.bi_inum);
bch_verbose(c, "recounting sectors for inode %llu",
- inode.k->p.inode);
+ u.bi_inum);
- sectors = bch2_count_inode_sectors(c, inode.k->p.inode);
+ sectors = bch2_count_inode_sectors(c, u.bi_inum);
if (sectors < 0) {
bch_err(c, "error in fs gc: error %i "
"recounting inode sectors",
@@ -992,20 +1047,6 @@ static int bch2_gc_do_inode(struct bch_fs *c,
do_update = true;
}
- if (i_nlink != real_i_nlink) {
- fsck_err_on(c->sb.clean, c,
- "filesystem marked clean, "
- "but inode %llu has wrong i_nlink "
- "(type %u i_nlink %u, should be %u)",
- inode.k->p.inode, mode_to_type(u.bi_mode),
- i_nlink, real_i_nlink);
-
- bch_verbose(c, "setting inode %llu nlinks from %u to %u",
- inode.k->p.inode, i_nlink, real_i_nlink);
- u.bi_nlink = real_i_nlink - nlink_bias(u.bi_mode);
- do_update = true;
- }
-
if (do_update) {
struct bkey_inode_buf p;
@@ -1024,9 +1065,9 @@ fsck_err:
noinline_for_stack
static int bch2_gc_walk_inodes(struct bch_fs *c,
- struct bch_inode_unpacked *lostfound_inode,
- nlink_table *links,
- u64 range_start, u64 range_end)
+ struct bch_inode_unpacked *lostfound_inode,
+ nlink_table *links,
+ u64 range_start, u64 range_end)
{
struct btree_iter iter;
struct bkey_s_c k;
@@ -1065,10 +1106,9 @@ peek_nlinks: link = genradix_iter_peek(&nlinks_iter, links);
*/
bch2_btree_iter_unlock(&iter);
- ret = bch2_gc_do_inode(c, lostfound_inode, &iter,
- bkey_s_c_to_inode(k), *link);
- if (ret == -EINTR)
- continue;
+ ret = check_inode(c, lostfound_inode, &iter,
+ bkey_s_c_to_inode(k), link);
+ BUG_ON(ret == -EINTR);
if (ret)
break;
@@ -1103,6 +1143,8 @@ static int check_inode_nlinks(struct bch_fs *c,
u64 this_iter_range_start, next_iter_range_start = 0;
int ret = 0;
+ bch_verbose(c, "checking inode nlinks");
+
genradix_init(&links);
do {
@@ -1129,68 +1171,102 @@ static int check_inode_nlinks(struct bch_fs *c,
return ret;
}
+noinline_for_stack
+static int check_inodes_fast(struct bch_fs *c)
+{
+ struct btree_iter iter;
+ struct bkey_s_c k;
+ struct bkey_s_c_inode inode;
+ unsigned long nr_inodes = 0;
+ int ret = 0;
+
+ for_each_btree_key(&iter, c, BTREE_ID_INODES, POS_MIN, 0, k) {
+ if (k.k->type != BCH_INODE_FS)
+ continue;
+
+ inode = bkey_s_c_to_inode(k);
+
+ if (!(inode.v->bi_flags & BCH_INODE_UNLINKED))
+ nr_inodes++;
+
+ if (inode.v->bi_flags &
+ (BCH_INODE_I_SIZE_DIRTY|
+ BCH_INODE_I_SECTORS_DIRTY|
+ BCH_INODE_UNLINKED)) {
+ fsck_err_on(c->sb.clean, c,
+ "filesystem marked clean but found inode %llu with flags %x",
+ inode.k->p.inode, inode.v->bi_flags);
+ ret = check_inode(c, NULL, &iter, inode, NULL);
+ BUG_ON(ret == -EINTR);
+ if (ret)
+ break;
+ }
+ }
+ atomic_long_set(&c->nr_inodes, nr_inodes);
+fsck_err:
+ return bch2_btree_iter_unlock(&iter) ?: ret;
+}
+
/*
* Checks for inconsistencies that shouldn't happen, unless we have a bug.
* Doesn't fix them yet, mainly because they haven't yet been observed:
*/
-int bch2_fsck(struct bch_fs *c, bool full_fsck)
+static int bch2_fsck_full(struct bch_fs *c)
{
struct bch_inode_unpacked root_inode, lostfound_inode;
int ret;
- if (full_fsck) {
- bch_verbose(c, "checking extents");
- ret = check_extents(c);
- if (ret)
- return ret;
+ bch_verbose(c, "starting fsck:");
+ ret = check_extents(c) ?:
+ check_dirents(c) ?:
+ check_xattrs(c) ?:
+ check_root(c, &root_inode) ?:
+ check_lostfound(c, &root_inode, &lostfound_inode) ?:
+ check_directory_structure(c, &lostfound_inode) ?:
+ check_inode_nlinks(c, &lostfound_inode);
- bch_verbose(c, "checking dirents");
- ret = check_dirents(c);
- if (ret)
- return ret;
+ bch2_flush_fsck_errs(c);
+ bch_verbose(c, "fsck done");
- bch_verbose(c, "checking xattrs");
- ret = check_xattrs(c);
- if (ret)
- return ret;
+ return ret;
+}
- bch_verbose(c, "checking root directory");
- ret = check_root(c, &root_inode);
- if (ret)
- return ret;
+static int bch2_fsck_inode_nlink(struct bch_fs *c)
+{
+ struct bch_inode_unpacked root_inode, lostfound_inode;
+ int ret;
- bch_verbose(c, "checking lost+found");
- ret = check_lostfound(c, &root_inode, &lostfound_inode);
- if (ret)
- return ret;
+ bch_verbose(c, "checking inode link counts:");
+ ret = check_root(c, &root_inode) ?:
+ check_lostfound(c, &root_inode, &lostfound_inode) ?:
+ check_inode_nlinks(c, &lostfound_inode);
- bch_verbose(c, "checking directory structure");
- ret = check_directory_structure(c, &lostfound_inode);
- if (ret)
- return ret;
+ bch2_flush_fsck_errs(c);
+ bch_verbose(c, "done");
- bch_verbose(c, "checking inode nlinks");
- ret = check_inode_nlinks(c, &lostfound_inode);
- if (ret)
- return ret;
- } else {
- bch_verbose(c, "checking root directory");
- ret = check_root(c, &root_inode);
- if (ret)
- return ret;
+ return ret;
+}
- bch_verbose(c, "checking lost+found");
- ret = check_lostfound(c, &root_inode, &lostfound_inode);
- if (ret)
- return ret;
+static int bch2_fsck_walk_inodes_only(struct bch_fs *c)
+{
+ int ret;
- bch_verbose(c, "checking inode nlinks");
- ret = check_inode_nlinks(c, &lostfound_inode);
- if (ret)
- return ret;
- }
+ bch_verbose(c, "walking inodes:");
+ ret = check_inodes_fast(c);
bch2_flush_fsck_errs(c);
+ bch_verbose(c, "done");
- return 0;
+ return ret;
+}
+
+int bch2_fsck(struct bch_fs *c)
+{
+ if (!c->opts.nofsck)
+ return bch2_fsck_full(c);
+
+ if (!c->sb.clean)
+ return bch2_fsck_inode_nlink(c);
+
+ return bch2_fsck_walk_inodes_only(c);
}
diff --git a/fs/bcachefs/fsck.h b/fs/bcachefs/fsck.h
index f9af1305dc2a..bc9caaf23797 100644
--- a/fs/bcachefs/fsck.h
+++ b/fs/bcachefs/fsck.h
@@ -2,6 +2,6 @@
#define _BCACHEFS_FSCK_H
s64 bch2_count_inode_sectors(struct bch_fs *, u64);
-int bch2_fsck(struct bch_fs *, bool);
+int bch2_fsck(struct bch_fs *);
#endif /* _BCACHEFS_FSCK_H */
diff --git a/fs/bcachefs/inode.c b/fs/bcachefs/inode.c
index 81d040d46e41..79c10df849a4 100644
--- a/fs/bcachefs/inode.c
+++ b/fs/bcachefs/inode.c
@@ -203,6 +203,10 @@ const char *bch2_inode_invalid(const struct bch_fs *c, struct bkey_s_c k)
if (unpacked.bi_compression >= BCH_COMPRESSION_OPT_NR + 1)
return "invalid data checksum type";
+ if ((unpacked.bi_flags & BCH_INODE_UNLINKED) &&
+ unpacked.bi_nlink != 0)
+ return "flagged as unlinked but bi_nlink != 0";
+
return NULL;
}
case BCH_INODE_BLOCKDEV:
diff --git a/fs/bcachefs/recovery.c b/fs/bcachefs/recovery.c
index f7c138d274e0..c9b879a6d136 100644
--- a/fs/bcachefs/recovery.c
+++ b/fs/bcachefs/recovery.c
@@ -250,12 +250,10 @@ int bch2_fs_recovery(struct bch_fs *c)
if (c->opts.norecovery)
goto out;
- bch_verbose(c, "starting fsck:");
err = "error in fsck";
- ret = bch2_fsck(c, !c->opts.nofsck);
+ ret = bch2_fsck(c);
if (ret)
goto err;
- bch_verbose(c, "fsck done");
if (enabled_qtypes(c)) {
bch_verbose(c, "reading quotas:");