diff options
Diffstat (limited to 'libbcache/fs-gc.c')
-rw-r--r-- | libbcache/fs-gc.c | 475 |
1 files changed, 475 insertions, 0 deletions
diff --git a/libbcache/fs-gc.c b/libbcache/fs-gc.c new file mode 100644 index 0000000..bd2a867 --- /dev/null +++ b/libbcache/fs-gc.c @@ -0,0 +1,475 @@ + +#include "bcache.h" +#include "btree_update.h" +#include "dirent.h" +#include "error.h" +#include "fs.h" +#include "fs-gc.h" +#include "inode.h" +#include "keylist.h" +#include "super.h" + +#include <linux/generic-radix-tree.h> + +struct nlink { + u32 count; + u32 dir_count; +}; + +DECLARE_GENRADIX_TYPE(nlinks, struct nlink); + +static void inc_link(struct cache_set *c, struct nlinks *links, + u64 range_start, u64 *range_end, + u64 inum, bool dir) +{ + struct nlink *link; + + if (inum < range_start || inum >= *range_end) + return; + + link = genradix_ptr_alloc(links, inum - range_start, GFP_KERNEL); + if (!link) { + bch_verbose(c, "allocation failed during fs gc - will need another pass"); + *range_end = inum; + return; + } + + if (dir) + link->dir_count++; + else + link->count++; +} + +/* + * XXX: should do a DFS (via filesystem heirarchy), and make sure all dirents + * are reachable + */ + +noinline_for_stack +static int bch_gc_walk_dirents(struct cache_set *c, struct nlinks *links, + u64 range_start, u64 *range_end) +{ + struct btree_iter iter; + struct bkey_s_c k; + struct bkey_s_c_dirent d; + u64 d_inum; + int ret; + + inc_link(c, links, range_start, range_end, BCACHE_ROOT_INO, false); + + for_each_btree_key(&iter, c, BTREE_ID_DIRENTS, POS_MIN, k) { + switch (k.k->type) { + case BCH_DIRENT: + d = bkey_s_c_to_dirent(k); + d_inum = le64_to_cpu(d.v->d_inum); + + if (d.v->d_type == DT_DIR) + inc_link(c, links, range_start, range_end, + d.k->p.inode, true); + + inc_link(c, links, range_start, range_end, + d_inum, false); + + break; + } + + bch_btree_iter_cond_resched(&iter); + } + ret = bch_btree_iter_unlock(&iter); + if (ret) + bch_err(c, "error in fs gc: btree error %i while walking dirents", ret); + + return ret; +} + +s64 bch_count_inode_sectors(struct cache_set *c, u64 inum) +{ + struct btree_iter iter; + struct bkey_s_c k; + u64 sectors = 0; + + for_each_btree_key(&iter, c, BTREE_ID_EXTENTS, POS(inum, 0), k) { + if (k.k->p.inode != inum) + break; + + if (bkey_extent_is_allocation(k.k)) + sectors += k.k->size; + } + + return bch_btree_iter_unlock(&iter) ?: sectors; +} + +static int bch_gc_do_inode(struct cache_set *c, struct btree_iter *iter, + struct bkey_s_c_inode inode, struct nlink link) +{ + u16 i_mode = le16_to_cpu(inode.v->i_mode); + u32 i_flags = le32_to_cpu(inode.v->i_flags); + u32 i_nlink = le32_to_cpu(inode.v->i_nlink); + u64 i_size = le64_to_cpu(inode.v->i_size); + s64 i_sectors = 0; + int ret = 0; + u32 real_i_nlink; + + fsck_err_on(i_nlink < link.count, c, + "inode %llu i_link too small (%u < %u, type %i)", + inode.k->p.inode, i_nlink, + link.count, mode_to_type(i_mode)); + + if (S_ISDIR(i_mode)) { + unfixable_fsck_err_on(link.count > 1, c, + "directory %llu with multiple hardlinks: %u", + inode.k->p.inode, link.count); + + real_i_nlink = link.count * 2 + link.dir_count; + } else { + unfixable_fsck_err_on(link.dir_count, c, + "found dirents for non directory %llu", + inode.k->p.inode); + + real_i_nlink = link.count + link.dir_count; + } + + if (!link.count) { + fsck_err_on(c->sb.clean, c, + "filesystem marked clean, " + "but found orphaned inode %llu", + inode.k->p.inode); + + unfixable_fsck_err_on(S_ISDIR(i_mode) && + bch_empty_dir(c, inode.k->p.inode), c, + "non empty directory with link count 0, " + "inode nlink %u, dir links found %u", + i_nlink, link.dir_count); + + bch_verbose(c, "deleting inode %llu", inode.k->p.inode); + + ret = bch_inode_rm(c, inode.k->p.inode); + if (ret) + bch_err(c, "error in fs gc: error %i " + "while deleting inode", ret); + return ret; + } + + if (i_flags & BCH_INODE_I_SIZE_DIRTY) { + fsck_err_on(c->sb.clean, c, + "filesystem marked clean, " + "but inode %llu has i_size dirty", + inode.k->p.inode); + + bch_verbose(c, "truncating inode %llu", inode.k->p.inode); + + /* + * XXX: need to truncate partial blocks too here - or ideally + * just switch units to bytes and that issue goes away + */ + + ret = bch_inode_truncate(c, inode.k->p.inode, + round_up(i_size, PAGE_SIZE) >> 9, + NULL, NULL); + if (ret) { + bch_err(c, "error in fs gc: error %i " + "truncating inode", ret); + return ret; + } + + /* + * We truncated without our normal sector accounting hook, just + * make sure we recalculate it: + */ + i_flags |= BCH_INODE_I_SECTORS_DIRTY; + } + + if (i_flags & BCH_INODE_I_SECTORS_DIRTY) { + fsck_err_on(c->sb.clean, c, + "filesystem marked clean, " + "but inode %llu has i_sectors dirty", + inode.k->p.inode); + + bch_verbose(c, "recounting sectors for inode %llu", + inode.k->p.inode); + + i_sectors = bch_count_inode_sectors(c, inode.k->p.inode); + if (i_sectors < 0) { + bch_err(c, "error in fs gc: error %i " + "recounting inode sectors", + (int) i_sectors); + return i_sectors; + } + } + + if (i_nlink != real_i_nlink) { + fsck_err_on(c->sb.clean, c, + "filesystem marked clean, " + "but inode %llu has wrong i_nlink " + "(type %u i_nlink %u, should be %u)", + inode.k->p.inode, mode_to_type(i_mode), + i_nlink, real_i_nlink); + + bch_verbose(c, "setting inode %llu nlinks from %u to %u", + inode.k->p.inode, i_nlink, real_i_nlink); + } + + if (i_nlink != real_i_nlink|| + i_flags & BCH_INODE_I_SECTORS_DIRTY || + i_flags & BCH_INODE_I_SIZE_DIRTY) { + struct bkey_i_inode update; + + bkey_reassemble(&update.k_i, inode.s_c); + update.v.i_nlink = cpu_to_le32(real_i_nlink); + update.v.i_flags = cpu_to_le32(i_flags & + ~(BCH_INODE_I_SIZE_DIRTY| + BCH_INODE_I_SECTORS_DIRTY)); + + if (i_flags & BCH_INODE_I_SECTORS_DIRTY) + update.v.i_sectors = cpu_to_le64(i_sectors); + + ret = bch_btree_insert_at(c, NULL, NULL, NULL, + BTREE_INSERT_NOFAIL, + BTREE_INSERT_ENTRY(iter, &update.k_i)); + if (ret && ret != -EINTR) + bch_err(c, "error in fs gc: error %i " + "updating inode", ret); + } +fsck_err: + return ret; +} + +noinline_for_stack +static int bch_gc_walk_inodes(struct cache_set *c, struct nlinks *links, + u64 range_start, u64 range_end) +{ + struct btree_iter iter; + struct bkey_s_c k; + struct nlink *link, zero_links = { 0, 0 }; + struct genradix_iter nlinks_iter; + int ret = 0, ret2 = 0; + u64 nlinks_pos; + + bch_btree_iter_init(&iter, c, BTREE_ID_INODES, POS(range_start, 0)); + genradix_iter_init(&nlinks_iter); + + while ((k = bch_btree_iter_peek(&iter)).k && + !btree_iter_err(k)) { +peek_nlinks: link = genradix_iter_peek(&nlinks_iter, links); + + if (!link && (!k.k || iter.pos.inode >= range_end)) + break; + + nlinks_pos = range_start + nlinks_iter.pos; + if (iter.pos.inode > nlinks_pos) { + unfixable_fsck_err_on(link && link->count, c, + "missing inode %llu (nlink %u)", + nlinks_pos, link->count); + genradix_iter_advance(&nlinks_iter, links); + goto peek_nlinks; + } + + if (iter.pos.inode < nlinks_pos || !link) + link = &zero_links; + + if (k.k && k.k->type == BCH_INODE_FS) { + /* + * Avoid potential deadlocks with iter for + * truncate/rm/etc.: + */ + bch_btree_iter_unlock(&iter); + + ret = bch_gc_do_inode(c, &iter, + bkey_s_c_to_inode(k), + *link); + if (ret == -EINTR) + continue; + if (ret) + break; + + if (link->count) + atomic_long_inc(&c->nr_inodes); + } else { + unfixable_fsck_err_on(link->count, c, + "missing inode %llu (nlink %u)", + nlinks_pos, link->count); + } + + if (nlinks_pos == iter.pos.inode) + genradix_iter_advance(&nlinks_iter, links); + + bch_btree_iter_advance_pos(&iter); + bch_btree_iter_cond_resched(&iter); + } +fsck_err: + ret2 = bch_btree_iter_unlock(&iter); + if (ret2) + bch_err(c, "error in fs gc: btree error %i while walking inodes", ret2); + + return ret ?: ret2; +} + +int bch_gc_inode_nlinks(struct cache_set *c) +{ + struct nlinks links; + u64 this_iter_range_start, next_iter_range_start = 0; + int ret = 0; + + genradix_init(&links); + + do { + this_iter_range_start = next_iter_range_start; + next_iter_range_start = U64_MAX; + + ret = bch_gc_walk_dirents(c, &links, + this_iter_range_start, + &next_iter_range_start); + if (ret) + break; + + ret = bch_gc_walk_inodes(c, &links, + this_iter_range_start, + next_iter_range_start); + if (ret) + break; + + genradix_free(&links); + } while (next_iter_range_start != U64_MAX); + + genradix_free(&links); + + return ret; +} + +static void next_inode(struct cache_set *c, u64 inum, u64 *cur_inum, + struct bkey_i_inode *inode, + bool *first_this_inode, bool *have_inode, + u64 *i_size, u16 *i_mode) +{ + *first_this_inode = inum != *cur_inum; + *cur_inum = inum; + + if (*first_this_inode) { + *have_inode = !bch_inode_find_by_inum(c, inum, inode); + + if (*have_inode) { + *i_mode = le16_to_cpu(inode->v.i_mode); + *i_size = le64_to_cpu(inode->v.i_size); + } + } +} + +/* + * Checks for inconsistencies that shouldn't happen, unless we have a bug. + * Doesn't fix them yet, mainly because they haven't yet been observed: + */ +int bch_fsck(struct cache_set *c) +{ + struct btree_iter iter; + struct bkey_s_c k; + struct bkey_i_inode inode; + bool first_this_inode, have_inode; + u64 cur_inum, i_sectors; + u64 i_size = 0; + u16 i_mode = 0; + int ret = 0; + + cur_inum = -1; + have_inode = false; + for_each_btree_key(&iter, c, BTREE_ID_EXTENTS, + POS(BCACHE_ROOT_INO, 0), k) { + if (k.k->type == KEY_TYPE_DISCARD) + continue; + + next_inode(c, k.k->p.inode, &cur_inum, &inode, + &first_this_inode, &have_inode, + &i_size, &i_mode); + + unfixable_fsck_err_on(!have_inode, c, + "extent type %u for missing inode %llu", + k.k->type, k.k->p.inode); + + unfixable_fsck_err_on(first_this_inode && have_inode && + le64_to_cpu(inode.v.i_sectors) != + (i_sectors = bch_count_inode_sectors(c, cur_inum)), + c, "i_sectors wrong: got %llu, should be %llu", + le64_to_cpu(inode.v.i_sectors), i_sectors); + + unfixable_fsck_err_on(have_inode && + !S_ISREG(i_mode) && !S_ISLNK(i_mode), c, + "extent type %u for non regular file, inode %llu mode %o", + k.k->type, k.k->p.inode, i_mode); + + unfixable_fsck_err_on(k.k->type != BCH_RESERVATION && + k.k->p.offset > round_up(i_size, PAGE_SIZE) >> 9, c, + "extent type %u offset %llu past end of inode %llu, i_size %llu", + k.k->type, k.k->p.offset, k.k->p.inode, i_size); + } + ret = bch_btree_iter_unlock(&iter); + if (ret) + return ret; + + cur_inum = -1; + have_inode = false; + for_each_btree_key(&iter, c, BTREE_ID_DIRENTS, + POS(BCACHE_ROOT_INO, 0), k) { + struct bkey_s_c_dirent d; + struct bkey_i_inode target; + bool have_target; + u64 d_inum; + + next_inode(c, k.k->p.inode, &cur_inum, &inode, + &first_this_inode, &have_inode, + &i_size, &i_mode); + + unfixable_fsck_err_on(!have_inode, c, + "dirent in nonexisting directory %llu", + k.k->p.inode); + + unfixable_fsck_err_on(!S_ISDIR(i_mode), c, + "dirent in non directory inode %llu, type %u", + k.k->p.inode, mode_to_type(i_mode)); + + if (k.k->type != BCH_DIRENT) + continue; + + d = bkey_s_c_to_dirent(k); + d_inum = le64_to_cpu(d.v->d_inum); + + unfixable_fsck_err_on(d_inum == d.k->p.inode, c, + "dirent points to own directory"); + + have_target = !bch_inode_find_by_inum(c, d_inum, &target); + + unfixable_fsck_err_on(!have_target, c, + "dirent points to missing inode %llu, type %u filename %s", + d_inum, d.v->d_type, d.v->d_name); + + unfixable_fsck_err_on(have_target && + d.v->d_type != + mode_to_type(le16_to_cpu(target.v.i_mode)), c, + "incorrect d_type: got %u should be %u, filename %s", + d.v->d_type, + mode_to_type(le16_to_cpu(target.v.i_mode)), + d.v->d_name); + } + ret = bch_btree_iter_unlock(&iter); + if (ret) + return ret; + + cur_inum = -1; + have_inode = false; + for_each_btree_key(&iter, c, BTREE_ID_XATTRS, + POS(BCACHE_ROOT_INO, 0), k) { + next_inode(c, k.k->p.inode, &cur_inum, &inode, + &first_this_inode, &have_inode, + &i_size, &i_mode); + + unfixable_fsck_err_on(!have_inode, c, + "xattr for missing inode %llu", + k.k->p.inode); + } + ret = bch_btree_iter_unlock(&iter); + if (ret) + return ret; + + return 0; +fsck_err: + bch_btree_iter_unlock(&iter); + return ret; +} |