diff options
author | Kent Overstreet <kent.overstreet@gmail.com> | 2017-02-01 20:12:27 -0900 |
---|---|---|
committer | Kent Overstreet <kent.overstreet@gmail.com> | 2017-02-06 20:43:37 -0900 |
commit | c8f34d5c7c93d19023a20e612455e91a5775f0e3 (patch) | |
tree | ffac04d514fe47bcfef64130e44abf3616e99ab8 | |
parent | cd85ccc92885dd9ff41f291ef07e1b6ea47a4fae (diff) |
bcachefs: More repair code improvements
-rw-r--r-- | drivers/md/bcache/bcache.h | 5 | ||||
-rw-r--r-- | drivers/md/bcache/error.h | 63 | ||||
-rw-r--r-- | drivers/md/bcache/fs-gc.c | 255 | ||||
-rw-r--r-- | drivers/md/bcache/journal.c | 24 |
4 files changed, 202 insertions, 145 deletions
diff --git a/drivers/md/bcache/bcache.h b/drivers/md/bcache/bcache.h index 9a43a69af422..309d37286a6d 100644 --- a/drivers/md/bcache/bcache.h +++ b/drivers/md/bcache/bcache.h @@ -210,8 +210,9 @@ #define bch_meta_write_fault(name) \ dynamic_fault("bcache:meta:write:" name) -#define bch_fmt(_c, fmt) \ - "bcache (%s): " fmt "\n", ((_c)->name) +#ifndef bch_fmt +#define bch_fmt(_c, fmt) "bcache (%s): " fmt "\n", ((_c)->name) +#endif #define bch_info(c, fmt, ...) \ printk(KERN_INFO bch_fmt(c, fmt), ##__VA_ARGS__) diff --git a/drivers/md/bcache/error.h b/drivers/md/bcache/error.h index 9eb9335abaf7..33a28c4bd756 100644 --- a/drivers/md/bcache/error.h +++ b/drivers/md/bcache/error.h @@ -101,38 +101,51 @@ enum { BCH_FSCK_UNKNOWN_VERSION = 4, }; -#define unfixable_fsck_err(c, msg, ...) \ -do { \ - bch_err(c, msg " (repair unimplemented)", ##__VA_ARGS__); \ - ret = BCH_FSCK_REPAIR_UNIMPLEMENTED; \ - goto fsck_err; \ -} while (0) +/* These macros return true if error should be fixed: */ -#define unfixable_fsck_err_on(cond, c, ...) \ -do { \ - if (cond) \ - unfixable_fsck_err(c, __VA_ARGS__); \ -} while (0) +/* XXX: mark in superblock that filesystem contains errors, if we ignore: */ -#define fsck_err(c, msg, ...) \ -do { \ - if (!(c)->opts.fix_errors) { \ - bch_err(c, msg, ##__VA_ARGS__); \ +#ifndef __fsck_err +#define __fsck_err(c, _can_fix, _can_ignore, _nofix_msg, msg, ...) \ +({ \ + bool _fix = false; \ + \ + if (_can_fix && (c)->opts.fix_errors) { \ + bch_err(c, msg ", fixing", ##__VA_ARGS__); \ + set_bit(CACHE_SET_FSCK_FIXED_ERRORS, &(c)->flags); \ + _fix = true; \ + } else if (_can_ignore && \ + (c)->opts.errors == BCH_ON_ERROR_CONTINUE) { \ + bch_err(c, msg " (ignoring)", ##__VA_ARGS__); \ + } else { \ + bch_err(c, msg " ("_nofix_msg")", ##__VA_ARGS__); \ ret = BCH_FSCK_ERRORS_NOT_FIXED; \ goto fsck_err; \ } \ - set_bit(CACHE_SET_FSCK_FIXED_ERRORS, &(c)->flags); \ - bch_err(c, msg ", fixing", ##__VA_ARGS__); \ -} while (0) - -#define fsck_err_on(cond, c, ...) \ -({ \ - bool _ret = (cond); \ \ - if (_ret) \ - fsck_err(c, __VA_ARGS__); \ - _ret; \ + BUG_ON(!_fix && !_can_ignore); \ + _fix; \ }) +#endif + +#define __fsck_err_on(cond, c, _can_fix, _can_ignore, _nofix_msg, ...) \ + ((cond) ? __fsck_err(c, _can_fix, _can_ignore, \ + _nofix_msg, ##__VA_ARGS__) : false) + +#define unfixable_fsck_err_on(cond, c, ...) \ + __fsck_err_on(cond, c, false, true, "repair unimplemented", ##__VA_ARGS__) + +#define need_fsck_err_on(cond, c, ...) \ + __fsck_err_on(cond, c, false, true, "run fsck to correct", ##__VA_ARGS__) + +#define mustfix_fsck_err(c, ...) \ + __fsck_err(c, true, false, "not fixing", ##__VA_ARGS__) + +#define mustfix_fsck_err_on(cond, c, ...) \ + __fsck_err_on(cond, c, true, false, "not fixing", ##__VA_ARGS__) + +#define fsck_err_on(cond, c, ...) \ + __fsck_err_on(cond, c, true, true, "not fixing", ##__VA_ARGS__) /* * Fatal errors: these don't indicate a bug, but we can't continue running in RW diff --git a/drivers/md/bcache/fs-gc.c b/drivers/md/bcache/fs-gc.c index 8c3cb023e67f..1dec230fdf68 100644 --- a/drivers/md/bcache/fs-gc.c +++ b/drivers/md/bcache/fs-gc.c @@ -13,6 +13,65 @@ #define QSTR(n) { { { .len = strlen(n) } }, .name = n } +static int remove_dirent(struct cache_set *c, struct btree_iter *iter, + struct bkey_s_c_dirent dirent) +{ + struct qstr name; + struct bkey_i_inode dir_inode; + struct bch_hash_info dir_hash_info; + u64 dir_inum = dirent.k->p.inode; + int ret; + char *buf; + + name.len = bch_dirent_name_bytes(dirent); + buf = kmalloc(name.len + 1, GFP_KERNEL); + if (!buf) + return -ENOMEM; + + memcpy(buf, dirent.v->d_name, name.len); + buf[name.len] = '\0'; + name.name = buf; + + /* Unlock iter so we don't deadlock, after copying name: */ + bch_btree_iter_unlock(iter); + + ret = bch_inode_find_by_inum(c, dir_inum, &dir_inode); + if (ret) + goto err; + + dir_hash_info = bch_hash_info_init(&dir_inode.v); + + ret = bch_dirent_delete(c, dir_inum, &dir_hash_info, &name, NULL); +err: + kfree(buf); + return ret; +} + +static int reattach_inode(struct cache_set *c, + struct bkey_i_inode *lostfound_inode, + u64 inum) +{ + struct bch_hash_info lostfound_hash_info = + bch_hash_info_init(&lostfound_inode->v); + char name_buf[20]; + struct qstr name; + int ret; + + snprintf(name_buf, sizeof(name_buf), "%llu", inum); + name = (struct qstr) QSTR(name_buf); + + le32_add_cpu(&lostfound_inode->v.i_nlink, 1); + + ret = bch_btree_insert(c, BTREE_ID_INODES, &lostfound_inode->k_i, + NULL, NULL, NULL, 0); + if (ret) + return ret; + + return bch_dirent_create(c, lostfound_inode->k.p.inode, + &lostfound_hash_info, + DT_DIR, &name, inum, NULL, 0); +} + struct inode_walker { bool first_this_inode; bool have_inode; @@ -122,12 +181,12 @@ static int check_dirents(struct cache_set *c) break; unfixable_fsck_err_on(!w.have_inode, c, - "dirent in nonexisting directory %llu", - k.k->p.inode); + "dirent in nonexisting directory %llu", + k.k->p.inode); unfixable_fsck_err_on(!S_ISDIR(w.i_mode), c, - "dirent in non directory inode %llu, type %u", - k.k->p.inode, mode_to_type(w.i_mode)); + "dirent in non directory inode %llu, type %u", + k.k->p.inode, mode_to_type(w.i_mode)); if (k.k->type != BCH_DIRENT) continue; @@ -135,8 +194,13 @@ static int check_dirents(struct cache_set *c) d = bkey_s_c_to_dirent(k); d_inum = le64_to_cpu(d.v->d_inum); - unfixable_fsck_err_on(d_inum == d.k->p.inode, c, - "dirent points to own directory"); + if (fsck_err_on(d_inum == d.k->p.inode, c, + "dirent points to own directory")) { + ret = remove_dirent(c, &iter, d); + if (ret) + goto err; + continue; + } ret = bch_inode_find_by_inum(c, d_inum, &target); if (ret && ret != -ENOENT) @@ -145,18 +209,43 @@ static int check_dirents(struct cache_set *c) have_target = !ret; ret = 0; - unfixable_fsck_err_on(!have_target, c, - "dirent points to missing inode %llu, type %u filename %s", - d_inum, d.v->d_type, d.v->d_name); - - unfixable_fsck_err_on(have_target && - d.v->d_type != - mode_to_type(le16_to_cpu(target.v.i_mode)), c, - "incorrect d_type: got %u should be %u, filename %s", - d.v->d_type, - mode_to_type(le16_to_cpu(target.v.i_mode)), - d.v->d_name); + if (fsck_err_on(!have_target, c, + "dirent points to missing inode %llu, type %u filename %s", + d_inum, d.v->d_type, d.v->d_name)) { + ret = remove_dirent(c, &iter, d); + if (ret) + goto err; + continue; + } + + if (fsck_err_on(have_target && + d.v->d_type != + mode_to_type(le16_to_cpu(target.v.i_mode)), c, + "incorrect d_type: got %u should be %u, filename %s", + d.v->d_type, + mode_to_type(le16_to_cpu(target.v.i_mode)), + d.v->d_name)) { + struct bkey_i_dirent *n; + + n = kmalloc(bkey_bytes(d.k), GFP_KERNEL); + if (!n) { + ret = -ENOMEM; + goto err; + } + + bkey_reassemble(&n->k_i, d.s_c); + n->v.d_type = mode_to_type(le16_to_cpu(target.v.i_mode)); + + ret = bch_btree_insert_at(c, NULL, NULL, NULL, + BTREE_INSERT_NOFAIL, + BTREE_INSERT_ENTRY(&iter, &n->k_i)); + kfree(n); + if (ret) + goto err; + + } } +err: fsck_err: return bch_btree_iter_unlock(&iter) ?: ret; } @@ -329,69 +418,10 @@ static int path_down(struct pathbuf *p, u64 inum) return 0; } -static int detach_dir(struct cache_set *c, struct btree_iter *iter, - struct bkey_s_c_dirent dirent) -{ - struct qstr name; - struct bkey_i_inode dir_inode; - struct bch_hash_info dir_hash_info; - u64 dir_inum = dirent.k->p.inode; - int ret; - char *buf; - - name.len = bch_dirent_name_bytes(dirent); - buf = kmalloc(name.len + 1, GFP_KERNEL); - if (!buf) - return -ENOMEM; - - memcpy(buf, dirent.v->d_name, name.len); - buf[name.len] = '\0'; - name.name = buf; - - /* Unlock iter so we don't deadlock, after copying name: */ - bch_btree_iter_unlock(iter); - - ret = bch_inode_find_by_inum(c, dir_inum, &dir_inode); - if (ret) - goto err; - - dir_hash_info = bch_hash_info_init(&dir_inode.v); - - ret = bch_dirent_delete(c, dir_inum, &dir_hash_info, &name, NULL); -err: - kfree(buf); - return ret; -} - -static int reattach_dir(struct cache_set *c, - struct bkey_i_inode *lostfound_inode, - u64 inum) -{ - struct bch_hash_info lostfound_hash_info = - bch_hash_info_init(&lostfound_inode->v); - char name_buf[20]; - struct qstr name; - int ret; - - snprintf(name_buf, sizeof(name_buf), "%llu", inum); - name = (struct qstr) QSTR(name_buf); - - le32_add_cpu(&lostfound_inode->v.i_nlink, 1); - - ret = bch_btree_insert(c, BTREE_ID_INODES, &lostfound_inode->k_i, - NULL, NULL, NULL, 0); - if (ret) - return ret; - - return bch_dirent_create(c, lostfound_inode->k.p.inode, - &lostfound_hash_info, - DT_DIR, &name, inum, NULL, 0); -} - noinline_for_stack -static int check_directory_structure(struct cache_set *c) +static int check_directory_structure(struct cache_set *c, + struct bkey_i_inode *lostfound_inode) { - struct bkey_i_inode root_inode, lostfound_inode; struct inode_bitmap dirs_done = { NULL, 0 }; struct pathbuf path = { 0, 0, NULL }; struct pathbuf_entry *e; @@ -402,14 +432,6 @@ static int check_directory_structure(struct cache_set *c) u64 d_inum; int ret = 0; - ret = check_root(c, &root_inode); - if (ret) - return ret; - - ret = check_lostfound(c, &root_inode, &lostfound_inode); - if (ret) - return ret; - /* DFS: */ restart_dfs: ret = inode_bitmap_set(&dirs_done, BCACHE_ROOT_INO); @@ -446,7 +468,7 @@ next: if (fsck_err_on(inode_bitmap_test(&dirs_done, d_inum), c, "directory with multiple hardlinks")) { - ret = detach_dir(c, &iter, dirent); + ret = remove_dirent(c, &iter, dirent); if (ret) goto err; continue; @@ -482,7 +504,7 @@ up: k.k->p.inode)) { bch_btree_iter_unlock(&iter); - ret = reattach_dir(c, &lostfound_inode, k.k->p.inode); + ret = reattach_inode(c, lostfound_inode, k.k->p.inode); if (ret) goto err; @@ -595,7 +617,9 @@ s64 bch_count_inode_sectors(struct cache_set *c, u64 inum) return bch_btree_iter_unlock(&iter) ?: sectors; } -static int bch_gc_do_inode(struct cache_set *c, struct btree_iter *iter, +static int bch_gc_do_inode(struct cache_set *c, + struct bkey_i_inode *lostfound_inode, + struct btree_iter *iter, struct bkey_s_c_inode inode, struct nlink link) { u16 i_mode = le16_to_cpu(inode.v->i_mode); @@ -611,14 +635,15 @@ static int bch_gc_do_inode(struct cache_set *c, struct btree_iter *iter, inode.k->p.inode, i_nlink, link.count, mode_to_type(i_mode)); + /* These should have been caught/fixed by earlier passes: */ if (S_ISDIR(i_mode)) { - unfixable_fsck_err_on(link.count > 1, c, + need_fsck_err_on(link.count > 1, c, "directory %llu with multiple hardlinks: %u", inode.k->p.inode, link.count); real_i_nlink = link.count * 2 + link.dir_count; } else { - unfixable_fsck_err_on(link.dir_count, c, + need_fsck_err_on(link.dir_count, c, "found dirents for non directory %llu", inode.k->p.inode); @@ -631,11 +656,16 @@ static int bch_gc_do_inode(struct cache_set *c, struct btree_iter *iter, "but found orphaned inode %llu", inode.k->p.inode); - unfixable_fsck_err_on(S_ISDIR(i_mode) && - bch_empty_dir(c, inode.k->p.inode), c, - "non empty directory with link count 0, " - "inode nlink %u, dir links found %u", - i_nlink, link.dir_count); + if (fsck_err_on(S_ISDIR(i_mode) && + bch_empty_dir(c, inode.k->p.inode), c, + "non empty directory with link count 0, " + "inode nlink %u, dir links found %u", + i_nlink, link.dir_count)) { + ret = reattach_inode(c, lostfound_inode, + inode.k->p.inode); + if (ret) + return ret; + } bch_verbose(c, "deleting inode %llu", inode.k->p.inode); @@ -731,7 +761,9 @@ fsck_err: } noinline_for_stack -static int bch_gc_walk_inodes(struct cache_set *c, struct nlinks *links, +static int bch_gc_walk_inodes(struct cache_set *c, + struct bkey_i_inode *lostfound_inode, + struct nlinks *links, u64 range_start, u64 range_end) { struct btree_iter iter; @@ -753,7 +785,8 @@ peek_nlinks: link = genradix_iter_peek(&nlinks_iter, links); nlinks_pos = range_start + nlinks_iter.pos; if (iter.pos.inode > nlinks_pos) { - unfixable_fsck_err_on(link && link->count, c, + /* Should have been caught by dirents pass: */ + need_fsck_err_on(link && link->count, c, "missing inode %llu (nlink %u)", nlinks_pos, link->count); genradix_iter_advance(&nlinks_iter, links); @@ -770,9 +803,8 @@ peek_nlinks: link = genradix_iter_peek(&nlinks_iter, links); */ bch_btree_iter_unlock(&iter); - ret = bch_gc_do_inode(c, &iter, - bkey_s_c_to_inode(k), - *link); + ret = bch_gc_do_inode(c, lostfound_inode, &iter, + bkey_s_c_to_inode(k), *link); if (ret == -EINTR) continue; if (ret) @@ -781,7 +813,8 @@ peek_nlinks: link = genradix_iter_peek(&nlinks_iter, links); if (link->count) atomic_long_inc(&c->nr_inodes); } else { - unfixable_fsck_err_on(link->count, c, + /* Should have been caught by dirents pass: */ + need_fsck_err_on(link->count, c, "missing inode %llu (nlink %u)", nlinks_pos, link->count); } @@ -801,7 +834,8 @@ fsck_err: } noinline_for_stack -static int check_inode_nlinks(struct cache_set *c) +static int check_inode_nlinks(struct cache_set *c, + struct bkey_i_inode *lostfound_inode) { struct nlinks links; u64 this_iter_range_start, next_iter_range_start = 0; @@ -819,7 +853,7 @@ static int check_inode_nlinks(struct cache_set *c) if (ret) break; - ret = bch_gc_walk_inodes(c, &links, + ret = bch_gc_walk_inodes(c, lostfound_inode, &links, this_iter_range_start, next_iter_range_start); if (ret) @@ -839,8 +873,17 @@ static int check_inode_nlinks(struct cache_set *c) */ int bch_fsck(struct cache_set *c, bool full_fsck) { + struct bkey_i_inode root_inode, lostfound_inode; int ret; + ret = check_root(c, &root_inode); + if (ret) + return ret; + + ret = check_lostfound(c, &root_inode, &lostfound_inode); + if (ret) + return ret; + if (!full_fsck) goto check_nlinks; @@ -856,11 +899,11 @@ int bch_fsck(struct cache_set *c, bool full_fsck) if (ret) return ret; - ret = check_directory_structure(c); + ret = check_directory_structure(c, &lostfound_inode); if (ret) return ret; check_nlinks: - ret = check_inode_nlinks(c); + ret = check_inode_nlinks(c, &lostfound_inode); if (ret) return ret; diff --git a/drivers/md/bcache/journal.c b/drivers/md/bcache/journal.c index ffc957368fc5..32306bc19e9b 100644 --- a/drivers/md/bcache/journal.c +++ b/drivers/md/bcache/journal.c @@ -478,14 +478,14 @@ static int journal_validate_key(struct cache_set *c, struct jset *j, char buf[160]; int ret = 0; - if (fsck_err_on(!k->k.u64s, c, + if (mustfix_fsck_err_on(!k->k.u64s, c, "invalid %s in journal: k->u64s 0", type)) { entry->u64s = cpu_to_le16((u64 *) k - entry->_data); journal_entry_null_range(jset_keys_next(entry), next); return 0; } - if (fsck_err_on((void *) bkey_next(k) > + if (mustfix_fsck_err_on((void *) bkey_next(k) > (void *) jset_keys_next(entry), c, "invalid %s in journal: extends past end of journal entry", type)) { @@ -494,7 +494,7 @@ static int journal_validate_key(struct cache_set *c, struct jset *j, return 0; } - if (fsck_err_on(k->k.format != KEY_FORMAT_CURRENT, c, + if (mustfix_fsck_err_on(k->k.format != KEY_FORMAT_CURRENT, c, "invalid %s in journal: bad format %u", type, k->k.format)) { le16_add_cpu(&entry->u64s, -k->k.u64s); @@ -510,7 +510,7 @@ static int journal_validate_key(struct cache_set *c, struct jset *j, if (invalid) { bch_bkey_val_to_text(c, key_type, buf, sizeof(buf), bkey_i_to_s_c(k)); - fsck_err(c, "invalid %s in journal: %s", type, buf); + mustfix_fsck_err(c, "invalid %s in journal: %s", type, buf); le16_add_cpu(&entry->u64s, -k->k.u64s); memmove(k, bkey_next(k), next - (void *) bkey_next(k)); @@ -543,7 +543,7 @@ static int journal_entry_validate(struct cache_set *c, struct jset *j, u64 secto return BCH_FSCK_UNKNOWN_VERSION; } - if (fsck_err_on(bytes > bucket_sectors_left << 9 || + if (mustfix_fsck_err_on(bytes > bucket_sectors_left << 9 || bytes > c->journal.entry_size_max, c, "journal entry too big (%zu bytes), sector %lluu", bytes, sector)) { @@ -556,7 +556,7 @@ static int journal_entry_validate(struct cache_set *c, struct jset *j, u64 secto got = le64_to_cpu(j->csum); expect = __csum_set(j, le32_to_cpu(j->u64s), JSET_CSUM_TYPE(j)); - if (fsck_err_on(got != expect, c, + if (mustfix_fsck_err_on(got != expect, c, "journal checksum bad (got %llu expect %llu), sector %lluu", got, expect, sector)) { /* XXX: retry IO, when we start retrying checksum errors */ @@ -564,14 +564,14 @@ static int journal_entry_validate(struct cache_set *c, struct jset *j, u64 secto return JOURNAL_ENTRY_BAD; } - if (fsck_err_on(le64_to_cpu(j->last_seq) > le64_to_cpu(j->seq), c, - "invalid journal entry: last_seq > seq")) + if (mustfix_fsck_err_on(le64_to_cpu(j->last_seq) > le64_to_cpu(j->seq), + c, "invalid journal entry: last_seq > seq")) j->last_seq = j->seq; for_each_jset_entry(entry, j) { struct bkey_i *k; - if (fsck_err_on(jset_keys_next(entry) > + if (mustfix_fsck_err_on(jset_keys_next(entry) > bkey_idx(j, le32_to_cpu(j->u64s)), c, "journal entry extents past end of jset")) { j->u64s = cpu_to_le64((u64 *) entry - j->_data); @@ -595,7 +595,7 @@ static int journal_entry_validate(struct cache_set *c, struct jset *j, u64 secto case JOURNAL_ENTRY_BTREE_ROOT: k = entry->start; - if (fsck_err_on(!entry->u64s || + if (mustfix_fsck_err_on(!entry->u64s || le16_to_cpu(entry->u64s) != k->k.u64s, c, "invalid btree root journal entry: wrong number of keys")) { journal_entry_null_range(entry, @@ -613,7 +613,7 @@ static int journal_entry_validate(struct cache_set *c, struct jset *j, u64 secto break; case JOURNAL_ENTRY_JOURNAL_SEQ_BLACKLISTED: - if (fsck_err_on(le16_to_cpu(entry->u64s) != 1, c, + if (mustfix_fsck_err_on(le16_to_cpu(entry->u64s) != 1, c, "invalid journal seq blacklist entry: bad size")) { journal_entry_null_range(entry, jset_keys_next(entry)); @@ -621,7 +621,7 @@ static int journal_entry_validate(struct cache_set *c, struct jset *j, u64 secto break; default: - fsck_err(c, "invalid journal entry type %llu", + mustfix_fsck_err(c, "invalid journal entry type %llu", JOURNAL_ENTRY_TYPE(entry)); journal_entry_null_range(entry, jset_keys_next(entry)); break; |