diff options
64 files changed, 978 insertions, 560 deletions
diff --git a/.bcachefs_revision b/.bcachefs_revision index 5fe33c98..f08feb4a 100644 --- a/.bcachefs_revision +++ b/.bcachefs_revision @@ -1 +1 @@ -c9d875f9be1f853e747c9e00421c678b0adf73d2 +9b4ab159abcd84cf0c25ee851dda8c40baffecc8 @@ -68,7 +68,7 @@ checksum = "86fdf8605db99b54d3cd748a44c6d04df638eb5dafb219b135d0149bd0db01f6" [[package]] name = "bcachefs-tools" -version = "1.25.1" +version = "1.25.2" dependencies = [ "anyhow", "bch_bindgen", @@ -4,7 +4,7 @@ default-members = [".", "bch_bindgen"] [package] name = "bcachefs-tools" -version = "1.25.1" +version = "1.25.2" authors = ["Yuxuan Shui <yshuiv7@gmail.com>", "Kayla Firestack <dev@kaylafire.me>", "Kent Overstreet <kent.overstreet@linux.dev>" ] edition = "2021" rust-version = "1.77.0" @@ -1,4 +1,4 @@ -VERSION=1.25.1 +VERSION=1.25.2 PREFIX?=/usr/local LIBEXECDIR?=$(PREFIX)/libexec diff --git a/bch_bindgen/src/fs.rs b/bch_bindgen/src/fs.rs index 2997ed8f..00c5d965 100644 --- a/bch_bindgen/src/fs.rs +++ b/bch_bindgen/src/fs.rs @@ -9,13 +9,25 @@ pub struct Fs { } impl Fs { - pub fn open(devs: &[PathBuf], opts: c::bch_opts) -> Result<Fs, bch_errcode> { - let devs: Vec<_> = devs + pub fn open(devs: &[PathBuf], mut opts: c::bch_opts) -> Result<Fs, bch_errcode> { + let devs_cstrs : Vec<_> = devs .iter() - .map(|i| CString::new(i.as_os_str().as_bytes()).unwrap().into_raw()) + .map(|i| CString::new(i.as_os_str().as_bytes()).unwrap()) .collect(); - let ret = unsafe { c::bch2_fs_open(devs[..].as_ptr(), devs.len() as u32, opts) }; + let mut devs_array: Vec<_> = devs_cstrs + .iter() + .map(|i| i.as_ptr()) + .collect(); + + let ret = unsafe { + let mut devs: c::darray_const_str = std::mem::zeroed(); + + devs.data = devs_array[..].as_mut_ptr(); + devs.nr = devs_array.len(); + + c::bch2_fs_open(&mut devs, &mut opts) + }; errptr_to_result(ret).map(|fs| Fs { raw: fs }) } diff --git a/c_src/cmd_device.c b/c_src/cmd_device.c index 96c25310..cb40dcea 100644 --- a/c_src/cmd_device.c +++ b/c_src/cmd_device.c @@ -515,7 +515,11 @@ static int cmd_device_resize(int argc, char *argv[]) } else { printf("Doing offline resize of %s\n", dev); - struct bch_fs *c = bch2_fs_open(&dev, 1, bch2_opts_empty()); + darray_const_str devs = {}; + darray_push(&devs, dev); + + struct bch_opts opts = bch2_opts_empty(); + struct bch_fs *c = bch2_fs_open(&devs, &opts); if (IS_ERR(c)) die("error opening %s: %s", dev, bch2_err_str(PTR_ERR(c))); @@ -612,7 +616,11 @@ static int cmd_device_resize_journal(int argc, char *argv[]) } else { printf("%s is offline - starting:\n", dev); - struct bch_fs *c = bch2_fs_open(&dev, 1, bch2_opts_empty()); + darray_const_str devs = {}; + darray_push(&devs, dev); + + struct bch_opts opts = bch2_opts_empty(); + struct bch_fs *c = bch2_fs_open(&devs, &opts); if (IS_ERR(c)) die("error opening %s: %s", dev, bch2_err_str(PTR_ERR(c))); diff --git a/c_src/cmd_dump.c b/c_src/cmd_dump.c index 7d7ceaa5..c6b813e9 100644 --- a/c_src/cmd_dump.c +++ b/c_src/cmd_dump.c @@ -147,7 +147,9 @@ int cmd_dump(int argc, char *argv[]) if (!argc) die("Please supply device(s) to check"); - struct bch_fs *c = bch2_fs_open(argv, argc, opts); + darray_const_str devs = get_or_split_cmdline_devs(argc, argv); + + struct bch_fs *c = bch2_fs_open(&devs, &opts); if (IS_ERR(c)) die("error opening devices: %s", bch2_err_str(PTR_ERR(c))); @@ -177,5 +179,6 @@ int cmd_dump(int argc, char *argv[]) up_read(&c->state_lock); bch2_fs_stop(c); + darray_exit(&devs); return 0; } diff --git a/c_src/cmd_format.c b/c_src/cmd_format.c index ac48454f..aa87feb0 100644 --- a/c_src/cmd_format.c +++ b/c_src/cmd_format.c @@ -123,7 +123,7 @@ static void build_fs(struct bch_fs *c, const char *src_path) int cmd_format(int argc, char *argv[]) { dev_opts_list devices = {}; - darray_str device_paths = {}; + darray_const_str device_paths = {}; struct format_opts opts = format_opts_default(); struct dev_opts dev_opts = dev_opts_default(); bool force = false, no_passphrase = false, quiet = false, initialize = true, verbose = false; @@ -302,9 +302,8 @@ int cmd_format(int argc, char *argv[]) * Start the filesystem once, to allocate the journal and create * the root directory: */ - struct bch_fs *c = bch2_fs_open(device_paths.data, - device_paths.nr, - bch2_opts_empty()); + struct bch_opts open_opts = bch2_opts_empty(); + struct bch_fs *c = bch2_fs_open(&device_paths, &open_opts); if (IS_ERR(c)) die("error opening %s: %s", device_paths.data[0], bch2_err_str(PTR_ERR(c))); diff --git a/c_src/cmd_fsck.c b/c_src/cmd_fsck.c index aa81fa11..13cec103 100644 --- a/c_src/cmd_fsck.c +++ b/c_src/cmd_fsck.c @@ -86,7 +86,7 @@ static int splice_fd_to_stdinout(int fd) stdin_closed = true; } - xclose(fd); + close(fd); return 0; } @@ -113,7 +113,7 @@ static void append_opt(struct printbuf *out, const char *opt) prt_str(out, opt); } -static bool should_use_kernel_fsck(darray_str devs) +static bool should_use_kernel_fsck(darray_const_str devs) { system("modprobe bcachefs"); @@ -131,7 +131,7 @@ static bool should_use_kernel_fsck(darray_str devs) opt_set(opts, nochanges, true); opt_set(opts, read_only, true); - struct bch_fs *c = bch2_fs_open(devs.data, devs.nr, opts); + struct bch_fs *c = bch2_fs_open(&devs, &opts); if (IS_ERR(c)) return false; @@ -265,7 +265,7 @@ int cmd_fsck(int argc, char *argv[]) exit(8); } - darray_str devs = get_or_split_cmdline_devs(argc, argv); + darray_const_str devs = get_or_split_cmdline_devs(argc, argv); darray_for_each(devs, i) if (dev_mounted(*i)) { @@ -324,7 +324,7 @@ userland_fsck: if (ret) return ret; - struct bch_fs *c = bch2_fs_open(devs.data, devs.nr, opts); + struct bch_fs *c = bch2_fs_open(&devs, &opts); if (IS_ERR(c)) exit(8); diff --git a/c_src/cmd_key.c b/c_src/cmd_key.c index ac8a94a8..c1b72ff4 100644 --- a/c_src/cmd_key.c +++ b/c_src/cmd_key.c @@ -87,12 +87,13 @@ int cmd_unlock(int argc, char *argv[]) int cmd_set_passphrase(int argc, char *argv[]) { - struct bch_opts opts = bch2_opts_empty(); - struct bch_fs *c; - - if (argc < 2) + args_shift(1); + if (!argc) die("Please supply one or more devices"); + darray_const_str devs = get_or_split_cmdline_devs(argc, argv); + + struct bch_opts opts = bch2_opts_empty(); opt_set(opts, nostart, true); /* @@ -100,7 +101,7 @@ int cmd_set_passphrase(int argc, char *argv[]) * to make sure we're opening and updating every component device: */ - c = bch2_fs_open(argv + 1, argc - 1, opts); + struct bch_fs *c = bch2_fs_open(&devs, &opts); if (IS_ERR(c)) die("Error opening %s: %s", argv[1], bch2_err_str(PTR_ERR(c))); @@ -126,14 +127,16 @@ int cmd_set_passphrase(int argc, char *argv[]) int cmd_remove_passphrase(int argc, char *argv[]) { - struct bch_opts opts = bch2_opts_empty(); - struct bch_fs *c; - - if (argc < 2) + args_shift(1); + if (!argc) die("Please supply one or more devices"); + darray_const_str devs = get_or_split_cmdline_devs(argc, argv); + + struct bch_opts opts = bch2_opts_empty(); opt_set(opts, nostart, true); - c = bch2_fs_open(argv + 1, argc - 1, opts); + + struct bch_fs *c = bch2_fs_open(&devs, &opts); if (IS_ERR(c)) die("Error opening %s: %s", argv[1], bch2_err_str(PTR_ERR(c))); diff --git a/c_src/cmd_kill_btree_node.c b/c_src/cmd_kill_btree_node.c index c8f43150..81dbdd4b 100644 --- a/c_src/cmd_kill_btree_node.c +++ b/c_src/cmd_kill_btree_node.c @@ -74,7 +74,9 @@ int cmd_kill_btree_node(int argc, char *argv[]) if (!argc) die("Please supply device(s)"); - struct bch_fs *c = bch2_fs_open(argv, argc, opts); + darray_const_str devs = get_or_split_cmdline_devs(argc, argv); + + struct bch_fs *c = bch2_fs_open(&devs, &opts); if (IS_ERR(c)) die("error opening %s: %s", argv[0], bch2_err_str(PTR_ERR(c))); diff --git a/c_src/cmd_list_journal.c b/c_src/cmd_list_journal.c index 3cdf4846..fe9e3bbd 100644 --- a/c_src/cmd_list_journal.c +++ b/c_src/cmd_list_journal.c @@ -319,9 +319,9 @@ int cmd_list_journal(int argc, char *argv[]) if (!argc) die("Please supply device(s) to open"); - darray_str devs = get_or_split_cmdline_devs(argc, argv); + darray_const_str devs = get_or_split_cmdline_devs(argc, argv); - struct bch_fs *c = bch2_fs_open(devs.data, devs.nr, opts); + struct bch_fs *c = bch2_fs_open(&devs, &opts); if (IS_ERR(c)) die("error opening %s: %s", argv[0], bch2_err_str(PTR_ERR(c))); diff --git a/c_src/cmd_migrate.c b/c_src/cmd_migrate.c index edd77377..91c42302 100644 --- a/c_src/cmd_migrate.c +++ b/c_src/cmd_migrate.c @@ -253,14 +253,15 @@ static int migrate_fs(const char *fs_path, free(sb); - char *path[1] = { dev->path }; + darray_const_str dev_paths = {}; + darray_push(&dev_paths, dev->path); struct bch_opts opts = bch2_opts_empty(); opt_set(opts, sb, sb_offset); opt_set(opts, nostart, true); opt_set(opts, noexcl, true); - struct bch_fs *c = bch2_fs_open(path, 1, opts); + struct bch_fs *c = bch2_fs_open(&dev_paths, &opts); if (IS_ERR(c)) die("Error opening new filesystem: %s", bch2_err_str(PTR_ERR(c))); @@ -295,7 +296,7 @@ static int migrate_fs(const char *fs_path, opt_set(opts, nochanges, true); opt_set(opts, read_only, true); - c = bch2_fs_open(path, 1, opts); + c = bch2_fs_open(&dev_paths, &opts); if (IS_ERR(c)) die("Error opening new filesystem: %s", bch2_err_str(PTR_ERR(c))); @@ -377,14 +378,14 @@ static void migrate_superblock_usage(void) int cmd_migrate_superblock(int argc, char *argv[]) { - char *dev = NULL; + darray_const_str devs = {}; u64 sb_offset = 0; int opt, ret; while ((opt = getopt(argc, argv, "d:o:h")) != -1) switch (opt) { case 'd': - dev = optarg; + darray_push(&devs, optarg); break; case 'o': ret = kstrtou64(optarg, 10, &sb_offset); @@ -396,13 +397,13 @@ int cmd_migrate_superblock(int argc, char *argv[]) exit(EXIT_SUCCESS); } - if (!dev) + if (!devs.nr) die("Please specify a device"); if (!sb_offset) die("Please specify offset of existing superblock"); - int fd = xopen(dev, O_RDWR); + int fd = xopen(devs.data[0], O_RDWR); struct bch_sb *sb = __bch2_super_read(fd, sb_offset); unsigned sb_size = 1U << sb->layout.sb_max_size_bits; @@ -435,7 +436,7 @@ int cmd_migrate_superblock(int argc, char *argv[]) opt_set(opts, nostart, true); opt_set(opts, sb, sb_offset); - struct bch_fs *c = bch2_fs_open(&dev, 1, opts); + struct bch_fs *c = bch2_fs_open(&devs, &opts); ret = PTR_ERR_OR_ZERO(c) ?: bch2_buckets_nouse_alloc(c); if (ret) @@ -461,7 +462,7 @@ int cmd_migrate_superblock(int argc, char *argv[]) * inconsequential: */ - c = bch2_fs_open(&dev, 1, opts); + c = bch2_fs_open(&devs, &opts); ret = PTR_ERR_OR_ZERO(c); if (ret) die("error opening filesystem: %s", bch2_err_str(ret)); diff --git a/c_src/cmd_option.c b/c_src/cmd_option.c index e314903a..14201c8d 100644 --- a/c_src/cmd_option.c +++ b/c_src/cmd_option.c @@ -94,10 +94,12 @@ int cmd_set_option(int argc, char *argv[]) } if (!online) { + darray_const_str devs = get_or_split_cmdline_devs(argc, argv); + struct bch_opts open_opts = bch2_opts_empty(); opt_set(open_opts, nostart, true); - struct bch_fs *c = bch2_fs_open(argv, argc, open_opts); + struct bch_fs *c = bch2_fs_open(&devs, &open_opts); if (IS_ERR(c)) { fprintf(stderr, "error opening %s: %s\n", argv[0], bch2_err_str(PTR_ERR(c))); exit(EXIT_FAILURE); diff --git a/c_src/posix_to_bcachefs.c b/c_src/posix_to_bcachefs.c index 3c2e72d8..68d7e992 100644 --- a/c_src/posix_to_bcachefs.c +++ b/c_src/posix_to_bcachefs.c @@ -176,7 +176,7 @@ static void write_data(struct bch_fs *c, die("write error: %s", bch2_err_str(op.error)); } -void copy_data(struct bch_fs *c, +static void copy_data(struct bch_fs *c, struct bch_inode_unpacked *dst_inode, int src_fd, u64 start, u64 end) { diff --git a/c_src/tools-util.c b/c_src/tools-util.c index ea12946c..03ad009f 100644 --- a/c_src/tools-util.c +++ b/c_src/tools-util.c @@ -547,7 +547,7 @@ char *dev_to_path(dev_t dev) return path; } -struct mntent *dev_to_mount(char *dev) +struct mntent *dev_to_mount(const char *dev) { struct mntent *mnt, *ret = NULL; FILE *f = setmntent("/proc/mounts", "r"); @@ -586,7 +586,7 @@ found: return ret; } -int dev_mounted(char *dev) +int dev_mounted(const char *dev) { struct mntent *mnt = dev_to_mount(dev); @@ -748,9 +748,9 @@ unsigned version_parse(char *buf) return BCH_VERSION(major, minor); } -darray_str get_or_split_cmdline_devs(int argc, char *argv[]) +darray_const_str get_or_split_cmdline_devs(int argc, char *argv[]) { - darray_str ret = {}; + darray_const_str ret = {}; if (argc == 1) { bch2_split_devs(argv[0], &ret); diff --git a/c_src/tools-util.h b/c_src/tools-util.h index 27652b62..1352050e 100644 --- a/c_src/tools-util.h +++ b/c_src/tools-util.h @@ -186,8 +186,8 @@ u32 crc32c(u32, const void *, size_t); char *dev_to_name(dev_t); char *dev_to_path(dev_t); -struct mntent *dev_to_mount(char *); -int dev_mounted(char *); +struct mntent *dev_to_mount(const char *); +int dev_mounted(const char *); char *fd_to_dev_model(int); #define args_shift(_nr) \ @@ -217,7 +217,7 @@ struct bbpos_range bbpos_range_parse(char *); unsigned version_parse(char *); -darray_str get_or_split_cmdline_devs(int argc, char *argv[]); +darray_const_str get_or_split_cmdline_devs(int argc, char *argv[]); char *pop_cmd(int *argc, char *argv[]); diff --git a/libbcachefs/alloc_background.c b/libbcachefs/alloc_background.c index 37cbbf86..002e3853 100644 --- a/libbcachefs/alloc_background.c +++ b/libbcachefs/alloc_background.c @@ -484,7 +484,7 @@ struct bkey_i_alloc_v4 *bch2_trans_start_alloc_update(struct btree_trans *trans, if (ret) return ERR_PTR(ret); - ret = bch2_trans_update(trans, &iter, &a->k_i, flags); + ret = bch2_trans_update_ip(trans, &iter, &a->k_i, flags, _RET_IP_); bch2_trans_iter_exit(trans, &iter); return unlikely(ret) ? ERR_PTR(ret) : a; } @@ -2393,14 +2393,16 @@ bkey_err: int bch2_fs_freespace_init(struct bch_fs *c) { - int ret = 0; - bool doing_init = false; + if (c->sb.features & BIT_ULL(BCH_FEATURE_small_image)) + return 0; + /* * We can crash during the device add path, so we need to check this on * every mount: */ + bool doing_init = false; for_each_member_device(c, ca) { if (ca->mi.freespace_initialized) continue; @@ -2410,7 +2412,7 @@ int bch2_fs_freespace_init(struct bch_fs *c) doing_init = true; } - ret = bch2_dev_freespace_init(c, ca, 0, ca->mi.nbuckets); + int ret = bch2_dev_freespace_init(c, ca, 0, ca->mi.nbuckets); if (ret) { bch2_dev_put(ca); bch_err_fn(c, ret); diff --git a/libbcachefs/alloc_foreground.c b/libbcachefs/alloc_foreground.c index ac1244e7..a0f92daa 100644 --- a/libbcachefs/alloc_foreground.c +++ b/libbcachefs/alloc_foreground.c @@ -1339,6 +1339,8 @@ alloc_done: open_bucket_for_each(c, &req->wp->ptrs, ob, i) req->wp->sectors_free = min(req->wp->sectors_free, ob->sectors_free); + req->wp->sectors_free = rounddown(req->wp->sectors_free, block_sectors(c)); + BUG_ON(!req->wp->sectors_free || req->wp->sectors_free == UINT_MAX); return 0; diff --git a/libbcachefs/alloc_foreground.h b/libbcachefs/alloc_foreground.h index 64e1f1ef..19220341 100644 --- a/libbcachefs/alloc_foreground.h +++ b/libbcachefs/alloc_foreground.h @@ -157,7 +157,9 @@ static inline void bch2_alloc_sectors_done_inlined(struct bch_fs *c, struct writ unsigned i; open_bucket_for_each(c, &wp->ptrs, ob, i) - ob_push(c, !ob->sectors_free ? &ptrs : &keep, ob); + ob_push(c, ob->sectors_free < block_sectors(c) + ? &ptrs + : &keep, ob); wp->ptrs = keep; mutex_unlock(&wp->lock); diff --git a/libbcachefs/bcachefs.h b/libbcachefs/bcachefs.h index 675b8f8e..5fb8b0f4 100644 --- a/libbcachefs/bcachefs.h +++ b/libbcachefs/bcachefs.h @@ -295,6 +295,16 @@ do { \ bch2_print(_c, __VA_ARGS__); \ } while (0) +#define bch2_print_str_ratelimited(_c, ...) \ +do { \ + static DEFINE_RATELIMIT_STATE(_rs, \ + DEFAULT_RATELIMIT_INTERVAL, \ + DEFAULT_RATELIMIT_BURST); \ + \ + if (__ratelimit(&_rs)) \ + bch2_print_str(_c, __VA_ARGS__); \ +} while (0) + #define bch_info(c, fmt, ...) \ bch2_print(c, KERN_INFO bch2_fmt(c, fmt), ##__VA_ARGS__) #define bch_info_ratelimited(c, fmt, ...) \ @@ -834,6 +844,7 @@ struct bch_fs { unsigned nsec_per_time_unit; u64 features; u64 compat; + u64 recovery_passes_required; unsigned long errors_silent[BITS_TO_LONGS(BCH_FSCK_ERR_MAX)]; u64 btrees_lost_data; } sb; diff --git a/libbcachefs/btree_gc.c b/libbcachefs/btree_gc.c index 9db8f5f8..92ae3173 100644 --- a/libbcachefs/btree_gc.c +++ b/libbcachefs/btree_gc.c @@ -48,6 +48,27 @@ #define DROP_PREV_NODE 11 #define DID_FILL_FROM_SCAN 12 +/* + * Returns true if it's a btree we can easily reconstruct, or otherwise won't + * cause data loss if it's missing: + */ +static bool btree_id_important(enum btree_id btree) +{ + if (btree_id_is_alloc(btree)) + return false; + + switch (btree) { + case BTREE_ID_quotas: + case BTREE_ID_snapshot_trees: + case BTREE_ID_logged_ops: + case BTREE_ID_rebalance_work: + case BTREE_ID_subvolume_children: + return false; + default: + return true; + } +} + static const char * const bch2_gc_phase_strs[] = { #define x(n) #n, GC_PHASES() @@ -350,20 +371,13 @@ again: prt_char(&buf, ' '); bch2_bkey_val_to_text(&buf, c, bkey_i_to_s_c(cur_k.k)); - if (mustfix_fsck_err_on(bch2_err_matches(ret, EIO), - trans, btree_node_read_error, - "Topology repair: unreadable btree node at\n%s", - buf.buf)) { + if (bch2_err_matches(ret, EIO)) { bch2_btree_node_evict(trans, cur_k.k); cur = NULL; ret = bch2_journal_key_delete(c, b->c.btree_id, b->c.level, cur_k.k->k.p); if (ret) break; - - ret = bch2_btree_lost_data(c, b->c.btree_id); - if (ret) - break; continue; } @@ -525,9 +539,6 @@ int bch2_check_topology(struct bch_fs *c) bch2_btree_id_to_text(&buf, i); if (r->error) { - ret = bch2_btree_lost_data(c, i); - if (ret) - break; reconstruct_root: bch_info(c, "btree root %s unreadable, must recover from scan", buf.buf); @@ -535,8 +546,10 @@ reconstruct_root: r->error = 0; if (!bch2_btree_has_scanned_nodes(c, i)) { - mustfix_fsck_err(trans, btree_root_unreadable_and_scan_found_nothing, - "no nodes found for btree %s, continue?", buf.buf); + __fsck_err(trans, + FSCK_CAN_FIX|(!btree_id_important(i) ? FSCK_AUTOFIX : 0), + btree_root_unreadable_and_scan_found_nothing, + "no nodes found for btree %s, continue?", buf.buf); bch2_btree_root_alloc_fake_trans(trans, i, 0); } else { bch2_btree_root_alloc_fake_trans(trans, i, 1); diff --git a/libbcachefs/btree_io.c b/libbcachefs/btree_io.c index 5cd27a9c..aec397b4 100644 --- a/libbcachefs/btree_io.c +++ b/libbcachefs/btree_io.c @@ -515,19 +515,23 @@ void bch2_btree_init_next(struct btree_trans *trans, struct btree *b) static void btree_err_msg(struct printbuf *out, struct bch_fs *c, struct bch_dev *ca, + bool print_pos, struct btree *b, struct bset *i, struct bkey_packed *k, - unsigned offset, int write) + unsigned offset, int rw) { - prt_printf(out, bch2_log_msg(c, "%s"), - write == READ - ? "error validating btree node " - : "corrupt btree node before write "); + if (print_pos) { + prt_str(out, rw == READ + ? "error validating btree node " + : "corrupt btree node before write "); + prt_printf(out, "at btree "); + bch2_btree_pos_to_text(out, c, b); + prt_newline(out); + } + if (ca) - prt_printf(out, "on %s ", ca->name); - prt_printf(out, "at btree "); - bch2_btree_pos_to_text(out, c, b); + prt_printf(out, "%s ", ca->name); - prt_printf(out, "\nnode offset %u/%u", + prt_printf(out, "node offset %u/%u", b->written, btree_ptr_sectors_written(bkey_i_to_s_c(&b->key))); if (i) prt_printf(out, " bset u64s %u", le16_to_cpu(i->u64s)); @@ -538,75 +542,110 @@ static void btree_err_msg(struct printbuf *out, struct bch_fs *c, prt_str(out, ": "); } -__printf(10, 11) +__printf(11, 12) static int __btree_err(int ret, struct bch_fs *c, struct bch_dev *ca, struct btree *b, struct bset *i, struct bkey_packed *k, - int write, - bool have_retry, + int rw, enum bch_sb_error_id err_type, + struct bch_io_failures *failed, + struct printbuf *err_msg, const char *fmt, ...) { - bool silent = c->curr_recovery_pass == BCH_RECOVERY_PASS_scan_for_btree_nodes; + if (c->curr_recovery_pass == BCH_RECOVERY_PASS_scan_for_btree_nodes) + return -BCH_ERR_fsck_fix; + + bool have_retry = false; + int ret2; + + if (ca) { + bch2_mark_btree_validate_failure(failed, ca->dev_idx); + + struct extent_ptr_decoded pick; + have_retry = !bch2_bkey_pick_read_device(c, + bkey_i_to_s_c(&b->key), + failed, &pick, -1); + } if (!have_retry && ret == -BCH_ERR_btree_node_read_err_want_retry) ret = -BCH_ERR_btree_node_read_err_fixable; if (!have_retry && ret == -BCH_ERR_btree_node_read_err_must_retry) ret = -BCH_ERR_btree_node_read_err_bad_node; - if (!silent && ret != -BCH_ERR_btree_node_read_err_fixable) - bch2_sb_error_count(c, err_type); + bch2_sb_error_count(c, err_type); + + bool print_deferred = err_msg && + rw == READ && + !(test_bit(BCH_FS_fsck_running, &c->flags) && + c->opts.fix_errors == FSCK_FIX_ask); struct printbuf out = PRINTBUF; - if (write != WRITE && ret != -BCH_ERR_btree_node_read_err_fixable) { - printbuf_indent_add_nextline(&out, 2); -#ifdef BCACHEFS_LOG_PREFIX - prt_printf(&out, bch2_log_msg(c, "")); -#endif - } + bch2_log_msg_start(c, &out); + + if (!print_deferred) + err_msg = &out; - btree_err_msg(&out, c, ca, b, i, k, b->written, write); + btree_err_msg(err_msg, c, ca, !print_deferred, b, i, k, b->written, rw); va_list args; va_start(args, fmt); - prt_vprintf(&out, fmt, args); + prt_vprintf(err_msg, fmt, args); va_end(args); - if (write == WRITE) { + if (print_deferred) { + prt_newline(err_msg); + + switch (ret) { + case -BCH_ERR_btree_node_read_err_fixable: + ret2 = bch2_fsck_err_opt(c, FSCK_CAN_FIX, err_type); + if (ret2 != -BCH_ERR_fsck_fix && + ret2 != -BCH_ERR_fsck_ignore) { + ret = ret2; + goto fsck_err; + } + + if (!have_retry) + ret = -BCH_ERR_fsck_fix; + goto out; + case -BCH_ERR_btree_node_read_err_bad_node: + prt_str(&out, ", "); + ret = __bch2_topology_error(c, &out); + break; + } + + goto out; + } + + if (rw == WRITE) { prt_str(&out, ", "); ret = __bch2_inconsistent_error(c, &out) ? -BCH_ERR_fsck_errors_not_fixed : 0; - silent = false; + goto print; } switch (ret) { case -BCH_ERR_btree_node_read_err_fixable: - ret = !silent - ? __bch2_fsck_err(c, NULL, FSCK_CAN_FIX, err_type, "%s", out.buf) - : -BCH_ERR_fsck_fix; - if (ret != -BCH_ERR_fsck_fix && - ret != -BCH_ERR_fsck_ignore) + ret2 = __bch2_fsck_err(c, NULL, FSCK_CAN_FIX, err_type, "%s", out.buf); + if (ret2 != -BCH_ERR_fsck_fix && + ret2 != -BCH_ERR_fsck_ignore) { + ret = ret2; goto fsck_err; - ret = -BCH_ERR_fsck_fix; + } + + if (!have_retry) + ret = -BCH_ERR_fsck_fix; goto out; case -BCH_ERR_btree_node_read_err_bad_node: prt_str(&out, ", "); ret = __bch2_topology_error(c, &out); - if (ret) - silent = false; - break; - case -BCH_ERR_btree_node_read_err_incompatible: - ret = -BCH_ERR_fsck_errors_not_fixed; - silent = false; break; } - - if (!silent) - bch2_print_str(c, KERN_ERR, out.buf); +print: + bch2_print_str(c, KERN_ERR, out.buf); out: fsck_err: printbuf_exit(&out); @@ -615,8 +654,9 @@ fsck_err: #define btree_err(type, c, ca, b, i, k, _err_type, msg, ...) \ ({ \ - int _ret = __btree_err(type, c, ca, b, i, k, write, have_retry, \ + int _ret = __btree_err(type, c, ca, b, i, k, write, \ BCH_FSCK_ERR_##_err_type, \ + failed, err_msg, \ msg, ##__VA_ARGS__); \ \ if (_ret != -BCH_ERR_fsck_fix) { \ @@ -624,7 +664,7 @@ fsck_err: goto fsck_err; \ } \ \ - *saw_error = true; \ + true; \ }) #define btree_err_on(cond, ...) ((cond) ? btree_err(__VA_ARGS__) : false) @@ -682,8 +722,9 @@ void bch2_btree_node_drop_keys_outside_node(struct btree *b) static int validate_bset(struct bch_fs *c, struct bch_dev *ca, struct btree *b, struct bset *i, - unsigned offset, unsigned sectors, - int write, bool have_retry, bool *saw_error) + unsigned offset, unsigned sectors, int write, + struct bch_io_failures *failed, + struct printbuf *err_msg) { unsigned version = le16_to_cpu(i->version); unsigned ptr_written = btree_ptr_sectors_written(bkey_i_to_s_c(&b->key)); @@ -896,7 +937,8 @@ static inline int btree_node_read_bkey_cmp(const struct btree *b, static int validate_bset_keys(struct bch_fs *c, struct btree *b, struct bset *i, int write, - bool have_retry, bool *saw_error) + struct bch_io_failures *failed, + struct printbuf *err_msg) { unsigned version = le16_to_cpu(i->version); struct bkey_packed *k, *prev = NULL; @@ -1009,7 +1051,9 @@ fsck_err: } int bch2_btree_node_read_done(struct bch_fs *c, struct bch_dev *ca, - struct btree *b, bool have_retry, bool *saw_error) + struct btree *b, + struct bch_io_failures *failed, + struct printbuf *err_msg) { struct btree_node_entry *bne; struct sort_iter *iter; @@ -1022,7 +1066,7 @@ int bch2_btree_node_read_done(struct bch_fs *c, struct bch_dev *ca, unsigned ptr_written = btree_ptr_sectors_written(bkey_i_to_s_c(&b->key)); u64 max_journal_seq = 0; struct printbuf buf = PRINTBUF; - int ret = 0, retry_read = 0, write = READ; + int ret = 0, write = READ; u64 start_time = local_clock(); b->version_ondisk = U16_MAX; @@ -1156,15 +1200,14 @@ int bch2_btree_node_read_done(struct bch_fs *c, struct bch_dev *ca, b->version_ondisk = min(b->version_ondisk, le16_to_cpu(i->version)); - ret = validate_bset(c, ca, b, i, b->written, sectors, - READ, have_retry, saw_error); + ret = validate_bset(c, ca, b, i, b->written, sectors, READ, failed, err_msg); if (ret) goto fsck_err; if (!b->written) btree_node_set_format(b, b->data->format); - ret = validate_bset_keys(c, b, i, READ, have_retry, saw_error); + ret = validate_bset_keys(c, b, i, READ, failed, err_msg); if (ret) goto fsck_err; @@ -1292,20 +1335,11 @@ int bch2_btree_node_read_done(struct bch_fs *c, struct bch_dev *ca, if (!ptr_written) set_btree_node_need_rewrite(b); -out: +fsck_err: mempool_free(iter, &c->fill_iter); printbuf_exit(&buf); bch2_time_stats_update(&c->times[BCH_TIME_btree_node_read_done], start_time); - return retry_read; -fsck_err: - if (ret == -BCH_ERR_btree_node_read_err_want_retry || - ret == -BCH_ERR_btree_node_read_err_must_retry) { - retry_read = 1; - } else { - set_btree_node_read_error(b); - bch2_btree_lost_data(c, b->c.btree_id); - } - goto out; + return ret; } static void btree_node_read_work(struct work_struct *work) @@ -1317,15 +1351,25 @@ static void btree_node_read_work(struct work_struct *work) struct btree *b = rb->b; struct bio *bio = &rb->bio; struct bch_io_failures failed = { .nr = 0 }; + int ret = 0; + struct printbuf buf = PRINTBUF; - bool saw_error = false; - bool retry = false; - bool can_retry; + bch2_log_msg_start(c, &buf); + + prt_printf(&buf, "btree node read error at btree "); + bch2_btree_pos_to_text(&buf, c, b); + prt_newline(&buf); goto start; while (1) { - retry = true; - bch_info(c, "retrying read"); + ret = bch2_bkey_pick_read_device(c, + bkey_i_to_s_c(&b->key), + &failed, &rb->pick, -1); + if (ret) { + set_btree_node_read_error(b); + break; + } + ca = bch2_dev_get_ioref(c, rb->pick.ptr.dev, READ, BCH_DEV_READ_REF_btree_node_read); rb->have_ioref = ca != NULL; rb->start_time = local_clock(); @@ -1343,60 +1387,59 @@ static void btree_node_read_work(struct work_struct *work) bch2_account_io_completion(ca, BCH_MEMBER_ERROR_read, rb->start_time, !bio->bi_status); start: - printbuf_reset(&buf); - bch2_btree_pos_to_text(&buf, c, b); - - if (ca && bio->bi_status) - bch_err_dev_ratelimited(ca, - "btree read error %s for %s", - bch2_blk_status_to_str(bio->bi_status), buf.buf); if (rb->have_ioref) enumerated_ref_put(&ca->io_ref[READ], BCH_DEV_READ_REF_btree_node_read); rb->have_ioref = false; - bch2_mark_io_failure(&failed, &rb->pick, false); - - can_retry = bch2_bkey_pick_read_device(c, - bkey_i_to_s_c(&b->key), - &failed, &rb->pick, -1) > 0; - - if (!bio->bi_status && - !bch2_btree_node_read_done(c, ca, b, can_retry, &saw_error)) { - if (retry) - bch_info(c, "retry success"); - break; + if (bio->bi_status) { + bch2_mark_io_failure(&failed, &rb->pick, false); + continue; } - saw_error = true; + ret = bch2_btree_node_read_done(c, ca, b, &failed, &buf); + if (ret == -BCH_ERR_btree_node_read_err_want_retry || + ret == -BCH_ERR_btree_node_read_err_must_retry) + continue; - if (!can_retry) { + if (ret) set_btree_node_read_error(b); - bch2_btree_lost_data(c, b->c.btree_id); - break; - } + + break; } - async_object_list_del(c, btree_read_bio, rb->list_idx); - bch2_time_stats_update(&c->times[BCH_TIME_btree_node_read], - rb->start_time); - bio_put(&rb->bio); + bch2_io_failures_to_text(&buf, c, &failed); - if ((saw_error || + if (btree_node_read_error(b)) + bch2_btree_lost_data(c, &buf, b->c.btree_id); + + /* + * only print retry success if we read from a replica with no errors + */ + if (btree_node_read_error(b)) + prt_printf(&buf, "ret %s", bch2_err_str(ret)); + else if (failed.nr) { + if (!bch2_dev_io_failures(&failed, rb->pick.ptr.dev)) + prt_printf(&buf, "retry success"); + else + prt_printf(&buf, "repair success"); + } + + if ((failed.nr || btree_node_need_rewrite(b)) && !btree_node_read_error(b) && c->curr_recovery_pass != BCH_RECOVERY_PASS_scan_for_btree_nodes) { - if (saw_error) { - printbuf_reset(&buf); - bch2_btree_id_level_to_text(&buf, b->c.btree_id, b->c.level); - prt_str(&buf, " "); - bch2_bkey_val_to_text(&buf, c, bkey_i_to_s_c(&b->key)); - bch_err_ratelimited(c, "%s: rewriting btree node at due to error\n %s", - __func__, buf.buf); - } - + prt_printf(&buf, " (rewriting node)"); bch2_btree_node_rewrite_async(c, b); } + prt_newline(&buf); + + if (failed.nr) + bch2_print_str_ratelimited(c, KERN_ERR, buf.buf); + async_object_list_del(c, btree_read_bio, rb->list_idx); + bch2_time_stats_update(&c->times[BCH_TIME_btree_node_read], + rb->start_time); + bio_put(&rb->bio); printbuf_exit(&buf); clear_btree_node_read_in_flight(b); wake_up_bit(&b->flags, BTREE_NODE_read_in_flight); @@ -1480,12 +1523,13 @@ static CLOSURE_CALLBACK(btree_node_read_all_replicas_done) struct btree *b = ra->b; struct printbuf buf = PRINTBUF; bool dump_bset_maps = false; - bool have_retry = false; int ret = 0, best = -1, write = READ; unsigned i, written = 0, written2 = 0; __le64 seq = b->key.k.type == KEY_TYPE_btree_ptr_v2 ? bkey_i_to_btree_ptr_v2(&b->key)->v.seq : 0; bool _saw_error = false, *saw_error = &_saw_error; + struct printbuf *err_msg = NULL; + struct bch_io_failures *failed = NULL; for (i = 0; i < ra->nr; i++) { struct btree_node *bn = ra->buf[i]; @@ -1578,14 +1622,19 @@ fsck_err: if (best >= 0) { memcpy(b->data, ra->buf[best], btree_buf_bytes(b)); - ret = bch2_btree_node_read_done(c, NULL, b, false, saw_error); + ret = bch2_btree_node_read_done(c, NULL, b, NULL, NULL); } else { ret = -1; } if (ret) { set_btree_node_read_error(b); - bch2_btree_lost_data(c, b->c.btree_id); + + struct printbuf buf = PRINTBUF; + bch2_btree_lost_data(c, &buf, b->c.btree_id); + if (buf.pos) + bch_err(c, "%s", buf.buf); + printbuf_exit(&buf); } else if (*saw_error) bch2_btree_node_rewrite_async(c, b); @@ -1718,6 +1767,8 @@ void bch2_btree_node_read(struct btree_trans *trans, struct btree *b, prt_str(&buf, "btree node read error: no device to read from\n at "); bch2_btree_pos_to_text(&buf, c, b); + prt_newline(&buf); + bch2_btree_lost_data(c, &buf, b->c.btree_id); bch_err_ratelimited(c, "%s", buf.buf); if (c->opts.recovery_passes & BIT_ULL(BCH_RECOVERY_PASS_check_topology) && @@ -1725,7 +1776,6 @@ void bch2_btree_node_read(struct btree_trans *trans, struct btree *b, bch2_fatal_error(c); set_btree_node_read_error(b); - bch2_btree_lost_data(c, b->c.btree_id); clear_btree_node_read_in_flight(b); wake_up_bit(&b->flags, BTREE_NODE_read_in_flight); printbuf_exit(&buf); @@ -2194,8 +2244,6 @@ static void btree_node_write_endio(struct bio *bio) static int validate_bset_for_write(struct bch_fs *c, struct btree *b, struct bset *i, unsigned sectors) { - bool saw_error; - int ret = bch2_bkey_validate(c, bkey_i_to_s_c(&b->key), (struct bkey_validate_context) { .from = BKEY_VALIDATE_btree_node, @@ -2208,8 +2256,8 @@ static int validate_bset_for_write(struct bch_fs *c, struct btree *b, return ret; } - ret = validate_bset_keys(c, b, i, WRITE, false, &saw_error) ?: - validate_bset(c, NULL, b, i, b->written, sectors, WRITE, false, &saw_error); + ret = validate_bset_keys(c, b, i, WRITE, NULL, NULL) ?: + validate_bset(c, NULL, b, i, b->written, sectors, WRITE, NULL, NULL); if (ret) { bch2_inconsistent_error(c); dump_stack(); diff --git a/libbcachefs/btree_io.h b/libbcachefs/btree_io.h index afdb11a9..30a51805 100644 --- a/libbcachefs/btree_io.h +++ b/libbcachefs/btree_io.h @@ -134,7 +134,9 @@ void bch2_btree_build_aux_trees(struct btree *); void bch2_btree_init_next(struct btree_trans *, struct btree *); int bch2_btree_node_read_done(struct bch_fs *, struct bch_dev *, - struct btree *, bool, bool *); + struct btree *, + struct bch_io_failures *, + struct printbuf *); void bch2_btree_node_read(struct btree_trans *, struct btree *, bool); int bch2_btree_root_read(struct bch_fs *, enum btree_id, const struct bkey_i *, unsigned); diff --git a/libbcachefs/btree_iter.c b/libbcachefs/btree_iter.c index c824f1b6..9d941619 100644 --- a/libbcachefs/btree_iter.c +++ b/libbcachefs/btree_iter.c @@ -2577,7 +2577,10 @@ struct bkey_s_c bch2_btree_iter_peek_prev_min(struct btree_trans *trans, struct struct bpos end) { if ((iter->flags & (BTREE_ITER_is_extents|BTREE_ITER_filter_snapshots)) && - !bkey_eq(iter->pos, POS_MAX)) { + !bkey_eq(iter->pos, POS_MAX) && + !((iter->flags & BTREE_ITER_is_extents) && + iter->pos.offset == U64_MAX)) { + /* * bkey_start_pos(), for extents, is not monotonically * increasing until after filtering for snapshots: @@ -2602,7 +2605,7 @@ struct bkey_s_c bch2_btree_iter_peek_prev_min(struct btree_trans *trans, struct bch2_trans_verify_not_unlocked_or_in_restart(trans); bch2_btree_iter_verify_entry_exit(iter); - EBUG_ON((iter->flags & BTREE_ITER_filter_snapshots) && bpos_eq(end, POS_MIN)); + EBUG_ON((iter->flags & BTREE_ITER_filter_snapshots) && iter->pos.inode != end.inode); int ret = trans_maybe_inject_restart(trans, _RET_IP_); if (unlikely(ret)) { @@ -3123,6 +3126,7 @@ void *__bch2_trans_kmalloc(struct btree_trans *trans, size_t size, unsigned long struct btree_transaction_stats *s = btree_trans_stats(trans); if (new_bytes > s->max_mem) { + mutex_lock(&s->lock); #ifdef CONFIG_BCACHEFS_DEBUG darray_resize(&s->trans_kmalloc_trace, trans->trans_kmalloc_trace.nr); s->trans_kmalloc_trace.nr = min(s->trans_kmalloc_trace.size, @@ -3134,6 +3138,7 @@ void *__bch2_trans_kmalloc(struct btree_trans *trans, size_t size, unsigned long s->trans_kmalloc_trace.nr); #endif s->max_mem = new_bytes; + mutex_unlock(&s->lock); } if (trans->used_mempool) { diff --git a/libbcachefs/btree_journal_iter.c b/libbcachefs/btree_journal_iter.c index 7d6c971d..ade3b5ad 100644 --- a/libbcachefs/btree_journal_iter.c +++ b/libbcachefs/btree_journal_iter.c @@ -288,7 +288,7 @@ int bch2_journal_key_insert_take(struct bch_fs *c, enum btree_id id, .size = max_t(size_t, keys->size, 8) * 2, }; - new_keys.data = kvmalloc_array(new_keys.size, sizeof(new_keys.data[0]), GFP_KERNEL); + new_keys.data = bch2_kvmalloc(new_keys.size * sizeof(new_keys.data[0]), GFP_KERNEL); if (!new_keys.data) { bch_err(c, "%s: error allocating new key array (size %zu)", __func__, new_keys.size); diff --git a/libbcachefs/btree_update.c b/libbcachefs/btree_update.c index 2bffd512..ce83cd03 100644 --- a/libbcachefs/btree_update.c +++ b/libbcachefs/btree_update.c @@ -511,8 +511,9 @@ static noinline int bch2_trans_update_get_key_cache(struct btree_trans *trans, return 0; } -int __must_check bch2_trans_update(struct btree_trans *trans, struct btree_iter *iter, - struct bkey_i *k, enum btree_iter_update_trigger_flags flags) +int __must_check bch2_trans_update_ip(struct btree_trans *trans, struct btree_iter *iter, + struct bkey_i *k, enum btree_iter_update_trigger_flags flags, + unsigned long ip) { kmsan_check_memory(k, bkey_bytes(&k->k)); @@ -548,7 +549,7 @@ int __must_check bch2_trans_update(struct btree_trans *trans, struct btree_iter path_idx = iter->key_cache_path; } - return bch2_trans_update_by_path(trans, path_idx, k, flags, _RET_IP_); + return bch2_trans_update_by_path(trans, path_idx, k, flags, ip); } int bch2_btree_insert_clone_trans(struct btree_trans *trans, diff --git a/libbcachefs/btree_update.h b/libbcachefs/btree_update.h index e674419c..62d24b08 100644 --- a/libbcachefs/btree_update.h +++ b/libbcachefs/btree_update.h @@ -102,8 +102,16 @@ int bch2_trans_update_extent_overwrite(struct btree_trans *, struct btree_iter * int bch2_bkey_get_empty_slot(struct btree_trans *, struct btree_iter *, enum btree_id, struct bpos); -int __must_check bch2_trans_update(struct btree_trans *, struct btree_iter *, - struct bkey_i *, enum btree_iter_update_trigger_flags); +int __must_check bch2_trans_update_ip(struct btree_trans *, struct btree_iter *, + struct bkey_i *, enum btree_iter_update_trigger_flags, + unsigned long); + +static inline int __must_check +bch2_trans_update(struct btree_trans *trans, struct btree_iter *iter, + struct bkey_i *k, enum btree_iter_update_trigger_flags flags) +{ + return bch2_trans_update_ip(trans, iter, k, flags, _THIS_IP_); +} struct jset_entry *__bch2_trans_jset_entry_alloc(struct btree_trans *, unsigned); diff --git a/libbcachefs/btree_update_interior.c b/libbcachefs/btree_update_interior.c index fcca8b01..3d25c2be 100644 --- a/libbcachefs/btree_update_interior.c +++ b/libbcachefs/btree_update_interior.c @@ -1396,7 +1396,7 @@ static void bch2_insert_fixup_btree_ptr(struct btree_update *as, printbuf_exit(&buf); } -static void +static int bch2_btree_insert_keys_interior(struct btree_update *as, struct btree_trans *trans, struct btree_path *path, @@ -1418,7 +1418,8 @@ bch2_btree_insert_keys_interior(struct btree_update *as, insert = bkey_next(insert)) bch2_insert_fixup_btree_ptr(as, trans, path, b, &node_iter, insert); - if (bch2_btree_node_check_topology(trans, b)) { + int ret = bch2_btree_node_check_topology(trans, b); + if (ret) { struct printbuf buf = PRINTBUF; for (struct bkey_i *k = keys->keys; @@ -1428,11 +1429,15 @@ bch2_btree_insert_keys_interior(struct btree_update *as, prt_newline(&buf); } - panic("%s(): check_topology error: inserted keys\n%s", __func__, buf.buf); + bch2_fs_fatal_error(as->c, "%ps -> %s(): check_topology error %s: inserted keys\n%s", + (void *) _RET_IP_, __func__, bch2_err_str(ret), buf.buf); + dump_stack(); + return ret; } memmove_u64s_down(keys->keys, insert, keys->top_p - insert->_data); keys->top_p -= insert->_data - keys->keys_p; + return 0; } static bool key_deleted_in_insert(struct keylist *insert_keys, struct bpos pos) @@ -1566,11 +1571,11 @@ static void __btree_split_node(struct btree_update *as, * nodes that were coalesced, and thus in the middle of a child node post * coalescing: */ -static void btree_split_insert_keys(struct btree_update *as, - struct btree_trans *trans, - btree_path_idx_t path_idx, - struct btree *b, - struct keylist *keys) +static int btree_split_insert_keys(struct btree_update *as, + struct btree_trans *trans, + btree_path_idx_t path_idx, + struct btree *b, + struct keylist *keys) { struct btree_path *path = trans->paths + path_idx; @@ -1580,8 +1585,12 @@ static void btree_split_insert_keys(struct btree_update *as, bch2_btree_node_iter_init(&node_iter, b, &bch2_keylist_front(keys)->k.p); - bch2_btree_insert_keys_interior(as, trans, path, b, node_iter, keys); + int ret = bch2_btree_insert_keys_interior(as, trans, path, b, node_iter, keys); + if (ret) + return ret; } + + return 0; } static int btree_split(struct btree_update *as, struct btree_trans *trans, @@ -1614,8 +1623,10 @@ static int btree_split(struct btree_update *as, struct btree_trans *trans, __btree_split_node(as, trans, b, n, keys); if (keys) { - btree_split_insert_keys(as, trans, path, n1, keys); - btree_split_insert_keys(as, trans, path, n2, keys); + ret = btree_split_insert_keys(as, trans, path, n1, keys) ?: + btree_split_insert_keys(as, trans, path, n2, keys); + if (ret) + goto err; BUG_ON(!bch2_keylist_empty(keys)); } @@ -1661,7 +1672,9 @@ static int btree_split(struct btree_update *as, struct btree_trans *trans, n3->sib_u64s[0] = U16_MAX; n3->sib_u64s[1] = U16_MAX; - btree_split_insert_keys(as, trans, path, n3, &as->parent_keys); + ret = btree_split_insert_keys(as, trans, path, n3, &as->parent_keys); + if (ret) + goto err; } } else { trace_and_count(c, btree_node_compact, trans, b); @@ -1669,7 +1682,9 @@ static int btree_split(struct btree_update *as, struct btree_trans *trans, n1 = bch2_btree_node_alloc_replacement(as, trans, b); if (keys) { - btree_split_insert_keys(as, trans, path, n1, keys); + ret = btree_split_insert_keys(as, trans, path, n1, keys); + if (ret) + goto err; BUG_ON(!bch2_keylist_empty(keys)); } @@ -1816,15 +1831,15 @@ static int bch2_btree_insert_node(struct btree_update *as, struct btree_trans *t goto split; } - ret = bch2_btree_node_check_topology(trans, b); + + ret = bch2_btree_node_check_topology(trans, b) ?: + bch2_btree_insert_keys_interior(as, trans, path, b, + path->l[b->c.level].iter, keys); if (ret) { bch2_btree_node_unlock_write(trans, path, b); return ret; } - bch2_btree_insert_keys_interior(as, trans, path, b, - path->l[b->c.level].iter, keys); - trans_for_each_path_with_node(trans, b, linked, i) bch2_btree_node_iter_peek(&linked->l[b->c.level].iter, b); diff --git a/libbcachefs/buckets.c b/libbcachefs/buckets.c index 7c267244..596edc7b 100644 --- a/libbcachefs/buckets.c +++ b/libbcachefs/buckets.c @@ -399,7 +399,7 @@ static int bucket_ref_update_err(struct btree_trans *trans, struct printbuf *buf bool print = __bch2_count_fsck_err(c, id, buf); - int ret = bch2_run_explicit_recovery_pass_printbuf(c, buf, + int ret = bch2_run_explicit_recovery_pass_persistent(c, buf, BCH_RECOVERY_PASS_check_allocations); if (insert) { @@ -599,6 +599,13 @@ static int bch2_trigger_pointer(struct btree_trans *trans, } struct bpos bucket = PTR_BUCKET_POS(ca, &p.ptr); + if (!bucket_valid(ca, bucket.offset)) { + if (insert) { + bch2_dev_bucket_missing(ca, bucket.offset); + ret = -BCH_ERR_trigger_pointer; + } + goto err; + } if (flags & BTREE_TRIGGER_transactional) { struct bkey_i_alloc_v4 *a = bch2_trans_start_alloc_update(trans, bucket, 0); @@ -965,7 +972,7 @@ static int __bch2_trans_mark_metadata_bucket(struct btree_trans *trans, bool print = bch2_count_fsck_err(c, bucket_metadata_type_mismatch, &buf); - bch2_run_explicit_recovery_pass_printbuf(c, &buf, + bch2_run_explicit_recovery_pass_persistent(c, &buf, BCH_RECOVERY_PASS_check_allocations); if (print) @@ -1310,13 +1317,11 @@ int bch2_dev_buckets_resize(struct bch_fs *c, struct bch_dev *ca, u64 nbuckets) old_bucket_gens = rcu_dereference_protected(ca->bucket_gens, 1); if (resize) { - bucket_gens->nbuckets = min(bucket_gens->nbuckets, - old_bucket_gens->nbuckets); - bucket_gens->nbuckets_minus_first = - bucket_gens->nbuckets - bucket_gens->first_bucket; + u64 copy = min(bucket_gens->nbuckets, + old_bucket_gens->nbuckets); memcpy(bucket_gens->b, old_bucket_gens->b, - bucket_gens->nbuckets); + sizeof(bucket_gens->b[0]) * copy); } rcu_assign_pointer(ca->bucket_gens, bucket_gens); diff --git a/libbcachefs/darray.h b/libbcachefs/darray.h index 88f0ca3f..50ec3dec 100644 --- a/libbcachefs/darray.h +++ b/libbcachefs/darray.h @@ -21,6 +21,7 @@ struct { \ typedef DARRAY(char) darray_char; typedef DARRAY(char *) darray_str; +typedef DARRAY(const char *) darray_const_str; typedef DARRAY(u8) darray_u8; typedef DARRAY(u16) darray_u16; diff --git a/libbcachefs/debug.c b/libbcachefs/debug.c index c24de2e3..8b8800ad 100644 --- a/libbcachefs/debug.c +++ b/libbcachefs/debug.c @@ -42,7 +42,7 @@ static bool bch2_btree_verify_replica(struct bch_fs *c, struct btree *b, struct btree_node *n_sorted = c->verify_data->data; struct bset *sorted, *inmemory = &b->data->keys; struct bio *bio; - bool failed = false, saw_error = false; + bool failed = false; struct bch_dev *ca = bch2_dev_get_ioref(c, pick.ptr.dev, READ, BCH_DEV_READ_REF_btree_verify_replicas); @@ -66,7 +66,7 @@ static bool bch2_btree_verify_replica(struct bch_fs *c, struct btree *b, memcpy(n_ondisk, n_sorted, btree_buf_bytes(b)); v->written = 0; - if (bch2_btree_node_read_done(c, ca, v, false, &saw_error) || saw_error) + if (bch2_btree_node_read_done(c, ca, v, NULL, NULL)) return false; n_sorted = c->verify_data->data; diff --git a/libbcachefs/dirent.c b/libbcachefs/dirent.c index 8488a757..b012b9dd 100644 --- a/libbcachefs/dirent.c +++ b/libbcachefs/dirent.c @@ -13,8 +13,8 @@ #include <linux/dcache.h> -static int bch2_casefold(struct btree_trans *trans, const struct bch_hash_info *info, - const struct qstr *str, struct qstr *out_cf) +int bch2_casefold(struct btree_trans *trans, const struct bch_hash_info *info, + const struct qstr *str, struct qstr *out_cf) { *out_cf = (struct qstr) QSTR_INIT(NULL, 0); @@ -35,18 +35,6 @@ static int bch2_casefold(struct btree_trans *trans, const struct bch_hash_info * #endif } -static inline int bch2_maybe_casefold(struct btree_trans *trans, - const struct bch_hash_info *info, - const struct qstr *str, struct qstr *out_cf) -{ - if (likely(!info->cf_encoding)) { - *out_cf = *str; - return 0; - } else { - return bch2_casefold(trans, info, str, out_cf); - } -} - static unsigned bch2_dirent_name_bytes(struct bkey_s_c_dirent d) { if (bkey_val_bytes(d.k) < offsetof(struct bch_dirent, d_name)) @@ -224,12 +212,19 @@ void bch2_dirent_to_text(struct printbuf *out, struct bch_fs *c, struct bkey_s_c struct bkey_s_c_dirent d = bkey_s_c_to_dirent(k); struct qstr d_name = bch2_dirent_get_name(d); - prt_printf(out, "%.*s -> ", d_name.len, d_name.name); + prt_printf(out, "%.*s", d_name.len, d_name.name); + + if (d.v->d_casefold) { + struct qstr d_name = bch2_dirent_get_lookup_name(d); + prt_printf(out, " (casefold %.*s)", d_name.len, d_name.name); + } + + prt_str(out, " ->"); if (d.v->d_type != DT_SUBVOL) - prt_printf(out, "%llu", le64_to_cpu(d.v->d_inum)); + prt_printf(out, " %llu", le64_to_cpu(d.v->d_inum)); else - prt_printf(out, "%u -> %u", + prt_printf(out, " %u -> %u", le32_to_cpu(d.v->d_parent_subvol), le32_to_cpu(d.v->d_child_subvol)); diff --git a/libbcachefs/dirent.h b/libbcachefs/dirent.h index 0880772b..9838a7ba 100644 --- a/libbcachefs/dirent.h +++ b/libbcachefs/dirent.h @@ -23,6 +23,21 @@ struct bch_fs; struct bch_hash_info; struct bch_inode_info; +int bch2_casefold(struct btree_trans *, const struct bch_hash_info *, + const struct qstr *, struct qstr *); + +static inline int bch2_maybe_casefold(struct btree_trans *trans, + const struct bch_hash_info *info, + const struct qstr *str, struct qstr *out_cf) +{ + if (likely(!info->cf_encoding)) { + *out_cf = *str; + return 0; + } else { + return bch2_casefold(trans, info, str, out_cf); + } +} + struct qstr bch2_dirent_get_name(struct bkey_s_c_dirent d); static inline unsigned dirent_val_u64s(unsigned len, unsigned cf_len) diff --git a/libbcachefs/errcode.h b/libbcachefs/errcode.h index 83ed5a63..13e28f9a 100644 --- a/libbcachefs/errcode.h +++ b/libbcachefs/errcode.h @@ -175,6 +175,7 @@ x(0, backpointer_to_overwritten_btree_node) \ x(0, journal_reclaim_would_deadlock) \ x(EINVAL, fsck) \ + x(BCH_ERR_fsck, fsck_ask) \ x(BCH_ERR_fsck, fsck_fix) \ x(BCH_ERR_fsck, fsck_delete_bkey) \ x(BCH_ERR_fsck, fsck_ignore) \ diff --git a/libbcachefs/error.c b/libbcachefs/error.c index 91a65a20..d7bc70fd 100644 --- a/libbcachefs/error.c +++ b/libbcachefs/error.c @@ -104,7 +104,7 @@ int __bch2_topology_error(struct bch_fs *c, struct printbuf *out) __bch2_inconsistent_error(c, out); return -BCH_ERR_btree_need_topology_repair; } else { - return bch2_run_explicit_recovery_pass_printbuf(c, out, BCH_RECOVERY_PASS_check_topology) ?: + return bch2_run_explicit_recovery_pass_persistent(c, out, BCH_RECOVERY_PASS_check_topology) ?: -BCH_ERR_btree_node_read_validate_error; } } @@ -393,6 +393,48 @@ bool __bch2_count_fsck_err(struct bch_fs *c, return print && !repeat; } +int bch2_fsck_err_opt(struct bch_fs *c, + enum bch_fsck_flags flags, + enum bch_sb_error_id err) +{ + if (!WARN_ON(err >= ARRAY_SIZE(fsck_flags_extra))) + flags |= fsck_flags_extra[err]; + + if (test_bit(BCH_FS_fsck_running, &c->flags)) { + if (!(flags & (FSCK_CAN_FIX|FSCK_CAN_IGNORE))) + return -BCH_ERR_fsck_repair_unimplemented; + + switch (c->opts.fix_errors) { + case FSCK_FIX_exit: + return -BCH_ERR_fsck_errors_not_fixed; + case FSCK_FIX_yes: + if (flags & FSCK_CAN_FIX) + return -BCH_ERR_fsck_fix; + fallthrough; + case FSCK_FIX_no: + if (flags & FSCK_CAN_IGNORE) + return -BCH_ERR_fsck_ignore; + return -BCH_ERR_fsck_errors_not_fixed; + case FSCK_FIX_ask: + if (flags & FSCK_AUTOFIX) + return -BCH_ERR_fsck_fix; + return -BCH_ERR_fsck_ask; + default: + BUG(); + } + } else { + if ((flags & FSCK_AUTOFIX) && + (c->opts.errors == BCH_ON_ERROR_continue || + c->opts.errors == BCH_ON_ERROR_fix_safe)) + return -BCH_ERR_fsck_fix; + + if (c->opts.errors == BCH_ON_ERROR_continue && + (flags & FSCK_CAN_IGNORE)) + return -BCH_ERR_fsck_ignore; + return -BCH_ERR_fsck_errors_not_fixed; + } +} + int __bch2_fsck_err(struct bch_fs *c, struct btree_trans *trans, enum bch_fsck_flags flags, @@ -484,7 +526,9 @@ int __bch2_fsck_err(struct bch_fs *c, } else if (!test_bit(BCH_FS_fsck_running, &c->flags)) { if (c->opts.errors != BCH_ON_ERROR_continue || !(flags & (FSCK_CAN_FIX|FSCK_CAN_IGNORE))) { - prt_str(out, ", shutting down"); + prt_str_indented(out, ", shutting down\n" + "error not marked as autofix and not in fsck\n" + "run fsck, and forward to devs so error can be marked for self-healing"); inconsistent = true; print = true; ret = -BCH_ERR_fsck_errors_not_fixed; diff --git a/libbcachefs/error.h b/libbcachefs/error.h index 0b3ede1c..d89dd270 100644 --- a/libbcachefs/error.h +++ b/libbcachefs/error.h @@ -80,6 +80,10 @@ bool __bch2_count_fsck_err(struct bch_fs *, enum bch_sb_error_id, struct printbu #define bch2_count_fsck_err(_c, _err, ...) \ __bch2_count_fsck_err(_c, BCH_FSCK_ERR_##_err, __VA_ARGS__) +int bch2_fsck_err_opt(struct bch_fs *, + enum bch_fsck_flags, + enum bch_sb_error_id); + __printf(5, 6) __cold int __bch2_fsck_err(struct bch_fs *, struct btree_trans *, enum bch_fsck_flags, diff --git a/libbcachefs/extents.c b/libbcachefs/extents.c index 7058a343..18506061 100644 --- a/libbcachefs/extents.c +++ b/libbcachefs/extents.c @@ -45,6 +45,49 @@ static void bch2_extent_crc_pack(union bch_extent_crc *, struct bch_extent_crc_unpacked, enum bch_extent_entry_type); +void bch2_io_failures_to_text(struct printbuf *out, + struct bch_fs *c, + struct bch_io_failures *failed) +{ + static const char * const error_types[] = { + "io", "checksum", "ec reconstruct", NULL + }; + + for (struct bch_dev_io_failures *f = failed->devs; + f < failed->devs + failed->nr; + f++) { + unsigned errflags = + ((!!f->failed_io) << 0) | + ((!!f->failed_csum_nr) << 1) | + ((!!f->failed_ec) << 2); + + if (!errflags) + continue; + + bch2_printbuf_make_room(out, 1024); + rcu_read_lock(); + out->atomic++; + struct bch_dev *ca = bch2_dev_rcu_noerror(c, f->dev); + if (ca) + prt_str(out, ca->name); + else + prt_printf(out, "(invalid device %u)", f->dev); + --out->atomic; + rcu_read_unlock(); + + prt_char(out, ' '); + + if (is_power_of_2(errflags)) { + prt_bitflags(out, error_types, errflags); + prt_str(out, " error"); + } else { + prt_str(out, "errors: "); + prt_bitflags(out, error_types, errflags); + } + prt_newline(out); + } +} + struct bch_dev_io_failures *bch2_dev_io_failures(struct bch_io_failures *f, unsigned dev) { @@ -79,6 +122,22 @@ void bch2_mark_io_failure(struct bch_io_failures *failed, f->failed_csum_nr++; } +void bch2_mark_btree_validate_failure(struct bch_io_failures *failed, + unsigned dev) +{ + struct bch_dev_io_failures *f = bch2_dev_io_failures(failed, dev); + + if (!f) { + BUG_ON(failed->nr >= ARRAY_SIZE(failed->devs)); + + f = &failed->devs[failed->nr++]; + memset(f, 0, sizeof(*f)); + f->dev = dev; + } + + f->failed_btree_validate = true; +} + static inline u64 dev_latency(struct bch_dev *ca) { return ca ? atomic64_read(&ca->cur_latency[READ]) : S64_MAX; @@ -179,6 +238,7 @@ int bch2_bkey_pick_read_device(struct bch_fs *c, struct bkey_s_c k, if (ca && ca->mi.state != BCH_MEMBER_STATE_failed) { have_io_errors |= f->failed_io; + have_io_errors |= f->failed_btree_validate; have_io_errors |= f->failed_ec; } have_csum_errors |= !!f->failed_csum_nr; @@ -186,6 +246,7 @@ int bch2_bkey_pick_read_device(struct bch_fs *c, struct bkey_s_c k, if (p.has_ec && (f->failed_io || f->failed_csum_nr)) p.do_ec_reconstruct = true; else if (f->failed_io || + f->failed_btree_validate || f->failed_csum_nr > c->opts.checksum_err_retry_nr) continue; } diff --git a/libbcachefs/extents.h b/libbcachefs/extents.h index e78a39e7..f25be08d 100644 --- a/libbcachefs/extents.h +++ b/libbcachefs/extents.h @@ -399,10 +399,13 @@ out: \ /* utility code common to all keys with pointers: */ +void bch2_io_failures_to_text(struct printbuf *, struct bch_fs *, + struct bch_io_failures *); struct bch_dev_io_failures *bch2_dev_io_failures(struct bch_io_failures *, unsigned); void bch2_mark_io_failure(struct bch_io_failures *, struct extent_ptr_decoded *, bool); +void bch2_mark_btree_validate_failure(struct bch_io_failures *, unsigned); int bch2_bkey_pick_read_device(struct bch_fs *, struct bkey_s_c, struct bch_io_failures *, struct extent_ptr_decoded *, int); diff --git a/libbcachefs/extents_types.h b/libbcachefs/extents_types.h index e51529dc..b23ce4a3 100644 --- a/libbcachefs/extents_types.h +++ b/libbcachefs/extents_types.h @@ -34,6 +34,7 @@ struct bch_io_failures { u8 dev; unsigned failed_csum_nr:6, failed_io:1, + failed_btree_validate:1, failed_ec:1; } devs[BCH_REPLICAS_MAX + 1]; }; diff --git a/libbcachefs/fs.c b/libbcachefs/fs.c index 0422dcab..f1472e91 100644 --- a/libbcachefs/fs.c +++ b/libbcachefs/fs.c @@ -53,7 +53,7 @@ static void bch2_vfs_inode_init(struct btree_trans *, subvol_inum, struct bch_subvolume *); /* Set VFS inode flags from bcachefs inode: */ -static inline void bch2_inode_flags_to_vfs(struct bch_inode_info *inode) +static inline void bch2_inode_flags_to_vfs(struct bch_fs *c, struct bch_inode_info *inode) { static const __maybe_unused unsigned bch_flags_to_vfs[] = { [__BCH_INODE_sync] = S_SYNC, @@ -64,8 +64,10 @@ static inline void bch2_inode_flags_to_vfs(struct bch_inode_info *inode) set_flags(bch_flags_to_vfs, inode->ei_inode.bi_flags, inode->v.i_flags); - if (inode->ei_inode.bi_casefold) + if (bch2_inode_casefold(c, &inode->ei_inode)) inode->v.i_flags |= S_CASEFOLD; + else + inode->v.i_flags &= ~S_CASEFOLD; } void bch2_inode_update_after_write(struct btree_trans *trans, @@ -96,7 +98,7 @@ void bch2_inode_update_after_write(struct btree_trans *trans, inode->ei_inode = *bi; - bch2_inode_flags_to_vfs(inode); + bch2_inode_flags_to_vfs(c, inode); } int __must_check bch2_write_inode(struct bch_fs *c, @@ -647,13 +649,18 @@ static struct bch_inode_info *bch2_lookup_trans(struct btree_trans *trans, const struct qstr *name) { struct bch_fs *c = trans->c; - struct btree_iter dirent_iter = {}; subvol_inum inum = {}; struct printbuf buf = PRINTBUF; + struct qstr lookup_name; + int ret = bch2_maybe_casefold(trans, dir_hash_info, name, &lookup_name); + if (ret) + return ERR_PTR(ret); + + struct btree_iter dirent_iter = {}; struct bkey_s_c k = bch2_hash_lookup(trans, &dirent_iter, bch2_dirent_hash_desc, - dir_hash_info, dir, name, 0); - int ret = bkey_err(k); + dir_hash_info, dir, &lookup_name, 0); + ret = bkey_err(k); if (ret) return ERR_PTR(ret); @@ -841,6 +848,9 @@ int __bch2_unlink(struct inode *vdir, struct dentry *dentry, */ set_nlink(&inode->v, 0); } + + if (IS_CASEFOLDED(vdir)) + d_invalidate(dentry); err: bch2_trans_put(trans); bch2_unlock_inodes(INODE_UPDATE_LOCK, dir, inode); @@ -1251,10 +1261,20 @@ static int bch2_tmpfile(struct mnt_idmap *idmap, return finish_open_simple(file, 0); } +struct bch_fiemap_extent { + struct bkey_buf kbuf; + unsigned flags; +}; + static int bch2_fill_extent(struct bch_fs *c, struct fiemap_extent_info *info, - struct bkey_s_c k, unsigned flags) + struct bch_fiemap_extent *fe) { + struct bkey_s_c k = bkey_i_to_s_c(fe->kbuf.k); + unsigned flags = fe->flags; + + BUG_ON(!k.k->size); + if (bkey_extent_is_direct_data(k.k)) { struct bkey_ptrs_c ptrs = bch2_bkey_ptrs_c(k); const union bch_extent_entry *entry; @@ -1307,110 +1327,223 @@ static int bch2_fill_extent(struct bch_fs *c, } } -static int bch2_fiemap(struct inode *vinode, struct fiemap_extent_info *info, - u64 start, u64 len) +/* + * Scan a range of an inode for data in pagecache. + * + * Intended to be retryable, so don't modify the output params until success is + * imminent. + */ +static int +bch2_fiemap_hole_pagecache(struct inode *vinode, u64 *start, u64 *end, + bool nonblock) { - struct bch_fs *c = vinode->i_sb->s_fs_info; - struct bch_inode_info *ei = to_bch_ei(vinode); - struct btree_trans *trans; - struct btree_iter iter; - struct bkey_s_c k; - struct bkey_buf cur, prev; - bool have_extent = false; - int ret = 0; + loff_t dstart, dend; - ret = fiemap_prep(&ei->v, info, start, &len, FIEMAP_FLAG_SYNC); - if (ret) + dstart = bch2_seek_pagecache_data(vinode, *start, *end, 0, nonblock); + if (dstart < 0) + return dstart; + + if (dstart == *end) { + *start = dstart; + return 0; + } + + dend = bch2_seek_pagecache_hole(vinode, dstart, *end, 0, nonblock); + if (dend < 0) + return dend; + + /* race */ + BUG_ON(dstart == dend); + + *start = dstart; + *end = dend; + return 0; +} + +/* + * Scan a range of pagecache that corresponds to a file mapping hole in the + * extent btree. If data is found, fake up an extent key so it looks like a + * delalloc extent to the rest of the fiemap processing code. + */ +static int +bch2_next_fiemap_pagecache_extent(struct btree_trans *trans, struct bch_inode_info *inode, + u64 start, u64 end, struct bch_fiemap_extent *cur) +{ + struct bch_fs *c = trans->c; + struct bkey_i_extent *delextent; + struct bch_extent_ptr ptr = {}; + loff_t dstart = start << 9, dend = end << 9; + int ret; + + /* + * We hold btree locks here so we cannot block on folio locks without + * dropping trans locks first. Run a nonblocking scan for the common + * case of no folios over holes and fall back on failure. + * + * Note that dropping locks like this is technically racy against + * writeback inserting to the extent tree, but a non-sync fiemap scan is + * fundamentally racy with writeback anyways. Therefore, just report the + * range as delalloc regardless of whether we have to cycle trans locks. + */ + ret = bch2_fiemap_hole_pagecache(&inode->v, &dstart, &dend, true); + if (ret == -EAGAIN) + ret = drop_locks_do(trans, + bch2_fiemap_hole_pagecache(&inode->v, &dstart, &dend, false)); + if (ret < 0) return ret; - struct bpos end = POS(ei->v.i_ino, (start + len) >> 9); - if (start + len < start) - return -EINVAL; + /* + * Create a fake extent key in the buffer. We have to add a dummy extent + * pointer for the fill code to add an extent entry. It's explicitly + * zeroed to reflect delayed allocation (i.e. phys offset 0). + */ + bch2_bkey_buf_realloc(&cur->kbuf, c, sizeof(*delextent) / sizeof(u64)); + delextent = bkey_extent_init(cur->kbuf.k); + delextent->k.p = POS(inode->ei_inum.inum, dend >> 9); + delextent->k.size = (dend - dstart) >> 9; + bch2_bkey_append_ptr(&delextent->k_i, ptr); - start >>= 9; + cur->flags = FIEMAP_EXTENT_DELALLOC; - bch2_bkey_buf_init(&cur); - bch2_bkey_buf_init(&prev); - trans = bch2_trans_get(c); + return 0; +} +static int bch2_next_fiemap_extent(struct btree_trans *trans, + struct bch_inode_info *inode, + u64 start, u64 end, + struct bch_fiemap_extent *cur) +{ + u32 snapshot; + int ret = bch2_subvolume_get_snapshot(trans, inode->ei_inum.subvol, &snapshot); + if (ret) + return ret; + + struct btree_iter iter; bch2_trans_iter_init(trans, &iter, BTREE_ID_extents, - POS(ei->v.i_ino, start), 0); + SPOS(inode->ei_inum.inum, start, snapshot), 0); - while (!ret || bch2_err_matches(ret, BCH_ERR_transaction_restart)) { - enum btree_id data_btree = BTREE_ID_extents; + struct bkey_s_c k = + bch2_btree_iter_peek_max(trans, &iter, POS(inode->ei_inum.inum, end)); + ret = bkey_err(k); + if (ret) + goto err; - bch2_trans_begin(trans); + ret = bch2_next_fiemap_pagecache_extent(trans, inode, start, end, cur); + if (ret) + goto err; - u32 snapshot; - ret = bch2_subvolume_get_snapshot(trans, ei->ei_inum.subvol, &snapshot); - if (ret) - continue; + struct bpos pagecache_start = bkey_start_pos(&cur->kbuf.k->k); - bch2_btree_iter_set_snapshot(trans, &iter, snapshot); + /* + * Does the pagecache or the btree take precedence? + * + * It _should_ be the pagecache, so that we correctly report delalloc + * extents when dirty in the pagecache (we're COW, after all). + * + * But we'd have to add per-sector writeback tracking to + * bch_folio_state, otherwise we report delalloc extents for clean + * cached data in the pagecache. + * + * We should do this, but even then fiemap won't report stable mappings: + * on bcachefs data moves around in the background (copygc, rebalance) + * and we don't provide a way for userspace to lock that out. + */ + if (k.k && + bkey_le(bpos_max(iter.pos, bkey_start_pos(k.k)), + pagecache_start)) { + bch2_bkey_buf_reassemble(&cur->kbuf, trans->c, k); + bch2_cut_front(iter.pos, cur->kbuf.k); + bch2_cut_back(POS(inode->ei_inum.inum, end), cur->kbuf.k); + cur->flags = 0; + } else if (k.k) { + bch2_cut_back(bkey_start_pos(k.k), cur->kbuf.k); + } - k = bch2_btree_iter_peek_max(trans, &iter, end); - ret = bkey_err(k); + if (cur->kbuf.k->k.type == KEY_TYPE_reflink_p) { + unsigned sectors = cur->kbuf.k->k.size; + s64 offset_into_extent = 0; + enum btree_id data_btree = BTREE_ID_extents; + ret = bch2_read_indirect_extent(trans, &data_btree, &offset_into_extent, + &cur->kbuf); if (ret) - continue; + goto err; - if (!k.k) - break; + struct bkey_i *k = cur->kbuf.k; + sectors = min_t(unsigned, sectors, k->k.size - offset_into_extent); - if (!bkey_extent_is_data(k.k) && - k.k->type != KEY_TYPE_reservation) { - bch2_btree_iter_advance(trans, &iter); - continue; - } + bch2_cut_front(POS(k->k.p.inode, + bkey_start_offset(&k->k) + offset_into_extent), + k); + bch2_key_resize(&k->k, sectors); + k->k.p = iter.pos; + k->k.p.offset += k->k.size; + } +err: + bch2_trans_iter_exit(trans, &iter); + return ret; +} - s64 offset_into_extent = iter.pos.offset - bkey_start_offset(k.k); - unsigned sectors = k.k->size - offset_into_extent; +static int bch2_fiemap(struct inode *vinode, struct fiemap_extent_info *info, + u64 start, u64 len) +{ + struct bch_fs *c = vinode->i_sb->s_fs_info; + struct bch_inode_info *ei = to_bch_ei(vinode); + struct btree_trans *trans; + struct bch_fiemap_extent cur, prev; + int ret = 0; + + ret = fiemap_prep(&ei->v, info, start, &len, 0); + if (ret) + return ret; + + if (start + len < start) + return -EINVAL; + + start >>= 9; + u64 end = (start + len) >> 9; - bch2_bkey_buf_reassemble(&cur, c, k); + bch2_bkey_buf_init(&cur.kbuf); + bch2_bkey_buf_init(&prev.kbuf); + bkey_init(&prev.kbuf.k->k); - ret = bch2_read_indirect_extent(trans, &data_btree, - &offset_into_extent, &cur); + trans = bch2_trans_get(c); + + while (start < end) { + ret = lockrestart_do(trans, + bch2_next_fiemap_extent(trans, ei, start, end, &cur)); if (ret) - continue; + goto err; - k = bkey_i_to_s_c(cur.k); - bch2_bkey_buf_realloc(&prev, c, k.k->u64s); + BUG_ON(bkey_start_offset(&cur.kbuf.k->k) < start); + BUG_ON(cur.kbuf.k->k.p.offset > end); - sectors = min_t(unsigned, sectors, k.k->size - offset_into_extent); + if (bkey_start_offset(&cur.kbuf.k->k) == end) + break; - bch2_cut_front(POS(k.k->p.inode, - bkey_start_offset(k.k) + - offset_into_extent), - cur.k); - bch2_key_resize(&cur.k->k, sectors); - cur.k->k.p = iter.pos; - cur.k->k.p.offset += cur.k->k.size; + start = cur.kbuf.k->k.p.offset; - if (have_extent) { + if (!bkey_deleted(&prev.kbuf.k->k)) { bch2_trans_unlock(trans); - ret = bch2_fill_extent(c, info, - bkey_i_to_s_c(prev.k), 0); + ret = bch2_fill_extent(c, info, &prev); if (ret) - break; + goto err; } - bkey_copy(prev.k, cur.k); - have_extent = true; - - bch2_btree_iter_set_pos(trans, &iter, - POS(iter.pos.inode, iter.pos.offset + sectors)); + bch2_bkey_buf_copy(&prev.kbuf, c, cur.kbuf.k); + prev.flags = cur.flags; } - bch2_trans_iter_exit(trans, &iter); - if (!ret && have_extent) { + if (!bkey_deleted(&prev.kbuf.k->k)) { bch2_trans_unlock(trans); - ret = bch2_fill_extent(c, info, bkey_i_to_s_c(prev.k), - FIEMAP_EXTENT_LAST); + prev.flags |= FIEMAP_EXTENT_LAST; + ret = bch2_fill_extent(c, info, &prev); } - +err: bch2_trans_put(trans); - bch2_bkey_buf_exit(&cur, c); - bch2_bkey_buf_exit(&prev, c); - return ret < 0 ? ret : 0; + bch2_bkey_buf_exit(&cur.kbuf, c); + bch2_bkey_buf_exit(&prev.kbuf, c); + + return bch2_err_class(ret < 0 ? ret : 0); } static const struct vm_operations_struct bch_vm_ops = { @@ -1487,13 +1620,14 @@ static int bch2_fileattr_get(struct dentry *dentry, struct fileattr *fa) { struct bch_inode_info *inode = to_bch_ei(d_inode(dentry)); + struct bch_fs *c = inode->v.i_sb->s_fs_info; fileattr_fill_xflags(fa, map_flags(bch_flags_to_xflags, inode->ei_inode.bi_flags)); if (inode->ei_inode.bi_fields_set & (1 << Inode_opt_project)) fa->fsx_xflags |= FS_XFLAG_PROJINHERIT; - if (inode->ei_inode.bi_casefold) + if (bch2_inode_casefold(c, &inode->ei_inode)) fa->flags |= FS_CASEFOLD_FL; fa->fsx_projid = inode->ei_qid.q[QTYP_PRJ]; @@ -1526,7 +1660,7 @@ static int fssetxattr_inode_update_fn(struct btree_trans *trans, (s->flags & (BCH_INODE_nodump|BCH_INODE_noatime)) != s->flags) return -EINVAL; - if (s->casefold != bi->bi_casefold) { + if (s->casefold != bch2_inode_casefold(c, bi)) { #ifdef CONFIG_UNICODE int ret = 0; /* Not supported on individual files. */ @@ -1547,9 +1681,8 @@ static int fssetxattr_inode_update_fn(struct btree_trans *trans, bch2_check_set_feature(c, BCH_FEATURE_casefolding); - bi->bi_casefold = s->casefold; - bi->bi_fields_set &= ~BIT(Inode_opt_casefold); - bi->bi_fields_set |= s->casefold << Inode_opt_casefold; + bi->bi_casefold = s->casefold + 1; + bi->bi_fields_set |= BIT(Inode_opt_casefold); #else printk(KERN_ERR "Cannot use casefolding on a kernel without CONFIG_UNICODE\n"); @@ -2330,7 +2463,7 @@ static int bch2_fs_get_tree(struct fs_context *fc) struct inode *vinode; struct bch2_opts_parse *opts_parse = fc->fs_private; struct bch_opts opts = opts_parse->opts; - darray_str devs; + darray_const_str devs; darray_fs devs_to_fs = {}; int ret; @@ -2354,7 +2487,7 @@ static int bch2_fs_get_tree(struct fs_context *fc) if (!IS_ERR(sb)) goto got_sb; - c = bch2_fs_open(devs.data, devs.nr, opts); + c = bch2_fs_open(&devs, &opts); ret = PTR_ERR_OR_ZERO(c); if (ret) goto err; @@ -2445,6 +2578,11 @@ got_sb: if (ret) goto err_put_super; +#ifdef CONFIG_UNICODE + sb->s_encoding = c->cf_encoding; +#endif + generic_set_sb_d_ops(sb); + vinode = bch2_vfs_inode_get(c, BCACHEFS_ROOT_SUBVOL_INUM); ret = PTR_ERR_OR_ZERO(vinode); bch_err_msg(c, ret, "mounting: error getting root inode"); diff --git a/libbcachefs/fsck.c b/libbcachefs/fsck.c index 540db66e..7d3dd1a0 100644 --- a/libbcachefs/fsck.c +++ b/libbcachefs/fsck.c @@ -3022,7 +3022,7 @@ long bch2_ioctl_fsck_offline(struct bch_ioctl_fsck_offline __user *user_arg) { struct bch_ioctl_fsck_offline arg; struct fsck_thread *thr = NULL; - darray_str(devs) = {}; + darray_const_str devs = {}; long ret = 0; if (copy_from_user(&arg, user_arg, sizeof(arg))) @@ -3080,7 +3080,7 @@ long bch2_ioctl_fsck_offline(struct bch_ioctl_fsck_offline __user *user_arg) bch2_thread_with_stdio_init(&thr->thr, &bch2_offline_fsck_ops); - thr->c = bch2_fs_open(devs.data, arg.nr_devs, thr->opts); + thr->c = bch2_fs_open(&devs, &thr->opts); if (!IS_ERR(thr->c) && thr->c->opts.errors == BCH_ON_ERROR_panic) diff --git a/libbcachefs/inode.h b/libbcachefs/inode.h index 1b2fc902..bb31e953 100644 --- a/libbcachefs/inode.h +++ b/libbcachefs/inode.h @@ -243,6 +243,14 @@ static inline unsigned bkey_inode_mode(struct bkey_s_c k) } } +static inline bool bch2_inode_casefold(struct bch_fs *c, const struct bch_inode_unpacked *bi) +{ + /* inode apts are stored with a +1 bias: 0 means "unset, use fs opt" */ + return bi->bi_casefold + ? bi->bi_casefold - 1 + : c->opts.casefold; +} + /* i_nlink: */ static inline unsigned nlink_bias(umode_t mode) diff --git a/libbcachefs/io_read.c b/libbcachefs/io_read.c index f23c3bfe..73a0a42a 100644 --- a/libbcachefs/io_read.c +++ b/libbcachefs/io_read.c @@ -573,7 +573,6 @@ static void bch2_rbio_retry(struct work_struct *work) .inum = rbio->read_pos.inode, }; struct bch_io_failures failed = { .nr = 0 }; - int orig_error = rbio->ret; struct btree_trans *trans = bch2_trans_get(c); @@ -614,10 +613,11 @@ static void bch2_rbio_retry(struct work_struct *work) if (ret) { rbio->ret = ret; rbio->bio.bi_status = BLK_STS_IOERR; - } else if (orig_error != -BCH_ERR_data_read_retry_csum_err_maybe_userspace && - orig_error != -BCH_ERR_data_read_ptr_stale_race && - !failed.nr) { + } + + if (failed.nr || ret) { struct printbuf buf = PRINTBUF; + bch2_log_msg_start(c, &buf); lockrestart_do(trans, bch2_inum_offset_err_msg_trans(trans, &buf, @@ -625,9 +625,22 @@ static void bch2_rbio_retry(struct work_struct *work) read_pos.offset << 9)); if (rbio->data_update) prt_str(&buf, "(internal move) "); - prt_str(&buf, "successful retry"); - bch_err_ratelimited(c, "%s", buf.buf); + prt_str(&buf, "data read error, "); + if (!ret) + prt_str(&buf, "successful retry"); + else + prt_str(&buf, bch2_err_str(ret)); + prt_newline(&buf); + + if (!bkey_deleted(&sk.k->k)) { + bch2_bkey_val_to_text(&buf, c, bkey_i_to_s_c(sk.k)); + prt_newline(&buf); + } + + bch2_io_failures_to_text(&buf, c, &failed); + + bch2_print_str_ratelimited(c, KERN_ERR, buf.buf); printbuf_exit(&buf); } @@ -662,27 +675,6 @@ static void bch2_rbio_error(struct bch_read_bio *rbio, } } -static void bch2_read_io_err(struct work_struct *work) -{ - struct bch_read_bio *rbio = - container_of(work, struct bch_read_bio, work); - struct bio *bio = &rbio->bio; - struct bch_fs *c = rbio->c; - struct bch_dev *ca = rbio->have_ioref ? bch2_dev_have_ref(c, rbio->pick.ptr.dev) : NULL; - struct printbuf buf = PRINTBUF; - - bch2_read_err_msg(c, &buf, rbio, rbio->read_pos); - prt_printf(&buf, "data read error: %s", bch2_blk_status_to_str(bio->bi_status)); - - if (ca) - bch_err_ratelimited(ca, "%s", buf.buf); - else - bch_err_ratelimited(c, "%s", buf.buf); - - printbuf_exit(&buf); - bch2_rbio_error(rbio, -BCH_ERR_data_read_retry_io_err, bio->bi_status); -} - static int __bch2_rbio_narrow_crcs(struct btree_trans *trans, struct bch_read_bio *rbio) { @@ -746,31 +738,6 @@ static noinline void bch2_rbio_narrow_crcs(struct bch_read_bio *rbio) __bch2_rbio_narrow_crcs(trans, rbio)); } -static void bch2_read_csum_err(struct work_struct *work) -{ - struct bch_read_bio *rbio = - container_of(work, struct bch_read_bio, work); - struct bch_fs *c = rbio->c; - struct bio *src = &rbio->bio; - struct bch_extent_crc_unpacked crc = rbio->pick.crc; - struct nonce nonce = extent_nonce(rbio->version, crc); - struct bch_csum csum = bch2_checksum_bio(c, crc.csum_type, nonce, src); - struct printbuf buf = PRINTBUF; - - bch2_read_err_msg(c, &buf, rbio, rbio->read_pos); - prt_str(&buf, "data "); - bch2_csum_err_msg(&buf, crc.csum_type, rbio->pick.crc.csum, csum); - - struct bch_dev *ca = rbio->have_ioref ? bch2_dev_have_ref(c, rbio->pick.ptr.dev) : NULL; - if (ca) - bch_err_ratelimited(ca, "%s", buf.buf); - else - bch_err_ratelimited(c, "%s", buf.buf); - - bch2_rbio_error(rbio, -BCH_ERR_data_read_retry_csum_err, BLK_STS_IOERR); - printbuf_exit(&buf); -} - static void bch2_read_decompress_err(struct work_struct *work) { struct bch_read_bio *rbio = @@ -931,7 +898,7 @@ out: memalloc_nofs_restore(nofs_flags); return; csum_err: - bch2_rbio_punt(rbio, bch2_read_csum_err, RBIO_CONTEXT_UNBOUND, system_unbound_wq); + bch2_rbio_error(rbio, -BCH_ERR_data_read_retry_csum_err, BLK_STS_IOERR); goto out; decompression_err: bch2_rbio_punt(rbio, bch2_read_decompress_err, RBIO_CONTEXT_UNBOUND, system_unbound_wq); @@ -957,7 +924,7 @@ static void bch2_read_endio(struct bio *bio) rbio->bio.bi_end_io = rbio->end_io; if (unlikely(bio->bi_status)) { - bch2_rbio_punt(rbio, bch2_read_io_err, RBIO_CONTEXT_UNBOUND, system_unbound_wq); + bch2_rbio_error(rbio, -BCH_ERR_data_read_retry_io_err, bio->bi_status); return; } @@ -1289,14 +1256,6 @@ retry_pick: if (likely(!rbio->pick.do_ec_reconstruct)) { if (unlikely(!rbio->have_ioref)) { - struct printbuf buf = PRINTBUF; - bch2_read_err_msg_trans(trans, &buf, rbio, read_pos); - prt_printf(&buf, "no device to read from:\n "); - bch2_bkey_val_to_text(&buf, c, k); - - bch_err_ratelimited(c, "%s", buf.buf); - printbuf_exit(&buf); - bch2_rbio_error(rbio, -BCH_ERR_data_read_retry_device_offline, BLK_STS_IOERR); diff --git a/libbcachefs/journal_reclaim.c b/libbcachefs/journal_reclaim.c index 66bfb95f..c3a32403 100644 --- a/libbcachefs/journal_reclaim.c +++ b/libbcachefs/journal_reclaim.c @@ -214,18 +214,20 @@ void bch2_journal_space_available(struct journal *j) j->can_discard = can_discard; if (nr_online < metadata_replicas_required(c)) { - struct printbuf buf = PRINTBUF; - buf.atomic++; - prt_printf(&buf, "insufficient writeable journal devices available: have %u, need %u\n" - "rw journal devs:", nr_online, metadata_replicas_required(c)); - - rcu_read_lock(); - for_each_member_device_rcu(c, ca, &c->rw_devs[BCH_DATA_journal]) - prt_printf(&buf, " %s", ca->name); - rcu_read_unlock(); - - bch_err(c, "%s", buf.buf); - printbuf_exit(&buf); + if (!(c->sb.features & BIT_ULL(BCH_FEATURE_small_image))) { + struct printbuf buf = PRINTBUF; + buf.atomic++; + prt_printf(&buf, "insufficient writeable journal devices available: have %u, need %u\n" + "rw journal devs:", nr_online, metadata_replicas_required(c)); + + rcu_read_lock(); + for_each_member_device_rcu(c, ca, &c->rw_devs[BCH_DATA_journal]) + prt_printf(&buf, " %s", ca->name); + rcu_read_unlock(); + + bch_err(c, "%s", buf.buf); + printbuf_exit(&buf); + } ret = -BCH_ERR_insufficient_journal_devices; goto out; } diff --git a/libbcachefs/move.c b/libbcachefs/move.c index f605bd01..532566ea 100644 --- a/libbcachefs/move.c +++ b/libbcachefs/move.c @@ -675,7 +675,7 @@ root_err: if (ret) break; - if (bkey_ge(bkey_start_pos(k.k), end)) + if (bkey_gt(bkey_start_pos(k.k), end)) break; if (ctxt->stats) @@ -750,7 +750,8 @@ next: if (ctxt->stats) atomic64_add(k.k->size, &ctxt->stats->sectors_seen); next_nondata: - bch2_btree_iter_advance(trans, &iter); + if (!bch2_btree_iter_advance(trans, &iter)) + break; } out: bch2_trans_iter_exit(trans, &reflink_iter); diff --git a/libbcachefs/namei.c b/libbcachefs/namei.c index 46f3c8b1..52c58c6d 100644 --- a/libbcachefs/namei.c +++ b/libbcachefs/namei.c @@ -343,6 +343,9 @@ bool bch2_reinherit_attrs(struct bch_inode_unpacked *dst_u, bool ret = false; for (id = 0; id < Inode_opt_nr; id++) { + if (!S_ISDIR(dst_u->bi_mode) && id == Inode_opt_casefold) + continue; + /* Skip attributes that were explicitly set on this inode */ if (dst_u->bi_fields_set & (1 << id)) continue; diff --git a/libbcachefs/recovery.c b/libbcachefs/recovery.c index e36fcab9..6aae686f 100644 --- a/libbcachefs/recovery.c +++ b/libbcachefs/recovery.c @@ -33,7 +33,9 @@ #include <linux/sort.h> #include <linux/stat.h> -int bch2_btree_lost_data(struct bch_fs *c, enum btree_id btree) +int bch2_btree_lost_data(struct bch_fs *c, + struct printbuf *msg, + enum btree_id btree) { u64 b = BIT_ULL(btree); int ret = 0; @@ -42,32 +44,32 @@ int bch2_btree_lost_data(struct bch_fs *c, enum btree_id btree) struct bch_sb_field_ext *ext = bch2_sb_field_get(c->disk_sb.sb, ext); if (!(c->sb.btrees_lost_data & b)) { - struct printbuf buf = PRINTBUF; - bch2_btree_id_to_text(&buf, btree); - bch_err(c, "flagging btree %s lost data", buf.buf); - printbuf_exit(&buf); + prt_printf(msg, "flagging btree "); + bch2_btree_id_to_text(msg, btree); + prt_printf(msg, " lost data\n"); + ext->btrees_lost_data |= cpu_to_le64(b); } /* Once we have runtime self healing for topology errors we won't need this: */ - ret = bch2_run_explicit_recovery_pass_persistent_locked(c, BCH_RECOVERY_PASS_check_topology) ?: ret; + ret = __bch2_run_explicit_recovery_pass_persistent(c, msg, BCH_RECOVERY_PASS_check_topology) ?: ret; /* Btree node accounting will be off: */ __set_bit_le64(BCH_FSCK_ERR_accounting_mismatch, ext->errors_silent); - ret = bch2_run_explicit_recovery_pass_persistent_locked(c, BCH_RECOVERY_PASS_check_allocations) ?: ret; + ret = __bch2_run_explicit_recovery_pass_persistent(c, msg, BCH_RECOVERY_PASS_check_allocations) ?: ret; #ifdef CONFIG_BCACHEFS_DEBUG /* * These are much more minor, and don't need to be corrected right away, * but in debug mode we want the next fsck run to be clean: */ - ret = bch2_run_explicit_recovery_pass_persistent_locked(c, BCH_RECOVERY_PASS_check_lrus) ?: ret; - ret = bch2_run_explicit_recovery_pass_persistent_locked(c, BCH_RECOVERY_PASS_check_backpointers_to_extents) ?: ret; + ret = __bch2_run_explicit_recovery_pass_persistent(c, msg, BCH_RECOVERY_PASS_check_lrus) ?: ret; + ret = __bch2_run_explicit_recovery_pass_persistent(c, msg, BCH_RECOVERY_PASS_check_backpointers_to_extents) ?: ret; #endif switch (btree) { case BTREE_ID_alloc: - ret = bch2_run_explicit_recovery_pass_persistent_locked(c, BCH_RECOVERY_PASS_check_alloc_info) ?: ret; + ret = __bch2_run_explicit_recovery_pass_persistent(c, msg, BCH_RECOVERY_PASS_check_alloc_info) ?: ret; __set_bit_le64(BCH_FSCK_ERR_alloc_key_data_type_wrong, ext->errors_silent); __set_bit_le64(BCH_FSCK_ERR_alloc_key_gen_wrong, ext->errors_silent); @@ -77,26 +79,30 @@ int bch2_btree_lost_data(struct bch_fs *c, enum btree_id btree) __set_bit_le64(BCH_FSCK_ERR_alloc_key_stripe_redundancy_wrong, ext->errors_silent); goto out; case BTREE_ID_backpointers: - ret = bch2_run_explicit_recovery_pass_persistent_locked(c, BCH_RECOVERY_PASS_check_btree_backpointers) ?: ret; - ret = bch2_run_explicit_recovery_pass_persistent_locked(c, BCH_RECOVERY_PASS_check_extents_to_backpointers) ?: ret; + ret = __bch2_run_explicit_recovery_pass_persistent(c, msg, BCH_RECOVERY_PASS_check_btree_backpointers) ?: ret; + ret = __bch2_run_explicit_recovery_pass_persistent(c, msg, BCH_RECOVERY_PASS_check_extents_to_backpointers) ?: ret; goto out; case BTREE_ID_need_discard: - ret = bch2_run_explicit_recovery_pass_persistent_locked(c, BCH_RECOVERY_PASS_check_alloc_info) ?: ret; + ret = __bch2_run_explicit_recovery_pass_persistent(c, msg, BCH_RECOVERY_PASS_check_alloc_info) ?: ret; goto out; case BTREE_ID_freespace: - ret = bch2_run_explicit_recovery_pass_persistent_locked(c, BCH_RECOVERY_PASS_check_alloc_info) ?: ret; + ret = __bch2_run_explicit_recovery_pass_persistent(c, msg, BCH_RECOVERY_PASS_check_alloc_info) ?: ret; goto out; case BTREE_ID_bucket_gens: - ret = bch2_run_explicit_recovery_pass_persistent_locked(c, BCH_RECOVERY_PASS_check_alloc_info) ?: ret; + ret = __bch2_run_explicit_recovery_pass_persistent(c, msg, BCH_RECOVERY_PASS_check_alloc_info) ?: ret; goto out; case BTREE_ID_lru: - ret = bch2_run_explicit_recovery_pass_persistent_locked(c, BCH_RECOVERY_PASS_check_alloc_info) ?: ret; + ret = __bch2_run_explicit_recovery_pass_persistent(c, msg, BCH_RECOVERY_PASS_check_alloc_info) ?: ret; goto out; case BTREE_ID_accounting: - ret = bch2_run_explicit_recovery_pass_persistent_locked(c, BCH_RECOVERY_PASS_check_allocations) ?: ret; + ret = __bch2_run_explicit_recovery_pass_persistent(c, msg, BCH_RECOVERY_PASS_check_allocations) ?: ret; + goto out; + case BTREE_ID_snapshots: + ret = __bch2_run_explicit_recovery_pass_persistent(c, msg, BCH_RECOVERY_PASS_reconstruct_snapshots) ?: ret; + ret = __bch2_run_explicit_recovery_pass_persistent(c, msg, BCH_RECOVERY_PASS_scan_for_btree_nodes) ?: ret; goto out; default: - ret = bch2_run_explicit_recovery_pass_persistent_locked(c, BCH_RECOVERY_PASS_scan_for_btree_nodes) ?: ret; + ret = __bch2_run_explicit_recovery_pass_persistent(c, msg, BCH_RECOVERY_PASS_scan_for_btree_nodes) ?: ret; goto out; } out: @@ -583,9 +589,6 @@ static int read_btree_roots(struct bch_fs *c) buf.buf, bch2_err_str(ret))) { if (btree_id_is_alloc(i)) r->error = 0; - - ret = bch2_btree_lost_data(c, i); - BUG_ON(ret); } } @@ -734,6 +737,11 @@ int bch2_fs_recovery(struct bch_fs *c) c->opts.read_only = true; } + if (c->sb.features & BIT_ULL(BCH_FEATURE_small_image)) { + bch_info(c, "filesystem is an unresized image file, mounting ro"); + c->opts.read_only = true; + } + mutex_lock(&c->sb_lock); struct bch_sb_field_ext *ext = bch2_sb_field_get(c->disk_sb.sb, ext); bool write_sb = false; @@ -949,8 +957,6 @@ use_clean: set_bit(BCH_FS_btree_running, &c->flags); ret = bch2_sb_set_upgrade_extra(c); - - ret = bch2_fs_resize_on_mount(c); if (ret) goto err; diff --git a/libbcachefs/recovery.h b/libbcachefs/recovery.h index d858ba67..c023f52f 100644 --- a/libbcachefs/recovery.h +++ b/libbcachefs/recovery.h @@ -2,7 +2,7 @@ #ifndef _BCACHEFS_RECOVERY_H #define _BCACHEFS_RECOVERY_H -int bch2_btree_lost_data(struct bch_fs *, enum btree_id); +int bch2_btree_lost_data(struct bch_fs *, struct printbuf *, enum btree_id); void bch2_reconstruct_alloc(struct bch_fs *); int bch2_journal_replay(struct bch_fs *); diff --git a/libbcachefs/recovery_passes.c b/libbcachefs/recovery_passes.c index 87150dd3..e14aca00 100644 --- a/libbcachefs/recovery_passes.c +++ b/libbcachefs/recovery_passes.c @@ -141,13 +141,13 @@ static int __bch2_run_explicit_recovery_pass(struct printbuf *out, if (pass < BCH_RECOVERY_PASS_set_may_go_rw && c->curr_recovery_pass >= BCH_RECOVERY_PASS_set_may_go_rw) { if (print) - prt_printf(out, "need recovery pass %s (%u), but already rw", + prt_printf(out, "need recovery pass %s (%u), but already rw\n", bch2_recovery_passes[pass], pass); return -BCH_ERR_cannot_rewind_recovery; } if (print) - prt_printf(out, "running explicit recovery pass %s (%u), currently at %s (%u)", + prt_printf(out, "running explicit recovery pass %s (%u), currently at %s (%u)\n", bch2_recovery_passes[pass], pass, bch2_recovery_passes[c->curr_recovery_pass], c->curr_recovery_pass); @@ -162,7 +162,7 @@ static int __bch2_run_explicit_recovery_pass(struct printbuf *out, } } -int bch2_run_explicit_recovery_pass_printbuf(struct bch_fs *c, +static int bch2_run_explicit_recovery_pass_printbuf(struct bch_fs *c, struct printbuf *out, enum bch_recovery_pass pass) { @@ -193,32 +193,30 @@ int bch2_run_explicit_recovery_pass(struct bch_fs *c, return ret; } -int bch2_run_explicit_recovery_pass_persistent_locked(struct bch_fs *c, - enum bch_recovery_pass pass) +int __bch2_run_explicit_recovery_pass_persistent(struct bch_fs *c, + struct printbuf *out, + enum bch_recovery_pass pass) { lockdep_assert_held(&c->sb_lock); struct bch_sb_field_ext *ext = bch2_sb_field_get(c->disk_sb.sb, ext); __set_bit_le64(bch2_recovery_pass_to_stable(pass), ext->recovery_passes_required); - return bch2_run_explicit_recovery_pass(c, pass); + return bch2_run_explicit_recovery_pass_printbuf(c, out, pass); } int bch2_run_explicit_recovery_pass_persistent(struct bch_fs *c, + struct printbuf *out, enum bch_recovery_pass pass) { - enum bch_recovery_pass_stable s = bch2_recovery_pass_to_stable(pass); + if (c->sb.recovery_passes_required & BIT_ULL(pass)) + return 0; mutex_lock(&c->sb_lock); - struct bch_sb_field_ext *ext = bch2_sb_field_get(c->disk_sb.sb, ext); - - if (!test_bit_le64(s, ext->recovery_passes_required)) { - __set_bit_le64(s, ext->recovery_passes_required); - bch2_write_super(c); - } + int ret = __bch2_run_explicit_recovery_pass_persistent(c, out, pass); mutex_unlock(&c->sb_lock); - return bch2_run_explicit_recovery_pass(c, pass); + return ret; } static void bch2_clear_recovery_pass_required(struct bch_fs *c, diff --git a/libbcachefs/recovery_passes.h b/libbcachefs/recovery_passes.h index e19a8aab..f33dd005 100644 --- a/libbcachefs/recovery_passes.h +++ b/libbcachefs/recovery_passes.h @@ -8,12 +8,12 @@ u64 bch2_recovery_passes_from_stable(u64 v); u64 bch2_fsck_recovery_passes(void); -int bch2_run_explicit_recovery_pass_printbuf(struct bch_fs *, - struct printbuf *, - enum bch_recovery_pass); int bch2_run_explicit_recovery_pass(struct bch_fs *, enum bch_recovery_pass); -int bch2_run_explicit_recovery_pass_persistent_locked(struct bch_fs *, enum bch_recovery_pass); -int bch2_run_explicit_recovery_pass_persistent(struct bch_fs *, enum bch_recovery_pass); + +int __bch2_run_explicit_recovery_pass_persistent(struct bch_fs *, struct printbuf *, + enum bch_recovery_pass); +int bch2_run_explicit_recovery_pass_persistent(struct bch_fs *, struct printbuf *, + enum bch_recovery_pass); int bch2_run_online_recovery_passes(struct bch_fs *); int bch2_run_recovery_passes(struct bch_fs *); diff --git a/libbcachefs/sb-downgrade.c b/libbcachefs/sb-downgrade.c index 6a16767e..296c6c92 100644 --- a/libbcachefs/sb-downgrade.c +++ b/libbcachefs/sb-downgrade.c @@ -20,6 +20,10 @@ * x(version, recovery_passes, errors...) */ #define UPGRADE_TABLE() \ + x(snapshot_2, \ + RECOVERY_PASS_ALL_FSCK, \ + BCH_FSCK_ERR_subvol_root_wrong_bi_subvol, \ + BCH_FSCK_ERR_subvol_not_master_and_not_snapshot) \ x(backpointers, \ RECOVERY_PASS_ALL_FSCK) \ x(inode_v3, \ diff --git a/libbcachefs/sb-errors_format.h b/libbcachefs/sb-errors_format.h index 2e1d487a..98c89852 100644 --- a/libbcachefs/sb-errors_format.h +++ b/libbcachefs/sb-errors_format.h @@ -46,7 +46,7 @@ enum bch_fsck_flags { x(btree_node_unsupported_version, 34, 0) \ x(btree_node_bset_older_than_sb_min, 35, 0) \ x(btree_node_bset_newer_than_sb, 36, 0) \ - x(btree_node_data_missing, 37, 0) \ + x(btree_node_data_missing, 37, FSCK_AUTOFIX) \ x(btree_node_bset_after_end, 38, 0) \ x(btree_node_replicas_sectors_written_mismatch, 39, 0) \ x(btree_node_replicas_data_mismatch, 40, 0) \ @@ -205,9 +205,9 @@ enum bch_fsck_flags { x(snapshot_bad_depth, 184, 0) \ x(snapshot_bad_skiplist, 185, 0) \ x(subvol_pos_bad, 186, 0) \ - x(subvol_not_master_and_not_snapshot, 187, 0) \ + x(subvol_not_master_and_not_snapshot, 187, FSCK_AUTOFIX) \ x(subvol_to_missing_root, 188, 0) \ - x(subvol_root_wrong_bi_subvol, 189, 0) \ + x(subvol_root_wrong_bi_subvol, 189, FSCK_AUTOFIX) \ x(bkey_in_missing_snapshot, 190, 0) \ x(inode_pos_inode_nonzero, 191, 0) \ x(inode_pos_blockdev_range, 192, 0) \ diff --git a/libbcachefs/sb-members.c b/libbcachefs/sb-members.c index b2f9cb2d..e810d85c 100644 --- a/libbcachefs/sb-members.c +++ b/libbcachefs/sb-members.c @@ -20,7 +20,7 @@ int bch2_dev_missing_bkey(struct bch_fs *c, struct bkey_s_c k, unsigned dev) bool print = bch2_count_fsck_err(c, ptr_to_invalid_device, &buf); - int ret = bch2_run_explicit_recovery_pass_printbuf(c, &buf, + int ret = bch2_run_explicit_recovery_pass_persistent(c, &buf, BCH_RECOVERY_PASS_check_allocations); if (print) @@ -35,9 +35,11 @@ void bch2_dev_missing_atomic(struct bch_fs *c, unsigned dev) bch2_fs_inconsistent(c, "pointer to nonexistent device %u", dev); } -void bch2_dev_bucket_missing(struct bch_fs *c, struct bpos bucket) +void bch2_dev_bucket_missing(struct bch_dev *ca, u64 bucket) { - bch2_fs_inconsistent(c, "pointer to nonexistent bucket %llu:%llu", bucket.inode, bucket.offset); + bch2_fs_inconsistent(ca->fs, + "pointer to nonexistent bucket %llu on device %s (valid range %u-%llu)", + bucket, ca->name, ca->mi.first_bucket, ca->mi.nbuckets); } #define x(t, n, ...) [n] = #t, diff --git a/libbcachefs/sb-members.h b/libbcachefs/sb-members.h index 0337e34d..c9cb8f76 100644 --- a/libbcachefs/sb-members.h +++ b/libbcachefs/sb-members.h @@ -258,20 +258,23 @@ static inline struct bch_dev *bch2_dev_tryget(struct bch_fs *c, unsigned dev) static inline struct bch_dev *bch2_dev_bucket_tryget_noerror(struct bch_fs *c, struct bpos bucket) { struct bch_dev *ca = bch2_dev_tryget_noerror(c, bucket.inode); - if (ca && !bucket_valid(ca, bucket.offset)) { + if (ca && unlikely(!bucket_valid(ca, bucket.offset))) { bch2_dev_put(ca); ca = NULL; } return ca; } -void bch2_dev_bucket_missing(struct bch_fs *, struct bpos); +void bch2_dev_bucket_missing(struct bch_dev *, u64); static inline struct bch_dev *bch2_dev_bucket_tryget(struct bch_fs *c, struct bpos bucket) { - struct bch_dev *ca = bch2_dev_bucket_tryget_noerror(c, bucket); - if (!ca) - bch2_dev_bucket_missing(c, bucket); + struct bch_dev *ca = bch2_dev_tryget(c, bucket.inode); + if (ca && unlikely(!bucket_valid(ca, bucket.offset))) { + bch2_dev_bucket_missing(ca, bucket.offset); + bch2_dev_put(ca); + ca = NULL; + } return ca; } diff --git a/libbcachefs/snapshot.c b/libbcachefs/snapshot.c index 14ea09cc..94cf60f7 100644 --- a/libbcachefs/snapshot.c +++ b/libbcachefs/snapshot.c @@ -1743,10 +1743,6 @@ int bch2_snapshots_read(struct bch_fs *c) BUG_ON(!test_bit(BCH_FS_new_fs, &c->flags) && test_bit(BCH_FS_may_go_rw, &c->flags)); - if (bch2_err_matches(ret, EIO) || - (c->sb.btrees_lost_data & BIT_ULL(BTREE_ID_snapshots))) - ret = bch2_run_explicit_recovery_pass_persistent(c, BCH_RECOVERY_PASS_reconstruct_snapshots); - return ret; } diff --git a/libbcachefs/str_hash.h b/libbcachefs/str_hash.h index a67ee4ad..0c1a0053 100644 --- a/libbcachefs/str_hash.h +++ b/libbcachefs/str_hash.h @@ -33,7 +33,7 @@ bch2_str_hash_opt_to_type(struct bch_fs *c, enum bch_str_hash_opts opt) struct bch_hash_info { u8 type; - struct unicode_map *cf_encoding; + struct unicode_map *cf_encoding; /* * For crc32 or crc64 string hashes the first key value of * the siphash_key (k0) is used as the key. @@ -44,11 +44,10 @@ struct bch_hash_info { static inline struct bch_hash_info bch2_hash_info_init(struct bch_fs *c, const struct bch_inode_unpacked *bi) { - /* XXX ick */ struct bch_hash_info info = { .type = INODE_STR_HASH(bi), #ifdef CONFIG_UNICODE - .cf_encoding = bi->bi_casefold ? c->cf_encoding : NULL, + .cf_encoding = bch2_inode_casefold(c, bi) ? c->cf_encoding : NULL, #endif .siphash_key = { .k0 = bi->bi_hash_seed } }; diff --git a/libbcachefs/subvolume.c b/libbcachefs/subvolume.c index 39376c87..51ab2ee1 100644 --- a/libbcachefs/subvolume.c +++ b/libbcachefs/subvolume.c @@ -23,7 +23,7 @@ static int bch2_subvolume_missing(struct bch_fs *c, u32 subvolid) prt_printf(&buf, "missing subvolume %u", subvolid); bool print = bch2_count_fsck_err(c, subvol_missing, &buf); - int ret = bch2_run_explicit_recovery_pass_printbuf(c, &buf, + int ret = bch2_run_explicit_recovery_pass_persistent(c, &buf, BCH_RECOVERY_PASS_check_inodes); if (print) bch2_print_str(c, KERN_ERR, buf.buf); @@ -62,8 +62,8 @@ static int check_subvol(struct btree_trans *trans, ret = bch2_snapshot_lookup(trans, snapid, &snapshot); if (bch2_err_matches(ret, ENOENT)) - bch_err(c, "subvolume %llu points to nonexistent snapshot %u", - k.k->p.offset, snapid); + return bch2_run_explicit_recovery_pass(c, + BCH_RECOVERY_PASS_reconstruct_snapshots) ?: ret; if (ret) return ret; diff --git a/libbcachefs/super-io.c b/libbcachefs/super-io.c index d53cbc5f..8730d2e7 100644 --- a/libbcachefs/super-io.c +++ b/libbcachefs/super-io.c @@ -623,6 +623,9 @@ static void bch2_sb_update(struct bch_fs *c) struct bch_sb_field_ext *ext = bch2_sb_field_get(src, ext); if (ext) { + c->sb.recovery_passes_required = + bch2_recovery_passes_from_stable(le64_to_cpu(ext->recovery_passes_required[0])); + le_bitvector_to_cpu(c->sb.errors_silent, (void *) ext->errors_silent, sizeof(c->sb.errors_silent) * 8); c->sb.btrees_lost_data = le64_to_cpu(ext->btrees_lost_data); diff --git a/libbcachefs/super.c b/libbcachefs/super.c index f861583c..0f9c38f8 100644 --- a/libbcachefs/super.c +++ b/libbcachefs/super.c @@ -214,6 +214,7 @@ static int bch2_dev_sysfs_online(struct bch_fs *, struct bch_dev *); static void bch2_dev_io_ref_stop(struct bch_dev *, int); static void __bch2_dev_read_only(struct bch_fs *, struct bch_dev *); static int bch2_fs_init_rw(struct bch_fs *); +static int bch2_fs_resize_on_mount(struct bch_fs *); struct bch_fs *bch2_dev_to_fs(dev_t dev) { @@ -567,6 +568,10 @@ static void __bch2_fs_free(struct bch_fs *c) for (unsigned i = 0; i < BCH_TIME_STAT_NR; i++) bch2_time_stats_exit(&c->times[i]); +#ifdef CONFIG_UNICODE + utf8_unload(c->cf_encoding); +#endif + bch2_find_btree_nodes_exit(&c->found_btree_nodes); bch2_free_pending_node_rewrites(c); bch2_free_fsck_errs(c); @@ -798,7 +803,7 @@ static int bch2_fs_init_rw(struct bch_fs *c) return 0; } -static struct bch_fs *bch2_fs_alloc(struct bch_sb *sb, struct bch_opts opts, +static struct bch_fs *bch2_fs_alloc(struct bch_sb *sb, struct bch_opts *opts, bch_sb_handles *sbs) { struct bch_fs *c; @@ -812,7 +817,7 @@ static struct bch_fs *bch2_fs_alloc(struct bch_sb *sb, struct bch_opts opts, goto out; } - c->stdio = (void *)(unsigned long) opts.stdio; + c->stdio = (void *)(unsigned long) opts->stdio; __module_get(THIS_MODULE); @@ -898,25 +903,6 @@ static struct bch_fs *bch2_fs_alloc(struct bch_sb *sb, struct bch_opts opts, if (ret) goto err; -#ifdef CONFIG_UNICODE - /* Default encoding until we can potentially have more as an option. */ - c->cf_encoding = utf8_load(BCH_FS_DEFAULT_UTF8_ENCODING); - if (IS_ERR(c->cf_encoding)) { - printk(KERN_ERR "Cannot load UTF-8 encoding for filesystem. Version: %u.%u.%u", - unicode_major(BCH_FS_DEFAULT_UTF8_ENCODING), - unicode_minor(BCH_FS_DEFAULT_UTF8_ENCODING), - unicode_rev(BCH_FS_DEFAULT_UTF8_ENCODING)); - ret = -EINVAL; - goto err; - } -#else - if (c->sb.features & BIT_ULL(BCH_FEATURE_casefolding)) { - printk(KERN_ERR "Cannot mount a filesystem with casefolding on a kernel without CONFIG_UNICODE\n"); - ret = -EINVAL; - goto err; - } -#endif - /* Compat: */ if (le16_to_cpu(sb->version) <= bcachefs_metadata_version_inode_v2 && !BCH_SB_JOURNAL_FLUSH_DELAY(sb)) @@ -931,7 +917,7 @@ static struct bch_fs *bch2_fs_alloc(struct bch_sb *sb, struct bch_opts opts, if (ret) goto err; - bch2_opts_apply(&c->opts, opts); + bch2_opts_apply(&c->opts, *opts); c->btree_key_cache_btrees |= 1U << BTREE_ID_alloc; if (c->opts.inodes_use_key_cache) @@ -1002,6 +988,29 @@ static struct bch_fs *bch2_fs_alloc(struct bch_sb *sb, struct bch_opts opts, if (ret) goto err; +#ifdef CONFIG_UNICODE + /* Default encoding until we can potentially have more as an option. */ + c->cf_encoding = utf8_load(BCH_FS_DEFAULT_UTF8_ENCODING); + if (IS_ERR(c->cf_encoding)) { + printk(KERN_ERR "Cannot load UTF-8 encoding for filesystem. Version: %u.%u.%u", + unicode_major(BCH_FS_DEFAULT_UTF8_ENCODING), + unicode_minor(BCH_FS_DEFAULT_UTF8_ENCODING), + unicode_rev(BCH_FS_DEFAULT_UTF8_ENCODING)); + ret = -EINVAL; + goto err; + } + bch_info(c, "Using encoding defined by superblock: utf8-%u.%u.%u", + unicode_major(BCH_FS_DEFAULT_UTF8_ENCODING), + unicode_minor(BCH_FS_DEFAULT_UTF8_ENCODING), + unicode_rev(BCH_FS_DEFAULT_UTF8_ENCODING)); +#else + if (c->sb.features & BIT_ULL(BCH_FEATURE_casefolding)) { + printk(KERN_ERR "Cannot mount a filesystem with casefolding on a kernel without CONFIG_UNICODE\n"); + ret = -EINVAL; + goto err; + } +#endif + for (i = 0; i < c->sb.nr_devices; i++) { if (!bch2_member_exists(c->disk_sb.sb, i)) continue; @@ -1070,6 +1079,40 @@ static void print_mount_opts(struct bch_fs *c) printbuf_exit(&p); } +static bool bch2_fs_may_start(struct bch_fs *c) +{ + struct bch_dev *ca; + unsigned flags = 0; + + switch (c->opts.degraded) { + case BCH_DEGRADED_very: + flags |= BCH_FORCE_IF_DEGRADED|BCH_FORCE_IF_LOST; + break; + case BCH_DEGRADED_yes: + flags |= BCH_FORCE_IF_DEGRADED; + break; + default: + mutex_lock(&c->sb_lock); + for (unsigned i = 0; i < c->disk_sb.sb->nr_devices; i++) { + if (!bch2_member_exists(c->disk_sb.sb, i)) + continue; + + ca = bch2_dev_locked(c, i); + + if (!bch2_dev_is_online(ca) && + (ca->mi.state == BCH_MEMBER_STATE_rw || + ca->mi.state == BCH_MEMBER_STATE_ro)) { + mutex_unlock(&c->sb_lock); + return false; + } + } + mutex_unlock(&c->sb_lock); + break; + } + + return bch2_have_enough_devs(c, c->online_devs, flags, true); +} + int bch2_fs_start(struct bch_fs *c) { time64_t now = ktime_get_real_seconds(); @@ -1077,6 +1120,9 @@ int bch2_fs_start(struct bch_fs *c) print_mount_opts(c); + if (!bch2_fs_may_start(c)) + return -BCH_ERR_insufficient_devices_to_start; + down_write(&c->state_lock); mutex_lock(&c->sb_lock); @@ -1106,6 +1152,12 @@ int bch2_fs_start(struct bch_fs *c) bch2_write_super(c); mutex_unlock(&c->sb_lock); + ret = bch2_fs_resize_on_mount(c); + if (ret) { + up_write(&c->state_lock); + goto err; + } + rcu_read_lock(); for_each_online_member_rcu(c, ca) if (ca->mi.state == BCH_MEMBER_STATE_rw) @@ -1593,40 +1645,6 @@ bool bch2_dev_state_allowed(struct bch_fs *c, struct bch_dev *ca, } } -static bool bch2_fs_may_start(struct bch_fs *c) -{ - struct bch_dev *ca; - unsigned flags = 0; - - switch (c->opts.degraded) { - case BCH_DEGRADED_very: - flags |= BCH_FORCE_IF_DEGRADED|BCH_FORCE_IF_LOST; - break; - case BCH_DEGRADED_yes: - flags |= BCH_FORCE_IF_DEGRADED; - break; - default: - mutex_lock(&c->sb_lock); - for (unsigned i = 0; i < c->disk_sb.sb->nr_devices; i++) { - if (!bch2_member_exists(c->disk_sb.sb, i)) - continue; - - ca = bch2_dev_locked(c, i); - - if (!bch2_dev_is_online(ca) && - (ca->mi.state == BCH_MEMBER_STATE_rw || - ca->mi.state == BCH_MEMBER_STATE_ro)) { - mutex_unlock(&c->sb_lock); - return false; - } - } - mutex_unlock(&c->sb_lock); - break; - } - - return bch2_have_enough_devs(c, c->online_devs, flags, true); -} - static void __bch2_dev_read_only(struct bch_fs *c, struct bch_dev *ca) { bch2_dev_io_ref_stop(ca, WRITE); @@ -1876,6 +1894,7 @@ int bch2_dev_add(struct bch_fs *c, const char *path) goto err_unlock; } unsigned dev_idx = ret; + ret = 0; /* success: */ @@ -1895,27 +1914,29 @@ int bch2_dev_add(struct bch_fs *c, const char *path) bch2_write_super(c); mutex_unlock(&c->sb_lock); - ret = bch2_dev_usage_init(ca, false); - if (ret) - goto err_late; + if (test_bit(BCH_FS_started, &c->flags)) { + ret = bch2_dev_usage_init(ca, false); + if (ret) + goto err_late; - ret = bch2_trans_mark_dev_sb(c, ca, BTREE_TRIGGER_transactional); - bch_err_msg(ca, ret, "marking new superblock"); - if (ret) - goto err_late; + ret = bch2_trans_mark_dev_sb(c, ca, BTREE_TRIGGER_transactional); + bch_err_msg(ca, ret, "marking new superblock"); + if (ret) + goto err_late; - ret = bch2_fs_freespace_init(c); - bch_err_msg(ca, ret, "initializing free space"); - if (ret) - goto err_late; + ret = bch2_fs_freespace_init(c); + bch_err_msg(ca, ret, "initializing free space"); + if (ret) + goto err_late; - if (ca->mi.state == BCH_MEMBER_STATE_rw) - __bch2_dev_read_write(c, ca); + if (ca->mi.state == BCH_MEMBER_STATE_rw) + __bch2_dev_read_write(c, ca); - ret = bch2_dev_journal_alloc(ca, false); - bch_err_msg(c, ret, "allocating journal"); - if (ret) - goto err_late; + ret = bch2_dev_journal_alloc(ca, false); + bch_err_msg(c, ret, "allocating journal"); + if (ret) + goto err_late; + } up_write(&c->state_lock); out: @@ -2096,10 +2117,8 @@ err: return ret; } -int bch2_fs_resize_on_mount(struct bch_fs *c) +static int bch2_fs_resize_on_mount(struct bch_fs *c) { - down_write(&c->state_lock); - for_each_online_member(c, ca, BCH_DEV_READ_REF_fs_resize_on_mount) { u64 old_nbuckets = ca->mi.nbuckets; u64 new_nbuckets = div64_u64(get_capacity(ca->disk_sb.bdev->bd_disk), @@ -2138,9 +2157,6 @@ int bch2_fs_resize_on_mount(struct bch_fs *c) } } } - - bch2_recalc_capacity(c); - up_write(&c->state_lock); return 0; } @@ -2260,8 +2276,8 @@ static inline int sb_cmp(struct bch_sb *l, struct bch_sb *r) cmp_int(le64_to_cpu(l->write_time), le64_to_cpu(r->write_time)); } -struct bch_fs *bch2_fs_open(char * const *devices, unsigned nr_devices, - struct bch_opts opts) +struct bch_fs *bch2_fs_open(darray_const_str *devices, + struct bch_opts *opts) { bch_sb_handles sbs = {}; struct bch_fs *c = NULL; @@ -2272,26 +2288,26 @@ struct bch_fs *bch2_fs_open(char * const *devices, unsigned nr_devices, if (!try_module_get(THIS_MODULE)) return ERR_PTR(-ENODEV); - if (!nr_devices) { + if (!devices->nr) { ret = -EINVAL; goto err; } - ret = darray_make_room(&sbs, nr_devices); + ret = darray_make_room(&sbs, devices->nr); if (ret) goto err; - for (unsigned i = 0; i < nr_devices; i++) { + darray_for_each(*devices, i) { struct bch_sb_handle sb = { NULL }; - ret = bch2_read_super(devices[i], &opts, &sb); + ret = bch2_read_super(*i, opts, &sb); if (ret) goto err; BUG_ON(darray_push(&sbs, sb)); } - if (opts.nochanges && !opts.read_only) { + if (opts->nochanges && !opts->read_only) { ret = -BCH_ERR_erofs_nochanges; goto err_print; } @@ -2301,7 +2317,7 @@ struct bch_fs *bch2_fs_open(char * const *devices, unsigned nr_devices, best = sb; darray_for_each_reverse(sbs, sb) { - ret = bch2_dev_in_fs(best, sb, &opts); + ret = bch2_dev_in_fs(best, sb, opts); if (ret == -BCH_ERR_device_has_been_removed || ret == -BCH_ERR_device_splitbrain) { @@ -2331,11 +2347,6 @@ struct bch_fs *bch2_fs_open(char * const *devices, unsigned nr_devices, } up_write(&c->state_lock); - if (!bch2_fs_may_start(c)) { - ret = -BCH_ERR_insufficient_devices_to_start; - goto err_print; - } - if (!c->opts.nostart) { ret = bch2_fs_start(c); if (ret) @@ -2350,7 +2361,7 @@ out: return c; err_print: pr_err("bch_fs_open err opening %s: %s", - devices[0], bch2_err_str(ret)); + devices->data[0], bch2_err_str(ret)); err: if (!IS_ERR_OR_NULL(c)) bch2_fs_stop(c); diff --git a/libbcachefs/super.h b/libbcachefs/super.h index dbf59547..7252544c 100644 --- a/libbcachefs/super.h +++ b/libbcachefs/super.h @@ -29,7 +29,6 @@ int bch2_dev_add(struct bch_fs *, const char *); int bch2_dev_online(struct bch_fs *, const char *); int bch2_dev_offline(struct bch_fs *, struct bch_dev *, int); int bch2_dev_resize(struct bch_fs *, struct bch_dev *, u64); -int bch2_fs_resize_on_mount(struct bch_fs *); struct bch_dev *bch2_dev_lookup(struct bch_fs *, const char *); bool bch2_fs_emergency_read_only(struct bch_fs *); @@ -44,7 +43,7 @@ void bch2_fs_free(struct bch_fs *); void bch2_fs_stop(struct bch_fs *); int bch2_fs_start(struct bch_fs *); -struct bch_fs *bch2_fs_open(char * const *, unsigned, struct bch_opts); +struct bch_fs *bch2_fs_open(darray_const_str *, struct bch_opts *); extern const struct blk_holder_ops bch2_sb_handle_bdev_ops; diff --git a/libbcachefs/tests.c b/libbcachefs/tests.c index c265b102..782a05fe 100644 --- a/libbcachefs/tests.c +++ b/libbcachefs/tests.c @@ -342,6 +342,8 @@ static int test_iterate_slots_extents(struct bch_fs *c, u64 nr) */ static int test_peek_end(struct bch_fs *c, u64 nr) { + delete_test_keys(c); + struct btree_trans *trans = bch2_trans_get(c); struct btree_iter iter; struct bkey_s_c k; @@ -362,6 +364,8 @@ static int test_peek_end(struct bch_fs *c, u64 nr) static int test_peek_end_extents(struct bch_fs *c, u64 nr) { + delete_test_keys(c); + struct btree_trans *trans = bch2_trans_get(c); struct btree_iter iter; struct bkey_s_c k; diff --git a/libbcachefs/util.c b/libbcachefs/util.c index 7e6ebe8c..dc3817f5 100644 --- a/libbcachefs/util.c +++ b/libbcachefs/util.c @@ -252,6 +252,16 @@ void bch2_prt_u64_base2(struct printbuf *out, u64 v) bch2_prt_u64_base2_nbits(out, v, fls64(v) ?: 1); } +static bool string_is_spaces(const char *str) +{ + while (*str) { + if (*str != ' ') + return false; + str++; + } + return true; +} + void bch2_print_string_as_lines(const char *prefix, const char *lines, bool nonblocking) { @@ -272,6 +282,9 @@ void bch2_print_string_as_lines(const char *prefix, const char *lines, while (*lines) { p = strchrnul(lines, '\n'); + if (!*p && string_is_spaces(lines)) + break; + printk("%s%.*s\n", prefix, (int) (p - lines), lines); if (!*p) break; @@ -1003,14 +1016,14 @@ u64 *bch2_acc_percpu_u64s(u64 __percpu *p, unsigned nr) return ret; } -void bch2_darray_str_exit(darray_str *d) +void bch2_darray_str_exit(darray_const_str *d) { darray_for_each(*d, i) kfree(*i); darray_exit(d); } -int bch2_split_devs(const char *_dev_name, darray_str *ret) +int bch2_split_devs(const char *_dev_name, darray_const_str *ret) { darray_init(ret); diff --git a/libbcachefs/util.h b/libbcachefs/util.h index 7a93e187..14cb2c7d 100644 --- a/libbcachefs/util.h +++ b/libbcachefs/util.h @@ -690,8 +690,8 @@ static inline bool qstr_eq(const struct qstr l, const struct qstr r) return l.len == r.len && !memcmp(l.name, r.name, l.len); } -void bch2_darray_str_exit(darray_str *); -int bch2_split_devs(const char *, darray_str *); +void bch2_darray_str_exit(darray_const_str *); +int bch2_split_devs(const char *, darray_const_str *); #ifdef __KERNEL__ |