summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--.bcachefs_revision2
-rw-r--r--Cargo.lock2
-rw-r--r--Cargo.toml2
-rw-r--r--Makefile2
-rw-r--r--bch_bindgen/src/fs.rs20
-rw-r--r--c_src/cmd_device.c12
-rw-r--r--c_src/cmd_dump.c5
-rw-r--r--c_src/cmd_format.c7
-rw-r--r--c_src/cmd_fsck.c10
-rw-r--r--c_src/cmd_key.c23
-rw-r--r--c_src/cmd_kill_btree_node.c4
-rw-r--r--c_src/cmd_list_journal.c4
-rw-r--r--c_src/cmd_migrate.c19
-rw-r--r--c_src/cmd_option.c4
-rw-r--r--c_src/posix_to_bcachefs.c2
-rw-r--r--c_src/tools-util.c8
-rw-r--r--c_src/tools-util.h6
-rw-r--r--libbcachefs/alloc_background.c10
-rw-r--r--libbcachefs/alloc_foreground.c2
-rw-r--r--libbcachefs/alloc_foreground.h4
-rw-r--r--libbcachefs/bcachefs.h11
-rw-r--r--libbcachefs/btree_gc.c39
-rw-r--r--libbcachefs/btree_io.c270
-rw-r--r--libbcachefs/btree_io.h4
-rw-r--r--libbcachefs/btree_iter.c9
-rw-r--r--libbcachefs/btree_journal_iter.c2
-rw-r--r--libbcachefs/btree_update.c7
-rw-r--r--libbcachefs/btree_update.h12
-rw-r--r--libbcachefs/btree_update_interior.c49
-rw-r--r--libbcachefs/buckets.c19
-rw-r--r--libbcachefs/darray.h1
-rw-r--r--libbcachefs/debug.c4
-rw-r--r--libbcachefs/dirent.c29
-rw-r--r--libbcachefs/dirent.h15
-rw-r--r--libbcachefs/errcode.h1
-rw-r--r--libbcachefs/error.c48
-rw-r--r--libbcachefs/error.h4
-rw-r--r--libbcachefs/extents.c61
-rw-r--r--libbcachefs/extents.h3
-rw-r--r--libbcachefs/extents_types.h1
-rw-r--r--libbcachefs/fs.c308
-rw-r--r--libbcachefs/fsck.c4
-rw-r--r--libbcachefs/inode.h8
-rw-r--r--libbcachefs/io_read.c83
-rw-r--r--libbcachefs/journal_reclaim.c26
-rw-r--r--libbcachefs/move.c5
-rw-r--r--libbcachefs/namei.c3
-rw-r--r--libbcachefs/recovery.c52
-rw-r--r--libbcachefs/recovery.h2
-rw-r--r--libbcachefs/recovery_passes.c26
-rw-r--r--libbcachefs/recovery_passes.h10
-rw-r--r--libbcachefs/sb-downgrade.c4
-rw-r--r--libbcachefs/sb-errors_format.h6
-rw-r--r--libbcachefs/sb-members.c8
-rw-r--r--libbcachefs/sb-members.h13
-rw-r--r--libbcachefs/snapshot.c4
-rw-r--r--libbcachefs/str_hash.h5
-rw-r--r--libbcachefs/subvolume.c6
-rw-r--r--libbcachefs/super-io.c3
-rw-r--r--libbcachefs/super.c197
-rw-r--r--libbcachefs/super.h3
-rw-r--r--libbcachefs/tests.c4
-rw-r--r--libbcachefs/util.c17
-rw-r--r--libbcachefs/util.h4
64 files changed, 978 insertions, 560 deletions
diff --git a/.bcachefs_revision b/.bcachefs_revision
index 5fe33c98..f08feb4a 100644
--- a/.bcachefs_revision
+++ b/.bcachefs_revision
@@ -1 +1 @@
-c9d875f9be1f853e747c9e00421c678b0adf73d2
+9b4ab159abcd84cf0c25ee851dda8c40baffecc8
diff --git a/Cargo.lock b/Cargo.lock
index 5edc55c0..440e6133 100644
--- a/Cargo.lock
+++ b/Cargo.lock
@@ -68,7 +68,7 @@ checksum = "86fdf8605db99b54d3cd748a44c6d04df638eb5dafb219b135d0149bd0db01f6"
[[package]]
name = "bcachefs-tools"
-version = "1.25.1"
+version = "1.25.2"
dependencies = [
"anyhow",
"bch_bindgen",
diff --git a/Cargo.toml b/Cargo.toml
index 98d74b76..ea82379c 100644
--- a/Cargo.toml
+++ b/Cargo.toml
@@ -4,7 +4,7 @@ default-members = [".", "bch_bindgen"]
[package]
name = "bcachefs-tools"
-version = "1.25.1"
+version = "1.25.2"
authors = ["Yuxuan Shui <yshuiv7@gmail.com>", "Kayla Firestack <dev@kaylafire.me>", "Kent Overstreet <kent.overstreet@linux.dev>" ]
edition = "2021"
rust-version = "1.77.0"
diff --git a/Makefile b/Makefile
index 3b25cf81..c7795af9 100644
--- a/Makefile
+++ b/Makefile
@@ -1,4 +1,4 @@
-VERSION=1.25.1
+VERSION=1.25.2
PREFIX?=/usr/local
LIBEXECDIR?=$(PREFIX)/libexec
diff --git a/bch_bindgen/src/fs.rs b/bch_bindgen/src/fs.rs
index 2997ed8f..00c5d965 100644
--- a/bch_bindgen/src/fs.rs
+++ b/bch_bindgen/src/fs.rs
@@ -9,13 +9,25 @@ pub struct Fs {
}
impl Fs {
- pub fn open(devs: &[PathBuf], opts: c::bch_opts) -> Result<Fs, bch_errcode> {
- let devs: Vec<_> = devs
+ pub fn open(devs: &[PathBuf], mut opts: c::bch_opts) -> Result<Fs, bch_errcode> {
+ let devs_cstrs : Vec<_> = devs
.iter()
- .map(|i| CString::new(i.as_os_str().as_bytes()).unwrap().into_raw())
+ .map(|i| CString::new(i.as_os_str().as_bytes()).unwrap())
.collect();
- let ret = unsafe { c::bch2_fs_open(devs[..].as_ptr(), devs.len() as u32, opts) };
+ let mut devs_array: Vec<_> = devs_cstrs
+ .iter()
+ .map(|i| i.as_ptr())
+ .collect();
+
+ let ret = unsafe {
+ let mut devs: c::darray_const_str = std::mem::zeroed();
+
+ devs.data = devs_array[..].as_mut_ptr();
+ devs.nr = devs_array.len();
+
+ c::bch2_fs_open(&mut devs, &mut opts)
+ };
errptr_to_result(ret).map(|fs| Fs { raw: fs })
}
diff --git a/c_src/cmd_device.c b/c_src/cmd_device.c
index 96c25310..cb40dcea 100644
--- a/c_src/cmd_device.c
+++ b/c_src/cmd_device.c
@@ -515,7 +515,11 @@ static int cmd_device_resize(int argc, char *argv[])
} else {
printf("Doing offline resize of %s\n", dev);
- struct bch_fs *c = bch2_fs_open(&dev, 1, bch2_opts_empty());
+ darray_const_str devs = {};
+ darray_push(&devs, dev);
+
+ struct bch_opts opts = bch2_opts_empty();
+ struct bch_fs *c = bch2_fs_open(&devs, &opts);
if (IS_ERR(c))
die("error opening %s: %s", dev, bch2_err_str(PTR_ERR(c)));
@@ -612,7 +616,11 @@ static int cmd_device_resize_journal(int argc, char *argv[])
} else {
printf("%s is offline - starting:\n", dev);
- struct bch_fs *c = bch2_fs_open(&dev, 1, bch2_opts_empty());
+ darray_const_str devs = {};
+ darray_push(&devs, dev);
+
+ struct bch_opts opts = bch2_opts_empty();
+ struct bch_fs *c = bch2_fs_open(&devs, &opts);
if (IS_ERR(c))
die("error opening %s: %s", dev, bch2_err_str(PTR_ERR(c)));
diff --git a/c_src/cmd_dump.c b/c_src/cmd_dump.c
index 7d7ceaa5..c6b813e9 100644
--- a/c_src/cmd_dump.c
+++ b/c_src/cmd_dump.c
@@ -147,7 +147,9 @@ int cmd_dump(int argc, char *argv[])
if (!argc)
die("Please supply device(s) to check");
- struct bch_fs *c = bch2_fs_open(argv, argc, opts);
+ darray_const_str devs = get_or_split_cmdline_devs(argc, argv);
+
+ struct bch_fs *c = bch2_fs_open(&devs, &opts);
if (IS_ERR(c))
die("error opening devices: %s", bch2_err_str(PTR_ERR(c)));
@@ -177,5 +179,6 @@ int cmd_dump(int argc, char *argv[])
up_read(&c->state_lock);
bch2_fs_stop(c);
+ darray_exit(&devs);
return 0;
}
diff --git a/c_src/cmd_format.c b/c_src/cmd_format.c
index ac48454f..aa87feb0 100644
--- a/c_src/cmd_format.c
+++ b/c_src/cmd_format.c
@@ -123,7 +123,7 @@ static void build_fs(struct bch_fs *c, const char *src_path)
int cmd_format(int argc, char *argv[])
{
dev_opts_list devices = {};
- darray_str device_paths = {};
+ darray_const_str device_paths = {};
struct format_opts opts = format_opts_default();
struct dev_opts dev_opts = dev_opts_default();
bool force = false, no_passphrase = false, quiet = false, initialize = true, verbose = false;
@@ -302,9 +302,8 @@ int cmd_format(int argc, char *argv[])
* Start the filesystem once, to allocate the journal and create
* the root directory:
*/
- struct bch_fs *c = bch2_fs_open(device_paths.data,
- device_paths.nr,
- bch2_opts_empty());
+ struct bch_opts open_opts = bch2_opts_empty();
+ struct bch_fs *c = bch2_fs_open(&device_paths, &open_opts);
if (IS_ERR(c))
die("error opening %s: %s", device_paths.data[0],
bch2_err_str(PTR_ERR(c)));
diff --git a/c_src/cmd_fsck.c b/c_src/cmd_fsck.c
index aa81fa11..13cec103 100644
--- a/c_src/cmd_fsck.c
+++ b/c_src/cmd_fsck.c
@@ -86,7 +86,7 @@ static int splice_fd_to_stdinout(int fd)
stdin_closed = true;
}
- xclose(fd);
+ close(fd);
return 0;
}
@@ -113,7 +113,7 @@ static void append_opt(struct printbuf *out, const char *opt)
prt_str(out, opt);
}
-static bool should_use_kernel_fsck(darray_str devs)
+static bool should_use_kernel_fsck(darray_const_str devs)
{
system("modprobe bcachefs");
@@ -131,7 +131,7 @@ static bool should_use_kernel_fsck(darray_str devs)
opt_set(opts, nochanges, true);
opt_set(opts, read_only, true);
- struct bch_fs *c = bch2_fs_open(devs.data, devs.nr, opts);
+ struct bch_fs *c = bch2_fs_open(&devs, &opts);
if (IS_ERR(c))
return false;
@@ -265,7 +265,7 @@ int cmd_fsck(int argc, char *argv[])
exit(8);
}
- darray_str devs = get_or_split_cmdline_devs(argc, argv);
+ darray_const_str devs = get_or_split_cmdline_devs(argc, argv);
darray_for_each(devs, i)
if (dev_mounted(*i)) {
@@ -324,7 +324,7 @@ userland_fsck:
if (ret)
return ret;
- struct bch_fs *c = bch2_fs_open(devs.data, devs.nr, opts);
+ struct bch_fs *c = bch2_fs_open(&devs, &opts);
if (IS_ERR(c))
exit(8);
diff --git a/c_src/cmd_key.c b/c_src/cmd_key.c
index ac8a94a8..c1b72ff4 100644
--- a/c_src/cmd_key.c
+++ b/c_src/cmd_key.c
@@ -87,12 +87,13 @@ int cmd_unlock(int argc, char *argv[])
int cmd_set_passphrase(int argc, char *argv[])
{
- struct bch_opts opts = bch2_opts_empty();
- struct bch_fs *c;
-
- if (argc < 2)
+ args_shift(1);
+ if (!argc)
die("Please supply one or more devices");
+ darray_const_str devs = get_or_split_cmdline_devs(argc, argv);
+
+ struct bch_opts opts = bch2_opts_empty();
opt_set(opts, nostart, true);
/*
@@ -100,7 +101,7 @@ int cmd_set_passphrase(int argc, char *argv[])
* to make sure we're opening and updating every component device:
*/
- c = bch2_fs_open(argv + 1, argc - 1, opts);
+ struct bch_fs *c = bch2_fs_open(&devs, &opts);
if (IS_ERR(c))
die("Error opening %s: %s", argv[1], bch2_err_str(PTR_ERR(c)));
@@ -126,14 +127,16 @@ int cmd_set_passphrase(int argc, char *argv[])
int cmd_remove_passphrase(int argc, char *argv[])
{
- struct bch_opts opts = bch2_opts_empty();
- struct bch_fs *c;
-
- if (argc < 2)
+ args_shift(1);
+ if (!argc)
die("Please supply one or more devices");
+ darray_const_str devs = get_or_split_cmdline_devs(argc, argv);
+
+ struct bch_opts opts = bch2_opts_empty();
opt_set(opts, nostart, true);
- c = bch2_fs_open(argv + 1, argc - 1, opts);
+
+ struct bch_fs *c = bch2_fs_open(&devs, &opts);
if (IS_ERR(c))
die("Error opening %s: %s", argv[1], bch2_err_str(PTR_ERR(c)));
diff --git a/c_src/cmd_kill_btree_node.c b/c_src/cmd_kill_btree_node.c
index c8f43150..81dbdd4b 100644
--- a/c_src/cmd_kill_btree_node.c
+++ b/c_src/cmd_kill_btree_node.c
@@ -74,7 +74,9 @@ int cmd_kill_btree_node(int argc, char *argv[])
if (!argc)
die("Please supply device(s)");
- struct bch_fs *c = bch2_fs_open(argv, argc, opts);
+ darray_const_str devs = get_or_split_cmdline_devs(argc, argv);
+
+ struct bch_fs *c = bch2_fs_open(&devs, &opts);
if (IS_ERR(c))
die("error opening %s: %s", argv[0], bch2_err_str(PTR_ERR(c)));
diff --git a/c_src/cmd_list_journal.c b/c_src/cmd_list_journal.c
index 3cdf4846..fe9e3bbd 100644
--- a/c_src/cmd_list_journal.c
+++ b/c_src/cmd_list_journal.c
@@ -319,9 +319,9 @@ int cmd_list_journal(int argc, char *argv[])
if (!argc)
die("Please supply device(s) to open");
- darray_str devs = get_or_split_cmdline_devs(argc, argv);
+ darray_const_str devs = get_or_split_cmdline_devs(argc, argv);
- struct bch_fs *c = bch2_fs_open(devs.data, devs.nr, opts);
+ struct bch_fs *c = bch2_fs_open(&devs, &opts);
if (IS_ERR(c))
die("error opening %s: %s", argv[0], bch2_err_str(PTR_ERR(c)));
diff --git a/c_src/cmd_migrate.c b/c_src/cmd_migrate.c
index edd77377..91c42302 100644
--- a/c_src/cmd_migrate.c
+++ b/c_src/cmd_migrate.c
@@ -253,14 +253,15 @@ static int migrate_fs(const char *fs_path,
free(sb);
- char *path[1] = { dev->path };
+ darray_const_str dev_paths = {};
+ darray_push(&dev_paths, dev->path);
struct bch_opts opts = bch2_opts_empty();
opt_set(opts, sb, sb_offset);
opt_set(opts, nostart, true);
opt_set(opts, noexcl, true);
- struct bch_fs *c = bch2_fs_open(path, 1, opts);
+ struct bch_fs *c = bch2_fs_open(&dev_paths, &opts);
if (IS_ERR(c))
die("Error opening new filesystem: %s", bch2_err_str(PTR_ERR(c)));
@@ -295,7 +296,7 @@ static int migrate_fs(const char *fs_path,
opt_set(opts, nochanges, true);
opt_set(opts, read_only, true);
- c = bch2_fs_open(path, 1, opts);
+ c = bch2_fs_open(&dev_paths, &opts);
if (IS_ERR(c))
die("Error opening new filesystem: %s", bch2_err_str(PTR_ERR(c)));
@@ -377,14 +378,14 @@ static void migrate_superblock_usage(void)
int cmd_migrate_superblock(int argc, char *argv[])
{
- char *dev = NULL;
+ darray_const_str devs = {};
u64 sb_offset = 0;
int opt, ret;
while ((opt = getopt(argc, argv, "d:o:h")) != -1)
switch (opt) {
case 'd':
- dev = optarg;
+ darray_push(&devs, optarg);
break;
case 'o':
ret = kstrtou64(optarg, 10, &sb_offset);
@@ -396,13 +397,13 @@ int cmd_migrate_superblock(int argc, char *argv[])
exit(EXIT_SUCCESS);
}
- if (!dev)
+ if (!devs.nr)
die("Please specify a device");
if (!sb_offset)
die("Please specify offset of existing superblock");
- int fd = xopen(dev, O_RDWR);
+ int fd = xopen(devs.data[0], O_RDWR);
struct bch_sb *sb = __bch2_super_read(fd, sb_offset);
unsigned sb_size = 1U << sb->layout.sb_max_size_bits;
@@ -435,7 +436,7 @@ int cmd_migrate_superblock(int argc, char *argv[])
opt_set(opts, nostart, true);
opt_set(opts, sb, sb_offset);
- struct bch_fs *c = bch2_fs_open(&dev, 1, opts);
+ struct bch_fs *c = bch2_fs_open(&devs, &opts);
ret = PTR_ERR_OR_ZERO(c) ?:
bch2_buckets_nouse_alloc(c);
if (ret)
@@ -461,7 +462,7 @@ int cmd_migrate_superblock(int argc, char *argv[])
* inconsequential:
*/
- c = bch2_fs_open(&dev, 1, opts);
+ c = bch2_fs_open(&devs, &opts);
ret = PTR_ERR_OR_ZERO(c);
if (ret)
die("error opening filesystem: %s", bch2_err_str(ret));
diff --git a/c_src/cmd_option.c b/c_src/cmd_option.c
index e314903a..14201c8d 100644
--- a/c_src/cmd_option.c
+++ b/c_src/cmd_option.c
@@ -94,10 +94,12 @@ int cmd_set_option(int argc, char *argv[])
}
if (!online) {
+ darray_const_str devs = get_or_split_cmdline_devs(argc, argv);
+
struct bch_opts open_opts = bch2_opts_empty();
opt_set(open_opts, nostart, true);
- struct bch_fs *c = bch2_fs_open(argv, argc, open_opts);
+ struct bch_fs *c = bch2_fs_open(&devs, &open_opts);
if (IS_ERR(c)) {
fprintf(stderr, "error opening %s: %s\n", argv[0], bch2_err_str(PTR_ERR(c)));
exit(EXIT_FAILURE);
diff --git a/c_src/posix_to_bcachefs.c b/c_src/posix_to_bcachefs.c
index 3c2e72d8..68d7e992 100644
--- a/c_src/posix_to_bcachefs.c
+++ b/c_src/posix_to_bcachefs.c
@@ -176,7 +176,7 @@ static void write_data(struct bch_fs *c,
die("write error: %s", bch2_err_str(op.error));
}
-void copy_data(struct bch_fs *c,
+static void copy_data(struct bch_fs *c,
struct bch_inode_unpacked *dst_inode,
int src_fd, u64 start, u64 end)
{
diff --git a/c_src/tools-util.c b/c_src/tools-util.c
index ea12946c..03ad009f 100644
--- a/c_src/tools-util.c
+++ b/c_src/tools-util.c
@@ -547,7 +547,7 @@ char *dev_to_path(dev_t dev)
return path;
}
-struct mntent *dev_to_mount(char *dev)
+struct mntent *dev_to_mount(const char *dev)
{
struct mntent *mnt, *ret = NULL;
FILE *f = setmntent("/proc/mounts", "r");
@@ -586,7 +586,7 @@ found:
return ret;
}
-int dev_mounted(char *dev)
+int dev_mounted(const char *dev)
{
struct mntent *mnt = dev_to_mount(dev);
@@ -748,9 +748,9 @@ unsigned version_parse(char *buf)
return BCH_VERSION(major, minor);
}
-darray_str get_or_split_cmdline_devs(int argc, char *argv[])
+darray_const_str get_or_split_cmdline_devs(int argc, char *argv[])
{
- darray_str ret = {};
+ darray_const_str ret = {};
if (argc == 1) {
bch2_split_devs(argv[0], &ret);
diff --git a/c_src/tools-util.h b/c_src/tools-util.h
index 27652b62..1352050e 100644
--- a/c_src/tools-util.h
+++ b/c_src/tools-util.h
@@ -186,8 +186,8 @@ u32 crc32c(u32, const void *, size_t);
char *dev_to_name(dev_t);
char *dev_to_path(dev_t);
-struct mntent *dev_to_mount(char *);
-int dev_mounted(char *);
+struct mntent *dev_to_mount(const char *);
+int dev_mounted(const char *);
char *fd_to_dev_model(int);
#define args_shift(_nr) \
@@ -217,7 +217,7 @@ struct bbpos_range bbpos_range_parse(char *);
unsigned version_parse(char *);
-darray_str get_or_split_cmdline_devs(int argc, char *argv[]);
+darray_const_str get_or_split_cmdline_devs(int argc, char *argv[]);
char *pop_cmd(int *argc, char *argv[]);
diff --git a/libbcachefs/alloc_background.c b/libbcachefs/alloc_background.c
index 37cbbf86..002e3853 100644
--- a/libbcachefs/alloc_background.c
+++ b/libbcachefs/alloc_background.c
@@ -484,7 +484,7 @@ struct bkey_i_alloc_v4 *bch2_trans_start_alloc_update(struct btree_trans *trans,
if (ret)
return ERR_PTR(ret);
- ret = bch2_trans_update(trans, &iter, &a->k_i, flags);
+ ret = bch2_trans_update_ip(trans, &iter, &a->k_i, flags, _RET_IP_);
bch2_trans_iter_exit(trans, &iter);
return unlikely(ret) ? ERR_PTR(ret) : a;
}
@@ -2393,14 +2393,16 @@ bkey_err:
int bch2_fs_freespace_init(struct bch_fs *c)
{
- int ret = 0;
- bool doing_init = false;
+ if (c->sb.features & BIT_ULL(BCH_FEATURE_small_image))
+ return 0;
+
/*
* We can crash during the device add path, so we need to check this on
* every mount:
*/
+ bool doing_init = false;
for_each_member_device(c, ca) {
if (ca->mi.freespace_initialized)
continue;
@@ -2410,7 +2412,7 @@ int bch2_fs_freespace_init(struct bch_fs *c)
doing_init = true;
}
- ret = bch2_dev_freespace_init(c, ca, 0, ca->mi.nbuckets);
+ int ret = bch2_dev_freespace_init(c, ca, 0, ca->mi.nbuckets);
if (ret) {
bch2_dev_put(ca);
bch_err_fn(c, ret);
diff --git a/libbcachefs/alloc_foreground.c b/libbcachefs/alloc_foreground.c
index ac1244e7..a0f92daa 100644
--- a/libbcachefs/alloc_foreground.c
+++ b/libbcachefs/alloc_foreground.c
@@ -1339,6 +1339,8 @@ alloc_done:
open_bucket_for_each(c, &req->wp->ptrs, ob, i)
req->wp->sectors_free = min(req->wp->sectors_free, ob->sectors_free);
+ req->wp->sectors_free = rounddown(req->wp->sectors_free, block_sectors(c));
+
BUG_ON(!req->wp->sectors_free || req->wp->sectors_free == UINT_MAX);
return 0;
diff --git a/libbcachefs/alloc_foreground.h b/libbcachefs/alloc_foreground.h
index 64e1f1ef..19220341 100644
--- a/libbcachefs/alloc_foreground.h
+++ b/libbcachefs/alloc_foreground.h
@@ -157,7 +157,9 @@ static inline void bch2_alloc_sectors_done_inlined(struct bch_fs *c, struct writ
unsigned i;
open_bucket_for_each(c, &wp->ptrs, ob, i)
- ob_push(c, !ob->sectors_free ? &ptrs : &keep, ob);
+ ob_push(c, ob->sectors_free < block_sectors(c)
+ ? &ptrs
+ : &keep, ob);
wp->ptrs = keep;
mutex_unlock(&wp->lock);
diff --git a/libbcachefs/bcachefs.h b/libbcachefs/bcachefs.h
index 675b8f8e..5fb8b0f4 100644
--- a/libbcachefs/bcachefs.h
+++ b/libbcachefs/bcachefs.h
@@ -295,6 +295,16 @@ do { \
bch2_print(_c, __VA_ARGS__); \
} while (0)
+#define bch2_print_str_ratelimited(_c, ...) \
+do { \
+ static DEFINE_RATELIMIT_STATE(_rs, \
+ DEFAULT_RATELIMIT_INTERVAL, \
+ DEFAULT_RATELIMIT_BURST); \
+ \
+ if (__ratelimit(&_rs)) \
+ bch2_print_str(_c, __VA_ARGS__); \
+} while (0)
+
#define bch_info(c, fmt, ...) \
bch2_print(c, KERN_INFO bch2_fmt(c, fmt), ##__VA_ARGS__)
#define bch_info_ratelimited(c, fmt, ...) \
@@ -834,6 +844,7 @@ struct bch_fs {
unsigned nsec_per_time_unit;
u64 features;
u64 compat;
+ u64 recovery_passes_required;
unsigned long errors_silent[BITS_TO_LONGS(BCH_FSCK_ERR_MAX)];
u64 btrees_lost_data;
} sb;
diff --git a/libbcachefs/btree_gc.c b/libbcachefs/btree_gc.c
index 9db8f5f8..92ae3173 100644
--- a/libbcachefs/btree_gc.c
+++ b/libbcachefs/btree_gc.c
@@ -48,6 +48,27 @@
#define DROP_PREV_NODE 11
#define DID_FILL_FROM_SCAN 12
+/*
+ * Returns true if it's a btree we can easily reconstruct, or otherwise won't
+ * cause data loss if it's missing:
+ */
+static bool btree_id_important(enum btree_id btree)
+{
+ if (btree_id_is_alloc(btree))
+ return false;
+
+ switch (btree) {
+ case BTREE_ID_quotas:
+ case BTREE_ID_snapshot_trees:
+ case BTREE_ID_logged_ops:
+ case BTREE_ID_rebalance_work:
+ case BTREE_ID_subvolume_children:
+ return false;
+ default:
+ return true;
+ }
+}
+
static const char * const bch2_gc_phase_strs[] = {
#define x(n) #n,
GC_PHASES()
@@ -350,20 +371,13 @@ again:
prt_char(&buf, ' ');
bch2_bkey_val_to_text(&buf, c, bkey_i_to_s_c(cur_k.k));
- if (mustfix_fsck_err_on(bch2_err_matches(ret, EIO),
- trans, btree_node_read_error,
- "Topology repair: unreadable btree node at\n%s",
- buf.buf)) {
+ if (bch2_err_matches(ret, EIO)) {
bch2_btree_node_evict(trans, cur_k.k);
cur = NULL;
ret = bch2_journal_key_delete(c, b->c.btree_id,
b->c.level, cur_k.k->k.p);
if (ret)
break;
-
- ret = bch2_btree_lost_data(c, b->c.btree_id);
- if (ret)
- break;
continue;
}
@@ -525,9 +539,6 @@ int bch2_check_topology(struct bch_fs *c)
bch2_btree_id_to_text(&buf, i);
if (r->error) {
- ret = bch2_btree_lost_data(c, i);
- if (ret)
- break;
reconstruct_root:
bch_info(c, "btree root %s unreadable, must recover from scan", buf.buf);
@@ -535,8 +546,10 @@ reconstruct_root:
r->error = 0;
if (!bch2_btree_has_scanned_nodes(c, i)) {
- mustfix_fsck_err(trans, btree_root_unreadable_and_scan_found_nothing,
- "no nodes found for btree %s, continue?", buf.buf);
+ __fsck_err(trans,
+ FSCK_CAN_FIX|(!btree_id_important(i) ? FSCK_AUTOFIX : 0),
+ btree_root_unreadable_and_scan_found_nothing,
+ "no nodes found for btree %s, continue?", buf.buf);
bch2_btree_root_alloc_fake_trans(trans, i, 0);
} else {
bch2_btree_root_alloc_fake_trans(trans, i, 1);
diff --git a/libbcachefs/btree_io.c b/libbcachefs/btree_io.c
index 5cd27a9c..aec397b4 100644
--- a/libbcachefs/btree_io.c
+++ b/libbcachefs/btree_io.c
@@ -515,19 +515,23 @@ void bch2_btree_init_next(struct btree_trans *trans, struct btree *b)
static void btree_err_msg(struct printbuf *out, struct bch_fs *c,
struct bch_dev *ca,
+ bool print_pos,
struct btree *b, struct bset *i, struct bkey_packed *k,
- unsigned offset, int write)
+ unsigned offset, int rw)
{
- prt_printf(out, bch2_log_msg(c, "%s"),
- write == READ
- ? "error validating btree node "
- : "corrupt btree node before write ");
+ if (print_pos) {
+ prt_str(out, rw == READ
+ ? "error validating btree node "
+ : "corrupt btree node before write ");
+ prt_printf(out, "at btree ");
+ bch2_btree_pos_to_text(out, c, b);
+ prt_newline(out);
+ }
+
if (ca)
- prt_printf(out, "on %s ", ca->name);
- prt_printf(out, "at btree ");
- bch2_btree_pos_to_text(out, c, b);
+ prt_printf(out, "%s ", ca->name);
- prt_printf(out, "\nnode offset %u/%u",
+ prt_printf(out, "node offset %u/%u",
b->written, btree_ptr_sectors_written(bkey_i_to_s_c(&b->key)));
if (i)
prt_printf(out, " bset u64s %u", le16_to_cpu(i->u64s));
@@ -538,75 +542,110 @@ static void btree_err_msg(struct printbuf *out, struct bch_fs *c,
prt_str(out, ": ");
}
-__printf(10, 11)
+__printf(11, 12)
static int __btree_err(int ret,
struct bch_fs *c,
struct bch_dev *ca,
struct btree *b,
struct bset *i,
struct bkey_packed *k,
- int write,
- bool have_retry,
+ int rw,
enum bch_sb_error_id err_type,
+ struct bch_io_failures *failed,
+ struct printbuf *err_msg,
const char *fmt, ...)
{
- bool silent = c->curr_recovery_pass == BCH_RECOVERY_PASS_scan_for_btree_nodes;
+ if (c->curr_recovery_pass == BCH_RECOVERY_PASS_scan_for_btree_nodes)
+ return -BCH_ERR_fsck_fix;
+
+ bool have_retry = false;
+ int ret2;
+
+ if (ca) {
+ bch2_mark_btree_validate_failure(failed, ca->dev_idx);
+
+ struct extent_ptr_decoded pick;
+ have_retry = !bch2_bkey_pick_read_device(c,
+ bkey_i_to_s_c(&b->key),
+ failed, &pick, -1);
+ }
if (!have_retry && ret == -BCH_ERR_btree_node_read_err_want_retry)
ret = -BCH_ERR_btree_node_read_err_fixable;
if (!have_retry && ret == -BCH_ERR_btree_node_read_err_must_retry)
ret = -BCH_ERR_btree_node_read_err_bad_node;
- if (!silent && ret != -BCH_ERR_btree_node_read_err_fixable)
- bch2_sb_error_count(c, err_type);
+ bch2_sb_error_count(c, err_type);
+
+ bool print_deferred = err_msg &&
+ rw == READ &&
+ !(test_bit(BCH_FS_fsck_running, &c->flags) &&
+ c->opts.fix_errors == FSCK_FIX_ask);
struct printbuf out = PRINTBUF;
- if (write != WRITE && ret != -BCH_ERR_btree_node_read_err_fixable) {
- printbuf_indent_add_nextline(&out, 2);
-#ifdef BCACHEFS_LOG_PREFIX
- prt_printf(&out, bch2_log_msg(c, ""));
-#endif
- }
+ bch2_log_msg_start(c, &out);
+
+ if (!print_deferred)
+ err_msg = &out;
- btree_err_msg(&out, c, ca, b, i, k, b->written, write);
+ btree_err_msg(err_msg, c, ca, !print_deferred, b, i, k, b->written, rw);
va_list args;
va_start(args, fmt);
- prt_vprintf(&out, fmt, args);
+ prt_vprintf(err_msg, fmt, args);
va_end(args);
- if (write == WRITE) {
+ if (print_deferred) {
+ prt_newline(err_msg);
+
+ switch (ret) {
+ case -BCH_ERR_btree_node_read_err_fixable:
+ ret2 = bch2_fsck_err_opt(c, FSCK_CAN_FIX, err_type);
+ if (ret2 != -BCH_ERR_fsck_fix &&
+ ret2 != -BCH_ERR_fsck_ignore) {
+ ret = ret2;
+ goto fsck_err;
+ }
+
+ if (!have_retry)
+ ret = -BCH_ERR_fsck_fix;
+ goto out;
+ case -BCH_ERR_btree_node_read_err_bad_node:
+ prt_str(&out, ", ");
+ ret = __bch2_topology_error(c, &out);
+ break;
+ }
+
+ goto out;
+ }
+
+ if (rw == WRITE) {
prt_str(&out, ", ");
ret = __bch2_inconsistent_error(c, &out)
? -BCH_ERR_fsck_errors_not_fixed
: 0;
- silent = false;
+ goto print;
}
switch (ret) {
case -BCH_ERR_btree_node_read_err_fixable:
- ret = !silent
- ? __bch2_fsck_err(c, NULL, FSCK_CAN_FIX, err_type, "%s", out.buf)
- : -BCH_ERR_fsck_fix;
- if (ret != -BCH_ERR_fsck_fix &&
- ret != -BCH_ERR_fsck_ignore)
+ ret2 = __bch2_fsck_err(c, NULL, FSCK_CAN_FIX, err_type, "%s", out.buf);
+ if (ret2 != -BCH_ERR_fsck_fix &&
+ ret2 != -BCH_ERR_fsck_ignore) {
+ ret = ret2;
goto fsck_err;
- ret = -BCH_ERR_fsck_fix;
+ }
+
+ if (!have_retry)
+ ret = -BCH_ERR_fsck_fix;
goto out;
case -BCH_ERR_btree_node_read_err_bad_node:
prt_str(&out, ", ");
ret = __bch2_topology_error(c, &out);
- if (ret)
- silent = false;
- break;
- case -BCH_ERR_btree_node_read_err_incompatible:
- ret = -BCH_ERR_fsck_errors_not_fixed;
- silent = false;
break;
}
-
- if (!silent)
- bch2_print_str(c, KERN_ERR, out.buf);
+print:
+ bch2_print_str(c, KERN_ERR, out.buf);
out:
fsck_err:
printbuf_exit(&out);
@@ -615,8 +654,9 @@ fsck_err:
#define btree_err(type, c, ca, b, i, k, _err_type, msg, ...) \
({ \
- int _ret = __btree_err(type, c, ca, b, i, k, write, have_retry, \
+ int _ret = __btree_err(type, c, ca, b, i, k, write, \
BCH_FSCK_ERR_##_err_type, \
+ failed, err_msg, \
msg, ##__VA_ARGS__); \
\
if (_ret != -BCH_ERR_fsck_fix) { \
@@ -624,7 +664,7 @@ fsck_err:
goto fsck_err; \
} \
\
- *saw_error = true; \
+ true; \
})
#define btree_err_on(cond, ...) ((cond) ? btree_err(__VA_ARGS__) : false)
@@ -682,8 +722,9 @@ void bch2_btree_node_drop_keys_outside_node(struct btree *b)
static int validate_bset(struct bch_fs *c, struct bch_dev *ca,
struct btree *b, struct bset *i,
- unsigned offset, unsigned sectors,
- int write, bool have_retry, bool *saw_error)
+ unsigned offset, unsigned sectors, int write,
+ struct bch_io_failures *failed,
+ struct printbuf *err_msg)
{
unsigned version = le16_to_cpu(i->version);
unsigned ptr_written = btree_ptr_sectors_written(bkey_i_to_s_c(&b->key));
@@ -896,7 +937,8 @@ static inline int btree_node_read_bkey_cmp(const struct btree *b,
static int validate_bset_keys(struct bch_fs *c, struct btree *b,
struct bset *i, int write,
- bool have_retry, bool *saw_error)
+ struct bch_io_failures *failed,
+ struct printbuf *err_msg)
{
unsigned version = le16_to_cpu(i->version);
struct bkey_packed *k, *prev = NULL;
@@ -1009,7 +1051,9 @@ fsck_err:
}
int bch2_btree_node_read_done(struct bch_fs *c, struct bch_dev *ca,
- struct btree *b, bool have_retry, bool *saw_error)
+ struct btree *b,
+ struct bch_io_failures *failed,
+ struct printbuf *err_msg)
{
struct btree_node_entry *bne;
struct sort_iter *iter;
@@ -1022,7 +1066,7 @@ int bch2_btree_node_read_done(struct bch_fs *c, struct bch_dev *ca,
unsigned ptr_written = btree_ptr_sectors_written(bkey_i_to_s_c(&b->key));
u64 max_journal_seq = 0;
struct printbuf buf = PRINTBUF;
- int ret = 0, retry_read = 0, write = READ;
+ int ret = 0, write = READ;
u64 start_time = local_clock();
b->version_ondisk = U16_MAX;
@@ -1156,15 +1200,14 @@ int bch2_btree_node_read_done(struct bch_fs *c, struct bch_dev *ca,
b->version_ondisk = min(b->version_ondisk,
le16_to_cpu(i->version));
- ret = validate_bset(c, ca, b, i, b->written, sectors,
- READ, have_retry, saw_error);
+ ret = validate_bset(c, ca, b, i, b->written, sectors, READ, failed, err_msg);
if (ret)
goto fsck_err;
if (!b->written)
btree_node_set_format(b, b->data->format);
- ret = validate_bset_keys(c, b, i, READ, have_retry, saw_error);
+ ret = validate_bset_keys(c, b, i, READ, failed, err_msg);
if (ret)
goto fsck_err;
@@ -1292,20 +1335,11 @@ int bch2_btree_node_read_done(struct bch_fs *c, struct bch_dev *ca,
if (!ptr_written)
set_btree_node_need_rewrite(b);
-out:
+fsck_err:
mempool_free(iter, &c->fill_iter);
printbuf_exit(&buf);
bch2_time_stats_update(&c->times[BCH_TIME_btree_node_read_done], start_time);
- return retry_read;
-fsck_err:
- if (ret == -BCH_ERR_btree_node_read_err_want_retry ||
- ret == -BCH_ERR_btree_node_read_err_must_retry) {
- retry_read = 1;
- } else {
- set_btree_node_read_error(b);
- bch2_btree_lost_data(c, b->c.btree_id);
- }
- goto out;
+ return ret;
}
static void btree_node_read_work(struct work_struct *work)
@@ -1317,15 +1351,25 @@ static void btree_node_read_work(struct work_struct *work)
struct btree *b = rb->b;
struct bio *bio = &rb->bio;
struct bch_io_failures failed = { .nr = 0 };
+ int ret = 0;
+
struct printbuf buf = PRINTBUF;
- bool saw_error = false;
- bool retry = false;
- bool can_retry;
+ bch2_log_msg_start(c, &buf);
+
+ prt_printf(&buf, "btree node read error at btree ");
+ bch2_btree_pos_to_text(&buf, c, b);
+ prt_newline(&buf);
goto start;
while (1) {
- retry = true;
- bch_info(c, "retrying read");
+ ret = bch2_bkey_pick_read_device(c,
+ bkey_i_to_s_c(&b->key),
+ &failed, &rb->pick, -1);
+ if (ret) {
+ set_btree_node_read_error(b);
+ break;
+ }
+
ca = bch2_dev_get_ioref(c, rb->pick.ptr.dev, READ, BCH_DEV_READ_REF_btree_node_read);
rb->have_ioref = ca != NULL;
rb->start_time = local_clock();
@@ -1343,60 +1387,59 @@ static void btree_node_read_work(struct work_struct *work)
bch2_account_io_completion(ca, BCH_MEMBER_ERROR_read,
rb->start_time, !bio->bi_status);
start:
- printbuf_reset(&buf);
- bch2_btree_pos_to_text(&buf, c, b);
-
- if (ca && bio->bi_status)
- bch_err_dev_ratelimited(ca,
- "btree read error %s for %s",
- bch2_blk_status_to_str(bio->bi_status), buf.buf);
if (rb->have_ioref)
enumerated_ref_put(&ca->io_ref[READ], BCH_DEV_READ_REF_btree_node_read);
rb->have_ioref = false;
- bch2_mark_io_failure(&failed, &rb->pick, false);
-
- can_retry = bch2_bkey_pick_read_device(c,
- bkey_i_to_s_c(&b->key),
- &failed, &rb->pick, -1) > 0;
-
- if (!bio->bi_status &&
- !bch2_btree_node_read_done(c, ca, b, can_retry, &saw_error)) {
- if (retry)
- bch_info(c, "retry success");
- break;
+ if (bio->bi_status) {
+ bch2_mark_io_failure(&failed, &rb->pick, false);
+ continue;
}
- saw_error = true;
+ ret = bch2_btree_node_read_done(c, ca, b, &failed, &buf);
+ if (ret == -BCH_ERR_btree_node_read_err_want_retry ||
+ ret == -BCH_ERR_btree_node_read_err_must_retry)
+ continue;
- if (!can_retry) {
+ if (ret)
set_btree_node_read_error(b);
- bch2_btree_lost_data(c, b->c.btree_id);
- break;
- }
+
+ break;
}
- async_object_list_del(c, btree_read_bio, rb->list_idx);
- bch2_time_stats_update(&c->times[BCH_TIME_btree_node_read],
- rb->start_time);
- bio_put(&rb->bio);
+ bch2_io_failures_to_text(&buf, c, &failed);
- if ((saw_error ||
+ if (btree_node_read_error(b))
+ bch2_btree_lost_data(c, &buf, b->c.btree_id);
+
+ /*
+ * only print retry success if we read from a replica with no errors
+ */
+ if (btree_node_read_error(b))
+ prt_printf(&buf, "ret %s", bch2_err_str(ret));
+ else if (failed.nr) {
+ if (!bch2_dev_io_failures(&failed, rb->pick.ptr.dev))
+ prt_printf(&buf, "retry success");
+ else
+ prt_printf(&buf, "repair success");
+ }
+
+ if ((failed.nr ||
btree_node_need_rewrite(b)) &&
!btree_node_read_error(b) &&
c->curr_recovery_pass != BCH_RECOVERY_PASS_scan_for_btree_nodes) {
- if (saw_error) {
- printbuf_reset(&buf);
- bch2_btree_id_level_to_text(&buf, b->c.btree_id, b->c.level);
- prt_str(&buf, " ");
- bch2_bkey_val_to_text(&buf, c, bkey_i_to_s_c(&b->key));
- bch_err_ratelimited(c, "%s: rewriting btree node at due to error\n %s",
- __func__, buf.buf);
- }
-
+ prt_printf(&buf, " (rewriting node)");
bch2_btree_node_rewrite_async(c, b);
}
+ prt_newline(&buf);
+
+ if (failed.nr)
+ bch2_print_str_ratelimited(c, KERN_ERR, buf.buf);
+ async_object_list_del(c, btree_read_bio, rb->list_idx);
+ bch2_time_stats_update(&c->times[BCH_TIME_btree_node_read],
+ rb->start_time);
+ bio_put(&rb->bio);
printbuf_exit(&buf);
clear_btree_node_read_in_flight(b);
wake_up_bit(&b->flags, BTREE_NODE_read_in_flight);
@@ -1480,12 +1523,13 @@ static CLOSURE_CALLBACK(btree_node_read_all_replicas_done)
struct btree *b = ra->b;
struct printbuf buf = PRINTBUF;
bool dump_bset_maps = false;
- bool have_retry = false;
int ret = 0, best = -1, write = READ;
unsigned i, written = 0, written2 = 0;
__le64 seq = b->key.k.type == KEY_TYPE_btree_ptr_v2
? bkey_i_to_btree_ptr_v2(&b->key)->v.seq : 0;
bool _saw_error = false, *saw_error = &_saw_error;
+ struct printbuf *err_msg = NULL;
+ struct bch_io_failures *failed = NULL;
for (i = 0; i < ra->nr; i++) {
struct btree_node *bn = ra->buf[i];
@@ -1578,14 +1622,19 @@ fsck_err:
if (best >= 0) {
memcpy(b->data, ra->buf[best], btree_buf_bytes(b));
- ret = bch2_btree_node_read_done(c, NULL, b, false, saw_error);
+ ret = bch2_btree_node_read_done(c, NULL, b, NULL, NULL);
} else {
ret = -1;
}
if (ret) {
set_btree_node_read_error(b);
- bch2_btree_lost_data(c, b->c.btree_id);
+
+ struct printbuf buf = PRINTBUF;
+ bch2_btree_lost_data(c, &buf, b->c.btree_id);
+ if (buf.pos)
+ bch_err(c, "%s", buf.buf);
+ printbuf_exit(&buf);
} else if (*saw_error)
bch2_btree_node_rewrite_async(c, b);
@@ -1718,6 +1767,8 @@ void bch2_btree_node_read(struct btree_trans *trans, struct btree *b,
prt_str(&buf, "btree node read error: no device to read from\n at ");
bch2_btree_pos_to_text(&buf, c, b);
+ prt_newline(&buf);
+ bch2_btree_lost_data(c, &buf, b->c.btree_id);
bch_err_ratelimited(c, "%s", buf.buf);
if (c->opts.recovery_passes & BIT_ULL(BCH_RECOVERY_PASS_check_topology) &&
@@ -1725,7 +1776,6 @@ void bch2_btree_node_read(struct btree_trans *trans, struct btree *b,
bch2_fatal_error(c);
set_btree_node_read_error(b);
- bch2_btree_lost_data(c, b->c.btree_id);
clear_btree_node_read_in_flight(b);
wake_up_bit(&b->flags, BTREE_NODE_read_in_flight);
printbuf_exit(&buf);
@@ -2194,8 +2244,6 @@ static void btree_node_write_endio(struct bio *bio)
static int validate_bset_for_write(struct bch_fs *c, struct btree *b,
struct bset *i, unsigned sectors)
{
- bool saw_error;
-
int ret = bch2_bkey_validate(c, bkey_i_to_s_c(&b->key),
(struct bkey_validate_context) {
.from = BKEY_VALIDATE_btree_node,
@@ -2208,8 +2256,8 @@ static int validate_bset_for_write(struct bch_fs *c, struct btree *b,
return ret;
}
- ret = validate_bset_keys(c, b, i, WRITE, false, &saw_error) ?:
- validate_bset(c, NULL, b, i, b->written, sectors, WRITE, false, &saw_error);
+ ret = validate_bset_keys(c, b, i, WRITE, NULL, NULL) ?:
+ validate_bset(c, NULL, b, i, b->written, sectors, WRITE, NULL, NULL);
if (ret) {
bch2_inconsistent_error(c);
dump_stack();
diff --git a/libbcachefs/btree_io.h b/libbcachefs/btree_io.h
index afdb11a9..30a51805 100644
--- a/libbcachefs/btree_io.h
+++ b/libbcachefs/btree_io.h
@@ -134,7 +134,9 @@ void bch2_btree_build_aux_trees(struct btree *);
void bch2_btree_init_next(struct btree_trans *, struct btree *);
int bch2_btree_node_read_done(struct bch_fs *, struct bch_dev *,
- struct btree *, bool, bool *);
+ struct btree *,
+ struct bch_io_failures *,
+ struct printbuf *);
void bch2_btree_node_read(struct btree_trans *, struct btree *, bool);
int bch2_btree_root_read(struct bch_fs *, enum btree_id,
const struct bkey_i *, unsigned);
diff --git a/libbcachefs/btree_iter.c b/libbcachefs/btree_iter.c
index c824f1b6..9d941619 100644
--- a/libbcachefs/btree_iter.c
+++ b/libbcachefs/btree_iter.c
@@ -2577,7 +2577,10 @@ struct bkey_s_c bch2_btree_iter_peek_prev_min(struct btree_trans *trans, struct
struct bpos end)
{
if ((iter->flags & (BTREE_ITER_is_extents|BTREE_ITER_filter_snapshots)) &&
- !bkey_eq(iter->pos, POS_MAX)) {
+ !bkey_eq(iter->pos, POS_MAX) &&
+ !((iter->flags & BTREE_ITER_is_extents) &&
+ iter->pos.offset == U64_MAX)) {
+
/*
* bkey_start_pos(), for extents, is not monotonically
* increasing until after filtering for snapshots:
@@ -2602,7 +2605,7 @@ struct bkey_s_c bch2_btree_iter_peek_prev_min(struct btree_trans *trans, struct
bch2_trans_verify_not_unlocked_or_in_restart(trans);
bch2_btree_iter_verify_entry_exit(iter);
- EBUG_ON((iter->flags & BTREE_ITER_filter_snapshots) && bpos_eq(end, POS_MIN));
+ EBUG_ON((iter->flags & BTREE_ITER_filter_snapshots) && iter->pos.inode != end.inode);
int ret = trans_maybe_inject_restart(trans, _RET_IP_);
if (unlikely(ret)) {
@@ -3123,6 +3126,7 @@ void *__bch2_trans_kmalloc(struct btree_trans *trans, size_t size, unsigned long
struct btree_transaction_stats *s = btree_trans_stats(trans);
if (new_bytes > s->max_mem) {
+ mutex_lock(&s->lock);
#ifdef CONFIG_BCACHEFS_DEBUG
darray_resize(&s->trans_kmalloc_trace, trans->trans_kmalloc_trace.nr);
s->trans_kmalloc_trace.nr = min(s->trans_kmalloc_trace.size,
@@ -3134,6 +3138,7 @@ void *__bch2_trans_kmalloc(struct btree_trans *trans, size_t size, unsigned long
s->trans_kmalloc_trace.nr);
#endif
s->max_mem = new_bytes;
+ mutex_unlock(&s->lock);
}
if (trans->used_mempool) {
diff --git a/libbcachefs/btree_journal_iter.c b/libbcachefs/btree_journal_iter.c
index 7d6c971d..ade3b5ad 100644
--- a/libbcachefs/btree_journal_iter.c
+++ b/libbcachefs/btree_journal_iter.c
@@ -288,7 +288,7 @@ int bch2_journal_key_insert_take(struct bch_fs *c, enum btree_id id,
.size = max_t(size_t, keys->size, 8) * 2,
};
- new_keys.data = kvmalloc_array(new_keys.size, sizeof(new_keys.data[0]), GFP_KERNEL);
+ new_keys.data = bch2_kvmalloc(new_keys.size * sizeof(new_keys.data[0]), GFP_KERNEL);
if (!new_keys.data) {
bch_err(c, "%s: error allocating new key array (size %zu)",
__func__, new_keys.size);
diff --git a/libbcachefs/btree_update.c b/libbcachefs/btree_update.c
index 2bffd512..ce83cd03 100644
--- a/libbcachefs/btree_update.c
+++ b/libbcachefs/btree_update.c
@@ -511,8 +511,9 @@ static noinline int bch2_trans_update_get_key_cache(struct btree_trans *trans,
return 0;
}
-int __must_check bch2_trans_update(struct btree_trans *trans, struct btree_iter *iter,
- struct bkey_i *k, enum btree_iter_update_trigger_flags flags)
+int __must_check bch2_trans_update_ip(struct btree_trans *trans, struct btree_iter *iter,
+ struct bkey_i *k, enum btree_iter_update_trigger_flags flags,
+ unsigned long ip)
{
kmsan_check_memory(k, bkey_bytes(&k->k));
@@ -548,7 +549,7 @@ int __must_check bch2_trans_update(struct btree_trans *trans, struct btree_iter
path_idx = iter->key_cache_path;
}
- return bch2_trans_update_by_path(trans, path_idx, k, flags, _RET_IP_);
+ return bch2_trans_update_by_path(trans, path_idx, k, flags, ip);
}
int bch2_btree_insert_clone_trans(struct btree_trans *trans,
diff --git a/libbcachefs/btree_update.h b/libbcachefs/btree_update.h
index e674419c..62d24b08 100644
--- a/libbcachefs/btree_update.h
+++ b/libbcachefs/btree_update.h
@@ -102,8 +102,16 @@ int bch2_trans_update_extent_overwrite(struct btree_trans *, struct btree_iter *
int bch2_bkey_get_empty_slot(struct btree_trans *, struct btree_iter *,
enum btree_id, struct bpos);
-int __must_check bch2_trans_update(struct btree_trans *, struct btree_iter *,
- struct bkey_i *, enum btree_iter_update_trigger_flags);
+int __must_check bch2_trans_update_ip(struct btree_trans *, struct btree_iter *,
+ struct bkey_i *, enum btree_iter_update_trigger_flags,
+ unsigned long);
+
+static inline int __must_check
+bch2_trans_update(struct btree_trans *trans, struct btree_iter *iter,
+ struct bkey_i *k, enum btree_iter_update_trigger_flags flags)
+{
+ return bch2_trans_update_ip(trans, iter, k, flags, _THIS_IP_);
+}
struct jset_entry *__bch2_trans_jset_entry_alloc(struct btree_trans *, unsigned);
diff --git a/libbcachefs/btree_update_interior.c b/libbcachefs/btree_update_interior.c
index fcca8b01..3d25c2be 100644
--- a/libbcachefs/btree_update_interior.c
+++ b/libbcachefs/btree_update_interior.c
@@ -1396,7 +1396,7 @@ static void bch2_insert_fixup_btree_ptr(struct btree_update *as,
printbuf_exit(&buf);
}
-static void
+static int
bch2_btree_insert_keys_interior(struct btree_update *as,
struct btree_trans *trans,
struct btree_path *path,
@@ -1418,7 +1418,8 @@ bch2_btree_insert_keys_interior(struct btree_update *as,
insert = bkey_next(insert))
bch2_insert_fixup_btree_ptr(as, trans, path, b, &node_iter, insert);
- if (bch2_btree_node_check_topology(trans, b)) {
+ int ret = bch2_btree_node_check_topology(trans, b);
+ if (ret) {
struct printbuf buf = PRINTBUF;
for (struct bkey_i *k = keys->keys;
@@ -1428,11 +1429,15 @@ bch2_btree_insert_keys_interior(struct btree_update *as,
prt_newline(&buf);
}
- panic("%s(): check_topology error: inserted keys\n%s", __func__, buf.buf);
+ bch2_fs_fatal_error(as->c, "%ps -> %s(): check_topology error %s: inserted keys\n%s",
+ (void *) _RET_IP_, __func__, bch2_err_str(ret), buf.buf);
+ dump_stack();
+ return ret;
}
memmove_u64s_down(keys->keys, insert, keys->top_p - insert->_data);
keys->top_p -= insert->_data - keys->keys_p;
+ return 0;
}
static bool key_deleted_in_insert(struct keylist *insert_keys, struct bpos pos)
@@ -1566,11 +1571,11 @@ static void __btree_split_node(struct btree_update *as,
* nodes that were coalesced, and thus in the middle of a child node post
* coalescing:
*/
-static void btree_split_insert_keys(struct btree_update *as,
- struct btree_trans *trans,
- btree_path_idx_t path_idx,
- struct btree *b,
- struct keylist *keys)
+static int btree_split_insert_keys(struct btree_update *as,
+ struct btree_trans *trans,
+ btree_path_idx_t path_idx,
+ struct btree *b,
+ struct keylist *keys)
{
struct btree_path *path = trans->paths + path_idx;
@@ -1580,8 +1585,12 @@ static void btree_split_insert_keys(struct btree_update *as,
bch2_btree_node_iter_init(&node_iter, b, &bch2_keylist_front(keys)->k.p);
- bch2_btree_insert_keys_interior(as, trans, path, b, node_iter, keys);
+ int ret = bch2_btree_insert_keys_interior(as, trans, path, b, node_iter, keys);
+ if (ret)
+ return ret;
}
+
+ return 0;
}
static int btree_split(struct btree_update *as, struct btree_trans *trans,
@@ -1614,8 +1623,10 @@ static int btree_split(struct btree_update *as, struct btree_trans *trans,
__btree_split_node(as, trans, b, n, keys);
if (keys) {
- btree_split_insert_keys(as, trans, path, n1, keys);
- btree_split_insert_keys(as, trans, path, n2, keys);
+ ret = btree_split_insert_keys(as, trans, path, n1, keys) ?:
+ btree_split_insert_keys(as, trans, path, n2, keys);
+ if (ret)
+ goto err;
BUG_ON(!bch2_keylist_empty(keys));
}
@@ -1661,7 +1672,9 @@ static int btree_split(struct btree_update *as, struct btree_trans *trans,
n3->sib_u64s[0] = U16_MAX;
n3->sib_u64s[1] = U16_MAX;
- btree_split_insert_keys(as, trans, path, n3, &as->parent_keys);
+ ret = btree_split_insert_keys(as, trans, path, n3, &as->parent_keys);
+ if (ret)
+ goto err;
}
} else {
trace_and_count(c, btree_node_compact, trans, b);
@@ -1669,7 +1682,9 @@ static int btree_split(struct btree_update *as, struct btree_trans *trans,
n1 = bch2_btree_node_alloc_replacement(as, trans, b);
if (keys) {
- btree_split_insert_keys(as, trans, path, n1, keys);
+ ret = btree_split_insert_keys(as, trans, path, n1, keys);
+ if (ret)
+ goto err;
BUG_ON(!bch2_keylist_empty(keys));
}
@@ -1816,15 +1831,15 @@ static int bch2_btree_insert_node(struct btree_update *as, struct btree_trans *t
goto split;
}
- ret = bch2_btree_node_check_topology(trans, b);
+
+ ret = bch2_btree_node_check_topology(trans, b) ?:
+ bch2_btree_insert_keys_interior(as, trans, path, b,
+ path->l[b->c.level].iter, keys);
if (ret) {
bch2_btree_node_unlock_write(trans, path, b);
return ret;
}
- bch2_btree_insert_keys_interior(as, trans, path, b,
- path->l[b->c.level].iter, keys);
-
trans_for_each_path_with_node(trans, b, linked, i)
bch2_btree_node_iter_peek(&linked->l[b->c.level].iter, b);
diff --git a/libbcachefs/buckets.c b/libbcachefs/buckets.c
index 7c267244..596edc7b 100644
--- a/libbcachefs/buckets.c
+++ b/libbcachefs/buckets.c
@@ -399,7 +399,7 @@ static int bucket_ref_update_err(struct btree_trans *trans, struct printbuf *buf
bool print = __bch2_count_fsck_err(c, id, buf);
- int ret = bch2_run_explicit_recovery_pass_printbuf(c, buf,
+ int ret = bch2_run_explicit_recovery_pass_persistent(c, buf,
BCH_RECOVERY_PASS_check_allocations);
if (insert) {
@@ -599,6 +599,13 @@ static int bch2_trigger_pointer(struct btree_trans *trans,
}
struct bpos bucket = PTR_BUCKET_POS(ca, &p.ptr);
+ if (!bucket_valid(ca, bucket.offset)) {
+ if (insert) {
+ bch2_dev_bucket_missing(ca, bucket.offset);
+ ret = -BCH_ERR_trigger_pointer;
+ }
+ goto err;
+ }
if (flags & BTREE_TRIGGER_transactional) {
struct bkey_i_alloc_v4 *a = bch2_trans_start_alloc_update(trans, bucket, 0);
@@ -965,7 +972,7 @@ static int __bch2_trans_mark_metadata_bucket(struct btree_trans *trans,
bool print = bch2_count_fsck_err(c, bucket_metadata_type_mismatch, &buf);
- bch2_run_explicit_recovery_pass_printbuf(c, &buf,
+ bch2_run_explicit_recovery_pass_persistent(c, &buf,
BCH_RECOVERY_PASS_check_allocations);
if (print)
@@ -1310,13 +1317,11 @@ int bch2_dev_buckets_resize(struct bch_fs *c, struct bch_dev *ca, u64 nbuckets)
old_bucket_gens = rcu_dereference_protected(ca->bucket_gens, 1);
if (resize) {
- bucket_gens->nbuckets = min(bucket_gens->nbuckets,
- old_bucket_gens->nbuckets);
- bucket_gens->nbuckets_minus_first =
- bucket_gens->nbuckets - bucket_gens->first_bucket;
+ u64 copy = min(bucket_gens->nbuckets,
+ old_bucket_gens->nbuckets);
memcpy(bucket_gens->b,
old_bucket_gens->b,
- bucket_gens->nbuckets);
+ sizeof(bucket_gens->b[0]) * copy);
}
rcu_assign_pointer(ca->bucket_gens, bucket_gens);
diff --git a/libbcachefs/darray.h b/libbcachefs/darray.h
index 88f0ca3f..50ec3dec 100644
--- a/libbcachefs/darray.h
+++ b/libbcachefs/darray.h
@@ -21,6 +21,7 @@ struct { \
typedef DARRAY(char) darray_char;
typedef DARRAY(char *) darray_str;
+typedef DARRAY(const char *) darray_const_str;
typedef DARRAY(u8) darray_u8;
typedef DARRAY(u16) darray_u16;
diff --git a/libbcachefs/debug.c b/libbcachefs/debug.c
index c24de2e3..8b8800ad 100644
--- a/libbcachefs/debug.c
+++ b/libbcachefs/debug.c
@@ -42,7 +42,7 @@ static bool bch2_btree_verify_replica(struct bch_fs *c, struct btree *b,
struct btree_node *n_sorted = c->verify_data->data;
struct bset *sorted, *inmemory = &b->data->keys;
struct bio *bio;
- bool failed = false, saw_error = false;
+ bool failed = false;
struct bch_dev *ca = bch2_dev_get_ioref(c, pick.ptr.dev, READ,
BCH_DEV_READ_REF_btree_verify_replicas);
@@ -66,7 +66,7 @@ static bool bch2_btree_verify_replica(struct bch_fs *c, struct btree *b,
memcpy(n_ondisk, n_sorted, btree_buf_bytes(b));
v->written = 0;
- if (bch2_btree_node_read_done(c, ca, v, false, &saw_error) || saw_error)
+ if (bch2_btree_node_read_done(c, ca, v, NULL, NULL))
return false;
n_sorted = c->verify_data->data;
diff --git a/libbcachefs/dirent.c b/libbcachefs/dirent.c
index 8488a757..b012b9dd 100644
--- a/libbcachefs/dirent.c
+++ b/libbcachefs/dirent.c
@@ -13,8 +13,8 @@
#include <linux/dcache.h>
-static int bch2_casefold(struct btree_trans *trans, const struct bch_hash_info *info,
- const struct qstr *str, struct qstr *out_cf)
+int bch2_casefold(struct btree_trans *trans, const struct bch_hash_info *info,
+ const struct qstr *str, struct qstr *out_cf)
{
*out_cf = (struct qstr) QSTR_INIT(NULL, 0);
@@ -35,18 +35,6 @@ static int bch2_casefold(struct btree_trans *trans, const struct bch_hash_info *
#endif
}
-static inline int bch2_maybe_casefold(struct btree_trans *trans,
- const struct bch_hash_info *info,
- const struct qstr *str, struct qstr *out_cf)
-{
- if (likely(!info->cf_encoding)) {
- *out_cf = *str;
- return 0;
- } else {
- return bch2_casefold(trans, info, str, out_cf);
- }
-}
-
static unsigned bch2_dirent_name_bytes(struct bkey_s_c_dirent d)
{
if (bkey_val_bytes(d.k) < offsetof(struct bch_dirent, d_name))
@@ -224,12 +212,19 @@ void bch2_dirent_to_text(struct printbuf *out, struct bch_fs *c, struct bkey_s_c
struct bkey_s_c_dirent d = bkey_s_c_to_dirent(k);
struct qstr d_name = bch2_dirent_get_name(d);
- prt_printf(out, "%.*s -> ", d_name.len, d_name.name);
+ prt_printf(out, "%.*s", d_name.len, d_name.name);
+
+ if (d.v->d_casefold) {
+ struct qstr d_name = bch2_dirent_get_lookup_name(d);
+ prt_printf(out, " (casefold %.*s)", d_name.len, d_name.name);
+ }
+
+ prt_str(out, " ->");
if (d.v->d_type != DT_SUBVOL)
- prt_printf(out, "%llu", le64_to_cpu(d.v->d_inum));
+ prt_printf(out, " %llu", le64_to_cpu(d.v->d_inum));
else
- prt_printf(out, "%u -> %u",
+ prt_printf(out, " %u -> %u",
le32_to_cpu(d.v->d_parent_subvol),
le32_to_cpu(d.v->d_child_subvol));
diff --git a/libbcachefs/dirent.h b/libbcachefs/dirent.h
index 0880772b..9838a7ba 100644
--- a/libbcachefs/dirent.h
+++ b/libbcachefs/dirent.h
@@ -23,6 +23,21 @@ struct bch_fs;
struct bch_hash_info;
struct bch_inode_info;
+int bch2_casefold(struct btree_trans *, const struct bch_hash_info *,
+ const struct qstr *, struct qstr *);
+
+static inline int bch2_maybe_casefold(struct btree_trans *trans,
+ const struct bch_hash_info *info,
+ const struct qstr *str, struct qstr *out_cf)
+{
+ if (likely(!info->cf_encoding)) {
+ *out_cf = *str;
+ return 0;
+ } else {
+ return bch2_casefold(trans, info, str, out_cf);
+ }
+}
+
struct qstr bch2_dirent_get_name(struct bkey_s_c_dirent d);
static inline unsigned dirent_val_u64s(unsigned len, unsigned cf_len)
diff --git a/libbcachefs/errcode.h b/libbcachefs/errcode.h
index 83ed5a63..13e28f9a 100644
--- a/libbcachefs/errcode.h
+++ b/libbcachefs/errcode.h
@@ -175,6 +175,7 @@
x(0, backpointer_to_overwritten_btree_node) \
x(0, journal_reclaim_would_deadlock) \
x(EINVAL, fsck) \
+ x(BCH_ERR_fsck, fsck_ask) \
x(BCH_ERR_fsck, fsck_fix) \
x(BCH_ERR_fsck, fsck_delete_bkey) \
x(BCH_ERR_fsck, fsck_ignore) \
diff --git a/libbcachefs/error.c b/libbcachefs/error.c
index 91a65a20..d7bc70fd 100644
--- a/libbcachefs/error.c
+++ b/libbcachefs/error.c
@@ -104,7 +104,7 @@ int __bch2_topology_error(struct bch_fs *c, struct printbuf *out)
__bch2_inconsistent_error(c, out);
return -BCH_ERR_btree_need_topology_repair;
} else {
- return bch2_run_explicit_recovery_pass_printbuf(c, out, BCH_RECOVERY_PASS_check_topology) ?:
+ return bch2_run_explicit_recovery_pass_persistent(c, out, BCH_RECOVERY_PASS_check_topology) ?:
-BCH_ERR_btree_node_read_validate_error;
}
}
@@ -393,6 +393,48 @@ bool __bch2_count_fsck_err(struct bch_fs *c,
return print && !repeat;
}
+int bch2_fsck_err_opt(struct bch_fs *c,
+ enum bch_fsck_flags flags,
+ enum bch_sb_error_id err)
+{
+ if (!WARN_ON(err >= ARRAY_SIZE(fsck_flags_extra)))
+ flags |= fsck_flags_extra[err];
+
+ if (test_bit(BCH_FS_fsck_running, &c->flags)) {
+ if (!(flags & (FSCK_CAN_FIX|FSCK_CAN_IGNORE)))
+ return -BCH_ERR_fsck_repair_unimplemented;
+
+ switch (c->opts.fix_errors) {
+ case FSCK_FIX_exit:
+ return -BCH_ERR_fsck_errors_not_fixed;
+ case FSCK_FIX_yes:
+ if (flags & FSCK_CAN_FIX)
+ return -BCH_ERR_fsck_fix;
+ fallthrough;
+ case FSCK_FIX_no:
+ if (flags & FSCK_CAN_IGNORE)
+ return -BCH_ERR_fsck_ignore;
+ return -BCH_ERR_fsck_errors_not_fixed;
+ case FSCK_FIX_ask:
+ if (flags & FSCK_AUTOFIX)
+ return -BCH_ERR_fsck_fix;
+ return -BCH_ERR_fsck_ask;
+ default:
+ BUG();
+ }
+ } else {
+ if ((flags & FSCK_AUTOFIX) &&
+ (c->opts.errors == BCH_ON_ERROR_continue ||
+ c->opts.errors == BCH_ON_ERROR_fix_safe))
+ return -BCH_ERR_fsck_fix;
+
+ if (c->opts.errors == BCH_ON_ERROR_continue &&
+ (flags & FSCK_CAN_IGNORE))
+ return -BCH_ERR_fsck_ignore;
+ return -BCH_ERR_fsck_errors_not_fixed;
+ }
+}
+
int __bch2_fsck_err(struct bch_fs *c,
struct btree_trans *trans,
enum bch_fsck_flags flags,
@@ -484,7 +526,9 @@ int __bch2_fsck_err(struct bch_fs *c,
} else if (!test_bit(BCH_FS_fsck_running, &c->flags)) {
if (c->opts.errors != BCH_ON_ERROR_continue ||
!(flags & (FSCK_CAN_FIX|FSCK_CAN_IGNORE))) {
- prt_str(out, ", shutting down");
+ prt_str_indented(out, ", shutting down\n"
+ "error not marked as autofix and not in fsck\n"
+ "run fsck, and forward to devs so error can be marked for self-healing");
inconsistent = true;
print = true;
ret = -BCH_ERR_fsck_errors_not_fixed;
diff --git a/libbcachefs/error.h b/libbcachefs/error.h
index 0b3ede1c..d89dd270 100644
--- a/libbcachefs/error.h
+++ b/libbcachefs/error.h
@@ -80,6 +80,10 @@ bool __bch2_count_fsck_err(struct bch_fs *, enum bch_sb_error_id, struct printbu
#define bch2_count_fsck_err(_c, _err, ...) \
__bch2_count_fsck_err(_c, BCH_FSCK_ERR_##_err, __VA_ARGS__)
+int bch2_fsck_err_opt(struct bch_fs *,
+ enum bch_fsck_flags,
+ enum bch_sb_error_id);
+
__printf(5, 6) __cold
int __bch2_fsck_err(struct bch_fs *, struct btree_trans *,
enum bch_fsck_flags,
diff --git a/libbcachefs/extents.c b/libbcachefs/extents.c
index 7058a343..18506061 100644
--- a/libbcachefs/extents.c
+++ b/libbcachefs/extents.c
@@ -45,6 +45,49 @@ static void bch2_extent_crc_pack(union bch_extent_crc *,
struct bch_extent_crc_unpacked,
enum bch_extent_entry_type);
+void bch2_io_failures_to_text(struct printbuf *out,
+ struct bch_fs *c,
+ struct bch_io_failures *failed)
+{
+ static const char * const error_types[] = {
+ "io", "checksum", "ec reconstruct", NULL
+ };
+
+ for (struct bch_dev_io_failures *f = failed->devs;
+ f < failed->devs + failed->nr;
+ f++) {
+ unsigned errflags =
+ ((!!f->failed_io) << 0) |
+ ((!!f->failed_csum_nr) << 1) |
+ ((!!f->failed_ec) << 2);
+
+ if (!errflags)
+ continue;
+
+ bch2_printbuf_make_room(out, 1024);
+ rcu_read_lock();
+ out->atomic++;
+ struct bch_dev *ca = bch2_dev_rcu_noerror(c, f->dev);
+ if (ca)
+ prt_str(out, ca->name);
+ else
+ prt_printf(out, "(invalid device %u)", f->dev);
+ --out->atomic;
+ rcu_read_unlock();
+
+ prt_char(out, ' ');
+
+ if (is_power_of_2(errflags)) {
+ prt_bitflags(out, error_types, errflags);
+ prt_str(out, " error");
+ } else {
+ prt_str(out, "errors: ");
+ prt_bitflags(out, error_types, errflags);
+ }
+ prt_newline(out);
+ }
+}
+
struct bch_dev_io_failures *bch2_dev_io_failures(struct bch_io_failures *f,
unsigned dev)
{
@@ -79,6 +122,22 @@ void bch2_mark_io_failure(struct bch_io_failures *failed,
f->failed_csum_nr++;
}
+void bch2_mark_btree_validate_failure(struct bch_io_failures *failed,
+ unsigned dev)
+{
+ struct bch_dev_io_failures *f = bch2_dev_io_failures(failed, dev);
+
+ if (!f) {
+ BUG_ON(failed->nr >= ARRAY_SIZE(failed->devs));
+
+ f = &failed->devs[failed->nr++];
+ memset(f, 0, sizeof(*f));
+ f->dev = dev;
+ }
+
+ f->failed_btree_validate = true;
+}
+
static inline u64 dev_latency(struct bch_dev *ca)
{
return ca ? atomic64_read(&ca->cur_latency[READ]) : S64_MAX;
@@ -179,6 +238,7 @@ int bch2_bkey_pick_read_device(struct bch_fs *c, struct bkey_s_c k,
if (ca && ca->mi.state != BCH_MEMBER_STATE_failed) {
have_io_errors |= f->failed_io;
+ have_io_errors |= f->failed_btree_validate;
have_io_errors |= f->failed_ec;
}
have_csum_errors |= !!f->failed_csum_nr;
@@ -186,6 +246,7 @@ int bch2_bkey_pick_read_device(struct bch_fs *c, struct bkey_s_c k,
if (p.has_ec && (f->failed_io || f->failed_csum_nr))
p.do_ec_reconstruct = true;
else if (f->failed_io ||
+ f->failed_btree_validate ||
f->failed_csum_nr > c->opts.checksum_err_retry_nr)
continue;
}
diff --git a/libbcachefs/extents.h b/libbcachefs/extents.h
index e78a39e7..f25be08d 100644
--- a/libbcachefs/extents.h
+++ b/libbcachefs/extents.h
@@ -399,10 +399,13 @@ out: \
/* utility code common to all keys with pointers: */
+void bch2_io_failures_to_text(struct printbuf *, struct bch_fs *,
+ struct bch_io_failures *);
struct bch_dev_io_failures *bch2_dev_io_failures(struct bch_io_failures *,
unsigned);
void bch2_mark_io_failure(struct bch_io_failures *,
struct extent_ptr_decoded *, bool);
+void bch2_mark_btree_validate_failure(struct bch_io_failures *, unsigned);
int bch2_bkey_pick_read_device(struct bch_fs *, struct bkey_s_c,
struct bch_io_failures *,
struct extent_ptr_decoded *, int);
diff --git a/libbcachefs/extents_types.h b/libbcachefs/extents_types.h
index e51529dc..b23ce4a3 100644
--- a/libbcachefs/extents_types.h
+++ b/libbcachefs/extents_types.h
@@ -34,6 +34,7 @@ struct bch_io_failures {
u8 dev;
unsigned failed_csum_nr:6,
failed_io:1,
+ failed_btree_validate:1,
failed_ec:1;
} devs[BCH_REPLICAS_MAX + 1];
};
diff --git a/libbcachefs/fs.c b/libbcachefs/fs.c
index 0422dcab..f1472e91 100644
--- a/libbcachefs/fs.c
+++ b/libbcachefs/fs.c
@@ -53,7 +53,7 @@ static void bch2_vfs_inode_init(struct btree_trans *, subvol_inum,
struct bch_subvolume *);
/* Set VFS inode flags from bcachefs inode: */
-static inline void bch2_inode_flags_to_vfs(struct bch_inode_info *inode)
+static inline void bch2_inode_flags_to_vfs(struct bch_fs *c, struct bch_inode_info *inode)
{
static const __maybe_unused unsigned bch_flags_to_vfs[] = {
[__BCH_INODE_sync] = S_SYNC,
@@ -64,8 +64,10 @@ static inline void bch2_inode_flags_to_vfs(struct bch_inode_info *inode)
set_flags(bch_flags_to_vfs, inode->ei_inode.bi_flags, inode->v.i_flags);
- if (inode->ei_inode.bi_casefold)
+ if (bch2_inode_casefold(c, &inode->ei_inode))
inode->v.i_flags |= S_CASEFOLD;
+ else
+ inode->v.i_flags &= ~S_CASEFOLD;
}
void bch2_inode_update_after_write(struct btree_trans *trans,
@@ -96,7 +98,7 @@ void bch2_inode_update_after_write(struct btree_trans *trans,
inode->ei_inode = *bi;
- bch2_inode_flags_to_vfs(inode);
+ bch2_inode_flags_to_vfs(c, inode);
}
int __must_check bch2_write_inode(struct bch_fs *c,
@@ -647,13 +649,18 @@ static struct bch_inode_info *bch2_lookup_trans(struct btree_trans *trans,
const struct qstr *name)
{
struct bch_fs *c = trans->c;
- struct btree_iter dirent_iter = {};
subvol_inum inum = {};
struct printbuf buf = PRINTBUF;
+ struct qstr lookup_name;
+ int ret = bch2_maybe_casefold(trans, dir_hash_info, name, &lookup_name);
+ if (ret)
+ return ERR_PTR(ret);
+
+ struct btree_iter dirent_iter = {};
struct bkey_s_c k = bch2_hash_lookup(trans, &dirent_iter, bch2_dirent_hash_desc,
- dir_hash_info, dir, name, 0);
- int ret = bkey_err(k);
+ dir_hash_info, dir, &lookup_name, 0);
+ ret = bkey_err(k);
if (ret)
return ERR_PTR(ret);
@@ -841,6 +848,9 @@ int __bch2_unlink(struct inode *vdir, struct dentry *dentry,
*/
set_nlink(&inode->v, 0);
}
+
+ if (IS_CASEFOLDED(vdir))
+ d_invalidate(dentry);
err:
bch2_trans_put(trans);
bch2_unlock_inodes(INODE_UPDATE_LOCK, dir, inode);
@@ -1251,10 +1261,20 @@ static int bch2_tmpfile(struct mnt_idmap *idmap,
return finish_open_simple(file, 0);
}
+struct bch_fiemap_extent {
+ struct bkey_buf kbuf;
+ unsigned flags;
+};
+
static int bch2_fill_extent(struct bch_fs *c,
struct fiemap_extent_info *info,
- struct bkey_s_c k, unsigned flags)
+ struct bch_fiemap_extent *fe)
{
+ struct bkey_s_c k = bkey_i_to_s_c(fe->kbuf.k);
+ unsigned flags = fe->flags;
+
+ BUG_ON(!k.k->size);
+
if (bkey_extent_is_direct_data(k.k)) {
struct bkey_ptrs_c ptrs = bch2_bkey_ptrs_c(k);
const union bch_extent_entry *entry;
@@ -1307,110 +1327,223 @@ static int bch2_fill_extent(struct bch_fs *c,
}
}
-static int bch2_fiemap(struct inode *vinode, struct fiemap_extent_info *info,
- u64 start, u64 len)
+/*
+ * Scan a range of an inode for data in pagecache.
+ *
+ * Intended to be retryable, so don't modify the output params until success is
+ * imminent.
+ */
+static int
+bch2_fiemap_hole_pagecache(struct inode *vinode, u64 *start, u64 *end,
+ bool nonblock)
{
- struct bch_fs *c = vinode->i_sb->s_fs_info;
- struct bch_inode_info *ei = to_bch_ei(vinode);
- struct btree_trans *trans;
- struct btree_iter iter;
- struct bkey_s_c k;
- struct bkey_buf cur, prev;
- bool have_extent = false;
- int ret = 0;
+ loff_t dstart, dend;
- ret = fiemap_prep(&ei->v, info, start, &len, FIEMAP_FLAG_SYNC);
- if (ret)
+ dstart = bch2_seek_pagecache_data(vinode, *start, *end, 0, nonblock);
+ if (dstart < 0)
+ return dstart;
+
+ if (dstart == *end) {
+ *start = dstart;
+ return 0;
+ }
+
+ dend = bch2_seek_pagecache_hole(vinode, dstart, *end, 0, nonblock);
+ if (dend < 0)
+ return dend;
+
+ /* race */
+ BUG_ON(dstart == dend);
+
+ *start = dstart;
+ *end = dend;
+ return 0;
+}
+
+/*
+ * Scan a range of pagecache that corresponds to a file mapping hole in the
+ * extent btree. If data is found, fake up an extent key so it looks like a
+ * delalloc extent to the rest of the fiemap processing code.
+ */
+static int
+bch2_next_fiemap_pagecache_extent(struct btree_trans *trans, struct bch_inode_info *inode,
+ u64 start, u64 end, struct bch_fiemap_extent *cur)
+{
+ struct bch_fs *c = trans->c;
+ struct bkey_i_extent *delextent;
+ struct bch_extent_ptr ptr = {};
+ loff_t dstart = start << 9, dend = end << 9;
+ int ret;
+
+ /*
+ * We hold btree locks here so we cannot block on folio locks without
+ * dropping trans locks first. Run a nonblocking scan for the common
+ * case of no folios over holes and fall back on failure.
+ *
+ * Note that dropping locks like this is technically racy against
+ * writeback inserting to the extent tree, but a non-sync fiemap scan is
+ * fundamentally racy with writeback anyways. Therefore, just report the
+ * range as delalloc regardless of whether we have to cycle trans locks.
+ */
+ ret = bch2_fiemap_hole_pagecache(&inode->v, &dstart, &dend, true);
+ if (ret == -EAGAIN)
+ ret = drop_locks_do(trans,
+ bch2_fiemap_hole_pagecache(&inode->v, &dstart, &dend, false));
+ if (ret < 0)
return ret;
- struct bpos end = POS(ei->v.i_ino, (start + len) >> 9);
- if (start + len < start)
- return -EINVAL;
+ /*
+ * Create a fake extent key in the buffer. We have to add a dummy extent
+ * pointer for the fill code to add an extent entry. It's explicitly
+ * zeroed to reflect delayed allocation (i.e. phys offset 0).
+ */
+ bch2_bkey_buf_realloc(&cur->kbuf, c, sizeof(*delextent) / sizeof(u64));
+ delextent = bkey_extent_init(cur->kbuf.k);
+ delextent->k.p = POS(inode->ei_inum.inum, dend >> 9);
+ delextent->k.size = (dend - dstart) >> 9;
+ bch2_bkey_append_ptr(&delextent->k_i, ptr);
- start >>= 9;
+ cur->flags = FIEMAP_EXTENT_DELALLOC;
- bch2_bkey_buf_init(&cur);
- bch2_bkey_buf_init(&prev);
- trans = bch2_trans_get(c);
+ return 0;
+}
+static int bch2_next_fiemap_extent(struct btree_trans *trans,
+ struct bch_inode_info *inode,
+ u64 start, u64 end,
+ struct bch_fiemap_extent *cur)
+{
+ u32 snapshot;
+ int ret = bch2_subvolume_get_snapshot(trans, inode->ei_inum.subvol, &snapshot);
+ if (ret)
+ return ret;
+
+ struct btree_iter iter;
bch2_trans_iter_init(trans, &iter, BTREE_ID_extents,
- POS(ei->v.i_ino, start), 0);
+ SPOS(inode->ei_inum.inum, start, snapshot), 0);
- while (!ret || bch2_err_matches(ret, BCH_ERR_transaction_restart)) {
- enum btree_id data_btree = BTREE_ID_extents;
+ struct bkey_s_c k =
+ bch2_btree_iter_peek_max(trans, &iter, POS(inode->ei_inum.inum, end));
+ ret = bkey_err(k);
+ if (ret)
+ goto err;
- bch2_trans_begin(trans);
+ ret = bch2_next_fiemap_pagecache_extent(trans, inode, start, end, cur);
+ if (ret)
+ goto err;
- u32 snapshot;
- ret = bch2_subvolume_get_snapshot(trans, ei->ei_inum.subvol, &snapshot);
- if (ret)
- continue;
+ struct bpos pagecache_start = bkey_start_pos(&cur->kbuf.k->k);
- bch2_btree_iter_set_snapshot(trans, &iter, snapshot);
+ /*
+ * Does the pagecache or the btree take precedence?
+ *
+ * It _should_ be the pagecache, so that we correctly report delalloc
+ * extents when dirty in the pagecache (we're COW, after all).
+ *
+ * But we'd have to add per-sector writeback tracking to
+ * bch_folio_state, otherwise we report delalloc extents for clean
+ * cached data in the pagecache.
+ *
+ * We should do this, but even then fiemap won't report stable mappings:
+ * on bcachefs data moves around in the background (copygc, rebalance)
+ * and we don't provide a way for userspace to lock that out.
+ */
+ if (k.k &&
+ bkey_le(bpos_max(iter.pos, bkey_start_pos(k.k)),
+ pagecache_start)) {
+ bch2_bkey_buf_reassemble(&cur->kbuf, trans->c, k);
+ bch2_cut_front(iter.pos, cur->kbuf.k);
+ bch2_cut_back(POS(inode->ei_inum.inum, end), cur->kbuf.k);
+ cur->flags = 0;
+ } else if (k.k) {
+ bch2_cut_back(bkey_start_pos(k.k), cur->kbuf.k);
+ }
- k = bch2_btree_iter_peek_max(trans, &iter, end);
- ret = bkey_err(k);
+ if (cur->kbuf.k->k.type == KEY_TYPE_reflink_p) {
+ unsigned sectors = cur->kbuf.k->k.size;
+ s64 offset_into_extent = 0;
+ enum btree_id data_btree = BTREE_ID_extents;
+ ret = bch2_read_indirect_extent(trans, &data_btree, &offset_into_extent,
+ &cur->kbuf);
if (ret)
- continue;
+ goto err;
- if (!k.k)
- break;
+ struct bkey_i *k = cur->kbuf.k;
+ sectors = min_t(unsigned, sectors, k->k.size - offset_into_extent);
- if (!bkey_extent_is_data(k.k) &&
- k.k->type != KEY_TYPE_reservation) {
- bch2_btree_iter_advance(trans, &iter);
- continue;
- }
+ bch2_cut_front(POS(k->k.p.inode,
+ bkey_start_offset(&k->k) + offset_into_extent),
+ k);
+ bch2_key_resize(&k->k, sectors);
+ k->k.p = iter.pos;
+ k->k.p.offset += k->k.size;
+ }
+err:
+ bch2_trans_iter_exit(trans, &iter);
+ return ret;
+}
- s64 offset_into_extent = iter.pos.offset - bkey_start_offset(k.k);
- unsigned sectors = k.k->size - offset_into_extent;
+static int bch2_fiemap(struct inode *vinode, struct fiemap_extent_info *info,
+ u64 start, u64 len)
+{
+ struct bch_fs *c = vinode->i_sb->s_fs_info;
+ struct bch_inode_info *ei = to_bch_ei(vinode);
+ struct btree_trans *trans;
+ struct bch_fiemap_extent cur, prev;
+ int ret = 0;
+
+ ret = fiemap_prep(&ei->v, info, start, &len, 0);
+ if (ret)
+ return ret;
+
+ if (start + len < start)
+ return -EINVAL;
+
+ start >>= 9;
+ u64 end = (start + len) >> 9;
- bch2_bkey_buf_reassemble(&cur, c, k);
+ bch2_bkey_buf_init(&cur.kbuf);
+ bch2_bkey_buf_init(&prev.kbuf);
+ bkey_init(&prev.kbuf.k->k);
- ret = bch2_read_indirect_extent(trans, &data_btree,
- &offset_into_extent, &cur);
+ trans = bch2_trans_get(c);
+
+ while (start < end) {
+ ret = lockrestart_do(trans,
+ bch2_next_fiemap_extent(trans, ei, start, end, &cur));
if (ret)
- continue;
+ goto err;
- k = bkey_i_to_s_c(cur.k);
- bch2_bkey_buf_realloc(&prev, c, k.k->u64s);
+ BUG_ON(bkey_start_offset(&cur.kbuf.k->k) < start);
+ BUG_ON(cur.kbuf.k->k.p.offset > end);
- sectors = min_t(unsigned, sectors, k.k->size - offset_into_extent);
+ if (bkey_start_offset(&cur.kbuf.k->k) == end)
+ break;
- bch2_cut_front(POS(k.k->p.inode,
- bkey_start_offset(k.k) +
- offset_into_extent),
- cur.k);
- bch2_key_resize(&cur.k->k, sectors);
- cur.k->k.p = iter.pos;
- cur.k->k.p.offset += cur.k->k.size;
+ start = cur.kbuf.k->k.p.offset;
- if (have_extent) {
+ if (!bkey_deleted(&prev.kbuf.k->k)) {
bch2_trans_unlock(trans);
- ret = bch2_fill_extent(c, info,
- bkey_i_to_s_c(prev.k), 0);
+ ret = bch2_fill_extent(c, info, &prev);
if (ret)
- break;
+ goto err;
}
- bkey_copy(prev.k, cur.k);
- have_extent = true;
-
- bch2_btree_iter_set_pos(trans, &iter,
- POS(iter.pos.inode, iter.pos.offset + sectors));
+ bch2_bkey_buf_copy(&prev.kbuf, c, cur.kbuf.k);
+ prev.flags = cur.flags;
}
- bch2_trans_iter_exit(trans, &iter);
- if (!ret && have_extent) {
+ if (!bkey_deleted(&prev.kbuf.k->k)) {
bch2_trans_unlock(trans);
- ret = bch2_fill_extent(c, info, bkey_i_to_s_c(prev.k),
- FIEMAP_EXTENT_LAST);
+ prev.flags |= FIEMAP_EXTENT_LAST;
+ ret = bch2_fill_extent(c, info, &prev);
}
-
+err:
bch2_trans_put(trans);
- bch2_bkey_buf_exit(&cur, c);
- bch2_bkey_buf_exit(&prev, c);
- return ret < 0 ? ret : 0;
+ bch2_bkey_buf_exit(&cur.kbuf, c);
+ bch2_bkey_buf_exit(&prev.kbuf, c);
+
+ return bch2_err_class(ret < 0 ? ret : 0);
}
static const struct vm_operations_struct bch_vm_ops = {
@@ -1487,13 +1620,14 @@ static int bch2_fileattr_get(struct dentry *dentry,
struct fileattr *fa)
{
struct bch_inode_info *inode = to_bch_ei(d_inode(dentry));
+ struct bch_fs *c = inode->v.i_sb->s_fs_info;
fileattr_fill_xflags(fa, map_flags(bch_flags_to_xflags, inode->ei_inode.bi_flags));
if (inode->ei_inode.bi_fields_set & (1 << Inode_opt_project))
fa->fsx_xflags |= FS_XFLAG_PROJINHERIT;
- if (inode->ei_inode.bi_casefold)
+ if (bch2_inode_casefold(c, &inode->ei_inode))
fa->flags |= FS_CASEFOLD_FL;
fa->fsx_projid = inode->ei_qid.q[QTYP_PRJ];
@@ -1526,7 +1660,7 @@ static int fssetxattr_inode_update_fn(struct btree_trans *trans,
(s->flags & (BCH_INODE_nodump|BCH_INODE_noatime)) != s->flags)
return -EINVAL;
- if (s->casefold != bi->bi_casefold) {
+ if (s->casefold != bch2_inode_casefold(c, bi)) {
#ifdef CONFIG_UNICODE
int ret = 0;
/* Not supported on individual files. */
@@ -1547,9 +1681,8 @@ static int fssetxattr_inode_update_fn(struct btree_trans *trans,
bch2_check_set_feature(c, BCH_FEATURE_casefolding);
- bi->bi_casefold = s->casefold;
- bi->bi_fields_set &= ~BIT(Inode_opt_casefold);
- bi->bi_fields_set |= s->casefold << Inode_opt_casefold;
+ bi->bi_casefold = s->casefold + 1;
+ bi->bi_fields_set |= BIT(Inode_opt_casefold);
#else
printk(KERN_ERR "Cannot use casefolding on a kernel without CONFIG_UNICODE\n");
@@ -2330,7 +2463,7 @@ static int bch2_fs_get_tree(struct fs_context *fc)
struct inode *vinode;
struct bch2_opts_parse *opts_parse = fc->fs_private;
struct bch_opts opts = opts_parse->opts;
- darray_str devs;
+ darray_const_str devs;
darray_fs devs_to_fs = {};
int ret;
@@ -2354,7 +2487,7 @@ static int bch2_fs_get_tree(struct fs_context *fc)
if (!IS_ERR(sb))
goto got_sb;
- c = bch2_fs_open(devs.data, devs.nr, opts);
+ c = bch2_fs_open(&devs, &opts);
ret = PTR_ERR_OR_ZERO(c);
if (ret)
goto err;
@@ -2445,6 +2578,11 @@ got_sb:
if (ret)
goto err_put_super;
+#ifdef CONFIG_UNICODE
+ sb->s_encoding = c->cf_encoding;
+#endif
+ generic_set_sb_d_ops(sb);
+
vinode = bch2_vfs_inode_get(c, BCACHEFS_ROOT_SUBVOL_INUM);
ret = PTR_ERR_OR_ZERO(vinode);
bch_err_msg(c, ret, "mounting: error getting root inode");
diff --git a/libbcachefs/fsck.c b/libbcachefs/fsck.c
index 540db66e..7d3dd1a0 100644
--- a/libbcachefs/fsck.c
+++ b/libbcachefs/fsck.c
@@ -3022,7 +3022,7 @@ long bch2_ioctl_fsck_offline(struct bch_ioctl_fsck_offline __user *user_arg)
{
struct bch_ioctl_fsck_offline arg;
struct fsck_thread *thr = NULL;
- darray_str(devs) = {};
+ darray_const_str devs = {};
long ret = 0;
if (copy_from_user(&arg, user_arg, sizeof(arg)))
@@ -3080,7 +3080,7 @@ long bch2_ioctl_fsck_offline(struct bch_ioctl_fsck_offline __user *user_arg)
bch2_thread_with_stdio_init(&thr->thr, &bch2_offline_fsck_ops);
- thr->c = bch2_fs_open(devs.data, arg.nr_devs, thr->opts);
+ thr->c = bch2_fs_open(&devs, &thr->opts);
if (!IS_ERR(thr->c) &&
thr->c->opts.errors == BCH_ON_ERROR_panic)
diff --git a/libbcachefs/inode.h b/libbcachefs/inode.h
index 1b2fc902..bb31e953 100644
--- a/libbcachefs/inode.h
+++ b/libbcachefs/inode.h
@@ -243,6 +243,14 @@ static inline unsigned bkey_inode_mode(struct bkey_s_c k)
}
}
+static inline bool bch2_inode_casefold(struct bch_fs *c, const struct bch_inode_unpacked *bi)
+{
+ /* inode apts are stored with a +1 bias: 0 means "unset, use fs opt" */
+ return bi->bi_casefold
+ ? bi->bi_casefold - 1
+ : c->opts.casefold;
+}
+
/* i_nlink: */
static inline unsigned nlink_bias(umode_t mode)
diff --git a/libbcachefs/io_read.c b/libbcachefs/io_read.c
index f23c3bfe..73a0a42a 100644
--- a/libbcachefs/io_read.c
+++ b/libbcachefs/io_read.c
@@ -573,7 +573,6 @@ static void bch2_rbio_retry(struct work_struct *work)
.inum = rbio->read_pos.inode,
};
struct bch_io_failures failed = { .nr = 0 };
- int orig_error = rbio->ret;
struct btree_trans *trans = bch2_trans_get(c);
@@ -614,10 +613,11 @@ static void bch2_rbio_retry(struct work_struct *work)
if (ret) {
rbio->ret = ret;
rbio->bio.bi_status = BLK_STS_IOERR;
- } else if (orig_error != -BCH_ERR_data_read_retry_csum_err_maybe_userspace &&
- orig_error != -BCH_ERR_data_read_ptr_stale_race &&
- !failed.nr) {
+ }
+
+ if (failed.nr || ret) {
struct printbuf buf = PRINTBUF;
+ bch2_log_msg_start(c, &buf);
lockrestart_do(trans,
bch2_inum_offset_err_msg_trans(trans, &buf,
@@ -625,9 +625,22 @@ static void bch2_rbio_retry(struct work_struct *work)
read_pos.offset << 9));
if (rbio->data_update)
prt_str(&buf, "(internal move) ");
- prt_str(&buf, "successful retry");
- bch_err_ratelimited(c, "%s", buf.buf);
+ prt_str(&buf, "data read error, ");
+ if (!ret)
+ prt_str(&buf, "successful retry");
+ else
+ prt_str(&buf, bch2_err_str(ret));
+ prt_newline(&buf);
+
+ if (!bkey_deleted(&sk.k->k)) {
+ bch2_bkey_val_to_text(&buf, c, bkey_i_to_s_c(sk.k));
+ prt_newline(&buf);
+ }
+
+ bch2_io_failures_to_text(&buf, c, &failed);
+
+ bch2_print_str_ratelimited(c, KERN_ERR, buf.buf);
printbuf_exit(&buf);
}
@@ -662,27 +675,6 @@ static void bch2_rbio_error(struct bch_read_bio *rbio,
}
}
-static void bch2_read_io_err(struct work_struct *work)
-{
- struct bch_read_bio *rbio =
- container_of(work, struct bch_read_bio, work);
- struct bio *bio = &rbio->bio;
- struct bch_fs *c = rbio->c;
- struct bch_dev *ca = rbio->have_ioref ? bch2_dev_have_ref(c, rbio->pick.ptr.dev) : NULL;
- struct printbuf buf = PRINTBUF;
-
- bch2_read_err_msg(c, &buf, rbio, rbio->read_pos);
- prt_printf(&buf, "data read error: %s", bch2_blk_status_to_str(bio->bi_status));
-
- if (ca)
- bch_err_ratelimited(ca, "%s", buf.buf);
- else
- bch_err_ratelimited(c, "%s", buf.buf);
-
- printbuf_exit(&buf);
- bch2_rbio_error(rbio, -BCH_ERR_data_read_retry_io_err, bio->bi_status);
-}
-
static int __bch2_rbio_narrow_crcs(struct btree_trans *trans,
struct bch_read_bio *rbio)
{
@@ -746,31 +738,6 @@ static noinline void bch2_rbio_narrow_crcs(struct bch_read_bio *rbio)
__bch2_rbio_narrow_crcs(trans, rbio));
}
-static void bch2_read_csum_err(struct work_struct *work)
-{
- struct bch_read_bio *rbio =
- container_of(work, struct bch_read_bio, work);
- struct bch_fs *c = rbio->c;
- struct bio *src = &rbio->bio;
- struct bch_extent_crc_unpacked crc = rbio->pick.crc;
- struct nonce nonce = extent_nonce(rbio->version, crc);
- struct bch_csum csum = bch2_checksum_bio(c, crc.csum_type, nonce, src);
- struct printbuf buf = PRINTBUF;
-
- bch2_read_err_msg(c, &buf, rbio, rbio->read_pos);
- prt_str(&buf, "data ");
- bch2_csum_err_msg(&buf, crc.csum_type, rbio->pick.crc.csum, csum);
-
- struct bch_dev *ca = rbio->have_ioref ? bch2_dev_have_ref(c, rbio->pick.ptr.dev) : NULL;
- if (ca)
- bch_err_ratelimited(ca, "%s", buf.buf);
- else
- bch_err_ratelimited(c, "%s", buf.buf);
-
- bch2_rbio_error(rbio, -BCH_ERR_data_read_retry_csum_err, BLK_STS_IOERR);
- printbuf_exit(&buf);
-}
-
static void bch2_read_decompress_err(struct work_struct *work)
{
struct bch_read_bio *rbio =
@@ -931,7 +898,7 @@ out:
memalloc_nofs_restore(nofs_flags);
return;
csum_err:
- bch2_rbio_punt(rbio, bch2_read_csum_err, RBIO_CONTEXT_UNBOUND, system_unbound_wq);
+ bch2_rbio_error(rbio, -BCH_ERR_data_read_retry_csum_err, BLK_STS_IOERR);
goto out;
decompression_err:
bch2_rbio_punt(rbio, bch2_read_decompress_err, RBIO_CONTEXT_UNBOUND, system_unbound_wq);
@@ -957,7 +924,7 @@ static void bch2_read_endio(struct bio *bio)
rbio->bio.bi_end_io = rbio->end_io;
if (unlikely(bio->bi_status)) {
- bch2_rbio_punt(rbio, bch2_read_io_err, RBIO_CONTEXT_UNBOUND, system_unbound_wq);
+ bch2_rbio_error(rbio, -BCH_ERR_data_read_retry_io_err, bio->bi_status);
return;
}
@@ -1289,14 +1256,6 @@ retry_pick:
if (likely(!rbio->pick.do_ec_reconstruct)) {
if (unlikely(!rbio->have_ioref)) {
- struct printbuf buf = PRINTBUF;
- bch2_read_err_msg_trans(trans, &buf, rbio, read_pos);
- prt_printf(&buf, "no device to read from:\n ");
- bch2_bkey_val_to_text(&buf, c, k);
-
- bch_err_ratelimited(c, "%s", buf.buf);
- printbuf_exit(&buf);
-
bch2_rbio_error(rbio,
-BCH_ERR_data_read_retry_device_offline,
BLK_STS_IOERR);
diff --git a/libbcachefs/journal_reclaim.c b/libbcachefs/journal_reclaim.c
index 66bfb95f..c3a32403 100644
--- a/libbcachefs/journal_reclaim.c
+++ b/libbcachefs/journal_reclaim.c
@@ -214,18 +214,20 @@ void bch2_journal_space_available(struct journal *j)
j->can_discard = can_discard;
if (nr_online < metadata_replicas_required(c)) {
- struct printbuf buf = PRINTBUF;
- buf.atomic++;
- prt_printf(&buf, "insufficient writeable journal devices available: have %u, need %u\n"
- "rw journal devs:", nr_online, metadata_replicas_required(c));
-
- rcu_read_lock();
- for_each_member_device_rcu(c, ca, &c->rw_devs[BCH_DATA_journal])
- prt_printf(&buf, " %s", ca->name);
- rcu_read_unlock();
-
- bch_err(c, "%s", buf.buf);
- printbuf_exit(&buf);
+ if (!(c->sb.features & BIT_ULL(BCH_FEATURE_small_image))) {
+ struct printbuf buf = PRINTBUF;
+ buf.atomic++;
+ prt_printf(&buf, "insufficient writeable journal devices available: have %u, need %u\n"
+ "rw journal devs:", nr_online, metadata_replicas_required(c));
+
+ rcu_read_lock();
+ for_each_member_device_rcu(c, ca, &c->rw_devs[BCH_DATA_journal])
+ prt_printf(&buf, " %s", ca->name);
+ rcu_read_unlock();
+
+ bch_err(c, "%s", buf.buf);
+ printbuf_exit(&buf);
+ }
ret = -BCH_ERR_insufficient_journal_devices;
goto out;
}
diff --git a/libbcachefs/move.c b/libbcachefs/move.c
index f605bd01..532566ea 100644
--- a/libbcachefs/move.c
+++ b/libbcachefs/move.c
@@ -675,7 +675,7 @@ root_err:
if (ret)
break;
- if (bkey_ge(bkey_start_pos(k.k), end))
+ if (bkey_gt(bkey_start_pos(k.k), end))
break;
if (ctxt->stats)
@@ -750,7 +750,8 @@ next:
if (ctxt->stats)
atomic64_add(k.k->size, &ctxt->stats->sectors_seen);
next_nondata:
- bch2_btree_iter_advance(trans, &iter);
+ if (!bch2_btree_iter_advance(trans, &iter))
+ break;
}
out:
bch2_trans_iter_exit(trans, &reflink_iter);
diff --git a/libbcachefs/namei.c b/libbcachefs/namei.c
index 46f3c8b1..52c58c6d 100644
--- a/libbcachefs/namei.c
+++ b/libbcachefs/namei.c
@@ -343,6 +343,9 @@ bool bch2_reinherit_attrs(struct bch_inode_unpacked *dst_u,
bool ret = false;
for (id = 0; id < Inode_opt_nr; id++) {
+ if (!S_ISDIR(dst_u->bi_mode) && id == Inode_opt_casefold)
+ continue;
+
/* Skip attributes that were explicitly set on this inode */
if (dst_u->bi_fields_set & (1 << id))
continue;
diff --git a/libbcachefs/recovery.c b/libbcachefs/recovery.c
index e36fcab9..6aae686f 100644
--- a/libbcachefs/recovery.c
+++ b/libbcachefs/recovery.c
@@ -33,7 +33,9 @@
#include <linux/sort.h>
#include <linux/stat.h>
-int bch2_btree_lost_data(struct bch_fs *c, enum btree_id btree)
+int bch2_btree_lost_data(struct bch_fs *c,
+ struct printbuf *msg,
+ enum btree_id btree)
{
u64 b = BIT_ULL(btree);
int ret = 0;
@@ -42,32 +44,32 @@ int bch2_btree_lost_data(struct bch_fs *c, enum btree_id btree)
struct bch_sb_field_ext *ext = bch2_sb_field_get(c->disk_sb.sb, ext);
if (!(c->sb.btrees_lost_data & b)) {
- struct printbuf buf = PRINTBUF;
- bch2_btree_id_to_text(&buf, btree);
- bch_err(c, "flagging btree %s lost data", buf.buf);
- printbuf_exit(&buf);
+ prt_printf(msg, "flagging btree ");
+ bch2_btree_id_to_text(msg, btree);
+ prt_printf(msg, " lost data\n");
+
ext->btrees_lost_data |= cpu_to_le64(b);
}
/* Once we have runtime self healing for topology errors we won't need this: */
- ret = bch2_run_explicit_recovery_pass_persistent_locked(c, BCH_RECOVERY_PASS_check_topology) ?: ret;
+ ret = __bch2_run_explicit_recovery_pass_persistent(c, msg, BCH_RECOVERY_PASS_check_topology) ?: ret;
/* Btree node accounting will be off: */
__set_bit_le64(BCH_FSCK_ERR_accounting_mismatch, ext->errors_silent);
- ret = bch2_run_explicit_recovery_pass_persistent_locked(c, BCH_RECOVERY_PASS_check_allocations) ?: ret;
+ ret = __bch2_run_explicit_recovery_pass_persistent(c, msg, BCH_RECOVERY_PASS_check_allocations) ?: ret;
#ifdef CONFIG_BCACHEFS_DEBUG
/*
* These are much more minor, and don't need to be corrected right away,
* but in debug mode we want the next fsck run to be clean:
*/
- ret = bch2_run_explicit_recovery_pass_persistent_locked(c, BCH_RECOVERY_PASS_check_lrus) ?: ret;
- ret = bch2_run_explicit_recovery_pass_persistent_locked(c, BCH_RECOVERY_PASS_check_backpointers_to_extents) ?: ret;
+ ret = __bch2_run_explicit_recovery_pass_persistent(c, msg, BCH_RECOVERY_PASS_check_lrus) ?: ret;
+ ret = __bch2_run_explicit_recovery_pass_persistent(c, msg, BCH_RECOVERY_PASS_check_backpointers_to_extents) ?: ret;
#endif
switch (btree) {
case BTREE_ID_alloc:
- ret = bch2_run_explicit_recovery_pass_persistent_locked(c, BCH_RECOVERY_PASS_check_alloc_info) ?: ret;
+ ret = __bch2_run_explicit_recovery_pass_persistent(c, msg, BCH_RECOVERY_PASS_check_alloc_info) ?: ret;
__set_bit_le64(BCH_FSCK_ERR_alloc_key_data_type_wrong, ext->errors_silent);
__set_bit_le64(BCH_FSCK_ERR_alloc_key_gen_wrong, ext->errors_silent);
@@ -77,26 +79,30 @@ int bch2_btree_lost_data(struct bch_fs *c, enum btree_id btree)
__set_bit_le64(BCH_FSCK_ERR_alloc_key_stripe_redundancy_wrong, ext->errors_silent);
goto out;
case BTREE_ID_backpointers:
- ret = bch2_run_explicit_recovery_pass_persistent_locked(c, BCH_RECOVERY_PASS_check_btree_backpointers) ?: ret;
- ret = bch2_run_explicit_recovery_pass_persistent_locked(c, BCH_RECOVERY_PASS_check_extents_to_backpointers) ?: ret;
+ ret = __bch2_run_explicit_recovery_pass_persistent(c, msg, BCH_RECOVERY_PASS_check_btree_backpointers) ?: ret;
+ ret = __bch2_run_explicit_recovery_pass_persistent(c, msg, BCH_RECOVERY_PASS_check_extents_to_backpointers) ?: ret;
goto out;
case BTREE_ID_need_discard:
- ret = bch2_run_explicit_recovery_pass_persistent_locked(c, BCH_RECOVERY_PASS_check_alloc_info) ?: ret;
+ ret = __bch2_run_explicit_recovery_pass_persistent(c, msg, BCH_RECOVERY_PASS_check_alloc_info) ?: ret;
goto out;
case BTREE_ID_freespace:
- ret = bch2_run_explicit_recovery_pass_persistent_locked(c, BCH_RECOVERY_PASS_check_alloc_info) ?: ret;
+ ret = __bch2_run_explicit_recovery_pass_persistent(c, msg, BCH_RECOVERY_PASS_check_alloc_info) ?: ret;
goto out;
case BTREE_ID_bucket_gens:
- ret = bch2_run_explicit_recovery_pass_persistent_locked(c, BCH_RECOVERY_PASS_check_alloc_info) ?: ret;
+ ret = __bch2_run_explicit_recovery_pass_persistent(c, msg, BCH_RECOVERY_PASS_check_alloc_info) ?: ret;
goto out;
case BTREE_ID_lru:
- ret = bch2_run_explicit_recovery_pass_persistent_locked(c, BCH_RECOVERY_PASS_check_alloc_info) ?: ret;
+ ret = __bch2_run_explicit_recovery_pass_persistent(c, msg, BCH_RECOVERY_PASS_check_alloc_info) ?: ret;
goto out;
case BTREE_ID_accounting:
- ret = bch2_run_explicit_recovery_pass_persistent_locked(c, BCH_RECOVERY_PASS_check_allocations) ?: ret;
+ ret = __bch2_run_explicit_recovery_pass_persistent(c, msg, BCH_RECOVERY_PASS_check_allocations) ?: ret;
+ goto out;
+ case BTREE_ID_snapshots:
+ ret = __bch2_run_explicit_recovery_pass_persistent(c, msg, BCH_RECOVERY_PASS_reconstruct_snapshots) ?: ret;
+ ret = __bch2_run_explicit_recovery_pass_persistent(c, msg, BCH_RECOVERY_PASS_scan_for_btree_nodes) ?: ret;
goto out;
default:
- ret = bch2_run_explicit_recovery_pass_persistent_locked(c, BCH_RECOVERY_PASS_scan_for_btree_nodes) ?: ret;
+ ret = __bch2_run_explicit_recovery_pass_persistent(c, msg, BCH_RECOVERY_PASS_scan_for_btree_nodes) ?: ret;
goto out;
}
out:
@@ -583,9 +589,6 @@ static int read_btree_roots(struct bch_fs *c)
buf.buf, bch2_err_str(ret))) {
if (btree_id_is_alloc(i))
r->error = 0;
-
- ret = bch2_btree_lost_data(c, i);
- BUG_ON(ret);
}
}
@@ -734,6 +737,11 @@ int bch2_fs_recovery(struct bch_fs *c)
c->opts.read_only = true;
}
+ if (c->sb.features & BIT_ULL(BCH_FEATURE_small_image)) {
+ bch_info(c, "filesystem is an unresized image file, mounting ro");
+ c->opts.read_only = true;
+ }
+
mutex_lock(&c->sb_lock);
struct bch_sb_field_ext *ext = bch2_sb_field_get(c->disk_sb.sb, ext);
bool write_sb = false;
@@ -949,8 +957,6 @@ use_clean:
set_bit(BCH_FS_btree_running, &c->flags);
ret = bch2_sb_set_upgrade_extra(c);
-
- ret = bch2_fs_resize_on_mount(c);
if (ret)
goto err;
diff --git a/libbcachefs/recovery.h b/libbcachefs/recovery.h
index d858ba67..c023f52f 100644
--- a/libbcachefs/recovery.h
+++ b/libbcachefs/recovery.h
@@ -2,7 +2,7 @@
#ifndef _BCACHEFS_RECOVERY_H
#define _BCACHEFS_RECOVERY_H
-int bch2_btree_lost_data(struct bch_fs *, enum btree_id);
+int bch2_btree_lost_data(struct bch_fs *, struct printbuf *, enum btree_id);
void bch2_reconstruct_alloc(struct bch_fs *);
int bch2_journal_replay(struct bch_fs *);
diff --git a/libbcachefs/recovery_passes.c b/libbcachefs/recovery_passes.c
index 87150dd3..e14aca00 100644
--- a/libbcachefs/recovery_passes.c
+++ b/libbcachefs/recovery_passes.c
@@ -141,13 +141,13 @@ static int __bch2_run_explicit_recovery_pass(struct printbuf *out,
if (pass < BCH_RECOVERY_PASS_set_may_go_rw &&
c->curr_recovery_pass >= BCH_RECOVERY_PASS_set_may_go_rw) {
if (print)
- prt_printf(out, "need recovery pass %s (%u), but already rw",
+ prt_printf(out, "need recovery pass %s (%u), but already rw\n",
bch2_recovery_passes[pass], pass);
return -BCH_ERR_cannot_rewind_recovery;
}
if (print)
- prt_printf(out, "running explicit recovery pass %s (%u), currently at %s (%u)",
+ prt_printf(out, "running explicit recovery pass %s (%u), currently at %s (%u)\n",
bch2_recovery_passes[pass], pass,
bch2_recovery_passes[c->curr_recovery_pass], c->curr_recovery_pass);
@@ -162,7 +162,7 @@ static int __bch2_run_explicit_recovery_pass(struct printbuf *out,
}
}
-int bch2_run_explicit_recovery_pass_printbuf(struct bch_fs *c,
+static int bch2_run_explicit_recovery_pass_printbuf(struct bch_fs *c,
struct printbuf *out,
enum bch_recovery_pass pass)
{
@@ -193,32 +193,30 @@ int bch2_run_explicit_recovery_pass(struct bch_fs *c,
return ret;
}
-int bch2_run_explicit_recovery_pass_persistent_locked(struct bch_fs *c,
- enum bch_recovery_pass pass)
+int __bch2_run_explicit_recovery_pass_persistent(struct bch_fs *c,
+ struct printbuf *out,
+ enum bch_recovery_pass pass)
{
lockdep_assert_held(&c->sb_lock);
struct bch_sb_field_ext *ext = bch2_sb_field_get(c->disk_sb.sb, ext);
__set_bit_le64(bch2_recovery_pass_to_stable(pass), ext->recovery_passes_required);
- return bch2_run_explicit_recovery_pass(c, pass);
+ return bch2_run_explicit_recovery_pass_printbuf(c, out, pass);
}
int bch2_run_explicit_recovery_pass_persistent(struct bch_fs *c,
+ struct printbuf *out,
enum bch_recovery_pass pass)
{
- enum bch_recovery_pass_stable s = bch2_recovery_pass_to_stable(pass);
+ if (c->sb.recovery_passes_required & BIT_ULL(pass))
+ return 0;
mutex_lock(&c->sb_lock);
- struct bch_sb_field_ext *ext = bch2_sb_field_get(c->disk_sb.sb, ext);
-
- if (!test_bit_le64(s, ext->recovery_passes_required)) {
- __set_bit_le64(s, ext->recovery_passes_required);
- bch2_write_super(c);
- }
+ int ret = __bch2_run_explicit_recovery_pass_persistent(c, out, pass);
mutex_unlock(&c->sb_lock);
- return bch2_run_explicit_recovery_pass(c, pass);
+ return ret;
}
static void bch2_clear_recovery_pass_required(struct bch_fs *c,
diff --git a/libbcachefs/recovery_passes.h b/libbcachefs/recovery_passes.h
index e19a8aab..f33dd005 100644
--- a/libbcachefs/recovery_passes.h
+++ b/libbcachefs/recovery_passes.h
@@ -8,12 +8,12 @@ u64 bch2_recovery_passes_from_stable(u64 v);
u64 bch2_fsck_recovery_passes(void);
-int bch2_run_explicit_recovery_pass_printbuf(struct bch_fs *,
- struct printbuf *,
- enum bch_recovery_pass);
int bch2_run_explicit_recovery_pass(struct bch_fs *, enum bch_recovery_pass);
-int bch2_run_explicit_recovery_pass_persistent_locked(struct bch_fs *, enum bch_recovery_pass);
-int bch2_run_explicit_recovery_pass_persistent(struct bch_fs *, enum bch_recovery_pass);
+
+int __bch2_run_explicit_recovery_pass_persistent(struct bch_fs *, struct printbuf *,
+ enum bch_recovery_pass);
+int bch2_run_explicit_recovery_pass_persistent(struct bch_fs *, struct printbuf *,
+ enum bch_recovery_pass);
int bch2_run_online_recovery_passes(struct bch_fs *);
int bch2_run_recovery_passes(struct bch_fs *);
diff --git a/libbcachefs/sb-downgrade.c b/libbcachefs/sb-downgrade.c
index 6a16767e..296c6c92 100644
--- a/libbcachefs/sb-downgrade.c
+++ b/libbcachefs/sb-downgrade.c
@@ -20,6 +20,10 @@
* x(version, recovery_passes, errors...)
*/
#define UPGRADE_TABLE() \
+ x(snapshot_2, \
+ RECOVERY_PASS_ALL_FSCK, \
+ BCH_FSCK_ERR_subvol_root_wrong_bi_subvol, \
+ BCH_FSCK_ERR_subvol_not_master_and_not_snapshot) \
x(backpointers, \
RECOVERY_PASS_ALL_FSCK) \
x(inode_v3, \
diff --git a/libbcachefs/sb-errors_format.h b/libbcachefs/sb-errors_format.h
index 2e1d487a..98c89852 100644
--- a/libbcachefs/sb-errors_format.h
+++ b/libbcachefs/sb-errors_format.h
@@ -46,7 +46,7 @@ enum bch_fsck_flags {
x(btree_node_unsupported_version, 34, 0) \
x(btree_node_bset_older_than_sb_min, 35, 0) \
x(btree_node_bset_newer_than_sb, 36, 0) \
- x(btree_node_data_missing, 37, 0) \
+ x(btree_node_data_missing, 37, FSCK_AUTOFIX) \
x(btree_node_bset_after_end, 38, 0) \
x(btree_node_replicas_sectors_written_mismatch, 39, 0) \
x(btree_node_replicas_data_mismatch, 40, 0) \
@@ -205,9 +205,9 @@ enum bch_fsck_flags {
x(snapshot_bad_depth, 184, 0) \
x(snapshot_bad_skiplist, 185, 0) \
x(subvol_pos_bad, 186, 0) \
- x(subvol_not_master_and_not_snapshot, 187, 0) \
+ x(subvol_not_master_and_not_snapshot, 187, FSCK_AUTOFIX) \
x(subvol_to_missing_root, 188, 0) \
- x(subvol_root_wrong_bi_subvol, 189, 0) \
+ x(subvol_root_wrong_bi_subvol, 189, FSCK_AUTOFIX) \
x(bkey_in_missing_snapshot, 190, 0) \
x(inode_pos_inode_nonzero, 191, 0) \
x(inode_pos_blockdev_range, 192, 0) \
diff --git a/libbcachefs/sb-members.c b/libbcachefs/sb-members.c
index b2f9cb2d..e810d85c 100644
--- a/libbcachefs/sb-members.c
+++ b/libbcachefs/sb-members.c
@@ -20,7 +20,7 @@ int bch2_dev_missing_bkey(struct bch_fs *c, struct bkey_s_c k, unsigned dev)
bool print = bch2_count_fsck_err(c, ptr_to_invalid_device, &buf);
- int ret = bch2_run_explicit_recovery_pass_printbuf(c, &buf,
+ int ret = bch2_run_explicit_recovery_pass_persistent(c, &buf,
BCH_RECOVERY_PASS_check_allocations);
if (print)
@@ -35,9 +35,11 @@ void bch2_dev_missing_atomic(struct bch_fs *c, unsigned dev)
bch2_fs_inconsistent(c, "pointer to nonexistent device %u", dev);
}
-void bch2_dev_bucket_missing(struct bch_fs *c, struct bpos bucket)
+void bch2_dev_bucket_missing(struct bch_dev *ca, u64 bucket)
{
- bch2_fs_inconsistent(c, "pointer to nonexistent bucket %llu:%llu", bucket.inode, bucket.offset);
+ bch2_fs_inconsistent(ca->fs,
+ "pointer to nonexistent bucket %llu on device %s (valid range %u-%llu)",
+ bucket, ca->name, ca->mi.first_bucket, ca->mi.nbuckets);
}
#define x(t, n, ...) [n] = #t,
diff --git a/libbcachefs/sb-members.h b/libbcachefs/sb-members.h
index 0337e34d..c9cb8f76 100644
--- a/libbcachefs/sb-members.h
+++ b/libbcachefs/sb-members.h
@@ -258,20 +258,23 @@ static inline struct bch_dev *bch2_dev_tryget(struct bch_fs *c, unsigned dev)
static inline struct bch_dev *bch2_dev_bucket_tryget_noerror(struct bch_fs *c, struct bpos bucket)
{
struct bch_dev *ca = bch2_dev_tryget_noerror(c, bucket.inode);
- if (ca && !bucket_valid(ca, bucket.offset)) {
+ if (ca && unlikely(!bucket_valid(ca, bucket.offset))) {
bch2_dev_put(ca);
ca = NULL;
}
return ca;
}
-void bch2_dev_bucket_missing(struct bch_fs *, struct bpos);
+void bch2_dev_bucket_missing(struct bch_dev *, u64);
static inline struct bch_dev *bch2_dev_bucket_tryget(struct bch_fs *c, struct bpos bucket)
{
- struct bch_dev *ca = bch2_dev_bucket_tryget_noerror(c, bucket);
- if (!ca)
- bch2_dev_bucket_missing(c, bucket);
+ struct bch_dev *ca = bch2_dev_tryget(c, bucket.inode);
+ if (ca && unlikely(!bucket_valid(ca, bucket.offset))) {
+ bch2_dev_bucket_missing(ca, bucket.offset);
+ bch2_dev_put(ca);
+ ca = NULL;
+ }
return ca;
}
diff --git a/libbcachefs/snapshot.c b/libbcachefs/snapshot.c
index 14ea09cc..94cf60f7 100644
--- a/libbcachefs/snapshot.c
+++ b/libbcachefs/snapshot.c
@@ -1743,10 +1743,6 @@ int bch2_snapshots_read(struct bch_fs *c)
BUG_ON(!test_bit(BCH_FS_new_fs, &c->flags) &&
test_bit(BCH_FS_may_go_rw, &c->flags));
- if (bch2_err_matches(ret, EIO) ||
- (c->sb.btrees_lost_data & BIT_ULL(BTREE_ID_snapshots)))
- ret = bch2_run_explicit_recovery_pass_persistent(c, BCH_RECOVERY_PASS_reconstruct_snapshots);
-
return ret;
}
diff --git a/libbcachefs/str_hash.h b/libbcachefs/str_hash.h
index a67ee4ad..0c1a0053 100644
--- a/libbcachefs/str_hash.h
+++ b/libbcachefs/str_hash.h
@@ -33,7 +33,7 @@ bch2_str_hash_opt_to_type(struct bch_fs *c, enum bch_str_hash_opts opt)
struct bch_hash_info {
u8 type;
- struct unicode_map *cf_encoding;
+ struct unicode_map *cf_encoding;
/*
* For crc32 or crc64 string hashes the first key value of
* the siphash_key (k0) is used as the key.
@@ -44,11 +44,10 @@ struct bch_hash_info {
static inline struct bch_hash_info
bch2_hash_info_init(struct bch_fs *c, const struct bch_inode_unpacked *bi)
{
- /* XXX ick */
struct bch_hash_info info = {
.type = INODE_STR_HASH(bi),
#ifdef CONFIG_UNICODE
- .cf_encoding = bi->bi_casefold ? c->cf_encoding : NULL,
+ .cf_encoding = bch2_inode_casefold(c, bi) ? c->cf_encoding : NULL,
#endif
.siphash_key = { .k0 = bi->bi_hash_seed }
};
diff --git a/libbcachefs/subvolume.c b/libbcachefs/subvolume.c
index 39376c87..51ab2ee1 100644
--- a/libbcachefs/subvolume.c
+++ b/libbcachefs/subvolume.c
@@ -23,7 +23,7 @@ static int bch2_subvolume_missing(struct bch_fs *c, u32 subvolid)
prt_printf(&buf, "missing subvolume %u", subvolid);
bool print = bch2_count_fsck_err(c, subvol_missing, &buf);
- int ret = bch2_run_explicit_recovery_pass_printbuf(c, &buf,
+ int ret = bch2_run_explicit_recovery_pass_persistent(c, &buf,
BCH_RECOVERY_PASS_check_inodes);
if (print)
bch2_print_str(c, KERN_ERR, buf.buf);
@@ -62,8 +62,8 @@ static int check_subvol(struct btree_trans *trans,
ret = bch2_snapshot_lookup(trans, snapid, &snapshot);
if (bch2_err_matches(ret, ENOENT))
- bch_err(c, "subvolume %llu points to nonexistent snapshot %u",
- k.k->p.offset, snapid);
+ return bch2_run_explicit_recovery_pass(c,
+ BCH_RECOVERY_PASS_reconstruct_snapshots) ?: ret;
if (ret)
return ret;
diff --git a/libbcachefs/super-io.c b/libbcachefs/super-io.c
index d53cbc5f..8730d2e7 100644
--- a/libbcachefs/super-io.c
+++ b/libbcachefs/super-io.c
@@ -623,6 +623,9 @@ static void bch2_sb_update(struct bch_fs *c)
struct bch_sb_field_ext *ext = bch2_sb_field_get(src, ext);
if (ext) {
+ c->sb.recovery_passes_required =
+ bch2_recovery_passes_from_stable(le64_to_cpu(ext->recovery_passes_required[0]));
+
le_bitvector_to_cpu(c->sb.errors_silent, (void *) ext->errors_silent,
sizeof(c->sb.errors_silent) * 8);
c->sb.btrees_lost_data = le64_to_cpu(ext->btrees_lost_data);
diff --git a/libbcachefs/super.c b/libbcachefs/super.c
index f861583c..0f9c38f8 100644
--- a/libbcachefs/super.c
+++ b/libbcachefs/super.c
@@ -214,6 +214,7 @@ static int bch2_dev_sysfs_online(struct bch_fs *, struct bch_dev *);
static void bch2_dev_io_ref_stop(struct bch_dev *, int);
static void __bch2_dev_read_only(struct bch_fs *, struct bch_dev *);
static int bch2_fs_init_rw(struct bch_fs *);
+static int bch2_fs_resize_on_mount(struct bch_fs *);
struct bch_fs *bch2_dev_to_fs(dev_t dev)
{
@@ -567,6 +568,10 @@ static void __bch2_fs_free(struct bch_fs *c)
for (unsigned i = 0; i < BCH_TIME_STAT_NR; i++)
bch2_time_stats_exit(&c->times[i]);
+#ifdef CONFIG_UNICODE
+ utf8_unload(c->cf_encoding);
+#endif
+
bch2_find_btree_nodes_exit(&c->found_btree_nodes);
bch2_free_pending_node_rewrites(c);
bch2_free_fsck_errs(c);
@@ -798,7 +803,7 @@ static int bch2_fs_init_rw(struct bch_fs *c)
return 0;
}
-static struct bch_fs *bch2_fs_alloc(struct bch_sb *sb, struct bch_opts opts,
+static struct bch_fs *bch2_fs_alloc(struct bch_sb *sb, struct bch_opts *opts,
bch_sb_handles *sbs)
{
struct bch_fs *c;
@@ -812,7 +817,7 @@ static struct bch_fs *bch2_fs_alloc(struct bch_sb *sb, struct bch_opts opts,
goto out;
}
- c->stdio = (void *)(unsigned long) opts.stdio;
+ c->stdio = (void *)(unsigned long) opts->stdio;
__module_get(THIS_MODULE);
@@ -898,25 +903,6 @@ static struct bch_fs *bch2_fs_alloc(struct bch_sb *sb, struct bch_opts opts,
if (ret)
goto err;
-#ifdef CONFIG_UNICODE
- /* Default encoding until we can potentially have more as an option. */
- c->cf_encoding = utf8_load(BCH_FS_DEFAULT_UTF8_ENCODING);
- if (IS_ERR(c->cf_encoding)) {
- printk(KERN_ERR "Cannot load UTF-8 encoding for filesystem. Version: %u.%u.%u",
- unicode_major(BCH_FS_DEFAULT_UTF8_ENCODING),
- unicode_minor(BCH_FS_DEFAULT_UTF8_ENCODING),
- unicode_rev(BCH_FS_DEFAULT_UTF8_ENCODING));
- ret = -EINVAL;
- goto err;
- }
-#else
- if (c->sb.features & BIT_ULL(BCH_FEATURE_casefolding)) {
- printk(KERN_ERR "Cannot mount a filesystem with casefolding on a kernel without CONFIG_UNICODE\n");
- ret = -EINVAL;
- goto err;
- }
-#endif
-
/* Compat: */
if (le16_to_cpu(sb->version) <= bcachefs_metadata_version_inode_v2 &&
!BCH_SB_JOURNAL_FLUSH_DELAY(sb))
@@ -931,7 +917,7 @@ static struct bch_fs *bch2_fs_alloc(struct bch_sb *sb, struct bch_opts opts,
if (ret)
goto err;
- bch2_opts_apply(&c->opts, opts);
+ bch2_opts_apply(&c->opts, *opts);
c->btree_key_cache_btrees |= 1U << BTREE_ID_alloc;
if (c->opts.inodes_use_key_cache)
@@ -1002,6 +988,29 @@ static struct bch_fs *bch2_fs_alloc(struct bch_sb *sb, struct bch_opts opts,
if (ret)
goto err;
+#ifdef CONFIG_UNICODE
+ /* Default encoding until we can potentially have more as an option. */
+ c->cf_encoding = utf8_load(BCH_FS_DEFAULT_UTF8_ENCODING);
+ if (IS_ERR(c->cf_encoding)) {
+ printk(KERN_ERR "Cannot load UTF-8 encoding for filesystem. Version: %u.%u.%u",
+ unicode_major(BCH_FS_DEFAULT_UTF8_ENCODING),
+ unicode_minor(BCH_FS_DEFAULT_UTF8_ENCODING),
+ unicode_rev(BCH_FS_DEFAULT_UTF8_ENCODING));
+ ret = -EINVAL;
+ goto err;
+ }
+ bch_info(c, "Using encoding defined by superblock: utf8-%u.%u.%u",
+ unicode_major(BCH_FS_DEFAULT_UTF8_ENCODING),
+ unicode_minor(BCH_FS_DEFAULT_UTF8_ENCODING),
+ unicode_rev(BCH_FS_DEFAULT_UTF8_ENCODING));
+#else
+ if (c->sb.features & BIT_ULL(BCH_FEATURE_casefolding)) {
+ printk(KERN_ERR "Cannot mount a filesystem with casefolding on a kernel without CONFIG_UNICODE\n");
+ ret = -EINVAL;
+ goto err;
+ }
+#endif
+
for (i = 0; i < c->sb.nr_devices; i++) {
if (!bch2_member_exists(c->disk_sb.sb, i))
continue;
@@ -1070,6 +1079,40 @@ static void print_mount_opts(struct bch_fs *c)
printbuf_exit(&p);
}
+static bool bch2_fs_may_start(struct bch_fs *c)
+{
+ struct bch_dev *ca;
+ unsigned flags = 0;
+
+ switch (c->opts.degraded) {
+ case BCH_DEGRADED_very:
+ flags |= BCH_FORCE_IF_DEGRADED|BCH_FORCE_IF_LOST;
+ break;
+ case BCH_DEGRADED_yes:
+ flags |= BCH_FORCE_IF_DEGRADED;
+ break;
+ default:
+ mutex_lock(&c->sb_lock);
+ for (unsigned i = 0; i < c->disk_sb.sb->nr_devices; i++) {
+ if (!bch2_member_exists(c->disk_sb.sb, i))
+ continue;
+
+ ca = bch2_dev_locked(c, i);
+
+ if (!bch2_dev_is_online(ca) &&
+ (ca->mi.state == BCH_MEMBER_STATE_rw ||
+ ca->mi.state == BCH_MEMBER_STATE_ro)) {
+ mutex_unlock(&c->sb_lock);
+ return false;
+ }
+ }
+ mutex_unlock(&c->sb_lock);
+ break;
+ }
+
+ return bch2_have_enough_devs(c, c->online_devs, flags, true);
+}
+
int bch2_fs_start(struct bch_fs *c)
{
time64_t now = ktime_get_real_seconds();
@@ -1077,6 +1120,9 @@ int bch2_fs_start(struct bch_fs *c)
print_mount_opts(c);
+ if (!bch2_fs_may_start(c))
+ return -BCH_ERR_insufficient_devices_to_start;
+
down_write(&c->state_lock);
mutex_lock(&c->sb_lock);
@@ -1106,6 +1152,12 @@ int bch2_fs_start(struct bch_fs *c)
bch2_write_super(c);
mutex_unlock(&c->sb_lock);
+ ret = bch2_fs_resize_on_mount(c);
+ if (ret) {
+ up_write(&c->state_lock);
+ goto err;
+ }
+
rcu_read_lock();
for_each_online_member_rcu(c, ca)
if (ca->mi.state == BCH_MEMBER_STATE_rw)
@@ -1593,40 +1645,6 @@ bool bch2_dev_state_allowed(struct bch_fs *c, struct bch_dev *ca,
}
}
-static bool bch2_fs_may_start(struct bch_fs *c)
-{
- struct bch_dev *ca;
- unsigned flags = 0;
-
- switch (c->opts.degraded) {
- case BCH_DEGRADED_very:
- flags |= BCH_FORCE_IF_DEGRADED|BCH_FORCE_IF_LOST;
- break;
- case BCH_DEGRADED_yes:
- flags |= BCH_FORCE_IF_DEGRADED;
- break;
- default:
- mutex_lock(&c->sb_lock);
- for (unsigned i = 0; i < c->disk_sb.sb->nr_devices; i++) {
- if (!bch2_member_exists(c->disk_sb.sb, i))
- continue;
-
- ca = bch2_dev_locked(c, i);
-
- if (!bch2_dev_is_online(ca) &&
- (ca->mi.state == BCH_MEMBER_STATE_rw ||
- ca->mi.state == BCH_MEMBER_STATE_ro)) {
- mutex_unlock(&c->sb_lock);
- return false;
- }
- }
- mutex_unlock(&c->sb_lock);
- break;
- }
-
- return bch2_have_enough_devs(c, c->online_devs, flags, true);
-}
-
static void __bch2_dev_read_only(struct bch_fs *c, struct bch_dev *ca)
{
bch2_dev_io_ref_stop(ca, WRITE);
@@ -1876,6 +1894,7 @@ int bch2_dev_add(struct bch_fs *c, const char *path)
goto err_unlock;
}
unsigned dev_idx = ret;
+ ret = 0;
/* success: */
@@ -1895,27 +1914,29 @@ int bch2_dev_add(struct bch_fs *c, const char *path)
bch2_write_super(c);
mutex_unlock(&c->sb_lock);
- ret = bch2_dev_usage_init(ca, false);
- if (ret)
- goto err_late;
+ if (test_bit(BCH_FS_started, &c->flags)) {
+ ret = bch2_dev_usage_init(ca, false);
+ if (ret)
+ goto err_late;
- ret = bch2_trans_mark_dev_sb(c, ca, BTREE_TRIGGER_transactional);
- bch_err_msg(ca, ret, "marking new superblock");
- if (ret)
- goto err_late;
+ ret = bch2_trans_mark_dev_sb(c, ca, BTREE_TRIGGER_transactional);
+ bch_err_msg(ca, ret, "marking new superblock");
+ if (ret)
+ goto err_late;
- ret = bch2_fs_freespace_init(c);
- bch_err_msg(ca, ret, "initializing free space");
- if (ret)
- goto err_late;
+ ret = bch2_fs_freespace_init(c);
+ bch_err_msg(ca, ret, "initializing free space");
+ if (ret)
+ goto err_late;
- if (ca->mi.state == BCH_MEMBER_STATE_rw)
- __bch2_dev_read_write(c, ca);
+ if (ca->mi.state == BCH_MEMBER_STATE_rw)
+ __bch2_dev_read_write(c, ca);
- ret = bch2_dev_journal_alloc(ca, false);
- bch_err_msg(c, ret, "allocating journal");
- if (ret)
- goto err_late;
+ ret = bch2_dev_journal_alloc(ca, false);
+ bch_err_msg(c, ret, "allocating journal");
+ if (ret)
+ goto err_late;
+ }
up_write(&c->state_lock);
out:
@@ -2096,10 +2117,8 @@ err:
return ret;
}
-int bch2_fs_resize_on_mount(struct bch_fs *c)
+static int bch2_fs_resize_on_mount(struct bch_fs *c)
{
- down_write(&c->state_lock);
-
for_each_online_member(c, ca, BCH_DEV_READ_REF_fs_resize_on_mount) {
u64 old_nbuckets = ca->mi.nbuckets;
u64 new_nbuckets = div64_u64(get_capacity(ca->disk_sb.bdev->bd_disk),
@@ -2138,9 +2157,6 @@ int bch2_fs_resize_on_mount(struct bch_fs *c)
}
}
}
-
- bch2_recalc_capacity(c);
- up_write(&c->state_lock);
return 0;
}
@@ -2260,8 +2276,8 @@ static inline int sb_cmp(struct bch_sb *l, struct bch_sb *r)
cmp_int(le64_to_cpu(l->write_time), le64_to_cpu(r->write_time));
}
-struct bch_fs *bch2_fs_open(char * const *devices, unsigned nr_devices,
- struct bch_opts opts)
+struct bch_fs *bch2_fs_open(darray_const_str *devices,
+ struct bch_opts *opts)
{
bch_sb_handles sbs = {};
struct bch_fs *c = NULL;
@@ -2272,26 +2288,26 @@ struct bch_fs *bch2_fs_open(char * const *devices, unsigned nr_devices,
if (!try_module_get(THIS_MODULE))
return ERR_PTR(-ENODEV);
- if (!nr_devices) {
+ if (!devices->nr) {
ret = -EINVAL;
goto err;
}
- ret = darray_make_room(&sbs, nr_devices);
+ ret = darray_make_room(&sbs, devices->nr);
if (ret)
goto err;
- for (unsigned i = 0; i < nr_devices; i++) {
+ darray_for_each(*devices, i) {
struct bch_sb_handle sb = { NULL };
- ret = bch2_read_super(devices[i], &opts, &sb);
+ ret = bch2_read_super(*i, opts, &sb);
if (ret)
goto err;
BUG_ON(darray_push(&sbs, sb));
}
- if (opts.nochanges && !opts.read_only) {
+ if (opts->nochanges && !opts->read_only) {
ret = -BCH_ERR_erofs_nochanges;
goto err_print;
}
@@ -2301,7 +2317,7 @@ struct bch_fs *bch2_fs_open(char * const *devices, unsigned nr_devices,
best = sb;
darray_for_each_reverse(sbs, sb) {
- ret = bch2_dev_in_fs(best, sb, &opts);
+ ret = bch2_dev_in_fs(best, sb, opts);
if (ret == -BCH_ERR_device_has_been_removed ||
ret == -BCH_ERR_device_splitbrain) {
@@ -2331,11 +2347,6 @@ struct bch_fs *bch2_fs_open(char * const *devices, unsigned nr_devices,
}
up_write(&c->state_lock);
- if (!bch2_fs_may_start(c)) {
- ret = -BCH_ERR_insufficient_devices_to_start;
- goto err_print;
- }
-
if (!c->opts.nostart) {
ret = bch2_fs_start(c);
if (ret)
@@ -2350,7 +2361,7 @@ out:
return c;
err_print:
pr_err("bch_fs_open err opening %s: %s",
- devices[0], bch2_err_str(ret));
+ devices->data[0], bch2_err_str(ret));
err:
if (!IS_ERR_OR_NULL(c))
bch2_fs_stop(c);
diff --git a/libbcachefs/super.h b/libbcachefs/super.h
index dbf59547..7252544c 100644
--- a/libbcachefs/super.h
+++ b/libbcachefs/super.h
@@ -29,7 +29,6 @@ int bch2_dev_add(struct bch_fs *, const char *);
int bch2_dev_online(struct bch_fs *, const char *);
int bch2_dev_offline(struct bch_fs *, struct bch_dev *, int);
int bch2_dev_resize(struct bch_fs *, struct bch_dev *, u64);
-int bch2_fs_resize_on_mount(struct bch_fs *);
struct bch_dev *bch2_dev_lookup(struct bch_fs *, const char *);
bool bch2_fs_emergency_read_only(struct bch_fs *);
@@ -44,7 +43,7 @@ void bch2_fs_free(struct bch_fs *);
void bch2_fs_stop(struct bch_fs *);
int bch2_fs_start(struct bch_fs *);
-struct bch_fs *bch2_fs_open(char * const *, unsigned, struct bch_opts);
+struct bch_fs *bch2_fs_open(darray_const_str *, struct bch_opts *);
extern const struct blk_holder_ops bch2_sb_handle_bdev_ops;
diff --git a/libbcachefs/tests.c b/libbcachefs/tests.c
index c265b102..782a05fe 100644
--- a/libbcachefs/tests.c
+++ b/libbcachefs/tests.c
@@ -342,6 +342,8 @@ static int test_iterate_slots_extents(struct bch_fs *c, u64 nr)
*/
static int test_peek_end(struct bch_fs *c, u64 nr)
{
+ delete_test_keys(c);
+
struct btree_trans *trans = bch2_trans_get(c);
struct btree_iter iter;
struct bkey_s_c k;
@@ -362,6 +364,8 @@ static int test_peek_end(struct bch_fs *c, u64 nr)
static int test_peek_end_extents(struct bch_fs *c, u64 nr)
{
+ delete_test_keys(c);
+
struct btree_trans *trans = bch2_trans_get(c);
struct btree_iter iter;
struct bkey_s_c k;
diff --git a/libbcachefs/util.c b/libbcachefs/util.c
index 7e6ebe8c..dc3817f5 100644
--- a/libbcachefs/util.c
+++ b/libbcachefs/util.c
@@ -252,6 +252,16 @@ void bch2_prt_u64_base2(struct printbuf *out, u64 v)
bch2_prt_u64_base2_nbits(out, v, fls64(v) ?: 1);
}
+static bool string_is_spaces(const char *str)
+{
+ while (*str) {
+ if (*str != ' ')
+ return false;
+ str++;
+ }
+ return true;
+}
+
void bch2_print_string_as_lines(const char *prefix, const char *lines,
bool nonblocking)
{
@@ -272,6 +282,9 @@ void bch2_print_string_as_lines(const char *prefix, const char *lines,
while (*lines) {
p = strchrnul(lines, '\n');
+ if (!*p && string_is_spaces(lines))
+ break;
+
printk("%s%.*s\n", prefix, (int) (p - lines), lines);
if (!*p)
break;
@@ -1003,14 +1016,14 @@ u64 *bch2_acc_percpu_u64s(u64 __percpu *p, unsigned nr)
return ret;
}
-void bch2_darray_str_exit(darray_str *d)
+void bch2_darray_str_exit(darray_const_str *d)
{
darray_for_each(*d, i)
kfree(*i);
darray_exit(d);
}
-int bch2_split_devs(const char *_dev_name, darray_str *ret)
+int bch2_split_devs(const char *_dev_name, darray_const_str *ret)
{
darray_init(ret);
diff --git a/libbcachefs/util.h b/libbcachefs/util.h
index 7a93e187..14cb2c7d 100644
--- a/libbcachefs/util.h
+++ b/libbcachefs/util.h
@@ -690,8 +690,8 @@ static inline bool qstr_eq(const struct qstr l, const struct qstr r)
return l.len == r.len && !memcmp(l.name, r.name, l.len);
}
-void bch2_darray_str_exit(darray_str *);
-int bch2_split_devs(const char *, darray_str *);
+void bch2_darray_str_exit(darray_const_str *);
+int bch2_split_devs(const char *, darray_const_str *);
#ifdef __KERNEL__