summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorKent Overstreet <kent.overstreet@gmail.com>2017-02-01 18:16:42 -0900
committerKent Overstreet <kent.overstreet@gmail.com>2017-02-06 20:45:37 -0900
commit5933f9478cc21e8b319309d2794948050d09b031 (patch)
tree781509d106b4aa5c99dda7e88028b58baf6de786
parentab88049a08bf734b9a6c9f2deae39fbd0c933192 (diff)
update bcache code, fsck improvements
-rw-r--r--.bcache_revision2
-rw-r--r--.gitignore1
-rw-r--r--Makefile26
-rw-r--r--bcache-userspace-shim.c82
-rw-r--r--bcache.c2
-rw-r--r--cmd_assemble.c (renamed from bcache-assemble.c)4
-rw-r--r--cmd_device.c (renamed from bcache-device.c)7
-rw-r--r--cmd_format.c (renamed from bcache-format.c)48
-rw-r--r--cmd_fs.c (renamed from bcache-fs.c)2
-rw-r--r--cmd_fsck.c68
-rw-r--r--cmd_run.c (renamed from bcache-run.c)3
-rw-r--r--cmds.h (renamed from bcache-cmds.h)6
-rw-r--r--include/linux/kernel.h1
-rw-r--r--include/linux/sched.h10
-rw-r--r--include/linux/time64.h5
-rw-r--r--libbcache.c112
-rw-r--r--libbcache.h21
-rw-r--r--libbcache/alloc.c3
-rw-r--r--libbcache/bcache.h5
-rw-r--r--libbcache/bkey_methods.c10
-rw-r--r--libbcache/bkey_methods.h2
-rw-r--r--libbcache/btree_cache.c56
-rw-r--r--libbcache/btree_cache.h10
-rw-r--r--libbcache/btree_io.c7
-rw-r--r--libbcache/btree_types.h3
-rw-r--r--libbcache/btree_update.h7
-rw-r--r--libbcache/debug.c60
-rw-r--r--libbcache/dirent.c49
-rw-r--r--libbcache/dirent.h12
-rw-r--r--libbcache/error.h63
-rw-r--r--libbcache/extents.c7
-rw-r--r--libbcache/extents.h9
-rw-r--r--libbcache/fs-gc.c710
-rw-r--r--libbcache/fs-gc.h3
-rw-r--r--libbcache/fs.c45
-rw-r--r--libbcache/inode.c32
-rw-r--r--libbcache/inode.h2
-rw-r--r--libbcache/io.c5
-rw-r--r--libbcache/journal.c37
-rw-r--r--libbcache/movinggc.c16
-rw-r--r--libbcache/opts.c46
-rw-r--r--libbcache/opts.h21
-rw-r--r--libbcache/str_hash.h8
-rw-r--r--libbcache/super.c117
-rw-r--r--libbcache/sysfs.c29
-rw-r--r--libbcache/tier.c3
-rw-r--r--tools-util.c102
-rw-r--r--tools-util.h54
48 files changed, 1251 insertions, 682 deletions
diff --git a/.bcache_revision b/.bcache_revision
index e728408..5caaaba 100644
--- a/.bcache_revision
+++ b/.bcache_revision
@@ -1 +1 @@
-BCACHE_REVISION=f8c8c133492ac9a63fdfeb9edf9bb26a3283db9f
+BCACHE_REVISION=76e3b2312705df2cb5adb8834bc6df56a288932e
diff --git a/.gitignore b/.gitignore
index 6291a6f..a2ff54d 100644
--- a/.gitignore
+++ b/.gitignore
@@ -7,3 +7,4 @@ probe-bcache
*.a
tags
cscope*
+bcache-tools
diff --git a/Makefile b/Makefile
index c29d973..6916c9b 100644
--- a/Makefile
+++ b/Makefile
@@ -1,7 +1,7 @@
PREFIX=/usr
INSTALL=install
-CFLAGS+=-std=gnu99 -O2 -g -flto -MMD -Wall \
+CFLAGS+=-std=gnu99 -O2 -g -MMD -Wall \
-Wno-unused-but-set-variable \
-Wno-pointer-sign \
-fno-strict-aliasing \
@@ -11,7 +11,14 @@ CFLAGS+=-std=gnu99 -O2 -g -flto -MMD -Wall \
-D_LGPL_SOURCE \
-DRCU_MEMBARRIER \
$(EXTRA_CFLAGS)
-LDFLAGS+=-O2 -g -flto
+LDFLAGS+=-O2 -g
+
+ifdef D
+ CFLAGS+=-Werror
+else
+ CFLAGS+=-flto -Werror
+ LDFLAGS+=-flto
+endif
PKGCONFIG_LIBS="blkid uuid liburcu"
CFLAGS+=`pkg-config --cflags ${PKGCONFIG_LIBS}`
@@ -33,9 +40,18 @@ CCANOBJS=$(patsubst %.c,%.o,$(CCANSRCS))
LINUX_SRCS=$(wildcard linux/*.c linux/*/*.c)
LINUX_OBJS=$(LINUX_SRCS:.c=.o)
-OBJS=bcache.o bcache-assemble.o bcache-device.o bcache-format.o \
- bcache-fs.o bcache-run.o bcache-userspace-shim.o \
- libbcache.o tools-util.o $(LINUX_OBJS) $(CCANOBJS)
+OBJS=bcache.o \
+ bcache-userspace-shim.o \
+ cmd_assemble.o \
+ cmd_device.o \
+ cmd_fs.o \
+ cmd_fsck.o \
+ cmd_format.o \
+ cmd_run.o \
+ libbcache.o \
+ tools-util.o \
+ $(LINUX_OBJS) \
+ $(CCANOBJS)
DEPS=$(OBJS:.o=.d)
-include $(DEPS)
diff --git a/bcache-userspace-shim.c b/bcache-userspace-shim.c
index c4a5466..9be5b50 100644
--- a/bcache-userspace-shim.c
+++ b/bcache-userspace-shim.c
@@ -1,8 +1,10 @@
#include <errno.h>
-#include <linux/byteorder.h>
#include <linux/types.h>
+#include "libbcache.h"
+#include "tools-util.h"
+
/* stub out the bcache code we aren't building: */
struct block_device;
@@ -53,6 +55,56 @@ void bch_cache_accounting_destroy(struct cache_accounting *acc) {}
void bch_cache_accounting_init(struct cache_accounting *acc,
struct closure *parent) {}
+#define bch_fmt(_c, fmt) fmt "\n"
+
+enum fsck_err_opts fsck_err_opt;
+
+/* Returns true if error should be fixed: */
+
+/* XXX: flag if we ignore errors */
+
+/*
+ * If it's an error that we can't ignore, and we're running non
+ * interactively - return true and have the error fixed so that we don't have to
+ * bail out and stop the fsck early, so that the user can see all the errors
+ * present:
+ */
+#define __fsck_err(c, _can_fix, _can_ignore, _nofix_msg, msg, ...) \
+({ \
+ bool _fix = false; \
+ \
+ if (_can_fix) { \
+ switch (fsck_err_opt) { \
+ case FSCK_ERR_ASK: \
+ printf(msg ": fix?", ##__VA_ARGS__); \
+ _fix = ask_yn(); \
+ \
+ break; \
+ case FSCK_ERR_YES: \
+ bch_err(c, msg ", fixing", ##__VA_ARGS__); \
+ _fix = true; \
+ break; \
+ case FSCK_ERR_NO: \
+ bch_err(c, msg, ##__VA_ARGS__); \
+ _fix = false; \
+ break; \
+ } \
+ } else if (_can_ignore) { \
+ bch_err(c, msg, ##__VA_ARGS__); \
+ } \
+ \
+ if (_can_fix && !_can_ignore && fsck_err_opt == FSCK_ERR_NO) \
+ _fix = true; \
+ \
+ if (!_fix && !_can_ignore) { \
+ printf("Fatal filesystem inconsistency, halting\n"); \
+ ret = BCH_FSCK_ERRORS_NOT_FIXED; \
+ goto fsck_err; \
+ } \
+ \
+ _fix; \
+})
+
//#include "acl.c"
#include "alloc.c"
#include "bkey.c"
@@ -113,31 +165,3 @@ SHIM_KTYPE(bch_cache_set);
SHIM_KTYPE(bch_cache_set_internal);
SHIM_KTYPE(bch_cache_set_time_stats);
SHIM_KTYPE(bch_cache_set_opts_dir);
-
-//#include "tools-util.h"
-
-int cmd_fsck(int argc, char *argv[])
-{
- DECLARE_COMPLETION_ONSTACK(shutdown);
- struct cache_set_opts opts = cache_set_opts_empty();
- struct cache_set *c = NULL;
- const char *err;
-
- printf("registering %s...\n", argv[1]);
-
- err = bch_register_cache_set(argv + 1, argc - 1, opts, &c);
- if (err) {
- BUG_ON(c);
- fprintf(stderr, "error opening %s: %s\n", argv[1], err);
- exit(EXIT_FAILURE);
- }
-
- c->stop_completion = &shutdown;
- bch_cache_set_stop(c);
- closure_put(&c->cl);
-
- /* Killable? */
- wait_for_completion(&shutdown);
-
- return 0;
-}
diff --git a/bcache.c b/bcache.c
index 9f09319..eb532f2 100644
--- a/bcache.c
+++ b/bcache.c
@@ -21,7 +21,7 @@
#include <sys/types.h>
#include <sys/stat.h>
-#include "bcache-cmds.h"
+#include "cmds.h"
static void usage(void)
{
diff --git a/bcache-assemble.c b/cmd_assemble.c
index 1b49166..1fa33e4 100644
--- a/bcache-assemble.c
+++ b/cmd_assemble.c
@@ -5,10 +5,10 @@
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
-
#include <sys/ioctl.h>
-#include "bcache-cmds.h"
+#include "cmds.h"
+#include "linux/bcache-ioctl.h"
int cmd_assemble(int argc, char *argv[])
{
diff --git a/bcache-device.c b/cmd_device.c
index 5ff0d82..ecb63bb 100644
--- a/bcache-device.c
+++ b/cmd_device.c
@@ -12,8 +12,9 @@
#include <sys/types.h>
#include <unistd.h>
-#include "bcache-cmds.h"
+#include "cmds.h"
#include "libbcache.h"
+#include "linux/bcache-ioctl.h"
/* This code belongs under show_fs */
#if 0
@@ -93,10 +94,10 @@ int cmd_device_show(int argc, char *argv[])
};
char **args = bch_nih_init(argc, argv, opts);
- if (nr_args(args) != 1)
+ if (argc != 2)
die("Please supply a single device");
- struct bcache_handle fs = bcache_fs_open(args[0]);
+ struct bcache_handle fs = bcache_fs_open(argv[1]);
struct dirent *entry;
struct bcache_dev devices[256];
diff --git a/bcache-format.c b/cmd_format.c
index a7aabc3..b955b41 100644
--- a/bcache-format.c
+++ b/cmd_format.c
@@ -22,8 +22,10 @@
#include "ccan/darray/darray.h"
-#include "bcache-cmds.h"
+#include "cmds.h"
#include "libbcache.h"
+#include "opts.h"
+#include "util.h"
/* Open a block device, do magic blkid stuff: */
static int open_for_format(const char *dev, bool force)
@@ -58,7 +60,8 @@ static int open_for_format(const char *dev, bool force)
else
printf("%s contains a %s filesystem\n",
dev, fs_type);
- if (!ask_proceed())
+ fputs("Proceed anyway?", stdout);
+ if (!ask_yn())
exit(EXIT_FAILURE);
}
@@ -96,7 +99,6 @@ static void usage(void)
" bcache format --tier 0 /dev/sdb --tier 1 /dev/sdc\n"
"\n"
"Report bugs to <linux-bcache@vger.kernel.org>");
- exit(EXIT_SUCCESS);
}
#define OPTS \
@@ -132,6 +134,27 @@ static const struct option format_opts[] = {
{ NULL }
};
+static unsigned hatoi_validate(const char *s, const char *msg)
+{
+ u64 v;
+
+ if (bch_strtoull_h(s, &v))
+ die("bad %s %s", msg, s);
+
+ if (v & (v - 1))
+ die("%s must be a power of two", msg);
+
+ v /= 512;
+
+ if (v > USHRT_MAX)
+ die("%s too large\n", msg);
+
+ if (!v)
+ die("%s too small\n", msg);
+
+ return v;
+}
+
int cmd_format(int argc, char *argv[])
{
darray(struct dev_opts) devices;
@@ -174,20 +197,21 @@ int cmd_format(int argc, char *argv[])
break;
case Opt_metadata_checksum_type:
meta_csum_type = read_string_list_or_die(optarg,
- csum_types, "checksum type");
+ bch_csum_types, "checksum type");
break;
case Opt_data_checksum_type:
data_csum_type = read_string_list_or_die(optarg,
- csum_types, "checksum type");
+ bch_csum_types, "checksum type");
break;
case Opt_compression_type:
compression_type = read_string_list_or_die(optarg,
- compression_types, "compression type");
+ bch_compression_types,
+ "compression type");
break;
case Opt_error_action:
case 'e':
on_error_action = read_string_list_or_die(optarg,
- error_actions, "error action");
+ bch_error_actions, "error action");
break;
case Opt_max_journal_entry_size:
max_journal_entry_size = hatoi_validate(optarg,
@@ -207,14 +231,19 @@ int cmd_format(int argc, char *argv[])
force = true;
break;
case Opt_fs_size:
- filesystem_size = hatoi(optarg) >> 9;
+ if (bch_strtoull_h(optarg, &filesystem_size))
+ die("invalid filesystem size");
+
+ filesystem_size >>= 9;
break;
case Opt_bucket_size:
bucket_size = hatoi_validate(optarg, "bucket size");
break;
case Opt_tier:
case 't':
- tier = strtoul_or_die(optarg, CACHE_TIERS, "tier");
+ if (kstrtouint(optarg, 10, &tier) ||
+ tier >= CACHE_TIERS)
+ die("invalid tier");
break;
case Opt_discard:
discard = true;
@@ -231,6 +260,7 @@ int cmd_format(int argc, char *argv[])
case Opt_help:
case 'h':
usage();
+ exit(EXIT_SUCCESS);
break;
}
diff --git a/bcache-fs.c b/cmd_fs.c
index 2e82086..382d31a 100644
--- a/bcache-fs.c
+++ b/cmd_fs.c
@@ -1,5 +1,5 @@
-#include "bcache-cmds.h"
+#include "cmds.h"
struct bcache_fs {
/* options... */
diff --git a/cmd_fsck.c b/cmd_fsck.c
new file mode 100644
index 0000000..1b7ebbf
--- /dev/null
+++ b/cmd_fsck.c
@@ -0,0 +1,68 @@
+
+#include "cmds.h"
+#include "libbcache.h"
+#include "super.h"
+#include "tools-util.h"
+
+static void usage(void)
+{
+ puts("bcache fsck - filesystem check and repair\n"
+ "Usage: bcache fsck [OPTION]... <devices>\n"
+ "\n"
+ "Options:\n"
+ " -p Automatic repair (no questions\n"
+ " -n Don't repair, only check for errors\n"
+ " -y Assume \"yes\" to all questions\n"
+ " -f Force checking even if filesystem is marked clean\n"
+ " -v Be verbose\n"
+ " --h Display this help and exit\n"
+ "Report bugs to <linux-bcache@vger.kernel.org>");
+}
+
+int cmd_fsck(int argc, char *argv[])
+{
+ DECLARE_COMPLETION_ONSTACK(shutdown);
+ struct cache_set_opts opts = cache_set_opts_empty();
+ struct cache_set *c = NULL;
+ const char *err;
+ int opt;
+
+ while ((opt = getopt(argc, argv, "pynfvh")) != -1)
+ switch (opt) {
+ case 'p':
+ fsck_err_opt = FSCK_ERR_YES;
+ break;
+ case 'y':
+ fsck_err_opt = FSCK_ERR_YES;
+ break;
+ case 'n':
+ opts.nochanges = true;
+ fsck_err_opt = FSCK_ERR_NO;
+ break;
+ case 'f':
+ /* force check, even if filesystem marked clean: */
+ break;
+ case 'v':
+ opts.verbose_recovery = true;
+ break;
+ case 'h':
+ usage();
+ exit(EXIT_SUCCESS);
+ }
+
+ if (optind >= argc)
+ die("Please supply device(s) to check");
+
+ err = bch_register_cache_set(argv + optind, argc - optind, opts, &c);
+ if (err)
+ die("error opening %s: %s", argv[optind], err);
+
+ c->stop_completion = &shutdown;
+ bch_cache_set_stop(c);
+ closure_put(&c->cl);
+
+ /* Killable? */
+ wait_for_completion(&shutdown);
+
+ return 0;
+}
diff --git a/bcache-run.c b/cmd_run.c
index f419407..74f3248 100644
--- a/bcache-run.c
+++ b/cmd_run.c
@@ -11,7 +11,8 @@
#include <uuid/uuid.h>
-#include "bcache-cmds.h"
+#include "cmds.h"
+#include "linux/bcache-ioctl.h"
int cmd_run(int argc, char *argv[])
{
diff --git a/bcache-cmds.h b/cmds.h
index 36035c2..c762a2c 100644
--- a/bcache-cmds.h
+++ b/cmds.h
@@ -4,8 +4,8 @@
* GPLv2
*/
-#ifndef _BCACHE_H
-#define _BCACHE_H
+#ifndef _CMDS_H
+#define _CMDS_H
#include "tools-util.h"
@@ -25,4 +25,4 @@ int cmd_device_remove(int argc, char *argv[]);
int cmd_fsck(int argc, char *argv[]);
-#endif /* _BCACHE_H */
+#endif /* _CMDS_H */
diff --git a/include/linux/kernel.h b/include/linux/kernel.h
index e4ffa86..2233350 100644
--- a/include/linux/kernel.h
+++ b/include/linux/kernel.h
@@ -8,6 +8,7 @@
#include <limits.h>
#include <linux/bug.h>
+#include <linux/byteorder.h>
#include <linux/compiler.h>
#define IS_ENABLED(opt) 0
diff --git a/include/linux/sched.h b/include/linux/sched.h
index 0316f50..885cc56 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -141,4 +141,14 @@ static inline u64 ktime_get_seconds(void)
return ts.tv_sec;
}
+static inline struct timespec current_kernel_time(void)
+{
+ struct timespec ts;
+
+ clock_gettime(CLOCK_MONOTONIC, &ts);
+ return ts;
+}
+
+#define CURRENT_TIME (current_kernel_time())
+
#endif /* __TOOLS_LINUX_SCHED_H */
diff --git a/include/linux/time64.h b/include/linux/time64.h
index 9d8a3ef..2e1ad82 100644
--- a/include/linux/time64.h
+++ b/include/linux/time64.h
@@ -61,6 +61,11 @@ static inline struct timespec64 timespec_to_timespec64(const struct timespec ts)
# define ns_to_timespec64 ns_to_timespec
# define timespec64_add_ns timespec_add_ns
+static inline s64 timespec_to_ns(const struct timespec *ts)
+{
+ return ((s64) ts->tv_sec * NSEC_PER_SEC) + ts->tv_nsec;
+}
+
#else
static inline struct timespec timespec64_to_timespec(const struct timespec64 ts64)
diff --git a/libbcache.c b/libbcache.c
index 081fd42..802d3b4 100644
--- a/libbcache.c
+++ b/libbcache.c
@@ -14,74 +14,7 @@
#include "linux/bcache.h"
#include "libbcache.h"
#include "checksum.h"
-
-const char * const cache_state[] = {
- "active",
- "ro",
- "failed",
- "spare",
- NULL
-};
-
-const char * const replacement_policies[] = {
- "lru",
- "fifo",
- "random",
- NULL
-};
-
-const char * const csum_types[] = {
- "none",
- "crc32c",
- "crc64",
- NULL
-};
-
-const char * const compression_types[] = {
- "none",
- "lz4",
- "gzip",
- NULL
-};
-
-const char * const str_hash_types[] = {
- "crc32c",
- "crc64",
- "siphash",
- "sha1",
- NULL
-};
-
-const char * const error_actions[] = {
- "continue",
- "readonly",
- "panic",
- NULL
-};
-
-const char * const member_states[] = {
- "active",
- "ro",
- "failed",
- "spare",
- NULL
-};
-
-const char * const bdev_cache_mode[] = {
- "writethrough",
- "writeback",
- "writearound",
- "none",
- NULL
-};
-
-const char * const bdev_state[] = {
- "detached",
- "clean",
- "dirty",
- "inconsistent",
- NULL
-};
+#include "opts.h"
#define BCH_MIN_NR_NBUCKETS (1 << 10)
@@ -93,15 +26,10 @@ void __do_write_sb(int fd, void *sb, size_t bytes)
char zeroes[SB_SECTOR << 9] = {0};
/* Zero start of disk */
- if (pwrite(fd, zeroes, SB_SECTOR << 9, 0) != SB_SECTOR << 9) {
- perror("write error trying to zero start of disk\n");
- exit(EXIT_FAILURE);
- }
+ xpwrite(fd, zeroes, SB_SECTOR << 9, 0);
+
/* Write superblock */
- if (pwrite(fd, sb, bytes, SB_SECTOR << 9) != bytes) {
- perror("write error trying to write superblock\n");
- exit(EXIT_FAILURE);
- }
+ xpwrite(fd, sb, bytes, SB_SECTOR << 9);
fsync(fd);
close(fd);
@@ -142,7 +70,7 @@ void bcache_format(struct dev_opts *devs, size_t nr_devs,
/* calculate bucket sizes: */
for (i = devs; i < devs + nr_devs; i++) {
if (!i->size)
- i->size = get_size(i->path, i->fd);
+ i->size = get_size(i->path, i->fd) >> 9;
if (!i->bucket_size) {
if (i->size < min_size(block_size))
@@ -297,12 +225,12 @@ void bcache_super_print(struct cache_sb *sb, int units)
internal_uuid_str,
label,
le64_to_cpu(sb->version),
- pr_units(le16_to_cpu(sb->block_size), units).b,
- pr_units(CACHE_SET_BTREE_NODE_SIZE(sb), units).b,
- pr_units(1U << CACHE_SET_JOURNAL_ENTRY_SIZE(sb), units).b,
+ pr_units(le16_to_cpu(sb->block_size), units),
+ pr_units(CACHE_SET_BTREE_NODE_SIZE(sb), units),
+ pr_units(1U << CACHE_SET_JOURNAL_ENTRY_SIZE(sb), units),
CACHE_SET_ERROR_ACTION(sb) < BCH_NR_ERROR_ACTIONS
- ? error_actions[CACHE_SET_ERROR_ACTION(sb)]
+ ? bch_error_actions[CACHE_SET_ERROR_ACTION(sb)]
: "unknown",
CACHE_SET_CLEAN(sb),
@@ -313,19 +241,19 @@ void bcache_super_print(struct cache_sb *sb, int units)
CACHE_SET_DATA_REPLICAS_WANT(sb),
CACHE_SET_META_PREFERRED_CSUM_TYPE(sb) < BCH_CSUM_NR
- ? csum_types[CACHE_SET_META_PREFERRED_CSUM_TYPE(sb)]
+ ? bch_csum_types[CACHE_SET_META_PREFERRED_CSUM_TYPE(sb)]
: "unknown",
CACHE_SET_DATA_PREFERRED_CSUM_TYPE(sb) < BCH_CSUM_NR
- ? csum_types[CACHE_SET_DATA_PREFERRED_CSUM_TYPE(sb)]
+ ? bch_csum_types[CACHE_SET_DATA_PREFERRED_CSUM_TYPE(sb)]
: "unknown",
CACHE_SET_COMPRESSION_TYPE(sb) < BCH_COMPRESSION_NR
- ? compression_types[CACHE_SET_COMPRESSION_TYPE(sb)]
+ ? bch_compression_types[CACHE_SET_COMPRESSION_TYPE(sb)]
: "unknown",
CACHE_SET_STR_HASH_TYPE(sb) < BCH_STR_HASH_NR
- ? str_hash_types[CACHE_SET_STR_HASH_TYPE(sb)]
+ ? bch_str_hash_types[CACHE_SET_STR_HASH_TYPE(sb)]
: "unknown",
CACHE_INODE_32BIT(sb),
@@ -356,14 +284,14 @@ void bcache_super_print(struct cache_sb *sb, int units)
" Discard: %llu\n",
i, member_uuid_str,
pr_units(le16_to_cpu(m->bucket_size) *
- le64_to_cpu(m->nbuckets), units).b,
- pr_units(le16_to_cpu(m->bucket_size), units).b,
+ le64_to_cpu(m->nbuckets), units),
+ pr_units(le16_to_cpu(m->bucket_size), units),
le16_to_cpu(m->first_bucket),
le64_to_cpu(m->nbuckets),
last_mount ? ctime(&last_mount) : "(never)",
CACHE_STATE(m) < CACHE_STATE_NR
- ? member_states[CACHE_STATE(m)]
+ ? bch_cache_state[CACHE_STATE(m)]
: "unknown",
CACHE_TIER(m),
@@ -371,7 +299,7 @@ void bcache_super_print(struct cache_sb *sb, int units)
CACHE_HAS_DATA(m),
CACHE_REPLACEMENT(m) < CACHE_REPLACEMENT_NR
- ? replacement_policies[CACHE_REPLACEMENT(m)]
+ ? bch_cache_replacement_policies[CACHE_REPLACEMENT(m)]
: "unknown",
CACHE_DISCARD(m));
@@ -387,8 +315,7 @@ struct cache_sb *bcache_super_read(const char *path)
if (fd < 0)
die("couldn't open %s", path);
- if (pread(fd, &sb, sizeof(sb), SB_SECTOR << 9) != sizeof(sb))
- die("error reading superblock");
+ xpread(fd, &sb, sizeof(sb), SB_SECTOR << 9);
if (memcmp(&sb.magic, &BCACHE_MAGIC, sizeof(sb.magic)))
die("not a bcache superblock");
@@ -397,8 +324,7 @@ struct cache_sb *bcache_super_read(const char *path)
ret = calloc(1, bytes);
- if (pread(fd, ret, bytes, SB_SECTOR << 9) != bytes)
- die("error reading superblock");
+ xpread(fd, ret, bytes, SB_SECTOR << 9);
return ret;
}
diff --git a/libbcache.h b/libbcache.h
index e4600d3..07329cd 100644
--- a/libbcache.h
+++ b/libbcache.h
@@ -1,17 +1,20 @@
#ifndef _LIBBCACHE_H
#define _LIBBCACHE_H
-#include "tools-util.h"
+#include <linux/uuid.h>
#include "stdbool.h"
-extern const char * const cache_state[];
-extern const char * const replacement_policies[];
-extern const char * const csum_types[];
-extern const char * const compression_types[];
-extern const char * const str_hash_types[];
-extern const char * const error_actions[];
-extern const char * const bdev_cache_mode[];
-extern const char * const bdev_state[];
+#include "tools-util.h"
+
+struct cache_sb;
+
+enum fsck_err_opts {
+ FSCK_ERR_ASK,
+ FSCK_ERR_YES,
+ FSCK_ERR_NO,
+};
+
+extern enum fsck_err_opts fsck_err_opt;
struct dev_opts {
int fd;
diff --git a/libbcache/alloc.c b/libbcache/alloc.c
index cff750c..4fe08b5 100644
--- a/libbcache/alloc.c
+++ b/libbcache/alloc.c
@@ -254,6 +254,9 @@ static int bch_prio_write(struct cache *ca)
bool need_new_journal_entry;
int i, ret;
+ if (c->opts.nochanges)
+ return 0;
+
trace_bcache_prio_write_start(ca);
atomic64_add(ca->mi.bucket_size * prio_buckets(ca),
diff --git a/libbcache/bcache.h b/libbcache/bcache.h
index 9a43a69..309d372 100644
--- a/libbcache/bcache.h
+++ b/libbcache/bcache.h
@@ -210,8 +210,9 @@
#define bch_meta_write_fault(name) \
dynamic_fault("bcache:meta:write:" name)
-#define bch_fmt(_c, fmt) \
- "bcache (%s): " fmt "\n", ((_c)->name)
+#ifndef bch_fmt
+#define bch_fmt(_c, fmt) "bcache (%s): " fmt "\n", ((_c)->name)
+#endif
#define bch_info(c, fmt, ...) \
printk(KERN_INFO bch_fmt(c, fmt), ##__VA_ARGS__)
diff --git a/libbcache/bkey_methods.c b/libbcache/bkey_methods.c
index 3bcd0e0..90f7e5f 100644
--- a/libbcache/bkey_methods.c
+++ b/libbcache/bkey_methods.c
@@ -89,6 +89,16 @@ void bkey_debugcheck(struct cache_set *c, struct btree *b, struct bkey_s_c k)
ops->key_debugcheck(c, b, k);
}
+void bch_val_to_text(struct cache_set *c, enum bkey_type type,
+ char *buf, size_t size, struct bkey_s_c k)
+{
+ const struct bkey_ops *ops = bch_bkey_ops[type];
+
+ if (k.k->type >= KEY_TYPE_GENERIC_NR &&
+ ops->val_to_text)
+ ops->val_to_text(c, buf, size, k);
+}
+
void bch_bkey_val_to_text(struct cache_set *c, enum bkey_type type,
char *buf, size_t size, struct bkey_s_c k)
{
diff --git a/libbcache/bkey_methods.h b/libbcache/bkey_methods.h
index 0e305eb..c1f0dc5 100644
--- a/libbcache/bkey_methods.h
+++ b/libbcache/bkey_methods.h
@@ -67,6 +67,8 @@ const char *btree_bkey_invalid(struct cache_set *, struct btree *,
struct bkey_s_c);
void bkey_debugcheck(struct cache_set *, struct btree *, struct bkey_s_c);
+void bch_val_to_text(struct cache_set *, enum bkey_type,
+ char *, size_t, struct bkey_s_c);
void bch_bkey_val_to_text(struct cache_set *, enum bkey_type,
char *, size_t, struct bkey_s_c);
diff --git a/libbcache/btree_cache.c b/libbcache/btree_cache.c
index 0994190..ca6064a 100644
--- a/libbcache/btree_cache.c
+++ b/libbcache/btree_cache.c
@@ -149,7 +149,8 @@ static int mca_reap_notrace(struct cache_set *c, struct btree *b, bool flush)
if (!six_trylock_write(&b->lock))
goto out_unlock_intent;
- if (btree_node_write_error(b))
+ if (btree_node_write_error(b) ||
+ btree_node_noevict(b))
goto out_unlock;
if (!list_empty(&b->write_blocked))
@@ -699,3 +700,56 @@ retry:
return b;
}
+
+int bch_print_btree_node(struct cache_set *c, struct btree *b,
+ char *buf, size_t len)
+{
+ const struct bkey_format *f = &b->format;
+ struct bset_stats stats;
+ char ptrs[100];
+
+ memset(&stats, 0, sizeof(stats));
+
+ bch_val_to_text(c, BKEY_TYPE_BTREE, ptrs, sizeof(ptrs),
+ bkey_i_to_s_c(&b->key));
+ bch_btree_keys_stats(b, &stats);
+
+ return scnprintf(buf, len,
+ "l %u %llu:%llu - %llu:%llu:\n"
+ " ptrs: %s\n"
+ " format: u64s %u fields %u %u %u %u %u\n"
+ " unpack fn len: %u\n"
+ " bytes used %zu/%zu (%zu%% full)\n"
+ " sib u64s: %u, %u (merge threshold %zu)\n"
+ " nr packed keys %u\n"
+ " nr unpacked keys %u\n"
+ " floats %zu\n"
+ " failed unpacked %zu\n"
+ " failed prev %zu\n"
+ " failed overflow %zu\n",
+ b->level,
+ b->data->min_key.inode,
+ b->data->min_key.offset,
+ b->data->max_key.inode,
+ b->data->max_key.offset,
+ ptrs,
+ f->key_u64s,
+ f->bits_per_field[0],
+ f->bits_per_field[1],
+ f->bits_per_field[2],
+ f->bits_per_field[3],
+ f->bits_per_field[4],
+ b->unpack_fn_len,
+ b->nr.live_u64s * sizeof(u64),
+ btree_bytes(c) - sizeof(struct btree_node),
+ b->nr.live_u64s * 100 / btree_max_u64s(c),
+ b->sib_u64s[0],
+ b->sib_u64s[1],
+ BTREE_FOREGROUND_MERGE_THRESHOLD(c),
+ b->nr.packed_keys,
+ b->nr.unpacked_keys,
+ stats.floats,
+ stats.failed_unpacked,
+ stats.failed_prev,
+ stats.failed_overflow);
+}
diff --git a/libbcache/btree_cache.h b/libbcache/btree_cache.h
index e745abb..c26489d 100644
--- a/libbcache/btree_cache.h
+++ b/libbcache/btree_cache.h
@@ -56,6 +56,16 @@ static inline unsigned btree_blocks(struct cache_set *c)
return c->sb.btree_node_size >> c->block_bits;
}
+#define BTREE_SPLIT_THRESHOLD(c) (btree_blocks(c) * 3 / 4)
+
+#define BTREE_FOREGROUND_MERGE_THRESHOLD(c) (btree_max_u64s(c) * 1 / 3)
+#define BTREE_FOREGROUND_MERGE_HYSTERESIS(c) \
+ (BTREE_FOREGROUND_MERGE_THRESHOLD(c) + \
+ (BTREE_FOREGROUND_MERGE_THRESHOLD(c) << 2))
+
#define btree_node_root(_c, _b) ((_c)->btree_roots[(_b)->btree_id].b)
+int bch_print_btree_node(struct cache_set *, struct btree *,
+ char *, size_t);
+
#endif /* _BCACHE_BTREE_CACHE_H */
diff --git a/libbcache/btree_io.c b/libbcache/btree_io.c
index ff976b5..4c295af 100644
--- a/libbcache/btree_io.c
+++ b/libbcache/btree_io.c
@@ -200,7 +200,7 @@ static unsigned sort_extent_whiteouts(struct bkey_packed *dst,
const struct bkey_format *f = &iter->b->format;
struct bkey_packed *in, *out = dst;
struct bkey_i l, r;
- bool prev = false, l_packed;
+ bool prev = false, l_packed = false;
u64 max_packed_size = bkey_field_max(f, BKEY_FIELD_SIZE);
u64 max_packed_offset = bkey_field_max(f, BKEY_FIELD_OFFSET);
u64 new_size;
@@ -1443,8 +1443,9 @@ void __bch_btree_node_write(struct cache_set *c, struct btree *b,
* Make sure to update b->written so bch_btree_init_next() doesn't
* break:
*/
- if (bch_journal_error(&c->journal)) {
- set_btree_node_write_error(b);
+ if (bch_journal_error(&c->journal) ||
+ c->opts.nochanges) {
+ set_btree_node_noevict(b);
b->written += sectors_to_write;
btree_bounce_free(c, order, used_mempool, data);
diff --git a/libbcache/btree_types.h b/libbcache/btree_types.h
index 3632a04..176d42a 100644
--- a/libbcache/btree_types.h
+++ b/libbcache/btree_types.h
@@ -2,6 +2,7 @@
#define _BCACHE_BTREE_TYPES_H
#include <linux/bcache.h>
+#include <linux/kernel.h>
#include <linux/list.h>
#include <linux/rhashtable.h>
#include <linux/semaphore.h>
@@ -138,6 +139,7 @@ enum btree_flags {
BTREE_NODE_read_error,
BTREE_NODE_write_error,
BTREE_NODE_dirty,
+ BTREE_NODE_noevict,
BTREE_NODE_write_idx,
BTREE_NODE_accessed,
BTREE_NODE_write_in_flight,
@@ -147,6 +149,7 @@ enum btree_flags {
BTREE_FLAG(read_error);
BTREE_FLAG(write_error);
BTREE_FLAG(dirty);
+BTREE_FLAG(noevict);
BTREE_FLAG(write_idx);
BTREE_FLAG(accessed);
BTREE_FLAG(write_in_flight);
diff --git a/libbcache/btree_update.h b/libbcache/btree_update.h
index 0154441..5fc1b1a 100644
--- a/libbcache/btree_update.h
+++ b/libbcache/btree_update.h
@@ -11,13 +11,6 @@ struct bkey_format_state;
struct bkey_format;
struct btree;
-#define BTREE_SPLIT_THRESHOLD(c) (btree_blocks(c) * 3 / 4)
-
-#define BTREE_FOREGROUND_MERGE_THRESHOLD(c) (btree_max_u64s(c) * 1 / 3)
-#define BTREE_FOREGROUND_MERGE_HYSTERESIS(c) \
- (BTREE_FOREGROUND_MERGE_THRESHOLD(c) + \
- (BTREE_FOREGROUND_MERGE_THRESHOLD(c) << 2))
-
static inline void btree_node_reset_sib_u64s(struct btree *b)
{
b->sib_u64s[0] = b->nr.live_u64s;
diff --git a/libbcache/debug.c b/libbcache/debug.c
index 1be2e60..39f5550 100644
--- a/libbcache/debug.c
+++ b/libbcache/debug.c
@@ -46,6 +46,9 @@ void __bch_btree_verify(struct cache_set *c, struct btree *b)
struct bio *bio;
struct closure cl;
+ if (c->opts.nochanges)
+ return;
+
closure_init_stack(&cl);
btree_node_io_lock(b);
@@ -296,55 +299,6 @@ static const struct file_operations btree_debug_ops = {
.read = bch_read_btree,
};
-static int print_btree_node(struct dump_iter *i, struct btree *b)
-{
- const struct bkey_format *f = &b->format;
- struct bset_stats stats;
-
- memset(&stats, 0, sizeof(stats));
-
- bch_btree_keys_stats(b, &stats);
-
- i->bytes = scnprintf(i->buf, sizeof(i->buf),
- "l %u %llu:%llu - %llu:%llu:\n"
- " format: u64s %u fields %u %u %u %u %u\n"
- " unpack fn len: %u\n"
- " bytes used %zu/%zu (%zu%% full)\n"
- " sib u64s: %u, %u (merge threshold %zu)\n"
- " nr packed keys %u\n"
- " nr unpacked keys %u\n"
- " floats %zu\n"
- " failed unpacked %zu\n"
- " failed prev %zu\n"
- " failed overflow %zu\n",
- b->level,
- b->data->min_key.inode,
- b->data->min_key.offset,
- b->data->max_key.inode,
- b->data->max_key.offset,
- f->key_u64s,
- f->bits_per_field[0],
- f->bits_per_field[1],
- f->bits_per_field[2],
- f->bits_per_field[3],
- f->bits_per_field[4],
- b->unpack_fn_len,
- b->nr.live_u64s * sizeof(u64),
- btree_bytes(i->c) - sizeof(struct btree_node),
- b->nr.live_u64s * 100 / btree_max_u64s(i->c),
- b->sib_u64s[0],
- b->sib_u64s[1],
- BTREE_FOREGROUND_MERGE_THRESHOLD(i->c),
- b->nr.packed_keys,
- b->nr.unpacked_keys,
- stats.floats,
- stats.failed_unpacked,
- stats.failed_prev,
- stats.failed_overflow);
-
- return flush_buf(i);
-}
-
static ssize_t bch_read_btree_formats(struct file *file, char __user *buf,
size_t size, loff_t *ppos)
{
@@ -365,7 +319,9 @@ static ssize_t bch_read_btree_formats(struct file *file, char __user *buf,
return i->ret;
for_each_btree_node(&iter, i->c, i->id, i->from, 0, b) {
- err = print_btree_node(i, b);
+ i->bytes = bch_print_btree_node(i->c, b, i->buf,
+ sizeof(i->buf));
+ err = flush_buf(i);
if (err)
break;
@@ -421,7 +377,9 @@ static ssize_t bch_read_bfloat_failed(struct file *file, char __user *buf,
struct bkey_packed *_k = bch_btree_node_iter_peek(node_iter, b);
if (iter.nodes[0] != prev_node) {
- err = print_btree_node(i, iter.nodes[0]);
+ i->bytes = bch_print_btree_node(i->c, b, i->buf,
+ sizeof(i->buf));
+ err = flush_buf(i);
if (err)
break;
}
diff --git a/libbcache/dirent.c b/libbcache/dirent.c
index 920ad2f..d97c3b2 100644
--- a/libbcache/dirent.c
+++ b/libbcache/dirent.c
@@ -10,7 +10,7 @@
#include <linux/dcache.h>
-static unsigned dirent_name_bytes(struct bkey_s_c_dirent d)
+unsigned bch_dirent_name_bytes(struct bkey_s_c_dirent d)
{
unsigned len = bkey_val_bytes(d.k) - sizeof(struct bch_dirent);
@@ -61,7 +61,7 @@ static u64 dirent_hash_key(const struct bch_hash_info *info, const void *key)
static u64 dirent_hash_bkey(const struct bch_hash_info *info, struct bkey_s_c k)
{
struct bkey_s_c_dirent d = bkey_s_c_to_dirent(k);
- struct qstr name = QSTR_INIT(d.v->d_name, dirent_name_bytes(d));
+ struct qstr name = QSTR_INIT(d.v->d_name, bch_dirent_name_bytes(d));
return bch_dirent_hash(info, &name);
}
@@ -69,7 +69,7 @@ static u64 dirent_hash_bkey(const struct bch_hash_info *info, struct bkey_s_c k)
static bool dirent_cmp_key(struct bkey_s_c _l, const void *_r)
{
struct bkey_s_c_dirent l = bkey_s_c_to_dirent(_l);
- int len = dirent_name_bytes(l);
+ int len = bch_dirent_name_bytes(l);
const struct qstr *r = _r;
return len - r->len ?: memcmp(l.v->d_name, r->name, len);
@@ -79,8 +79,8 @@ static bool dirent_cmp_bkey(struct bkey_s_c _l, struct bkey_s_c _r)
{
struct bkey_s_c_dirent l = bkey_s_c_to_dirent(_l);
struct bkey_s_c_dirent r = bkey_s_c_to_dirent(_r);
- int l_len = dirent_name_bytes(l);
- int r_len = dirent_name_bytes(r);
+ int l_len = bch_dirent_name_bytes(l);
+ int r_len = bch_dirent_name_bytes(r);
return l_len - r_len ?: memcmp(l.v->d_name, r.v->d_name, l_len);
}
@@ -125,7 +125,7 @@ static void bch_dirent_to_text(struct cache_set *c, char *buf,
if (size) {
unsigned n = min_t(unsigned, size,
- dirent_name_bytes(d));
+ bch_dirent_name_bytes(d));
memcpy(buf, d.v->d_name, n);
buf[size - 1] = '\0';
buf += n;
@@ -167,15 +167,16 @@ static struct bkey_i_dirent *dirent_create_key(u8 type,
bkey_val_bytes(&dirent->k) -
(sizeof(struct bch_dirent) + name->len));
- EBUG_ON(dirent_name_bytes(dirent_i_to_s_c(dirent)) != name->len);
+ EBUG_ON(bch_dirent_name_bytes(dirent_i_to_s_c(dirent)) != name->len);
return dirent;
}
-int bch_dirent_create(struct cache_set *c, struct inode *dir, u8 type,
- const struct qstr *name, u64 dst_inum)
+int bch_dirent_create(struct cache_set *c, u64 dir_inum,
+ const struct bch_hash_info *hash_info,
+ u8 type, const struct qstr *name, u64 dst_inum,
+ u64 *journal_seq, int flags)
{
- struct bch_inode_info *ei = to_bch_ei(dir);
struct bkey_i_dirent *dirent;
int ret;
@@ -183,9 +184,8 @@ int bch_dirent_create(struct cache_set *c, struct inode *dir, u8 type,
if (!dirent)
return -ENOMEM;
- ret = bch_hash_set(dirent_hash_desc, &ei->str_hash, c,
- ei->vfs_inode.i_ino, &ei->journal_seq,
- &dirent->k_i, BCH_HASH_SET_MUST_CREATE);
+ ret = bch_hash_set(dirent_hash_desc, hash_info, c, dir_inum,
+ journal_seq, &dirent->k_i, flags);
kfree(dirent);
return ret;
@@ -346,26 +346,25 @@ err:
return ret;
}
-int bch_dirent_delete(struct cache_set *c, struct inode *dir,
- const struct qstr *name)
+int bch_dirent_delete(struct cache_set *c, u64 dir_inum,
+ const struct bch_hash_info *hash_info,
+ const struct qstr *name,
+ u64 *journal_seq)
{
- struct bch_inode_info *ei = to_bch_ei(dir);
-
- return bch_hash_delete(dirent_hash_desc, &ei->str_hash,
- c, ei->vfs_inode.i_ino,
- &ei->journal_seq, name);
+ return bch_hash_delete(dirent_hash_desc, hash_info,
+ c, dir_inum, journal_seq, name);
}
-u64 bch_dirent_lookup(struct cache_set *c, struct inode *dir,
+u64 bch_dirent_lookup(struct cache_set *c, u64 dir_inum,
+ const struct bch_hash_info *hash_info,
const struct qstr *name)
{
- struct bch_inode_info *ei = to_bch_ei(dir);
struct btree_iter iter;
struct bkey_s_c k;
u64 inum;
- k = bch_hash_lookup(dirent_hash_desc, &ei->str_hash, c,
- ei->vfs_inode.i_ino, &iter, name);
+ k = bch_hash_lookup(dirent_hash_desc, hash_info, c,
+ dir_inum, &iter, name);
if (IS_ERR(k.k)) {
bch_btree_iter_unlock(&iter);
return 0;
@@ -428,7 +427,7 @@ int bch_readdir(struct cache_set *c, struct file *file,
if (k.k->p.inode > inode->i_ino)
break;
- len = dirent_name_bytes(dirent);
+ len = bch_dirent_name_bytes(dirent);
pr_debug("emitting %s", dirent.v->d_name);
diff --git a/libbcache/dirent.h b/libbcache/dirent.h
index e18089b..cc67d55 100644
--- a/libbcache/dirent.h
+++ b/libbcache/dirent.h
@@ -7,10 +7,13 @@ struct qstr;
struct file;
struct dir_context;
struct cache_set;
+struct bch_hash_info;
-int bch_dirent_create(struct cache_set *c, struct inode *, u8,
- const struct qstr *, u64);
-int bch_dirent_delete(struct cache_set *c, struct inode *, const struct qstr *);
+unsigned bch_dirent_name_bytes(struct bkey_s_c_dirent);
+int bch_dirent_create(struct cache_set *c, u64, const struct bch_hash_info *,
+ u8, const struct qstr *, u64, u64 *, int);
+int bch_dirent_delete(struct cache_set *, u64, const struct bch_hash_info *,
+ const struct qstr *, u64 *);
enum bch_rename_mode {
BCH_RENAME,
@@ -23,8 +26,9 @@ int bch_dirent_rename(struct cache_set *,
struct inode *, const struct qstr *,
u64 *, enum bch_rename_mode);
-u64 bch_dirent_lookup(struct cache_set *c, struct inode *,
+u64 bch_dirent_lookup(struct cache_set *, u64, const struct bch_hash_info *,
const struct qstr *);
+
int bch_empty_dir(struct cache_set *, u64);
int bch_readdir(struct cache_set *, struct file *, struct dir_context *);
diff --git a/libbcache/error.h b/libbcache/error.h
index 9eb9335..33a28c4 100644
--- a/libbcache/error.h
+++ b/libbcache/error.h
@@ -101,38 +101,51 @@ enum {
BCH_FSCK_UNKNOWN_VERSION = 4,
};
-#define unfixable_fsck_err(c, msg, ...) \
-do { \
- bch_err(c, msg " (repair unimplemented)", ##__VA_ARGS__); \
- ret = BCH_FSCK_REPAIR_UNIMPLEMENTED; \
- goto fsck_err; \
-} while (0)
+/* These macros return true if error should be fixed: */
-#define unfixable_fsck_err_on(cond, c, ...) \
-do { \
- if (cond) \
- unfixable_fsck_err(c, __VA_ARGS__); \
-} while (0)
+/* XXX: mark in superblock that filesystem contains errors, if we ignore: */
-#define fsck_err(c, msg, ...) \
-do { \
- if (!(c)->opts.fix_errors) { \
- bch_err(c, msg, ##__VA_ARGS__); \
+#ifndef __fsck_err
+#define __fsck_err(c, _can_fix, _can_ignore, _nofix_msg, msg, ...) \
+({ \
+ bool _fix = false; \
+ \
+ if (_can_fix && (c)->opts.fix_errors) { \
+ bch_err(c, msg ", fixing", ##__VA_ARGS__); \
+ set_bit(CACHE_SET_FSCK_FIXED_ERRORS, &(c)->flags); \
+ _fix = true; \
+ } else if (_can_ignore && \
+ (c)->opts.errors == BCH_ON_ERROR_CONTINUE) { \
+ bch_err(c, msg " (ignoring)", ##__VA_ARGS__); \
+ } else { \
+ bch_err(c, msg " ("_nofix_msg")", ##__VA_ARGS__); \
ret = BCH_FSCK_ERRORS_NOT_FIXED; \
goto fsck_err; \
} \
- set_bit(CACHE_SET_FSCK_FIXED_ERRORS, &(c)->flags); \
- bch_err(c, msg ", fixing", ##__VA_ARGS__); \
-} while (0)
-
-#define fsck_err_on(cond, c, ...) \
-({ \
- bool _ret = (cond); \
\
- if (_ret) \
- fsck_err(c, __VA_ARGS__); \
- _ret; \
+ BUG_ON(!_fix && !_can_ignore); \
+ _fix; \
})
+#endif
+
+#define __fsck_err_on(cond, c, _can_fix, _can_ignore, _nofix_msg, ...) \
+ ((cond) ? __fsck_err(c, _can_fix, _can_ignore, \
+ _nofix_msg, ##__VA_ARGS__) : false)
+
+#define unfixable_fsck_err_on(cond, c, ...) \
+ __fsck_err_on(cond, c, false, true, "repair unimplemented", ##__VA_ARGS__)
+
+#define need_fsck_err_on(cond, c, ...) \
+ __fsck_err_on(cond, c, false, true, "run fsck to correct", ##__VA_ARGS__)
+
+#define mustfix_fsck_err(c, ...) \
+ __fsck_err(c, true, false, "not fixing", ##__VA_ARGS__)
+
+#define mustfix_fsck_err_on(cond, c, ...) \
+ __fsck_err_on(cond, c, true, false, "not fixing", ##__VA_ARGS__)
+
+#define fsck_err_on(cond, c, ...) \
+ __fsck_err_on(cond, c, true, true, "not fixing", ##__VA_ARGS__)
/*
* Fatal errors: these don't indicate a bug, but we can't continue running in RW
diff --git a/libbcache/extents.c b/libbcache/extents.c
index 45fa220..c026d59 100644
--- a/libbcache/extents.c
+++ b/libbcache/extents.c
@@ -108,15 +108,16 @@ struct btree_nr_keys bch_key_sort_fix_overlapping(struct bset *dst,
/* Common among btree and extent ptrs */
-bool bch_extent_has_device(struct bkey_s_c_extent e, unsigned dev)
+const struct bch_extent_ptr *
+bch_extent_has_device(struct bkey_s_c_extent e, unsigned dev)
{
const struct bch_extent_ptr *ptr;
extent_for_each_ptr(e, ptr)
if (ptr->dev == dev)
- return true;
+ return ptr;
- return false;
+ return NULL;
}
unsigned bch_extent_nr_ptrs_from(struct bkey_s_c_extent e,
diff --git a/libbcache/extents.h b/libbcache/extents.h
index 2dc6446..e1cb47a 100644
--- a/libbcache/extents.h
+++ b/libbcache/extents.h
@@ -1,15 +1,15 @@
#ifndef _BCACHE_EXTENTS_H
#define _BCACHE_EXTENTS_H
+#include "bcache.h"
#include "bkey.h"
#include <linux/bcache.h>
-struct bch_replace_info;
-union bch_extent_crc;
-struct btree_iter;
+struct btree_node_iter;
struct btree_insert;
struct btree_insert_entry;
+struct extent_insert_hook;
struct btree_nr_keys bch_key_sort_fix_overlapping(struct bset *,
struct btree *,
@@ -485,7 +485,8 @@ static inline void bch_extent_drop_ptr(struct bkey_s_extent e,
bch_extent_drop_redundant_crcs(e);
}
-bool bch_extent_has_device(struct bkey_s_c_extent, unsigned);
+const struct bch_extent_ptr *
+bch_extent_has_device(struct bkey_s_c_extent, unsigned);
bool bch_cut_front(struct bpos, struct bkey_i *);
bool bch_cut_back(struct bpos, struct bkey *);
diff --git a/libbcache/fs-gc.c b/libbcache/fs-gc.c
index bd2a867..1dec230 100644
--- a/libbcache/fs-gc.c
+++ b/libbcache/fs-gc.c
@@ -11,6 +11,529 @@
#include <linux/generic-radix-tree.h>
+#define QSTR(n) { { { .len = strlen(n) } }, .name = n }
+
+static int remove_dirent(struct cache_set *c, struct btree_iter *iter,
+ struct bkey_s_c_dirent dirent)
+{
+ struct qstr name;
+ struct bkey_i_inode dir_inode;
+ struct bch_hash_info dir_hash_info;
+ u64 dir_inum = dirent.k->p.inode;
+ int ret;
+ char *buf;
+
+ name.len = bch_dirent_name_bytes(dirent);
+ buf = kmalloc(name.len + 1, GFP_KERNEL);
+ if (!buf)
+ return -ENOMEM;
+
+ memcpy(buf, dirent.v->d_name, name.len);
+ buf[name.len] = '\0';
+ name.name = buf;
+
+ /* Unlock iter so we don't deadlock, after copying name: */
+ bch_btree_iter_unlock(iter);
+
+ ret = bch_inode_find_by_inum(c, dir_inum, &dir_inode);
+ if (ret)
+ goto err;
+
+ dir_hash_info = bch_hash_info_init(&dir_inode.v);
+
+ ret = bch_dirent_delete(c, dir_inum, &dir_hash_info, &name, NULL);
+err:
+ kfree(buf);
+ return ret;
+}
+
+static int reattach_inode(struct cache_set *c,
+ struct bkey_i_inode *lostfound_inode,
+ u64 inum)
+{
+ struct bch_hash_info lostfound_hash_info =
+ bch_hash_info_init(&lostfound_inode->v);
+ char name_buf[20];
+ struct qstr name;
+ int ret;
+
+ snprintf(name_buf, sizeof(name_buf), "%llu", inum);
+ name = (struct qstr) QSTR(name_buf);
+
+ le32_add_cpu(&lostfound_inode->v.i_nlink, 1);
+
+ ret = bch_btree_insert(c, BTREE_ID_INODES, &lostfound_inode->k_i,
+ NULL, NULL, NULL, 0);
+ if (ret)
+ return ret;
+
+ return bch_dirent_create(c, lostfound_inode->k.p.inode,
+ &lostfound_hash_info,
+ DT_DIR, &name, inum, NULL, 0);
+}
+
+struct inode_walker {
+ bool first_this_inode;
+ bool have_inode;
+ u16 i_mode;
+ u64 i_size;
+ u64 cur_inum;
+ struct bkey_i_inode inode;
+};
+
+static struct inode_walker inode_walker_init(void)
+{
+ return (struct inode_walker) {
+ .cur_inum = -1,
+ .have_inode = false,
+ };
+}
+
+static int walk_inode(struct cache_set *c, struct inode_walker *w, u64 inum)
+{
+ w->first_this_inode = inum != w->cur_inum;
+ w->cur_inum = inum;
+
+ if (w->first_this_inode) {
+ int ret = bch_inode_find_by_inum(c, inum, &w->inode);
+
+ if (ret && ret != -ENOENT)
+ return ret;
+
+ w->have_inode = !ret;
+
+ if (w->have_inode) {
+ w->i_mode = le16_to_cpu(w->inode.v.i_mode);
+ w->i_size = le64_to_cpu(w->inode.v.i_size);
+ }
+ }
+
+ return 0;
+}
+
+/*
+ * Walk extents: verify that extents have a corresponding S_ISREG inode, and
+ * that i_size an i_sectors are consistent
+ */
+noinline_for_stack
+static int check_extents(struct cache_set *c)
+{
+ struct inode_walker w = inode_walker_init();
+ struct btree_iter iter;
+ struct bkey_s_c k;
+ u64 i_sectors;
+ int ret = 0;
+
+ for_each_btree_key(&iter, c, BTREE_ID_EXTENTS,
+ POS(BCACHE_ROOT_INO, 0), k) {
+ if (k.k->type == KEY_TYPE_DISCARD)
+ continue;
+
+ ret = walk_inode(c, &w, k.k->p.inode);
+ if (ret)
+ break;
+
+ unfixable_fsck_err_on(!w.have_inode, c,
+ "extent type %u for missing inode %llu",
+ k.k->type, k.k->p.inode);
+
+ unfixable_fsck_err_on(w.first_this_inode && w.have_inode &&
+ le64_to_cpu(w.inode.v.i_sectors) !=
+ (i_sectors = bch_count_inode_sectors(c, w.cur_inum)),
+ c, "i_sectors wrong: got %llu, should be %llu",
+ le64_to_cpu(w.inode.v.i_sectors), i_sectors);
+
+ unfixable_fsck_err_on(w.have_inode &&
+ !S_ISREG(w.i_mode) && !S_ISLNK(w.i_mode), c,
+ "extent type %u for non regular file, inode %llu mode %o",
+ k.k->type, k.k->p.inode, w.i_mode);
+
+ unfixable_fsck_err_on(k.k->type != BCH_RESERVATION &&
+ k.k->p.offset > round_up(w.i_size, PAGE_SIZE) >> 9, c,
+ "extent type %u offset %llu past end of inode %llu, i_size %llu",
+ k.k->type, k.k->p.offset, k.k->p.inode, w.i_size);
+ }
+fsck_err:
+ return bch_btree_iter_unlock(&iter) ?: ret;
+}
+
+/*
+ * Walk dirents: verify that they all have a corresponding S_ISDIR inode,
+ * validate d_type
+ */
+noinline_for_stack
+static int check_dirents(struct cache_set *c)
+{
+ struct inode_walker w = inode_walker_init();
+ struct btree_iter iter;
+ struct bkey_s_c k;
+ int ret = 0;
+
+ for_each_btree_key(&iter, c, BTREE_ID_DIRENTS,
+ POS(BCACHE_ROOT_INO, 0), k) {
+ struct bkey_s_c_dirent d;
+ struct bkey_i_inode target;
+ bool have_target;
+ u64 d_inum;
+
+ ret = walk_inode(c, &w, k.k->p.inode);
+ if (ret)
+ break;
+
+ unfixable_fsck_err_on(!w.have_inode, c,
+ "dirent in nonexisting directory %llu",
+ k.k->p.inode);
+
+ unfixable_fsck_err_on(!S_ISDIR(w.i_mode), c,
+ "dirent in non directory inode %llu, type %u",
+ k.k->p.inode, mode_to_type(w.i_mode));
+
+ if (k.k->type != BCH_DIRENT)
+ continue;
+
+ d = bkey_s_c_to_dirent(k);
+ d_inum = le64_to_cpu(d.v->d_inum);
+
+ if (fsck_err_on(d_inum == d.k->p.inode, c,
+ "dirent points to own directory")) {
+ ret = remove_dirent(c, &iter, d);
+ if (ret)
+ goto err;
+ continue;
+ }
+
+ ret = bch_inode_find_by_inum(c, d_inum, &target);
+ if (ret && ret != -ENOENT)
+ break;
+
+ have_target = !ret;
+ ret = 0;
+
+ if (fsck_err_on(!have_target, c,
+ "dirent points to missing inode %llu, type %u filename %s",
+ d_inum, d.v->d_type, d.v->d_name)) {
+ ret = remove_dirent(c, &iter, d);
+ if (ret)
+ goto err;
+ continue;
+ }
+
+ if (fsck_err_on(have_target &&
+ d.v->d_type !=
+ mode_to_type(le16_to_cpu(target.v.i_mode)), c,
+ "incorrect d_type: got %u should be %u, filename %s",
+ d.v->d_type,
+ mode_to_type(le16_to_cpu(target.v.i_mode)),
+ d.v->d_name)) {
+ struct bkey_i_dirent *n;
+
+ n = kmalloc(bkey_bytes(d.k), GFP_KERNEL);
+ if (!n) {
+ ret = -ENOMEM;
+ goto err;
+ }
+
+ bkey_reassemble(&n->k_i, d.s_c);
+ n->v.d_type = mode_to_type(le16_to_cpu(target.v.i_mode));
+
+ ret = bch_btree_insert_at(c, NULL, NULL, NULL,
+ BTREE_INSERT_NOFAIL,
+ BTREE_INSERT_ENTRY(&iter, &n->k_i));
+ kfree(n);
+ if (ret)
+ goto err;
+
+ }
+ }
+err:
+fsck_err:
+ return bch_btree_iter_unlock(&iter) ?: ret;
+}
+
+/*
+ * Walk xattrs: verify that they all have a corresponding inode
+ */
+noinline_for_stack
+static int check_xattrs(struct cache_set *c)
+{
+ struct inode_walker w = inode_walker_init();
+ struct btree_iter iter;
+ struct bkey_s_c k;
+ int ret = 0;
+
+ for_each_btree_key(&iter, c, BTREE_ID_XATTRS,
+ POS(BCACHE_ROOT_INO, 0), k) {
+ ret = walk_inode(c, &w, k.k->p.inode);
+ if (ret)
+ break;
+
+ unfixable_fsck_err_on(!w.have_inode, c,
+ "xattr for missing inode %llu",
+ k.k->p.inode);
+ }
+fsck_err:
+ return bch_btree_iter_unlock(&iter) ?: ret;
+}
+
+/* Get root directory, create if it doesn't exist: */
+static int check_root(struct cache_set *c, struct bkey_i_inode *root_inode)
+{
+ int ret;
+
+ ret = bch_inode_find_by_inum(c, BCACHE_ROOT_INO, root_inode);
+ if (ret && ret != -ENOENT)
+ return ret;
+
+ if (fsck_err_on(ret, c, "root directory missing"))
+ goto create_root;
+
+ if (fsck_err_on(!S_ISDIR(le16_to_cpu(root_inode->v.i_mode)), c,
+ "root inode not a directory"))
+ goto create_root;
+
+ return 0;
+fsck_err:
+ return ret;
+create_root:
+ bch_inode_init(c, root_inode, 0, 0, S_IFDIR|S_IRWXU|S_IRUGO|S_IXUGO, 0);
+ root_inode->k.p.inode = BCACHE_ROOT_INO;
+
+ return bch_btree_insert(c, BTREE_ID_INODES, &root_inode->k_i,
+ NULL, NULL, NULL, 0);
+}
+
+/* Get lost+found, create if it doesn't exist: */
+static int check_lostfound(struct cache_set *c,
+ struct bkey_i_inode *root_inode,
+ struct bkey_i_inode *lostfound_inode)
+{
+ struct qstr lostfound = QSTR("lost+found");
+ struct bch_hash_info root_hash_info = bch_hash_info_init(&root_inode->v);
+ u64 inum;
+ int ret;
+
+ inum = bch_dirent_lookup(c, BCACHE_ROOT_INO, &root_hash_info,
+ &lostfound);
+ if (!inum) {
+ bch_notice(c, "creating lost+found");
+ goto create_lostfound;
+ }
+
+ ret = bch_inode_find_by_inum(c, inum, lostfound_inode);
+ if (ret && ret != -ENOENT)
+ return ret;
+
+ if (fsck_err_on(ret, c, "lost+found missing"))
+ goto create_lostfound;
+
+ if (fsck_err_on(!S_ISDIR(le16_to_cpu(lostfound_inode->v.i_mode)), c,
+ "lost+found inode not a directory"))
+ goto create_lostfound;
+
+ return 0;
+fsck_err:
+ return ret;
+create_lostfound:
+ le32_add_cpu(&root_inode->v.i_nlink, 1);
+
+ ret = bch_btree_insert(c, BTREE_ID_INODES, &root_inode->k_i,
+ NULL, NULL, NULL, 0);
+ if (ret)
+ return ret;
+
+ bch_inode_init(c, lostfound_inode, 0, 0, S_IFDIR|S_IRWXU|S_IRUGO|S_IXUGO, 0);
+
+ ret = bch_inode_create(c, &lostfound_inode->k_i, BLOCKDEV_INODE_MAX, 0,
+ &c->unused_inode_hint);
+ if (ret)
+ return ret;
+
+ ret = bch_dirent_create(c, BCACHE_ROOT_INO, &root_hash_info, DT_DIR,
+ &lostfound, lostfound_inode->k.p.inode, NULL, 0);
+ if (ret)
+ return ret;
+
+ return 0;
+}
+
+struct inode_bitmap {
+ unsigned long *bits;
+ size_t size;
+};
+
+static inline bool inode_bitmap_test(struct inode_bitmap *b, size_t nr)
+{
+ return nr < b->size ? test_bit(nr, b->bits) : false;
+}
+
+static inline int inode_bitmap_set(struct inode_bitmap *b, size_t nr)
+{
+ if (nr >= b->size) {
+ size_t new_size = max(max(PAGE_SIZE * 8,
+ b->size * 2),
+ nr + 1);
+ void *n;
+
+ new_size = roundup_pow_of_two(new_size);
+ n = krealloc(b->bits, new_size / 8, GFP_KERNEL|__GFP_ZERO);
+ if (!n)
+ return -ENOMEM;
+
+ b->bits = n;
+ b->size = new_size;
+ }
+
+ __set_bit(nr, b->bits);
+ return 0;
+}
+
+struct pathbuf {
+ size_t nr;
+ size_t size;
+
+ struct pathbuf_entry {
+ u64 inum;
+ u64 offset;
+ } *entries;
+};
+
+static int path_down(struct pathbuf *p, u64 inum)
+{
+ if (p->nr == p->size) {
+ size_t new_size = max(256UL, p->size * 2);
+ void *n = krealloc(p->entries,
+ new_size * sizeof(p->entries[0]),
+ GFP_KERNEL);
+ if (!n)
+ return -ENOMEM;
+
+ p->entries = n;
+ p->size = new_size;
+ };
+
+ p->entries[p->nr++] = (struct pathbuf_entry) {
+ .inum = inum,
+ .offset = 0,
+ };
+ return 0;
+}
+
+noinline_for_stack
+static int check_directory_structure(struct cache_set *c,
+ struct bkey_i_inode *lostfound_inode)
+{
+ struct inode_bitmap dirs_done = { NULL, 0 };
+ struct pathbuf path = { 0, 0, NULL };
+ struct pathbuf_entry *e;
+ struct btree_iter iter;
+ struct bkey_s_c k;
+ struct bkey_s_c_dirent dirent;
+ bool had_unreachable;
+ u64 d_inum;
+ int ret = 0;
+
+ /* DFS: */
+restart_dfs:
+ ret = inode_bitmap_set(&dirs_done, BCACHE_ROOT_INO);
+ if (ret)
+ goto err;
+
+ ret = path_down(&path, BCACHE_ROOT_INO);
+ if (ret)
+ return ret;
+
+ while (path.nr) {
+next:
+ e = &path.entries[path.nr - 1];
+
+ if (e->offset == U64_MAX)
+ goto up;
+
+ for_each_btree_key(&iter, c, BTREE_ID_DIRENTS,
+ POS(e->inum, e->offset + 1), k) {
+ if (k.k->p.inode != e->inum)
+ break;
+
+ e->offset = k.k->p.offset;
+
+ if (k.k->type != BCH_DIRENT)
+ continue;
+
+ dirent = bkey_s_c_to_dirent(k);
+
+ if (dirent.v->d_type != DT_DIR)
+ continue;
+
+ d_inum = le64_to_cpu(dirent.v->d_inum);
+
+ if (fsck_err_on(inode_bitmap_test(&dirs_done, d_inum), c,
+ "directory with multiple hardlinks")) {
+ ret = remove_dirent(c, &iter, dirent);
+ if (ret)
+ goto err;
+ continue;
+ }
+
+ ret = inode_bitmap_set(&dirs_done, d_inum);
+ if (ret)
+ goto err;
+
+ ret = path_down(&path, d_inum);
+ if (ret)
+ goto err;
+
+ bch_btree_iter_unlock(&iter);
+ goto next;
+ }
+ ret = bch_btree_iter_unlock(&iter);
+ if (ret)
+ goto err;
+up:
+ path.nr--;
+ }
+
+ had_unreachable = false;
+
+ for_each_btree_key(&iter, c, BTREE_ID_INODES, POS_MIN, k) {
+ if (k.k->type != BCH_INODE_FS ||
+ !S_ISDIR(le16_to_cpu(bkey_s_c_to_inode(k).v->i_mode)))
+ continue;
+
+ if (fsck_err_on(!inode_bitmap_test(&dirs_done, k.k->p.inode), c,
+ "unreachable directory found (inum %llu)",
+ k.k->p.inode)) {
+ bch_btree_iter_unlock(&iter);
+
+ ret = reattach_inode(c, lostfound_inode, k.k->p.inode);
+ if (ret)
+ goto err;
+
+ had_unreachable = true;
+ }
+ }
+ ret = bch_btree_iter_unlock(&iter);
+ if (ret)
+ goto err;
+
+ if (had_unreachable) {
+ bch_info(c, "reattached unreachable directories, restarting pass to check for loops");
+ kfree(dirs_done.bits);
+ kfree(path.entries);
+ memset(&dirs_done, 0, sizeof(dirs_done));
+ memset(&path, 0, sizeof(path));
+ goto restart_dfs;
+ }
+
+out:
+ kfree(dirs_done.bits);
+ kfree(path.entries);
+ return ret;
+err:
+fsck_err:
+ ret = bch_btree_iter_unlock(&iter) ?: ret;
+ goto out;
+}
+
struct nlink {
u32 count;
u32 dir_count;
@@ -40,11 +563,6 @@ static void inc_link(struct cache_set *c, struct nlinks *links,
link->count++;
}
-/*
- * XXX: should do a DFS (via filesystem heirarchy), and make sure all dirents
- * are reachable
- */
-
noinline_for_stack
static int bch_gc_walk_dirents(struct cache_set *c, struct nlinks *links,
u64 range_start, u64 *range_end)
@@ -99,7 +617,9 @@ s64 bch_count_inode_sectors(struct cache_set *c, u64 inum)
return bch_btree_iter_unlock(&iter) ?: sectors;
}
-static int bch_gc_do_inode(struct cache_set *c, struct btree_iter *iter,
+static int bch_gc_do_inode(struct cache_set *c,
+ struct bkey_i_inode *lostfound_inode,
+ struct btree_iter *iter,
struct bkey_s_c_inode inode, struct nlink link)
{
u16 i_mode = le16_to_cpu(inode.v->i_mode);
@@ -115,14 +635,15 @@ static int bch_gc_do_inode(struct cache_set *c, struct btree_iter *iter,
inode.k->p.inode, i_nlink,
link.count, mode_to_type(i_mode));
+ /* These should have been caught/fixed by earlier passes: */
if (S_ISDIR(i_mode)) {
- unfixable_fsck_err_on(link.count > 1, c,
+ need_fsck_err_on(link.count > 1, c,
"directory %llu with multiple hardlinks: %u",
inode.k->p.inode, link.count);
real_i_nlink = link.count * 2 + link.dir_count;
} else {
- unfixable_fsck_err_on(link.dir_count, c,
+ need_fsck_err_on(link.dir_count, c,
"found dirents for non directory %llu",
inode.k->p.inode);
@@ -135,11 +656,16 @@ static int bch_gc_do_inode(struct cache_set *c, struct btree_iter *iter,
"but found orphaned inode %llu",
inode.k->p.inode);
- unfixable_fsck_err_on(S_ISDIR(i_mode) &&
- bch_empty_dir(c, inode.k->p.inode), c,
- "non empty directory with link count 0, "
- "inode nlink %u, dir links found %u",
- i_nlink, link.dir_count);
+ if (fsck_err_on(S_ISDIR(i_mode) &&
+ bch_empty_dir(c, inode.k->p.inode), c,
+ "non empty directory with link count 0, "
+ "inode nlink %u, dir links found %u",
+ i_nlink, link.dir_count)) {
+ ret = reattach_inode(c, lostfound_inode,
+ inode.k->p.inode);
+ if (ret)
+ return ret;
+ }
bch_verbose(c, "deleting inode %llu", inode.k->p.inode);
@@ -235,7 +761,9 @@ fsck_err:
}
noinline_for_stack
-static int bch_gc_walk_inodes(struct cache_set *c, struct nlinks *links,
+static int bch_gc_walk_inodes(struct cache_set *c,
+ struct bkey_i_inode *lostfound_inode,
+ struct nlinks *links,
u64 range_start, u64 range_end)
{
struct btree_iter iter;
@@ -257,7 +785,8 @@ peek_nlinks: link = genradix_iter_peek(&nlinks_iter, links);
nlinks_pos = range_start + nlinks_iter.pos;
if (iter.pos.inode > nlinks_pos) {
- unfixable_fsck_err_on(link && link->count, c,
+ /* Should have been caught by dirents pass: */
+ need_fsck_err_on(link && link->count, c,
"missing inode %llu (nlink %u)",
nlinks_pos, link->count);
genradix_iter_advance(&nlinks_iter, links);
@@ -274,9 +803,8 @@ peek_nlinks: link = genradix_iter_peek(&nlinks_iter, links);
*/
bch_btree_iter_unlock(&iter);
- ret = bch_gc_do_inode(c, &iter,
- bkey_s_c_to_inode(k),
- *link);
+ ret = bch_gc_do_inode(c, lostfound_inode, &iter,
+ bkey_s_c_to_inode(k), *link);
if (ret == -EINTR)
continue;
if (ret)
@@ -285,7 +813,8 @@ peek_nlinks: link = genradix_iter_peek(&nlinks_iter, links);
if (link->count)
atomic_long_inc(&c->nr_inodes);
} else {
- unfixable_fsck_err_on(link->count, c,
+ /* Should have been caught by dirents pass: */
+ need_fsck_err_on(link->count, c,
"missing inode %llu (nlink %u)",
nlinks_pos, link->count);
}
@@ -304,7 +833,9 @@ fsck_err:
return ret ?: ret2;
}
-int bch_gc_inode_nlinks(struct cache_set *c)
+noinline_for_stack
+static int check_inode_nlinks(struct cache_set *c,
+ struct bkey_i_inode *lostfound_inode)
{
struct nlinks links;
u64 this_iter_range_start, next_iter_range_start = 0;
@@ -322,7 +853,7 @@ int bch_gc_inode_nlinks(struct cache_set *c)
if (ret)
break;
- ret = bch_gc_walk_inodes(c, &links,
+ ret = bch_gc_walk_inodes(c, lostfound_inode, &links,
this_iter_range_start,
next_iter_range_start);
if (ret)
@@ -336,140 +867,45 @@ int bch_gc_inode_nlinks(struct cache_set *c)
return ret;
}
-static void next_inode(struct cache_set *c, u64 inum, u64 *cur_inum,
- struct bkey_i_inode *inode,
- bool *first_this_inode, bool *have_inode,
- u64 *i_size, u16 *i_mode)
-{
- *first_this_inode = inum != *cur_inum;
- *cur_inum = inum;
-
- if (*first_this_inode) {
- *have_inode = !bch_inode_find_by_inum(c, inum, inode);
-
- if (*have_inode) {
- *i_mode = le16_to_cpu(inode->v.i_mode);
- *i_size = le64_to_cpu(inode->v.i_size);
- }
- }
-}
-
/*
* Checks for inconsistencies that shouldn't happen, unless we have a bug.
* Doesn't fix them yet, mainly because they haven't yet been observed:
*/
-int bch_fsck(struct cache_set *c)
+int bch_fsck(struct cache_set *c, bool full_fsck)
{
- struct btree_iter iter;
- struct bkey_s_c k;
- struct bkey_i_inode inode;
- bool first_this_inode, have_inode;
- u64 cur_inum, i_sectors;
- u64 i_size = 0;
- u16 i_mode = 0;
- int ret = 0;
-
- cur_inum = -1;
- have_inode = false;
- for_each_btree_key(&iter, c, BTREE_ID_EXTENTS,
- POS(BCACHE_ROOT_INO, 0), k) {
- if (k.k->type == KEY_TYPE_DISCARD)
- continue;
-
- next_inode(c, k.k->p.inode, &cur_inum, &inode,
- &first_this_inode, &have_inode,
- &i_size, &i_mode);
-
- unfixable_fsck_err_on(!have_inode, c,
- "extent type %u for missing inode %llu",
- k.k->type, k.k->p.inode);
-
- unfixable_fsck_err_on(first_this_inode && have_inode &&
- le64_to_cpu(inode.v.i_sectors) !=
- (i_sectors = bch_count_inode_sectors(c, cur_inum)),
- c, "i_sectors wrong: got %llu, should be %llu",
- le64_to_cpu(inode.v.i_sectors), i_sectors);
-
- unfixable_fsck_err_on(have_inode &&
- !S_ISREG(i_mode) && !S_ISLNK(i_mode), c,
- "extent type %u for non regular file, inode %llu mode %o",
- k.k->type, k.k->p.inode, i_mode);
+ struct bkey_i_inode root_inode, lostfound_inode;
+ int ret;
- unfixable_fsck_err_on(k.k->type != BCH_RESERVATION &&
- k.k->p.offset > round_up(i_size, PAGE_SIZE) >> 9, c,
- "extent type %u offset %llu past end of inode %llu, i_size %llu",
- k.k->type, k.k->p.offset, k.k->p.inode, i_size);
- }
- ret = bch_btree_iter_unlock(&iter);
+ ret = check_root(c, &root_inode);
if (ret)
return ret;
- cur_inum = -1;
- have_inode = false;
- for_each_btree_key(&iter, c, BTREE_ID_DIRENTS,
- POS(BCACHE_ROOT_INO, 0), k) {
- struct bkey_s_c_dirent d;
- struct bkey_i_inode target;
- bool have_target;
- u64 d_inum;
-
- next_inode(c, k.k->p.inode, &cur_inum, &inode,
- &first_this_inode, &have_inode,
- &i_size, &i_mode);
-
- unfixable_fsck_err_on(!have_inode, c,
- "dirent in nonexisting directory %llu",
- k.k->p.inode);
-
- unfixable_fsck_err_on(!S_ISDIR(i_mode), c,
- "dirent in non directory inode %llu, type %u",
- k.k->p.inode, mode_to_type(i_mode));
-
- if (k.k->type != BCH_DIRENT)
- continue;
-
- d = bkey_s_c_to_dirent(k);
- d_inum = le64_to_cpu(d.v->d_inum);
-
- unfixable_fsck_err_on(d_inum == d.k->p.inode, c,
- "dirent points to own directory");
+ ret = check_lostfound(c, &root_inode, &lostfound_inode);
+ if (ret)
+ return ret;
- have_target = !bch_inode_find_by_inum(c, d_inum, &target);
+ if (!full_fsck)
+ goto check_nlinks;
- unfixable_fsck_err_on(!have_target, c,
- "dirent points to missing inode %llu, type %u filename %s",
- d_inum, d.v->d_type, d.v->d_name);
+ ret = check_extents(c);
+ if (ret)
+ return ret;
- unfixable_fsck_err_on(have_target &&
- d.v->d_type !=
- mode_to_type(le16_to_cpu(target.v.i_mode)), c,
- "incorrect d_type: got %u should be %u, filename %s",
- d.v->d_type,
- mode_to_type(le16_to_cpu(target.v.i_mode)),
- d.v->d_name);
- }
- ret = bch_btree_iter_unlock(&iter);
+ ret = check_dirents(c);
if (ret)
return ret;
- cur_inum = -1;
- have_inode = false;
- for_each_btree_key(&iter, c, BTREE_ID_XATTRS,
- POS(BCACHE_ROOT_INO, 0), k) {
- next_inode(c, k.k->p.inode, &cur_inum, &inode,
- &first_this_inode, &have_inode,
- &i_size, &i_mode);
+ ret = check_xattrs(c);
+ if (ret)
+ return ret;
- unfixable_fsck_err_on(!have_inode, c,
- "xattr for missing inode %llu",
- k.k->p.inode);
- }
- ret = bch_btree_iter_unlock(&iter);
+ ret = check_directory_structure(c, &lostfound_inode);
+ if (ret)
+ return ret;
+check_nlinks:
+ ret = check_inode_nlinks(c, &lostfound_inode);
if (ret)
return ret;
return 0;
-fsck_err:
- bch_btree_iter_unlock(&iter);
- return ret;
}
diff --git a/libbcache/fs-gc.h b/libbcache/fs-gc.h
index c44086c..ca6571a 100644
--- a/libbcache/fs-gc.h
+++ b/libbcache/fs-gc.h
@@ -2,7 +2,6 @@
#define _BCACHE_FS_GC_H
s64 bch_count_inode_sectors(struct cache_set *, u64);
-int bch_gc_inode_nlinks(struct cache_set *);
-int bch_fsck(struct cache_set *);
+int bch_fsck(struct cache_set *, bool);
#endif /* _BCACHE_FS_GC_H */
diff --git a/libbcache/fs.c b/libbcache/fs.c
index 1f01e48..884a950 100644
--- a/libbcache/fs.c
+++ b/libbcache/fs.c
@@ -26,7 +26,7 @@
static struct kmem_cache *bch_inode_cache;
-static void bch_inode_init(struct bch_inode_info *, struct bkey_s_c_inode);
+static void bch_vfs_inode_init(struct bch_inode_info *, struct bkey_s_c_inode);
/*
* I_SIZE_DIRTY requires special handling:
@@ -175,7 +175,7 @@ static struct inode *bch_vfs_inode_get(struct super_block *sb, u64 inum)
}
ei = to_bch_ei(inode);
- bch_inode_init(ei, bkey_s_c_to_inode(k));
+ bch_vfs_inode_init(ei, bkey_s_c_to_inode(k));
ei->journal_seq = bch_inode_journal_seq(&c->journal, inum);
@@ -193,10 +193,7 @@ static struct inode *bch_vfs_inode_create(struct cache_set *c,
struct inode *inode;
struct posix_acl *default_acl = NULL, *acl = NULL;
struct bch_inode_info *ei;
- struct bch_inode *bi;
struct bkey_i_inode bkey_inode;
- struct timespec ts = CURRENT_TIME;
- s64 now = timespec_to_ns(&ts);
int ret;
inode = new_inode(parent->i_sb);
@@ -213,19 +210,8 @@ static struct inode *bch_vfs_inode_create(struct cache_set *c,
ei = to_bch_ei(inode);
- bi = &bkey_inode_init(&bkey_inode.k_i)->v;
- bi->i_uid = cpu_to_le32(i_uid_read(inode));
- bi->i_gid = cpu_to_le32(i_gid_read(inode));
-
- bi->i_mode = cpu_to_le16(inode->i_mode);
- bi->i_dev = cpu_to_le32(rdev);
- bi->i_atime = cpu_to_le64(now);
- bi->i_mtime = cpu_to_le64(now);
- bi->i_ctime = cpu_to_le64(now);
- bi->i_nlink = cpu_to_le32(S_ISDIR(mode) ? 2 : 1);
-
- get_random_bytes(&bi->i_hash_seed, sizeof(bi->i_hash_seed));
- SET_INODE_STR_HASH_TYPE(bi, c->sb.str_hash_type);
+ bch_inode_init(c, &bkey_inode, i_uid_read(inode),
+ i_gid_read(inode), inode->i_mode, rdev);
ret = bch_inode_create(c, &bkey_inode.k_i,
BLOCKDEV_INODE_MAX, 0,
@@ -239,7 +225,7 @@ static struct inode *bch_vfs_inode_create(struct cache_set *c,
goto err;
}
- bch_inode_init(ei, inode_i_to_s_c(&bkey_inode));
+ bch_vfs_inode_init(ei, inode_i_to_s_c(&bkey_inode));
if (default_acl) {
ret = bch_set_acl(inode, default_acl, ACL_TYPE_DEFAULT);
@@ -270,9 +256,13 @@ static int bch_vfs_dirent_create(struct cache_set *c, struct inode *dir,
u8 type, const struct qstr *name,
struct inode *dst)
{
+ struct bch_inode_info *dir_ei = to_bch_ei(dir);
int ret;
- ret = bch_dirent_create(c, dir, type, name, dst->i_ino);
+ ret = bch_dirent_create(c, dir->i_ino, &dir_ei->str_hash,
+ type, name, dst->i_ino,
+ &dir_ei->journal_seq,
+ BCH_HASH_SET_MUST_CREATE);
if (unlikely(ret))
return ret;
@@ -317,10 +307,13 @@ static struct dentry *bch_lookup(struct inode *dir, struct dentry *dentry,
unsigned int flags)
{
struct cache_set *c = dir->i_sb->s_fs_info;
+ struct bch_inode_info *dir_ei = to_bch_ei(dir);
struct inode *inode = NULL;
u64 inum;
- inum = bch_dirent_lookup(c, dir, &dentry->d_name);
+ inum = bch_dirent_lookup(c, dir->i_ino,
+ &dir_ei->str_hash,
+ &dentry->d_name);
if (inum)
inode = bch_vfs_inode_get(dir->i_sb, inum);
@@ -374,7 +367,8 @@ static int bch_unlink(struct inode *dir, struct dentry *dentry)
lockdep_assert_held(&inode->i_rwsem);
- ret = bch_dirent_delete(c, dir, &dentry->d_name);
+ ret = bch_dirent_delete(c, dir->i_ino, &dir_ei->str_hash,
+ &dentry->d_name, &dir_ei->journal_seq);
if (ret)
return ret;
@@ -1016,8 +1010,8 @@ static const struct address_space_operations bch_address_space_operations = {
.error_remove_page = generic_error_remove_page,
};
-static void bch_inode_init(struct bch_inode_info *ei,
- struct bkey_s_c_inode bkey_inode)
+static void bch_vfs_inode_init(struct bch_inode_info *ei,
+ struct bkey_s_c_inode bkey_inode)
{
struct inode *inode = &ei->vfs_inode;
const struct bch_inode *bi = bkey_inode.v;
@@ -1044,8 +1038,7 @@ static void bch_inode_init(struct bch_inode_info *ei,
inode->i_ctime = ns_to_timespec(le64_to_cpu(bi->i_ctime));
bch_inode_flags_to_vfs(inode);
- ei->str_hash.seed = le64_to_cpu(bi->i_hash_seed);
- ei->str_hash.type = INODE_STR_HASH_TYPE(bi);
+ ei->str_hash = bch_hash_info_init(bi);
inode->i_mapping->a_ops = &bch_address_space_operations;
diff --git a/libbcache/inode.c b/libbcache/inode.c
index d36de43..200deb0 100644
--- a/libbcache/inode.c
+++ b/libbcache/inode.c
@@ -7,6 +7,8 @@
#include "io.h"
#include "keylist.h"
+#include <linux/random.h>
+
ssize_t bch_inode_status(char *buf, size_t len, const struct bkey *k)
{
if (k->p.offset)
@@ -105,6 +107,28 @@ const struct bkey_ops bch_bkey_inode_ops = {
.val_to_text = bch_inode_to_text,
};
+void bch_inode_init(struct cache_set *c, struct bkey_i_inode *inode,
+ uid_t uid, gid_t gid, umode_t mode, dev_t rdev)
+{
+ struct timespec ts = CURRENT_TIME;
+ s64 now = timespec_to_ns(&ts);
+ struct bch_inode *bi;
+
+ bi = &bkey_inode_init(&inode->k_i)->v;
+ bi->i_uid = cpu_to_le32(uid);
+ bi->i_gid = cpu_to_le32(gid);
+
+ bi->i_mode = cpu_to_le16(mode);
+ bi->i_dev = cpu_to_le32(rdev);
+ bi->i_atime = cpu_to_le64(now);
+ bi->i_mtime = cpu_to_le64(now);
+ bi->i_ctime = cpu_to_le64(now);
+ bi->i_nlink = cpu_to_le32(S_ISDIR(mode) ? 2 : 1);
+
+ get_random_bytes(&bi->i_hash_seed, sizeof(bi->i_hash_seed));
+ SET_INODE_STR_HASH_TYPE(bi, c->sb.str_hash_type);
+}
+
int bch_inode_create(struct cache_set *c, struct bkey_i *inode,
u64 min, u64 max, u64 *hint)
{
@@ -228,15 +252,14 @@ int bch_inode_find_by_inum(struct cache_set *c, u64 inode_nr,
{
struct btree_iter iter;
struct bkey_s_c k;
- int ret = -ENOENT;
for_each_btree_key_with_holes(&iter, c, BTREE_ID_INODES,
POS(inode_nr, 0), k) {
switch (k.k->type) {
case BCH_INODE_FS:
- ret = 0;
bkey_reassemble(&inode->k_i, k);
- break;
+ bch_btree_iter_unlock(&iter);
+ return 0;
default:
/* hole, not found */
break;
@@ -245,9 +268,8 @@ int bch_inode_find_by_inum(struct cache_set *c, u64 inode_nr,
break;
}
- bch_btree_iter_unlock(&iter);
- return ret;
+ return bch_btree_iter_unlock(&iter) ?: -ENOENT;
}
int bch_cached_dev_inode_find_by_uuid(struct cache_set *c, uuid_le *uuid,
diff --git a/libbcache/inode.h b/libbcache/inode.h
index d8b28c7..fa1a4cf 100644
--- a/libbcache/inode.h
+++ b/libbcache/inode.h
@@ -5,6 +5,8 @@ extern const struct bkey_ops bch_bkey_inode_ops;
ssize_t bch_inode_status(char *, size_t, const struct bkey *);
+void bch_inode_init(struct cache_set *, struct bkey_i_inode *,
+ uid_t, gid_t, umode_t, dev_t);
int bch_inode_create(struct cache_set *, struct bkey_i *, u64, u64, u64 *);
int bch_inode_truncate(struct cache_set *, u64, u64,
struct extent_insert_hook *, u64 *);
diff --git a/libbcache/io.c b/libbcache/io.c
index 7219b65..4112ea5 100644
--- a/libbcache/io.c
+++ b/libbcache/io.c
@@ -140,6 +140,8 @@ void bch_submit_wbio_replicas(struct bch_write_bio *wbio, struct cache_set *c,
struct bch_write_bio *n;
struct cache *ca;
+ BUG_ON(c->opts.nochanges);
+
wbio->split = false;
wbio->c = c;
@@ -738,7 +740,8 @@ void bch_write(struct closure *cl)
!(op->flags & BCH_WRITE_CACHED),
op->flags & BCH_WRITE_DISCARD);
- if (!percpu_ref_tryget(&c->writes)) {
+ if (c->opts.nochanges ||
+ !percpu_ref_tryget(&c->writes)) {
__bcache_io_error(c, "read only");
op->error = -EROFS;
bch_disk_reservation_put(c, &op->res);
diff --git a/libbcache/journal.c b/libbcache/journal.c
index ffc9573..9e09b86 100644
--- a/libbcache/journal.c
+++ b/libbcache/journal.c
@@ -478,14 +478,14 @@ static int journal_validate_key(struct cache_set *c, struct jset *j,
char buf[160];
int ret = 0;
- if (fsck_err_on(!k->k.u64s, c,
+ if (mustfix_fsck_err_on(!k->k.u64s, c,
"invalid %s in journal: k->u64s 0", type)) {
entry->u64s = cpu_to_le16((u64 *) k - entry->_data);
journal_entry_null_range(jset_keys_next(entry), next);
return 0;
}
- if (fsck_err_on((void *) bkey_next(k) >
+ if (mustfix_fsck_err_on((void *) bkey_next(k) >
(void *) jset_keys_next(entry), c,
"invalid %s in journal: extends past end of journal entry",
type)) {
@@ -494,7 +494,7 @@ static int journal_validate_key(struct cache_set *c, struct jset *j,
return 0;
}
- if (fsck_err_on(k->k.format != KEY_FORMAT_CURRENT, c,
+ if (mustfix_fsck_err_on(k->k.format != KEY_FORMAT_CURRENT, c,
"invalid %s in journal: bad format %u",
type, k->k.format)) {
le16_add_cpu(&entry->u64s, -k->k.u64s);
@@ -510,7 +510,7 @@ static int journal_validate_key(struct cache_set *c, struct jset *j,
if (invalid) {
bch_bkey_val_to_text(c, key_type, buf, sizeof(buf),
bkey_i_to_s_c(k));
- fsck_err(c, "invalid %s in journal: %s", type, buf);
+ mustfix_fsck_err(c, "invalid %s in journal: %s", type, buf);
le16_add_cpu(&entry->u64s, -k->k.u64s);
memmove(k, bkey_next(k), next - (void *) bkey_next(k));
@@ -543,7 +543,7 @@ static int journal_entry_validate(struct cache_set *c, struct jset *j, u64 secto
return BCH_FSCK_UNKNOWN_VERSION;
}
- if (fsck_err_on(bytes > bucket_sectors_left << 9 ||
+ if (mustfix_fsck_err_on(bytes > bucket_sectors_left << 9 ||
bytes > c->journal.entry_size_max, c,
"journal entry too big (%zu bytes), sector %lluu",
bytes, sector)) {
@@ -556,7 +556,7 @@ static int journal_entry_validate(struct cache_set *c, struct jset *j, u64 secto
got = le64_to_cpu(j->csum);
expect = __csum_set(j, le32_to_cpu(j->u64s), JSET_CSUM_TYPE(j));
- if (fsck_err_on(got != expect, c,
+ if (mustfix_fsck_err_on(got != expect, c,
"journal checksum bad (got %llu expect %llu), sector %lluu",
got, expect, sector)) {
/* XXX: retry IO, when we start retrying checksum errors */
@@ -564,14 +564,14 @@ static int journal_entry_validate(struct cache_set *c, struct jset *j, u64 secto
return JOURNAL_ENTRY_BAD;
}
- if (fsck_err_on(le64_to_cpu(j->last_seq) > le64_to_cpu(j->seq), c,
- "invalid journal entry: last_seq > seq"))
+ if (mustfix_fsck_err_on(le64_to_cpu(j->last_seq) > le64_to_cpu(j->seq),
+ c, "invalid journal entry: last_seq > seq"))
j->last_seq = j->seq;
for_each_jset_entry(entry, j) {
struct bkey_i *k;
- if (fsck_err_on(jset_keys_next(entry) >
+ if (mustfix_fsck_err_on(jset_keys_next(entry) >
bkey_idx(j, le32_to_cpu(j->u64s)), c,
"journal entry extents past end of jset")) {
j->u64s = cpu_to_le64((u64 *) entry - j->_data);
@@ -595,7 +595,7 @@ static int journal_entry_validate(struct cache_set *c, struct jset *j, u64 secto
case JOURNAL_ENTRY_BTREE_ROOT:
k = entry->start;
- if (fsck_err_on(!entry->u64s ||
+ if (mustfix_fsck_err_on(!entry->u64s ||
le16_to_cpu(entry->u64s) != k->k.u64s, c,
"invalid btree root journal entry: wrong number of keys")) {
journal_entry_null_range(entry,
@@ -613,7 +613,7 @@ static int journal_entry_validate(struct cache_set *c, struct jset *j, u64 secto
break;
case JOURNAL_ENTRY_JOURNAL_SEQ_BLACKLISTED:
- if (fsck_err_on(le16_to_cpu(entry->u64s) != 1, c,
+ if (mustfix_fsck_err_on(le16_to_cpu(entry->u64s) != 1, c,
"invalid journal seq blacklist entry: bad size")) {
journal_entry_null_range(entry,
jset_keys_next(entry));
@@ -621,7 +621,7 @@ static int journal_entry_validate(struct cache_set *c, struct jset *j, u64 secto
break;
default:
- fsck_err(c, "invalid journal entry type %llu",
+ mustfix_fsck_err(c, "invalid journal entry type %llu",
JOURNAL_ENTRY_TYPE(entry));
journal_entry_null_range(entry, jset_keys_next(entry));
break;
@@ -2065,6 +2065,13 @@ static void journal_write(struct closure *cl)
bch_check_mark_super(c, &j->key, true);
+ /*
+ * XXX: we really should just disable the entire journal in nochanges
+ * mode
+ */
+ if (c->opts.nochanges)
+ goto no_io;
+
extent_for_each_ptr(bkey_i_to_s_extent(&j->key), ptr) {
rcu_read_lock();
ca = PTR_CACHE(c, ptr);
@@ -2094,8 +2101,6 @@ static void journal_write(struct closure *cl)
trace_bcache_journal_write(bio);
closure_bio_submit_punt(bio, cl, c);
- ptr->offset += sectors;
-
ca->journal.bucket_seq[ca->journal.cur_idx] = le64_to_cpu(w->data->seq);
}
@@ -2114,6 +2119,10 @@ static void journal_write(struct closure *cl)
closure_bio_submit_punt(bio, cl, c);
}
+no_io:
+ extent_for_each_ptr(bkey_i_to_s_extent(&j->key), ptr)
+ ptr->offset += sectors;
+
closure_return_with_destructor(cl, journal_write_done);
}
diff --git a/libbcache/movinggc.c b/libbcache/movinggc.c
index 3c85d49..cb4f165 100644
--- a/libbcache/movinggc.c
+++ b/libbcache/movinggc.c
@@ -26,14 +26,11 @@ static const struct bch_extent_ptr *moving_pred(struct cache *ca,
{
const struct bch_extent_ptr *ptr;
- if (bkey_extent_is_data(k.k)) {
- struct bkey_s_c_extent e = bkey_s_c_to_extent(k);
-
- extent_for_each_ptr(e, ptr)
- if ((ca->sb.nr_this_dev == ptr->dev) &&
- PTR_BUCKET(ca, ptr)->mark.copygc)
- return ptr;
- }
+ if (bkey_extent_is_data(k.k) &&
+ (ptr = bch_extent_has_device(bkey_s_c_to_extent(k),
+ ca->sb.nr_this_dev)) &&
+ PTR_BUCKET(ca, ptr)->mark.copygc)
+ return ptr;
return NULL;
}
@@ -274,6 +271,9 @@ int bch_moving_gc_thread_start(struct cache *ca)
/* The moving gc read thread must be stopped */
BUG_ON(ca->moving_gc_read != NULL);
+ if (ca->set->opts.nochanges)
+ return 0;
+
if (cache_set_init_fault("moving_gc_start"))
return -ENOMEM;
diff --git a/libbcache/opts.c b/libbcache/opts.c
index 249dd5d..60a2a4d 100644
--- a/libbcache/opts.c
+++ b/libbcache/opts.c
@@ -4,16 +4,6 @@
#include "opts.h"
#include "util.h"
-const char * const bch_bool_opt[] = {
- "0",
- "1",
- NULL
-};
-
-const char * const bch_uint_opt[] = {
- NULL
-};
-
const char * const bch_error_actions[] = {
"continue",
"remount-ro",
@@ -43,6 +33,42 @@ const char * const bch_str_hash_types[] = {
NULL
};
+const char * const bch_cache_replacement_policies[] = {
+ "lru",
+ "fifo",
+ "random",
+ NULL
+};
+
+/* Default is -1; we skip past it for struct cached_dev's cache mode */
+const char * const bch_cache_modes[] = {
+ "default",
+ "writethrough",
+ "writeback",
+ "writearound",
+ "none",
+ NULL
+};
+
+const char * const bch_cache_state[] = {
+ "active",
+ "readonly",
+ "failed",
+ "spare",
+ NULL
+};
+
+
+const char * const bch_bool_opt[] = {
+ "0",
+ "1",
+ NULL
+};
+
+const char * const bch_uint_opt[] = {
+ NULL
+};
+
enum bch_opts {
#define CACHE_SET_OPT(_name, _choices, _min, _max, _sb_opt, _perm) \
Opt_##_name,
diff --git a/libbcache/opts.h b/libbcache/opts.h
index 1d19ac6..70df232 100644
--- a/libbcache/opts.h
+++ b/libbcache/opts.h
@@ -6,6 +6,14 @@
#include <linux/log2.h>
#include <linux/string.h>
+extern const char * const bch_error_actions[];
+extern const char * const bch_csum_types[];
+extern const char * const bch_compression_types[];
+extern const char * const bch_str_hash_types[];
+extern const char * const bch_cache_replacement_policies[];
+extern const char * const bch_cache_modes[];
+extern const char * const bch_cache_state[];
+
/*
* Mount options; we also store defaults in the superblock.
*
@@ -20,10 +28,6 @@
extern const char * const bch_bool_opt[];
extern const char * const bch_uint_opt[];
-extern const char * const bch_error_actions[];
-extern const char * const bch_csum_types[];
-extern const char * const bch_compression_types[];
-extern const char * const bch_str_hash_types[];
/* dummy option, for options that aren't stored in the superblock */
LE64_BITMASK(NO_SB_OPT, struct cache_sb, flags, 0, 0);
@@ -44,6 +48,15 @@ LE64_BITMASK(NO_SB_OPT, struct cache_sb, flags, 0, 0);
CACHE_SET_OPT(fix_errors, \
bch_bool_opt, 0, 2, \
NO_SB_OPT, true) \
+ CACHE_SET_OPT(nochanges, \
+ bch_bool_opt, 0, 2, \
+ NO_SB_OPT, 0) \
+ CACHE_SET_OPT(noreplay, \
+ bch_bool_opt, 0, 2, \
+ NO_SB_OPT, 0) \
+ CACHE_SET_OPT(norecovery, \
+ bch_bool_opt, 0, 2, \
+ NO_SB_OPT, 0) \
CACHE_SET_SB_OPTS()
#define CACHE_SET_OPTS() \
diff --git a/libbcache/str_hash.h b/libbcache/str_hash.h
index 9a718a8..a489304 100644
--- a/libbcache/str_hash.h
+++ b/libbcache/str_hash.h
@@ -79,6 +79,14 @@ struct bch_hash_info {
u8 type;
};
+static inline struct bch_hash_info bch_hash_info_init(const struct bch_inode *bi)
+{
+ return (struct bch_hash_info) {
+ .seed = le64_to_cpu(bi->i_hash_seed),
+ .type = INODE_STR_HASH_TYPE(bi),
+ };
+}
+
struct bch_hash_desc {
enum btree_id btree_id;
u8 key_type;
diff --git a/libbcache/super.c b/libbcache/super.c
index 5f6a85e..296700b 100644
--- a/libbcache/super.c
+++ b/libbcache/super.c
@@ -99,14 +99,17 @@ static bool bch_is_open(struct block_device *bdev)
}
static const char *bch_blkdev_open(const char *path, void *holder,
+ struct cache_set_opts opts,
struct block_device **ret)
{
struct block_device *bdev;
+ fmode_t mode = opts.nochanges > 0
+ ? FMODE_READ
+ : FMODE_READ|FMODE_WRITE|FMODE_EXCL;
const char *err;
*ret = NULL;
- bdev = blkdev_get_by_path(path, FMODE_READ|FMODE_WRITE|FMODE_EXCL,
- holder);
+ bdev = blkdev_get_by_path(path, mode, holder);
if (bdev == ERR_PTR(-EBUSY)) {
bdev = lookup_bdev(path);
@@ -369,6 +372,7 @@ int bch_super_realloc(struct bcache_superblock *sb, unsigned u64s)
}
static const char *read_super(struct bcache_superblock *sb,
+ struct cache_set_opts opts,
const char *path)
{
const char *err;
@@ -378,7 +382,7 @@ static const char *read_super(struct bcache_superblock *sb,
memset(sb, 0, sizeof(*sb));
- err = bch_blkdev_open(path, &sb, &sb->bdev);
+ err = bch_blkdev_open(path, &sb, opts, &sb->bdev);
if (err)
return err;
retry:
@@ -614,6 +618,9 @@ static void __bcache_write_super(struct cache_set *c)
closure_init(cl, &c->cl);
+ if (c->opts.nochanges)
+ goto no_io;
+
le64_add_cpu(&c->disk_sb.seq, 1);
for_each_cache(ca, c, i) {
@@ -636,7 +643,7 @@ static void __bcache_write_super(struct cache_set *c)
percpu_ref_get(&ca->ref);
__write_super(c, &ca->disk_sb);
}
-
+no_io:
closure_return_with_destructor(cl, bcache_write_super_unlock);
}
@@ -1147,6 +1154,9 @@ static struct cache_set *bch_cache_set_alloc(struct cache_sb *sb,
c->opts = cache_superblock_opts(sb);
cache_set_opts_apply(&c->opts, opts);
+ c->opts.nochanges |= c->opts.noreplay;
+ c->opts.read_only |= c->opts.nochanges;
+
c->block_bits = ilog2(c->sb.block_size);
if (cache_set_init_fault("cache_set_alloc"))
@@ -1339,6 +1349,9 @@ static const char *run_cache_set(struct cache_set *c)
if (bch_initial_gc(c, &journal))
goto err;
+ if (c->opts.noreplay)
+ goto recovery_done;
+
bch_verbose(c, "mark and sweep done");
/*
@@ -1365,6 +1378,9 @@ static const char *run_cache_set(struct cache_set *c)
bch_verbose(c, "journal replay done");
+ if (c->opts.norecovery)
+ goto recovery_done;
+
/*
* Write a new journal entry _before_ we start journalling new
* data - otherwise, we could end up with btree node bsets with
@@ -1376,21 +1392,12 @@ static const char *run_cache_set(struct cache_set *c)
if (bch_journal_meta(&c->journal))
goto err;
- bch_verbose(c, "starting fs gc:");
- err = "error in fs gc";
- ret = bch_gc_inode_nlinks(c);
+ bch_verbose(c, "starting fsck:");
+ err = "error in fsck";
+ ret = bch_fsck(c, !c->opts.nofsck);
if (ret)
goto err;
- bch_verbose(c, "fs gc done");
-
- if (!c->opts.nofsck) {
- bch_verbose(c, "starting fsck:");
- err = "error in fsck";
- ret = bch_fsck(c);
- if (ret)
- goto err;
- bch_verbose(c, "fsck done");
- }
+ bch_verbose(c, "fsck done");
} else {
struct bkey_i_inode inode;
struct closure cl;
@@ -1433,12 +1440,9 @@ static const char *run_cache_set(struct cache_set *c)
/* Wait for new btree roots to be written: */
closure_sync(&cl);
- bkey_inode_init(&inode.k_i);
+ bch_inode_init(c, &inode, 0, 0,
+ S_IFDIR|S_IRWXU|S_IRUGO|S_IXUGO, 0);
inode.k.p.inode = BCACHE_ROOT_INO;
- inode.v.i_mode = cpu_to_le16(S_IFDIR|S_IRWXU|S_IRUGO|S_IXUGO);
- inode.v.i_nlink = cpu_to_le32(2);
- get_random_bytes(&inode.v.i_hash_seed, sizeof(inode.v.i_hash_seed));
- SET_INODE_STR_HASH_TYPE(&inode.v, c->sb.str_hash_type);
err = "error creating root directory";
if (bch_btree_insert(c, BTREE_ID_INODES, &inode.k_i,
@@ -1449,7 +1453,7 @@ static const char *run_cache_set(struct cache_set *c)
if (bch_journal_meta(&c->journal))
goto err;
}
-
+recovery_done:
if (c->opts.read_only) {
bch_cache_set_read_only_sync(c);
} else {
@@ -1485,12 +1489,12 @@ static const char *run_cache_set(struct cache_set *c)
set_bit(CACHE_SET_RUNNING, &c->flags);
bch_attach_backing_devs(c);
- closure_put(&c->caching);
-
bch_notify_cache_set_read_write(c);
-
- BUG_ON(!list_empty(&journal));
- return NULL;
+ err = NULL;
+out:
+ bch_journal_entries_free(&journal);
+ closure_put(&c->caching);
+ return err;
err:
switch (ret) {
case BCH_FSCK_ERRORS_NOT_FIXED:
@@ -1519,12 +1523,8 @@ err:
}
BUG_ON(!err);
-
- bch_journal_entries_free(&journal);
set_bit(CACHE_SET_ERROR, &c->flags);
- bch_cache_set_unregister(c);
- closure_put(&c->caching);
- return err;
+ goto out;
}
static const char *can_add_cache(struct cache_sb *sb,
@@ -2056,8 +2056,9 @@ static const char *register_cache(struct bcache_superblock *sb,
struct cache_set_opts opts)
{
char name[BDEVNAME_SIZE];
- const char *err = "cannot allocate memory";
+ const char *err;
struct cache_set *c;
+ bool allocated_cache_set = false;
err = validate_cache_super(sb);
if (err)
@@ -2067,41 +2068,36 @@ static const char *register_cache(struct bcache_superblock *sb,
c = cache_set_lookup(sb->sb->set_uuid);
if (c) {
- if ((err = (can_attach_cache(sb->sb, c) ?:
- cache_alloc(sb, c, NULL))))
+ err = can_attach_cache(sb->sb, c);
+ if (err)
return err;
+ } else {
+ c = bch_cache_set_alloc(sb->sb, opts);
+ if (!c)
+ return "cannot allocate memory";
- if (cache_set_nr_online_devices(c) == cache_set_nr_devices(c)) {
- err = run_cache_set(c);
- if (err)
- return err;
- }
- goto out;
+ allocated_cache_set = true;
}
- c = bch_cache_set_alloc(sb->sb, opts);
- if (!c)
- return err;
-
err = cache_alloc(sb, c, NULL);
if (err)
- goto err_stop;
+ goto err;
if (cache_set_nr_online_devices(c) == cache_set_nr_devices(c)) {
err = run_cache_set(c);
if (err)
- goto err_stop;
+ goto err;
+ } else {
+ err = "error creating kobject";
+ if (bch_cache_set_online(c))
+ goto err;
}
- err = "error creating kobject";
- if (bch_cache_set_online(c))
- goto err_stop;
-out:
-
bch_info(c, "started");
return NULL;
-err_stop:
- bch_cache_set_stop(c);
+err:
+ if (allocated_cache_set)
+ bch_cache_set_stop(c);
return err;
}
@@ -2117,7 +2113,7 @@ int bch_cache_set_add_cache(struct cache_set *c, const char *path)
mutex_lock(&bch_register_lock);
- err = read_super(&sb, path);
+ err = read_super(&sb, c->opts, path);
if (err)
goto err_unlock;
@@ -2261,7 +2257,7 @@ const char *bch_register_cache_set(char * const *devices, unsigned nr_devices,
mutex_lock(&bch_register_lock);
for (i = 0; i < nr_devices; i++) {
- err = read_super(&sb[i], devices[i]);
+ err = read_super(&sb[i], opts, devices[i]);
if (err)
goto err_unlock;
@@ -2312,6 +2308,8 @@ const char *bch_register_cache_set(char * const *devices, unsigned nr_devices,
out:
kfree(sb);
module_put(THIS_MODULE);
+ if (err)
+ c = NULL;
return err;
err_unlock:
if (c)
@@ -2326,18 +2324,19 @@ err:
const char *bch_register_one(const char *path)
{
struct bcache_superblock sb;
+ struct cache_set_opts opts = cache_set_opts_empty();
const char *err;
mutex_lock(&bch_register_lock);
- err = read_super(&sb, path);
+ err = read_super(&sb, opts, path);
if (err)
goto err;
if (__SB_IS_BDEV(le64_to_cpu(sb.sb->version)))
err = bch_backing_dev_register(&sb);
else
- err = register_cache(&sb, cache_set_opts_empty());
+ err = register_cache(&sb, opts);
free_super(&sb);
err:
diff --git a/libbcache/sysfs.c b/libbcache/sysfs.c
index 40d006b..58a7125 100644
--- a/libbcache/sysfs.c
+++ b/libbcache/sysfs.c
@@ -24,31 +24,6 @@
#include <linux/blkdev.h>
#include <linux/sort.h>
-static const char * const cache_replacement_policies[] = {
- "lru",
- "fifo",
- "random",
- NULL
-};
-
-/* Default is -1; we skip past it for struct cached_dev's cache mode */
-static const char * const bch_cache_modes[] = {
- "default",
- "writethrough",
- "writeback",
- "writearound",
- "none",
- NULL
-};
-
-static const char * const bch_cache_state[] = {
- "active",
- "readonly",
- "failed",
- "spare",
- NULL
-};
-
write_attribute(attach);
write_attribute(detach);
write_attribute(unregister);
@@ -1237,7 +1212,7 @@ SHOW(bch_cache)
if (attr == &sysfs_cache_replacement_policy)
return bch_snprint_string_list(buf, PAGE_SIZE,
- cache_replacement_policies,
+ bch_cache_replacement_policies,
ca->mi.replacement);
sysfs_print(tier, ca->mi.tier);
@@ -1281,7 +1256,7 @@ STORE(__bch_cache)
}
if (attr == &sysfs_cache_replacement_policy) {
- ssize_t v = bch_read_string_list(buf, cache_replacement_policies);
+ ssize_t v = bch_read_string_list(buf, bch_cache_replacement_policies);
if (v < 0)
return v;
diff --git a/libbcache/tier.c b/libbcache/tier.c
index 2b568e1..39b04f7 100644
--- a/libbcache/tier.c
+++ b/libbcache/tier.c
@@ -224,6 +224,9 @@ int bch_tiering_read_start(struct cache_set *c)
{
struct task_struct *t;
+ if (c->opts.nochanges)
+ return 0;
+
t = kthread_create(bch_tiering_thread, c, "bch_tier_read");
if (IS_ERR(t))
return PTR_ERR(t);
diff --git a/tools-util.c b/tools-util.c
index 68a42a9..c6e8855 100644
--- a/tools-util.c
+++ b/tools-util.c
@@ -18,11 +18,13 @@
#include "ccan/crc/crc.h"
+#include "linux/bcache-ioctl.h"
#include "tools-util.h"
+#include "util.h"
/* Integer stuff: */
-struct units_buf pr_units(u64 v, enum units units)
+struct units_buf __pr_units(u64 v, enum units units)
{
struct units_buf ret;
@@ -53,65 +55,6 @@ struct units_buf pr_units(u64 v, enum units units)
/* Argument parsing stuff: */
-long strtoul_or_die(const char *p, size_t max, const char *msg)
-{
- errno = 0;
- long v = strtol(p, NULL, 10);
- if (errno || v < 0 || v >= max)
- die("Invalid %s %zi", msg, v);
-
- return v;
-}
-
-u64 hatoi(const char *s)
-{
- char *e;
- long long i = strtoll(s, &e, 10);
- switch (*e) {
- case 't':
- case 'T':
- i *= 1024;
- case 'g':
- case 'G':
- i *= 1024;
- case 'm':
- case 'M':
- i *= 1024;
- case 'k':
- case 'K':
- i *= 1024;
- }
- return i;
-}
-
-unsigned hatoi_validate(const char *s, const char *msg)
-{
- u64 v = hatoi(s);
-
- if (v & (v - 1))
- die("%s must be a power of two", msg);
-
- v /= 512;
-
- if (v > USHRT_MAX)
- die("%s too large\n", msg);
-
- if (!v)
- die("%s too small\n", msg);
-
- return v;
-}
-
-unsigned nr_args(char * const *args)
-{
- unsigned i;
-
- for (i = 0; args[i]; i++)
- ;
-
- return i;
-}
-
/* File parsing (i.e. sysfs) */
char *read_file_str(int dirfd, const char *path)
@@ -151,48 +94,16 @@ u64 read_file_u64(int dirfd, const char *path)
/* String list options: */
-ssize_t read_string_list(const char *buf, const char * const list[])
-{
- size_t i;
- char *s, *d = strdup(buf);
- if (!d)
- return -ENOMEM;
-
- s = strim(d);
-
- for (i = 0; list[i]; i++)
- if (!strcmp(list[i], s))
- break;
-
- free(d);
-
- if (!list[i])
- return -EINVAL;
-
- return i;
-}
-
ssize_t read_string_list_or_die(const char *opt, const char * const list[],
const char *msg)
{
- ssize_t v = read_string_list(opt, list);
+ ssize_t v = bch_read_string_list(opt, list);
if (v < 0)
die("Bad %s %s", msg, opt);
return v;
}
-void print_string_list(const char * const list[], size_t selected)
-{
- size_t i;
-
- for (i = 0; list[i]; i++) {
- if (i)
- putchar(' ');
- printf(i == selected ? "[%s] ": "%s", list[i]);
- }
-}
-
/* Returns size of file or block device, in units of 512 byte sectors: */
u64 get_size(const char *path, int fd)
{
@@ -296,14 +207,15 @@ struct bcache_handle bcache_fs_open(const char *path)
return ret;
}
-bool ask_proceed(void)
+bool ask_yn(void)
{
const char *short_yes = "yY";
char *buf = NULL;
size_t buflen = 0;
bool ret;
- fputs("Proceed anyway? (y,n) ", stdout);
+ fputs(" (y,n) ", stdout);
+ fflush(stdout);
if (getline(&buf, &buflen, stdin) < 0)
die("error reading from standard input");
diff --git a/tools-util.h b/tools-util.h
index 5c8ea13..09f00ef 100644
--- a/tools-util.h
+++ b/tools-util.h
@@ -1,10 +1,12 @@
#ifndef _TOOLS_UTIL_H
#define _TOOLS_UTIL_H
+#include <errno.h>
#include <stdbool.h>
#include <stdio.h>
#include <stdlib.h>
#include <sys/types.h>
+#include <unistd.h>
#include <linux/byteorder.h>
#include <linux/kernel.h>
@@ -18,38 +20,66 @@ do { \
exit(EXIT_FAILURE); \
} while (0)
+static inline void *xcalloc(size_t count, size_t size)
+{
+ void *p = calloc(count, size);
+
+ if (!p)
+ die("insufficient memory");
+
+ return p;
+}
+
+static inline void *xmalloc(size_t size)
+{
+ void *p = malloc(size);
+
+ if (!p)
+ die("insufficient memory");
+
+ memset(p, 0, size);
+ return p;
+}
+
+static inline void xpread(int fd, void *buf, size_t count, off_t offset)
+{
+ ssize_t r = pread(fd, buf, count, offset);
+
+ if (r != count)
+ die("read error (ret %zi)", r);
+}
+
+static inline void xpwrite(int fd, const void *buf, size_t count, off_t offset)
+{
+ ssize_t r = pwrite(fd, buf, count, offset);
+
+ if (r != count)
+ die("write error (ret %zi err %s)", r, strerror(errno));
+}
+
enum units {
BYTES,
SECTORS,
HUMAN_READABLE,
};
-struct units_buf pr_units(u64, enum units);
+struct units_buf __pr_units(u64, enum units);
struct units_buf {
char b[20];
};
-long strtoul_or_die(const char *, size_t, const char *);
-
-u64 hatoi(const char *);
-unsigned hatoi_validate(const char *, const char *);
-unsigned nr_args(char * const *);
+#define pr_units(_v, _u) __pr_units(_v, _u).b
char *read_file_str(int, const char *);
u64 read_file_u64(int, const char *);
-ssize_t read_string_list(const char *, const char * const[]);
ssize_t read_string_list_or_die(const char *, const char * const[],
const char *);
-void print_string_list(const char * const[], size_t);
u64 get_size(const char *, int);
unsigned get_blocksize(const char *, int);
-#include "linux/bcache.h"
-#include "linux/bcache-ioctl.h"
-
int bcachectl_open(void);
struct bcache_handle {
@@ -59,6 +89,6 @@ struct bcache_handle {
struct bcache_handle bcache_fs_open(const char *);
-bool ask_proceed(void);
+bool ask_yn(void);
#endif /* _TOOLS_UTIL_H */