diff options
Diffstat (limited to 'c_src')
-rw-r--r-- | c_src/bcachefs.c | 2 | ||||
-rw-r--r-- | c_src/cmd_image.c | 495 | ||||
-rw-r--r-- | c_src/cmds.h | 2 |
3 files changed, 499 insertions, 0 deletions
diff --git a/c_src/bcachefs.c b/c_src/bcachefs.c index 46c524c4..f4712597 100644 --- a/c_src/bcachefs.c +++ b/c_src/bcachefs.c @@ -38,6 +38,8 @@ void bcachefs_usage(void) " reset-counters Reset all counters on an unmounted device\n" " strip-alloc Strip alloc info on a filesystem to be used read-only\n" "\n" + "Commands for managing images:\n" + " image create Create a new compact disk image\n" "Mount:\n" " mount Mount a filesystem\n" "\n" diff --git a/c_src/cmd_image.c b/c_src/cmd_image.c new file mode 100644 index 00000000..12478b1f --- /dev/null +++ b/c_src/cmd_image.c @@ -0,0 +1,495 @@ +/* + * Authors: Kent Overstreet <kent.overstreet@gmail.com> + * + * GPLv2 + */ +#include <ctype.h> +#include <dirent.h> +#include <errno.h> +#include <fcntl.h> +#include <getopt.h> +#include <stdbool.h> +#include <stdint.h> +#include <stdio.h> +#include <stdlib.h> +#include <string.h> +#include <sys/stat.h> +#include <sys/types.h> +#include <unistd.h> + +#include <uuid/uuid.h> + +#include "cmds.h" +#include "cmd_strip_alloc.h" +#include "posix_to_bcachefs.h" +#include "libbcachefs.h" +#include "crypto.h" +#include "libbcachefs/alloc_background.h" +#include "libbcachefs/alloc_foreground.h" +#include "libbcachefs/data_update.h" +#include "libbcachefs/errcode.h" +#include "libbcachefs/move.h" +#include "libbcachefs/opts.h" +#include "libbcachefs/super-io.h" +#include "libbcachefs/util.h" + +#include "libbcachefs/darray.h" + +static u64 count_input_size(int dirfd) +{ + DIR *dir = fdopendir(dirfd); + struct dirent *d; + u64 bytes = 0; + + while ((errno = 0), (d = readdir(dir))) { + struct stat stat = + xfstatat(dirfd, d->d_name, AT_SYMLINK_NOFOLLOW); + + if (!strcmp(d->d_name, ".") || + !strcmp(d->d_name, "..") || + !strcmp(d->d_name, "lost+found")) + continue; + + bytes += stat.st_blocks << 9; + + if (mode_to_type(stat.st_mode) == DT_DIR) { + int fd = xopenat(dirfd, d->d_name, O_RDONLY|O_NOATIME); + bytes += count_input_size(fd); + xclose(fd); + } + } + + if (errno) + die("readdir error: %m"); + return bytes; +} + +struct move_btree_args { + bool move_alloc; + unsigned target; +}; + +static bool move_btree_pred(struct bch_fs *c, void *_arg, + enum btree_id btree, struct bkey_s_c k, + struct bch_io_opts *io_opts, + struct data_update_opts *data_opts) +{ + struct move_btree_args *args = _arg; + + data_opts->target = dev_to_target(0); + data_opts->target = args->target; + + if (k.k->type != KEY_TYPE_btree_ptr_v2) + return false; + + if (!args->move_alloc && btree_id_is_alloc(btree)) + return false; + + return true; + return k.k->type == KEY_TYPE_btree_ptr_v2 && !btree_id_is_alloc(btree); +} + +static int move_btree(struct bch_fs *c, bool move_alloc, unsigned target_dev) +{ + struct move_btree_args args = { + .move_alloc = move_alloc, + .target = dev_to_target(target_dev), + }; + + struct moving_context ctxt; + bch2_moving_ctxt_init(&ctxt, c, NULL, NULL, writepoint_hashed(1), false); + int ret = 0; + + for (unsigned btree = 0; btree < BTREE_ID_NR; btree++) { + if (!move_alloc && btree_id_is_alloc(btree)) + continue; + + for (unsigned level = 1; level < BTREE_MAX_DEPTH; level++) { + ret = bch2_move_data_btree(&ctxt, + POS_MIN, SPOS_MAX, + move_btree_pred, &args, + btree, level); + if (ret) + goto err; + } + } +err: + bch2_moving_ctxt_exit(&ctxt); + return ret; +} + +static void check_gaps(struct bch_fs *c) +{ + /* Check for gaps, make sure the allocator is behaving correctly */ + u64 prev_bucket = 0; + bch2_trans_run(c, + for_each_btree_key_max(trans, iter, BTREE_ID_alloc, POS_MIN, POS(0, U64_MAX), 0, k, ({ + if (k.k->type == KEY_TYPE_alloc_v4) { + struct bkey_s_c_alloc_v4 a = bkey_s_c_to_alloc_v4(k); + + if ((prev_bucket && prev_bucket + 1 != k.k->p.offset) || + a.v->dirty_sectors != c->devs[0]->mi.bucket_size) + pr_info("%llu %llu %s %u", prev_bucket, k.k->p.offset, + __bch2_data_types[a.v->data_type], + a.v->dirty_sectors); + prev_bucket = k.k->p.offset; + } + + 0; + }))); +} + +static int get_nbuckets_used(struct bch_fs *c, u64 *nbuckets) +{ + struct btree_trans *trans = bch2_trans_get(c); + struct btree_iter iter; + bch2_trans_iter_init(trans, &iter, BTREE_ID_alloc, POS(0, U64_MAX), 0); + struct bkey_s_c k; + int ret = lockrestart_do(trans, bkey_err(k = bch2_btree_iter_peek_prev(trans, &iter))); + if (!ret && k.k->type != KEY_TYPE_alloc_v4) + ret = -ENOENT; + if (ret) { + fprintf(stderr, "error looking up last alloc key: %s\n", bch2_err_str(ret)); + goto err; + } + + *nbuckets = (k.k->p.offset + 1); +err: + bch2_trans_iter_exit(trans, &iter); + bch2_trans_put(trans); + return ret; + +} + +static void print_dev_usage_all(struct bch_fs *c) +{ + struct printbuf buf = PRINTBUF; + + for_each_member_device(c, ca) { + struct bch_dev_usage_full stats = bch2_dev_usage_full_read(ca); + bch2_dev_usage_to_text(&buf, ca, &stats); + } + + printf("%s", buf.buf); + printbuf_exit(&buf); +} + +/* + * Build an image file: + * + * Use a temporary second device for metadata, so that we can write out data + * reprodicably, sequentially from the start of the device. + * + * After data is written out, the metadata that we want to keep is moved to the + * real image file. By default, alloc info is left out: it will be recreated on + * first RW mount. + * + * After migrating metadata, the image file is trimmed and the temporary + * metadata device is dropped. + */ +static void image_create(struct bch_opt_strs fs_opt_strs, + struct bch_opts fs_opts, + struct format_opts format_opts, + struct dev_opts dev_opts, + const char *src_path, + bool keep_alloc, + unsigned verbosity) +{ + int src_fd = xopen(src_path, O_RDONLY); + + if (!S_ISDIR(xfstat(src_fd).st_mode)) + die("%s is not a directory", src_path); + + u64 input_bytes = count_input_size(src_fd); + lseek(src_fd, 0, SEEK_SET); + + dev_opts_list devs = {}; + darray_push(&devs, dev_opts); + + dev_opts.path = mprintf("%s.metadata", devs.data[0].path), + darray_push(&devs, dev_opts); + + if (!access(devs.data[1].path, F_OK)) + die("temporary metadata device %s already exists", devs.data[1].path); + + opt_set(devs.data[0].opts, data_allowed, BIT(BCH_DATA_user)); + opt_set(devs.data[1].opts, data_allowed, BIT(BCH_DATA_journal)|BIT(BCH_DATA_btree)); + + darray_for_each(devs, dev) { + int ret = open_for_format(dev, BLK_OPEN_CREAT, false); + if (ret) { + fprintf(stderr, "Error opening %s: %s", dev->path, strerror(-ret)); + goto err; + } + + if (ftruncate(dev->bdev->bd_fd, input_bytes * 2)) { + fprintf(stderr, "ftruncate error: %m"); + goto err; + } + } + + format_opts.no_sb_at_end = true; + struct bch_sb *sb = bch2_format(fs_opt_strs, fs_opts, format_opts, devs); + if (verbosity > 1) { + struct printbuf buf = PRINTBUF; + buf.human_readable_units = true; + + bch2_sb_to_text(&buf, sb, false, 1 << BCH_SB_FIELD_members_v2); + printf("%s", buf.buf); + printbuf_exit(&buf); + } + + darray_const_str device_paths = {}; + darray_for_each(devs, dev) + darray_push(&device_paths, dev->path); + + struct bch_opts opts = bch2_opts_empty(); + opt_set(opts, copygc_enabled, false); + opt_set(opts, rebalance_enabled, false); + + struct bch_fs *c = bch2_fs_open(&device_paths, &opts); + + unlink(device_paths.data[1]); + + int ret = PTR_ERR_OR_ZERO(c); + if (ret) { + fprintf(stderr, "error opening %s: %s\n", + device_paths.data[0], bch2_err_str(ret)); + goto err; + } + + struct copy_fs_state s = {}; + copy_fs(c, src_fd, src_path, &s, 0); + + printf("moving non-alloc btree to primary device\n"); + + mutex_lock(&c->sb_lock); + struct bch_member *m = bch2_members_v2_get_mut(c->disk_sb.sb, 0); + SET_BCH_MEMBER_DATA_ALLOWED(m, BCH_MEMBER_DATA_ALLOWED(m)|BIT(BCH_DATA_btree)); + bch2_write_super(c); + mutex_unlock(&c->sb_lock); + + bch2_dev_allocator_set_rw(c, c->devs[0], true); + + ret = move_btree(c, keep_alloc, 0); + if (ret) { + fprintf(stderr, "error migrating btree from temporary device: %s\n", + bch2_err_str(ret)); + goto err; + } + + bch2_fs_read_only(c); + + if (verbosity > 1) + print_dev_usage_all(c); + + if (0) + check_gaps(c); + + u64 nbuckets; + ret = get_nbuckets_used(c, &nbuckets); + if (ret) + goto err; + + if (ftruncate(c->devs[0]->disk_sb.bdev->bd_fd, nbuckets * bucket_bytes(c->devs[0]))) { + fprintf(stderr, "truncate error: %m\n"); + goto err; + } + + mutex_lock(&c->sb_lock); + if (!keep_alloc) { + printf("Stripping alloc info\n"); + strip_fs_alloc(c); + } + + rcu_assign_pointer(c->devs[1], NULL); + + m = bch2_members_v2_get_mut(c->disk_sb.sb, 0); + SET_BCH_MEMBER_DATA_ALLOWED(m, BCH_MEMBER_DATA_ALLOWED(m)|BIT(BCH_DATA_journal)); + + bch2_members_v2_get_mut(c->disk_sb.sb, 0)->nbuckets = cpu_to_le64(nbuckets); + + for_each_online_member(c, ca, 0) { + struct bch_member *m = bch2_members_v2_get_mut(c->disk_sb.sb, ca->dev_idx); + SET_BCH_MEMBER_RESIZE_ON_MOUNT(m, true); + } + + c->disk_sb.sb->features[0] |= cpu_to_le64(BIT_ULL(BCH_FEATURE_small_image)); + + /* + * sb->nr_devices must be 1 so that it can be mounted without UUID + * conflicts + */ + unsigned u64s = DIV_ROUND_UP(sizeof(struct bch_sb_field_members_v2) + + sizeof(struct bch_member), sizeof(u64)); + bch2_sb_field_resize(&c->disk_sb, members_v2, u64s); + c->disk_sb.sb->nr_devices = 1; + SET_BCH_SB_MULTI_DEVICE(c->disk_sb.sb, false); + + bch2_write_super(c); + mutex_unlock(&c->sb_lock); + + bch2_fs_stop(c); + darray_exit(&device_paths); + xclose(src_fd); + return; +err: + darray_for_each(devs, d) + unlink(d->path); + exit(EXIT_FAILURE); +} + +static void image_create_usage(void) +{ + puts("bcachefs image create - create a minimum size, reproducible filesystem image\n" + "Usage: bcachefs image create [OPTION]... <file>\n" + "\n" + "Options:\n" + " --source=path Source directory to be used as content for the new image\n" + " -a, --keep-alloc Include allocation info in the filesystem\n" + " 6.16+ regenerates alloc info on first rw mount\n" + " --encrypted Enable whole filesystem encryption (chacha20/poly1305)\n" + " -L, --fs_label=label\n" + " -U, --uuid=uuid\n" + " --superblock_size=size\n" + " --bucket_size=size\n" + " --fs_size=size Expected size of device image will be used on, hint for bucket size\n" + " -f, --force\n" + " -q, --quiet Only print errors\n" + " -v, --verbose Verbose filesystem initialization\n" + " -h, --help Display this help and exit\n" + "\n" + "Report bugs to <linux-bcachefs@vger.kernel.org>"); +} + +static int cmd_image_create(int argc, char *argv[]) +{ + static const struct option longopts[] = { + { "source", required_argument, NULL, 's' }, + { "keep-alloc", no_argument, NULL, 'a' }, + { "encrypted", required_argument, NULL, 'e' }, + { "fs_label", required_argument, NULL, 'L' }, + { "uuid", required_argument, NULL, 'U' }, + { "superblock_size", required_argument, NULL, 'S' }, + { "force", no_argument, NULL, 'f' }, + { "quiet", no_argument, NULL, 'q' }, + { "verbose", no_argument, NULL, 'v' }, + { "help", no_argument, NULL, 'h' }, + { NULL } + }; + struct format_opts opts = format_opts_default(); + struct dev_opts dev_opts = dev_opts_default(); + bool keep_alloc = false, force = false; + unsigned verbosity = 1; + struct bch_opt_strs fs_opt_strs = {}; + struct bch_opts fs_opts = bch2_opts_empty(); + + opts.superblock_size = 128; /* 64k */ + + while (true) { + const struct bch_option *opt = + bch2_cmdline_opt_parse(argc, argv, OPT_FORMAT|OPT_FS|OPT_DEVICE); + if (opt) { + unsigned id = opt - bch2_opt_table; + u64 v; + struct printbuf err = PRINTBUF; + int ret = bch2_opt_parse(NULL, opt, optarg, &v, &err); + if (ret == -BCH_ERR_option_needs_open_fs) { + fs_opt_strs.by_id[id] = strdup(optarg); + continue; + } + if (ret) + die("invalid option: %s", err.buf); + + if (opt->flags & OPT_DEVICE) + bch2_opt_set_by_id(&dev_opts.opts, id, v); + else if (opt->flags & OPT_FS) + bch2_opt_set_by_id(&fs_opts, id, v); + else + die("got bch_opt of wrong type %s", opt->attr.name); + + continue; + } + + int optid = getopt_long(argc, argv, + "s:aeL:U:S:fqvh", + longopts, NULL); + if (optid == -1) + break; + + switch (optid) { + case 's': + opts.source = optarg; + break; + case 'a': + keep_alloc = true; + break; + case 'L': + opts.label = optarg; + break; + case 'U': + if (uuid_parse(optarg, opts.uuid.b)) + die("Bad uuid"); + break; + case 'S': + if (bch2_strtouint_h(optarg, &opts.superblock_size)) + die("invalid filesystem size"); + + opts.superblock_size >>= 9; + break; + case 'f': + force = true; + break; + case 'q': + verbosity = 0; + break; + case 'v': + verbosity++; + break; + case 'h': + image_create_usage(); + exit(EXIT_SUCCESS); + break; + case '?': + exit(EXIT_FAILURE); + break; + default: + die("getopt ret %i %c", optid, optid); + } + } + args_shift(optind); + + if (argc != 1) + die("Please supply a filename for the new image"); + + dev_opts.path = argv[0]; + + image_create(fs_opt_strs, fs_opts, opts, dev_opts, opts.source, keep_alloc, verbosity); + bch2_opt_strs_free(&fs_opt_strs); + return 0; +} + +static int image_usage(void) +{ + puts("bcachefs image - commands for creating and updating image files\n" + "Usage: bcachefs image <CMD> [OPTION]...\n" + "\n" + "Commands:\n" + " create Create a minimally-sized disk image\n" + "\n" + "Report bugs to <linux-bcachefs@vger.kernel.org>"); + return 0; +} + +int image_cmds(int argc, char *argv[]) +{ + char *cmd = pop_cmd(&argc, argv); + + if (argc < 1) + return image_usage(); + if (!strcmp(cmd, "create")) + return cmd_image_create(argc, argv); + + image_usage(); + return -EINVAL; +} diff --git a/c_src/cmds.h b/c_src/cmds.h index 13451ef1..4454f33f 100644 --- a/c_src/cmds.h +++ b/c_src/cmds.h @@ -16,6 +16,8 @@ int cmd_reset_counters(int argc, char *argv[]); int cmd_strip_alloc(int argc, char *argv[]); int cmd_set_option(int argc, char *argv[]); +int image_cmds(int argc, char *argv[]); + int fs_usage(void); int cmd_fs_usage(int argc, char *argv[]); int cmd_fs_top(int argc, char *argv[]); |