summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorKent Overstreet <kmo@daterainc.com>2014-08-25 19:11:59 -0700
committerKent Overstreet <kmo@daterainc.com>2014-08-29 21:48:17 +0000
commit02c47b0373ae9b96d9511f2381f40e48c4426145 (patch)
treefef61b171a0a03213140cdf2707584ef96b11f01
parent219c3966c87188169d44b00e0014e3e106544796 (diff)
Superblock version 6
Change-Id: Iba976c8c5a4a91dcac06145471fc70baa6a6393e
-rw-r--r--.gitignore2
-rw-r--r--Makefile2
-rw-r--r--bcache-super-show.c282
-rw-r--r--bcache.c139
-rw-r--r--bcache.h137
-rw-r--r--make-bcache.c461
-rw-r--r--probe-bcache.c4
7 files changed, 559 insertions, 468 deletions
diff --git a/.gitignore b/.gitignore
index fd7b2a7..63af840 100644
--- a/.gitignore
+++ b/.gitignore
@@ -1,6 +1,7 @@
bcache-super-show
make-bcache
probe-bcache
+bcachectl
.*
*.o
tags
@@ -12,6 +13,7 @@ compile
config.guess
config.h.in
config.sub
+config.type
configure
depcomp
install-sh
diff --git a/Makefile b/Makefile
index e7d94e6..5b95af1 100644
--- a/Makefile
+++ b/Makefile
@@ -3,7 +3,7 @@ PREFIX=/usr
UDEVLIBDIR=/lib/udev
DRACUTLIBDIR=/lib/dracut
INSTALL=install
-CFLAGS+=-O2 -Wall -g
+CFLAGS+=-O2 -Wall -Werror -g
all: make-bcache probe-bcache bcache-super-show bcachectl
diff --git a/bcache-super-show.c b/bcache-super-show.c
index c65c672..76d8b0a 100644
--- a/bcache-super-show.c
+++ b/bcache-super-show.c
@@ -9,6 +9,7 @@
#define __USE_FILE_OFFSET64
#define _XOPEN_SOURCE 500
+#include <ctype.h>
#include <errno.h>
#include <fcntl.h>
#include <inttypes.h>
@@ -26,77 +27,30 @@
#include "bcache.h"
+static bool force_csum = false;
static void usage()
{
fprintf(stderr, "Usage: bcache-super-show [-f] <device>\n");
}
-
-static bool accepted_char(char c)
-{
- if ('0' <= c && c <= '9')
- return true;
- if ('A' <= c && c <= 'Z')
- return true;
- if ('a' <= c && c <= 'z')
- return true;
- if (strchr(".-_", c))
- return true;
- return false;
-}
-
-static void print_encode(char* in)
+static void print_encode(char *in)
{
- for (char* pos = in; *pos; pos++)
- if (accepted_char(*pos))
+ for (char *pos = in; *pos; pos++)
+ if (isalnum(*pos) || strchr(".-_", *pos))
putchar(*pos);
else
printf("%%%x", *pos);
}
-
-int main(int argc, char **argv)
+static void show_super_common(struct cache_sb *sb)
{
- bool force_csum = false;
- int o;
- extern char *optarg;
- struct cache_sb sb;
char uuid[40];
+ char label[SB_LABEL_SIZE + 1];
uint64_t expected_csum;
- while ((o = getopt(argc, argv, "f")) != EOF)
- switch (o) {
- case 'f':
- force_csum = 1;
- break;
-
- default:
- usage();
- exit(1);
- }
-
- argv += optind;
- argc -= optind;
-
- if (argc != 1) {
- usage();
- exit(1);
- }
-
- int fd = open(argv[0], O_RDONLY);
- if (fd < 0) {
- printf("Can't open dev %s: %s\n", argv[0], strerror(errno));
- exit(2);
- }
-
- if (pread(fd, &sb, sizeof(sb), SB_START) != sizeof(sb)) {
- fprintf(stderr, "Couldn't read\n");
- exit(2);
- }
-
printf("sb.magic\t\t");
- if (!memcmp(sb.magic, bcache_magic, 16)) {
+ if (!memcmp(&sb->magic, &BCACHE_MAGIC, sizeof(sb->magic))) {
printf("ok\n");
} else {
printf("bad magic\n");
@@ -104,8 +58,8 @@ int main(int argc, char **argv)
exit(2);
}
- printf("sb.first_sector\t\t%" PRIu64, sb.offset);
- if (sb.offset == SB_SECTOR) {
+ printf("sb.first_sector\t\t%ju", (uint64_t) sb->offset);
+ if (sb->offset == SB_SECTOR) {
printf(" [match]\n");
} else {
printf(" [expected %ds]\n", SB_SECTOR);
@@ -113,9 +67,12 @@ int main(int argc, char **argv)
exit(2);
}
- printf("sb.csum\t\t\t%" PRIX64, sb.csum);
- expected_csum = csum_set(&sb);
- if (sb.csum == expected_csum) {
+ printf("sb.csum\t\t\t%ju", (uint64_t) sb->csum);
+ expected_csum = csum_set(sb,
+ sb->version < BCACHE_SB_VERSION_CDEV_V3
+ ? BCH_CSUM_CRC64
+ : CACHE_SB_CSUM_TYPE(sb));
+ if (sb->csum == expected_csum) {
printf(" [match]\n");
} else {
printf(" [expected %" PRIX64 "]\n", expected_csum);
@@ -125,8 +82,8 @@ int main(int argc, char **argv)
}
}
- printf("sb.version\t\t%" PRIu64, sb.version);
- switch (sb.version) {
+ printf("sb.version\t\t%ju", (uint64_t) sb->version);
+ switch (sb->version) {
// These are handled the same by the kernel
case BCACHE_SB_VERSION_CDEV:
case BCACHE_SB_VERSION_CDEV_WITH_UUID:
@@ -142,13 +99,12 @@ int main(int argc, char **argv)
default:
printf(" [unknown]\n");
// exit code?
- return 0;
+ exit(EXIT_SUCCESS);
}
putchar('\n');
- char label[SB_LABEL_SIZE + 1];
- strncpy(label, (char*)sb.label, SB_LABEL_SIZE);
+ strncpy(label, (char *) sb->label, SB_LABEL_SIZE);
label[SB_LABEL_SIZE] = '\0';
printf("dev.label\t\t");
if (*label)
@@ -157,103 +113,133 @@ int main(int argc, char **argv)
printf("(empty)");
putchar('\n');
- uuid_unparse(sb.uuid, uuid);
+ uuid_unparse(sb->uuid.b, uuid);
printf("dev.uuid\t\t%s\n", uuid);
- printf("dev.sectors_per_block\t%u\n"
- "dev.sectors_per_bucket\t%u\n",
- sb.block_size,
- sb.bucket_size);
-
- if (!SB_IS_BDEV(&sb)) {
- // total_sectors includes the superblock;
- printf("dev.cache.first_sector\t%u\n"
- "dev.cache.cache_sectors\t%ju\n"
- "dev.cache.total_sectors\t%ju\n"
- "dev.cache.ordered\t%s\n"
- "dev.cache.discard\t%s\n"
- "dev.cache.pos\t\t%u\n"
- "dev.cache.setsize\t\t%u\n"
- "dev.cache.replacement\t%ju",
- sb.bucket_size * sb.first_bucket,
- sb.bucket_size * (sb.nbuckets - sb.first_bucket),
- sb.bucket_size * sb.nbuckets,
- CACHE_SYNC(&sb) ? "yes" : "no",
- CACHE_DISCARD(&sb) ? "yes" : "no",
- sb.nr_this_dev,
- sb.nr_in_set,
- CACHE_REPLACEMENT(&sb));
- switch (CACHE_REPLACEMENT(&sb)) {
- case CACHE_REPLACEMENT_LRU:
- printf(" [lru]\n");
- break;
- case CACHE_REPLACEMENT_FIFO:
- printf(" [fifo]\n");
- break;
- case CACHE_REPLACEMENT_RANDOM:
- printf(" [random]\n");
- break;
- default:
- putchar('\n');
- }
+ uuid_unparse(sb->set_uuid.b, uuid);
+ printf("cset.uuid\t\t%s\n", uuid);
+}
+static void show_super_backingdev(struct cache_sb *sb)
+{
+ uint64_t first_sector;
+
+ show_super_common(sb);
+
+ if (sb->version == BCACHE_SB_VERSION_BDEV) {
+ first_sector = BDEV_DATA_START_DEFAULT;
} else {
- uint64_t first_sector;
- if (sb.version == BCACHE_SB_VERSION_BDEV) {
- first_sector = BDEV_DATA_START_DEFAULT;
- } else {
- if (sb.keys == 1 || sb.d[0]) {
- fprintf(stderr,
- "Possible experimental format detected, bailing\n");
- exit(3);
- }
- first_sector = sb.data_offset;
+ if (sb->keys == 1 || sb->d[0]) {
+ fprintf(stderr,
+ "Possible experimental format detected, bailing\n");
+ exit(3);
}
+ first_sector = sb->data_offset;
+ }
- printf("dev.data.first_sector\t%ju\n"
- "dev.data.cache_mode\t%ju",
- first_sector,
- BDEV_CACHE_MODE(&sb));
- switch (BDEV_CACHE_MODE(&sb)) {
- case CACHE_MODE_WRITETHROUGH:
- printf(" [writethrough]\n");
- break;
- case CACHE_MODE_WRITEBACK:
- printf(" [writeback]\n");
- break;
- case CACHE_MODE_WRITEAROUND:
- printf(" [writearound]\n");
- break;
- case CACHE_MODE_NONE:
- printf(" [no caching]\n");
+ printf("dev.data.first_sector\t%ju\n"
+ "dev.data.cache_mode\t%s"
+ "dev.data.cache_state\t%s\n",
+ first_sector,
+ bdev_cache_mode[BDEV_CACHE_MODE(sb)],
+ bdev_state[BDEV_STATE(sb)]);
+}
+
+static void show_cache_member(struct cache_sb *sb, unsigned i)
+{
+ struct cache_member *m = ((struct cache_member *) sb->d) + i;
+
+ printf("cache.state\t%s\n", cache_state[CACHE_STATE(m)]);
+ printf("cache.tier\t%llu\n", CACHE_TIER(m));
+
+ printf("cache.replication_set\t%llu\n", CACHE_REPLICATION_SET(m));
+ printf("cache.cur_meta_replicas\t%llu\n", REPLICATION_SET_CUR_META_REPLICAS(m));
+ printf("cache.cur_data_replicas\t%llu\n", REPLICATION_SET_CUR_DATA_REPLICAS(m));
+
+ printf("cache.has_metadata\t%llu\n", CACHE_HAS_METADATA(m));
+ printf("cache.has_data\t%llu\n", CACHE_HAS_DATA(m));
+
+ printf("cache.replacement\t%s\n", replacement_policies[CACHE_REPLACEMENT(m)]);
+ printf("cache.discard\t%llu\n", CACHE_DISCARD(m));
+}
+
+static void show_super_cache(struct cache_sb *sb)
+{
+ show_super_common(sb);
+
+ printf("dev.sectors_per_block\t%u\n"
+ "dev.sectors_per_bucket\t%u\n",
+ sb->block_size,
+ sb->bucket_size);
+
+ // total_sectors includes the superblock;
+ printf("dev.cache.first_sector\t%u\n"
+ "dev.cache.cache_sectors\t%llu\n"
+ "dev.cache.total_sectors\t%llu\n"
+ "dev.cache.ordered\t%s\n"
+ "dev.cache.pos\t\t%u\n"
+ "dev.cache.setsize\t\t%u\n",
+ sb->bucket_size * sb->first_bucket,
+ sb->bucket_size * (sb->nbuckets - sb->first_bucket),
+ sb->bucket_size * sb->nbuckets,
+ CACHE_SYNC(sb) ? "yes" : "no",
+ sb->nr_this_dev,
+ sb->nr_in_set);
+
+ show_cache_member(sb, sb->nr_this_dev);
+}
+
+int main(int argc, char **argv)
+{
+ int o;
+ extern char *optarg;
+ struct cache_sb sb_stack, *sb = &sb_stack;
+ size_t bytes = sizeof(*sb);
+
+ while ((o = getopt(argc, argv, "f")) != EOF)
+ switch (o) {
+ case 'f':
+ force_csum = 1;
break;
+
default:
- putchar('\n');
+ usage();
+ exit(1);
}
- printf("dev.data.cache_state\t%ju",
- BDEV_STATE(&sb));
- switch (BDEV_STATE(&sb)) {
- case BDEV_STATE_NONE:
- printf(" [detached]\n");
- break;
- case BDEV_STATE_CLEAN:
- printf(" [clean]\n");
- break;
- case BDEV_STATE_DIRTY:
- printf(" [dirty]\n");
- break;
- case BDEV_STATE_STALE:
- printf(" [inconsistent]\n");
- break;
- default:
- putchar('\n');
+ argv += optind;
+ argc -= optind;
+
+ if (argc != 1) {
+ usage();
+ exit(1);
+ }
+
+ int fd = open(argv[0], O_RDONLY);
+ if (fd < 0) {
+ printf("Can't open dev %s: %s\n", argv[0], strerror(errno));
+ exit(2);
+ }
+
+ if (pread(fd, sb, bytes, SB_START) != bytes) {
+ fprintf(stderr, "Couldn't read\n");
+ exit(2);
+ }
+
+ if (sb->keys) {
+ bytes = sizeof(*sb) + sb->keys * sizeof(uint64_t);
+ sb = malloc(bytes);
+
+ if (pread(fd, sb, bytes, SB_START) != bytes) {
+ fprintf(stderr, "Couldn't read\n");
+ exit(2);
}
}
- putchar('\n');
- uuid_unparse(sb.set_uuid, uuid);
- printf("cset.uuid\t\t%s\n", uuid);
+ if (!SB_IS_BDEV(sb))
+ show_super_cache(sb);
+ else
+ show_super_backingdev(sb);
return 0;
}
diff --git a/bcache.c b/bcache.c
index 8b4b986..3ec0689 100644
--- a/bcache.c
+++ b/bcache.c
@@ -1,10 +1,122 @@
#define _GNU_SOURCE
+#include <ctype.h>
+#include <errno.h>
#include <stdio.h>
#include <stdlib.h>
#include <stdint.h>
#include <unistd.h>
+#include "bcache.h"
+
+const char * const cache_state[] = {
+ "active",
+ "ro",
+ "failed",
+ "spare",
+ NULL
+};
+
+const char * const replacement_policies[] = {
+ "lru",
+ "fifo",
+ "random",
+ NULL
+};
+
+const char * const csum_types[] = {
+ "none",
+ "crc32c",
+ "crc64",
+ NULL
+};
+
+const char * const bdev_cache_mode[] = {
+ "writethrough",
+ "writeback",
+ "writearound",
+ "none",
+ NULL
+};
+
+const char * const bdev_state[] = {
+ "detached",
+ "clean",
+ "dirty",
+ "inconsistent",
+ NULL
+};
+
+char *skip_spaces(const char *str)
+{
+ while (isspace(*str))
+ ++str;
+ return (char *)str;
+}
+
+char *strim(char *s)
+{
+ size_t size;
+ char *end;
+
+ s = skip_spaces(s);
+ size = strlen(s);
+ if (!size)
+ return s;
+
+ end = s + size - 1;
+ while (end >= s && isspace(*end))
+ end--;
+ *(end + 1) = '\0';
+
+ return s;
+}
+
+ssize_t read_string_list(const char *buf, const char * const list[])
+{
+ size_t i;
+ char *s, *d = strdup(buf);
+ if (!d)
+ return -ENOMEM;
+
+ s = strim(d);
+
+ for (i = 0; list[i]; i++)
+ if (!strcmp(list[i], s))
+ break;
+
+ free(d);
+
+ if (!list[i])
+ return -EINVAL;
+
+ return i;
+}
+
+ssize_t read_string_list_or_die(const char *opt, const char * const list[],
+ const char *msg)
+{
+ ssize_t v = read_string_list(opt, list);
+ if (v < 0) {
+ fprintf(stderr, "Bad %s %s\n", msg, opt);
+ exit(EXIT_FAILURE);
+
+ }
+
+ return v;
+}
+
+void print_string_list(const char * const list[], size_t selected)
+{
+ size_t i;
+
+ for (i = 0; list[i]; i++) {
+ if (i)
+ putchar(' ');
+ printf(i == selected ? "[%s] ": "%s", list[i]);
+ }
+}
+
/*
* Portions Copyright (c) 1996-2001, PostgreSQL Global Development Group (Any
* use permitted, subject to terms of PostgreSQL license; see.)
@@ -115,9 +227,8 @@ static const uint64_t crc_table[256] = {
0x9AFCE626CE85B507ULL
};
-uint64_t crc64(const void *_data, size_t len)
+static uint64_t bch_crc64_update(uint64_t crc, const void *_data, size_t len)
{
- uint64_t crc = 0xFFFFFFFFFFFFFFFFULL;
const unsigned char *data = _data;
while (len--) {
@@ -125,5 +236,27 @@ uint64_t crc64(const void *_data, size_t len)
crc = crc_table[i] ^ (crc << 8);
}
- return crc ^ 0xFFFFFFFFFFFFFFFFULL;
+ return crc;
+}
+
+static uint64_t bch_checksum_update(unsigned type, uint64_t crc, const void *data, size_t len)
+{
+ switch (type) {
+ case BCH_CSUM_NONE:
+ return 0;
+ case BCH_CSUM_CRC64:
+ return bch_crc64_update(crc, data, len);
+ default:
+ fprintf(stderr, "Unknown checksum type %u\n", type);
+ exit(EXIT_FAILURE);
+ }
+}
+
+uint64_t bch_checksum(unsigned type, const void *data, size_t len)
+{
+ uint64_t crc = 0xffffffffffffffffULL;
+
+ crc = bch_checksum_update(type, crc, data, len);
+
+ return crc ^ 0xffffffffffffffffULL;
}
diff --git a/bcache.h b/bcache.h
index bd61d76..c5ba186 100644
--- a/bcache.h
+++ b/bcache.h
@@ -7,124 +7,29 @@
#ifndef _BCACHE_H
#define _BCACHE_H
-#define BITMASK(name, type, field, offset, size) \
-static inline uint64_t name(const type *k) \
-{ return (k->field >> offset) & ~(((uint64_t) ~0) << size); } \
- \
-static inline void SET_##name(type *k, uint64_t v) \
-{ \
- k->field &= ~(~((uint64_t) ~0 << size) << offset); \
- k->field |= v << offset; \
-}
+#include <linux/bcache.h>
-static const char bcache_magic[] = {
- 0xc6, 0x85, 0x73, 0xf6, 0x4e, 0x1a, 0x45, 0xca,
- 0x82, 0x65, 0xf5, 0x7f, 0x48, 0xba, 0x6d, 0x81 };
-
-/*
- * Version 0: Cache device
- * Version 1: Backing device
- * Version 2: Seed pointer into btree node checksum
- * Version 3: Cache device with new UUID format
- * Version 4: Backing device with data offset
- */
-#define BCACHE_SB_VERSION_CDEV_V0 0
-#define BCACHE_SB_VERSION_BDEV 1
-#define BCACHE_SB_VERSION_CDEV_WITH_UUID 3
-#define BCACHE_SB_VERSION_BDEV_WITH_OFFSET 4
-#define BCACHE_SB_VERSION_CDEV 5
-#define BCACHE_SB_MAX_VERSION 5
-
-#define SB_SECTOR 8
-#define SB_LABEL_SIZE 32
-#define SB_JOURNAL_BUCKETS 256U
-#define BDEV_DATA_START_DEFAULT 16 /* sectors */
#define SB_START (SB_SECTOR * 512)
-struct cache_sb {
- uint64_t csum;
- uint64_t offset; /* sector where this sb was written */
- uint64_t version;
-
- uint8_t magic[16];
-
- uint8_t uuid[16];
- union {
- uint8_t set_uuid[16];
- uint64_t set_magic;
- };
- uint8_t label[SB_LABEL_SIZE];
-
- uint64_t flags;
- uint64_t seq;
- uint64_t pad[8];
-
- union {
- struct {
- /* Cache devices */
- uint64_t nbuckets; /* device size */
-
- uint16_t block_size; /* sectors */
- uint16_t bucket_size; /* sectors */
-
- uint16_t nr_in_set;
- uint16_t nr_this_dev;
- };
- struct {
- /* Backing devices */
- uint64_t data_offset;
-
- /*
- * block_size from the cache device section is still used by
- * backing devices, so don't add anything here until we fix
- * things to not need it for backing devices anymore
- */
- };
- };
-
- uint32_t last_mount; /* time_t */
-
- uint16_t first_bucket;
- union {
- uint16_t njournal_buckets;
- uint16_t keys;
- };
- uint64_t d[SB_JOURNAL_BUCKETS]; /* journal buckets */
-};
-
-static inline bool SB_IS_BDEV(const struct cache_sb *sb)
-{
- return sb->version == BCACHE_SB_VERSION_BDEV
- || sb->version == BCACHE_SB_VERSION_BDEV_WITH_OFFSET;
-}
-
-BITMASK(CACHE_SYNC, struct cache_sb, flags, 0, 1);
-BITMASK(CACHE_DISCARD, struct cache_sb, flags, 1, 1);
-BITMASK(CACHE_REPLACEMENT, struct cache_sb, flags, 2, 3);
-#define CACHE_REPLACEMENT_LRU 0U
-#define CACHE_REPLACEMENT_FIFO 1U
-#define CACHE_REPLACEMENT_RANDOM 2U
-
-BITMASK(CACHE_TIER, struct cache_sb, flags, 2, 5);
-#define CACHE_TIERS 4U
-
-BITMASK(BDEV_CACHE_MODE, struct cache_sb, flags, 0, 4);
-#define CACHE_MODE_WRITETHROUGH 0U
-#define CACHE_MODE_WRITEBACK 1U
-#define CACHE_MODE_WRITEAROUND 2U
-#define CACHE_MODE_NONE 3U
-BITMASK(BDEV_STATE, struct cache_sb, flags, 61, 2);
-#define BDEV_STATE_NONE 0U
-#define BDEV_STATE_CLEAN 1U
-#define BDEV_STATE_DIRTY 2U
-#define BDEV_STATE_STALE 3U
-
-uint64_t crc64(const void *_data, size_t len);
-
-#define node(i, j) ((void *) ((i)->d + (j)))
-#define end(i) node(i, (i)->keys)
-
-#define csum_set(i) \
- crc64(((void *) (i)) + 8, ((void *) end(i)) - (((void *) (i)) + 8))
+extern const char * const cache_state[];
+extern const char * const replacement_policies[];
+extern const char * const csum_types[];
+extern const char * const bdev_cache_mode[];
+extern const char * const bdev_state[];
+
+ssize_t read_string_list(const char *, const char * const[]);
+ssize_t read_string_list_or_die(const char *, const char * const[],
+ const char *);
+void print_string_list(const char * const[], size_t);
+
+uint64_t bch_checksum(unsigned, const void *, size_t);
+
+#define csum_set(i, type) \
+({ \
+ void *start = ((void *) (i)) + sizeof(uint64_t); \
+ void *end = bset_bkey_last(i); \
+ \
+ bch_checksum(type, start, end - start); \
+})
#endif
diff --git a/make-bcache.c b/make-bcache.c
index 75df4d3..450ba99 100644
--- a/make-bcache.c
+++ b/make-bcache.c
@@ -96,91 +96,105 @@ unsigned hatoi_validate(const char *s, const char *msg)
return v;
}
-char *skip_spaces(const char *str)
+void usage()
{
- while (isspace(*str))
- ++str;
- return (char *)str;
+ fprintf(stderr,
+ "Usage: make-bcache [options] device\n"
+ " -C, --cache Format a cache device\n"
+ " -B, --bdev Format a backing device\n"
+ " --wipe-bcache destroy existing bcache data if present\n"
+ " -l, --label label\n"
+ " --cset-uuid UUID for the cache set\n"
+ " --csum-type One of (none|crc32c|crc64)\n"
+
+ " -b, --bucket bucket size\n"
+ " -w, --block block size (hard sector size of SSD, often 2k)\n"
+
+ " --replication-set replication set of subsequent devices\n"
+ " --meta-replicas number of metadata replicas\n"
+ " --data-replicas number of data replicas\n"
+ " --tier tier of subsequent devices\n"
+ " --cache_replacement_policy one of (lru|fifo|random)\n"
+ " --discard enable discards\n"
+
+ " --writeback enable writeback\n"
+ " -o, --data-offset data offset in sectors\n"
+ " -h, --help display this help and exit\n");
+ exit(EXIT_FAILURE);
}
-char *strim(char *s)
+static void do_write_sb(int fd, struct cache_sb *sb)
{
- size_t size;
- char *end;
+ char zeroes[SB_START] = {0};
+ size_t bytes = ((void *) bset_bkey_last(sb)) - (void *) sb;
- s = skip_spaces(s);
- size = strlen(s);
- if (!size)
- return s;
-
- end = s + size - 1;
- while (end >= s && isspace(*end))
- end--;
- *(end + 1) = '\0';
+ /* Zero start of disk */
+ if (pwrite(fd, zeroes, SB_START, 0) != SB_START) {
+ perror("write error\n");
+ exit(EXIT_FAILURE);
+ }
+ /* Write superblock */
+ if (pwrite(fd, sb, bytes, SB_START) != bytes) {
+ perror("write error\n");
+ exit(EXIT_FAILURE);
+ }
- return s;
+ fsync(fd);
+ close(fd);
}
-ssize_t read_string_list(const char *buf, const char * const list[])
+static void write_backingdev_sb(int fd, unsigned block_size, unsigned bucket_size,
+ bool writeback, uint64_t data_offset,
+ const char *label,
+ uuid_le set_uuid)
{
- size_t i;
- char *s, *d = strdup(buf);
- if (!d)
- return -ENOMEM;
+ char uuid_str[40], set_uuid_str[40];
+ struct cache_sb sb;
- s = strim(d);
+ memset(&sb, 0, sizeof(struct cache_sb));
- for (i = 0; list[i]; i++)
- if (!strcmp(list[i], s))
- break;
+ sb.offset = SB_SECTOR;
+ sb.version = BCACHE_SB_VERSION_BDEV;
+ sb.magic = BCACHE_MAGIC;
+ uuid_generate(sb.uuid.b);
+ sb.set_uuid = set_uuid;
+ sb.bucket_size = bucket_size;
+ sb.block_size = block_size;
- free(d);
+ uuid_unparse(sb.uuid.b, uuid_str);
+ uuid_unparse(sb.set_uuid.b, set_uuid_str);
+ if (label)
+ memcpy(sb.label, label, SB_LABEL_SIZE);
- if (!list[i])
- return -EINVAL;
+ SET_BDEV_CACHE_MODE(&sb, writeback
+ ? CACHE_MODE_WRITEBACK
+ : CACHE_MODE_WRITETHROUGH);
- return i;
-}
+ if (data_offset != BDEV_DATA_START_DEFAULT) {
+ sb.version = BCACHE_SB_VERSION_BDEV_WITH_OFFSET;
+ sb.data_offset = data_offset;
+ }
-void usage()
-{
- fprintf(stderr,
- "Usage: make-bcache [options] device\n"
- " -C, --cache Format a cache device\n"
- " -B, --bdev Format a backing device\n"
- " -b, --bucket bucket size\n"
- " -w, --block block size (hard sector size of SSD, often 2k)\n"
- " -o, --data-offset data offset in sectors\n"
- " --cset-uuid UUID for the cache set\n"
-// " -U UUID\n"
- " --writeback enable writeback\n"
- " --discard enable discards\n"
- " --wipe-bcache destroy existing bcache data if present\n"
- " --tier set tier of subsequent cache devices\n"
- " --cache_replacement_policy=(lru|fifo|random)\n"
- " -l, --label label\n"
- " -h, --help display this help and exit\n");
- exit(EXIT_FAILURE);
+ sb.csum = csum_set(&sb, BCH_CSUM_CRC64);
+
+ printf("UUID: %s\n"
+ "Set UUID: %s\n"
+ "version: %u\n"
+ "block_size: %u\n"
+ "data_offset: %ju\n",
+ uuid_str, set_uuid_str,
+ (unsigned) sb.version,
+ sb.block_size,
+ data_offset);
+
+ do_write_sb(fd, &sb);
}
-const char * const cache_replacement_policies[] = {
- "lru",
- "fifo",
- "random",
- NULL
-};
-
-static void write_sb(char *dev, unsigned block_size, unsigned bucket_size,
- bool writeback, bool discard, bool wipe_bcache,
- unsigned cache_replacement_policy, uint64_t data_offset,
- uuid_t set_uuid, unsigned tier, bool bdev,
- uint16_t nr_in_set, uint16_t nr_this_dev,
- char *label)
+static int dev_open(const char *dev, bool wipe_bcache)
{
- int fd;
- char uuid_str[40], set_uuid_str[40], zeroes[SB_START] = {0};
struct cache_sb sb;
blkid_probe pr;
+ int fd;
if ((fd = open(dev, O_RDWR|O_EXCL)) == -1) {
fprintf(stderr, "Can't open dev %s: %s\n", dev, strerror(errno));
@@ -190,7 +204,7 @@ static void write_sb(char *dev, unsigned block_size, unsigned bucket_size,
if (pread(fd, &sb, sizeof(sb), SB_START) != sizeof(sb))
exit(EXIT_FAILURE);
- if (!memcmp(sb.magic, bcache_magic, 16) && !wipe_bcache) {
+ if (!memcmp(&sb.magic, &BCACHE_MAGIC, 16) && !wipe_bcache) {
fprintf(stderr, "Already a bcache device on %s, "
"overwrite with --wipe-bcache\n", dev);
exit(EXIT_FAILURE);
@@ -210,94 +224,84 @@ static void write_sb(char *dev, unsigned block_size, unsigned bucket_size,
exit(EXIT_FAILURE);
}
- memset(&sb, 0, sizeof(struct cache_sb));
+ return fd;
+}
- sb.offset = SB_SECTOR;
- sb.version = bdev
- ? BCACHE_SB_VERSION_BDEV
- : BCACHE_SB_VERSION_CDEV;
+static void write_cache_sbs(int *fds, struct cache_sb *sb,
+ unsigned block_size, unsigned bucket_size)
+{
+ char uuid_str[40], set_uuid_str[40];
+ size_t i;
- memcpy(sb.magic, bcache_magic, 16);
- uuid_generate(sb.uuid);
- memcpy(sb.set_uuid, set_uuid, sizeof(sb.set_uuid));
+ sb->offset = SB_SECTOR;
+ sb->version = BCACHE_SB_VERSION_CDEV_V3;
+ sb->magic = BCACHE_MAGIC;
+ sb->bucket_size = bucket_size;
+ sb->block_size = block_size;
+ sb->keys = bch_journal_buckets_offset(sb);
- sb.bucket_size = bucket_size;
- sb.block_size = block_size;
+ /*
+ * don't have a userspace crc32c implementation handy, just always use
+ * crc64
+ */
+ SET_CACHE_SB_CSUM_TYPE(sb, BCH_CSUM_CRC64);
- uuid_unparse(sb.uuid, uuid_str);
- uuid_unparse(sb.set_uuid, set_uuid_str);
- if (label) {
- memcpy(sb.label, label, SB_LABEL_SIZE);
- }
+ for (i = 0; i < sb->nr_in_set; i++) {
+ struct cache_member *m = sb->members + i;
- if (SB_IS_BDEV(&sb)) {
- SET_BDEV_CACHE_MODE(
- &sb, writeback ? CACHE_MODE_WRITEBACK : CACHE_MODE_WRITETHROUGH);
+ sb->uuid = m->uuid;
- if (data_offset != BDEV_DATA_START_DEFAULT) {
- sb.version = BCACHE_SB_VERSION_BDEV_WITH_OFFSET;
- sb.data_offset = data_offset;
- }
+ sb->nbuckets = getblocks(fds[i]) / sb->bucket_size;
+ sb->nr_this_dev = i;
+ sb->first_bucket = (23 / sb->bucket_size) + 1;
- printf("UUID: %s\n"
- "Set UUID: %s\n"
- "version: %u\n"
- "block_size: %u\n"
- "data_offset: %ju\n",
- uuid_str, set_uuid_str,
- (unsigned) sb.version,
- sb.block_size,
- data_offset);
- } else {
- sb.nbuckets = getblocks(fd) / sb.bucket_size;
- sb.nr_in_set = nr_in_set;
- sb.nr_this_dev = nr_this_dev;
- sb.first_bucket = (23 / sb.bucket_size) + 1;
-
- if (sb.nbuckets < 1 << 7) {
- fprintf(stderr, "Not enough buckets: %ju, need %u\n",
- sb.nbuckets, 1 << 7);
+ if (sb->nbuckets < 1 << 7) {
+ fprintf(stderr, "Not enough buckets: %llu, need %u\n",
+ sb->nbuckets, 1 << 7);
exit(EXIT_FAILURE);
}
- SET_CACHE_DISCARD(&sb, discard);
- SET_CACHE_REPLACEMENT(&sb, cache_replacement_policy);
- SET_CACHE_TIER(&sb, tier);
+ sb->csum = csum_set(sb, CACHE_SB_CSUM_TYPE(sb));
+ uuid_unparse(sb->uuid.b, uuid_str);
+ uuid_unparse(sb->set_uuid.b, set_uuid_str);
printf("UUID: %s\n"
"Set UUID: %s\n"
"version: %u\n"
- "nbuckets: %ju\n"
+ "nbuckets: %llu\n"
"block_size: %u\n"
"bucket_size: %u\n"
"nr_in_set: %u\n"
"nr_this_dev: %u\n"
"first_bucket: %u\n",
uuid_str, set_uuid_str,
- (unsigned) sb.version,
- sb.nbuckets,
- sb.block_size,
- sb.bucket_size,
- sb.nr_in_set,
- sb.nr_this_dev,
- sb.first_bucket);
+ (unsigned) sb->version,
+ sb->nbuckets,
+ sb->block_size,
+ sb->bucket_size,
+ sb->nr_in_set,
+ sb->nr_this_dev,
+ sb->first_bucket);
+
+ do_write_sb(fds[i], sb);
}
+}
- sb.csum = csum_set(&sb);
+static void next_cache_device(struct cache_sb *sb,
+ unsigned replication_set,
+ unsigned tier,
+ unsigned replacement_policy,
+ bool discard)
+{
+ struct cache_member *m = sb->members + sb->nr_in_set;
- /* Zero start of disk */
- if (pwrite(fd, zeroes, SB_START, 0) != SB_START) {
- perror("write error\n");
- exit(EXIT_FAILURE);
- }
- /* Write superblock */
- if (pwrite(fd, &sb, sizeof(sb), SB_START) != sizeof(sb)) {
- perror("write error\n");
- exit(EXIT_FAILURE);
- }
+ SET_CACHE_REPLICATION_SET(m, replication_set);
+ SET_CACHE_TIER(m, tier);
+ SET_CACHE_REPLACEMENT(m, replacement_policy);
+ SET_CACHE_DISCARD(m, discard);
+ uuid_generate(m->uuid.b);
- fsync(fd);
- close(fd);
+ sb->nr_in_set++;
}
static unsigned get_blocksize(const char *path)
@@ -344,44 +348,81 @@ static unsigned get_blocksize(const char *path)
return statbuf.st_blksize / 512;
}
+static long strtoul_or_die(const char *p, size_t max, const char *msg)
+{
+ errno = 0;
+ long v = strtol(optarg, NULL, 10);
+ if (errno || v < 0 || v >= max) {
+ fprintf(stderr, "Invalid %s %zi\n", msg, v);
+ exit(EXIT_FAILURE);
+ }
+
+ return v;
+}
+
int main(int argc, char **argv)
{
int c, bdev = -1;
- unsigned i, ncache_devices = 0, nbacking_devices = 0;
- unsigned long tier = 0;
- unsigned cache_device_tier[argc];
- char *cache_devices[argc];
- char *backing_devices[argc];
+ size_t i, nr_backing_devices = 0;
unsigned block_size = 0, bucket_size = 1024;
int writeback = 0, discard = 0, wipe_bcache = 0;
- unsigned cache_replacement_policy = 0;
+ unsigned replication_set = 0, tier = 0, replacement_policy = 0;
uint64_t data_offset = BDEV_DATA_START_DEFAULT;
- uuid_t set_uuid;
char *label = NULL;
- uuid_generate(set_uuid);
-
- struct option opts[] = {
- { "cache", 0, NULL, 'C' },
- { "bdev", 0, NULL, 'B' },
- { "bucket", 1, NULL, 'b' },
- { "block", 1, NULL, 'w' },
- { "writeback", 0, &writeback, 1 },
- { "wipe-bcache", 0, &wipe_bcache, 1 },
- { "discard", 0, &discard, 1 },
- { "cache_replacement_policy", 1, NULL, 'p' },
- { "data_offset", 1, NULL, 'o' },
- { "cset-uuid", 1, NULL, 'u' },
- { "tier", 1, NULL, 't' },
- { "label", 1, NULL, 'l' },
- { "help", 0, NULL, 'h' },
- { NULL, 0, NULL, 0 },
+ const char *cache_devices[argc];
+ int cache_dev_fd[argc];
+
+ const char *backing_devices[argc];
+ int backing_dev_fd[argc];
+ const char *backing_dev_labels[argc];
+
+ enum long_opts {
+ CACHE_SET_UUID = 256,
+ CSUM_TYPE,
+ REPLICATION_SET,
+ META_REPLICAS,
+ DATA_REPLICAS,
};
+ const struct option opts[] = {
+ { "cache", 0, NULL, 'C' },
+ { "bdev", 0, NULL, 'B' },
+ { "wipe-bcache", 0, &wipe_bcache, 1 },
+ { "label", 1, NULL, 'l' },
+ { "cset-uuid", 1, NULL, CACHE_SET_UUID },
+ { "csum-type", 1, NULL, CSUM_TYPE },
+
+ { "bucket", 1, NULL, 'b' },
+ { "block", 1, NULL, 'w' },
+
+ { "replication-set", 1, NULL, REPLICATION_SET },
+ { "meta-replicas", 1, NULL, META_REPLICAS},
+ { "data-replicas", 1, NULL, DATA_REPLICAS },
+ { "tier", 1, NULL, 't' },
+ { "cache_replacement_policy", 1, NULL, 'p' },
+ { "discard", 0, &discard, 1 },
+
+ { "writeback", 0, &writeback, 1 },
+ { "data_offset", 1, NULL, 'o' },
+
+ { "help", 0, NULL, 'h' },
+ { NULL, 0, NULL, 0 },
+ };
+
+ struct cache_sb *cache_set_sb = calloc(1, sizeof(*cache_set_sb) +
+ sizeof(struct cache_member) * argc);
+
+ uuid_generate(cache_set_sb->set_uuid.b);
+ SET_CACHE_PREFERRED_CSUM_TYPE(cache_set_sb, BCH_CSUM_CRC32C);
+ SET_CACHE_SET_META_REPLICAS_WANT(cache_set_sb, 1);
+ SET_CACHE_SET_DATA_REPLICAS_WANT(cache_set_sb, 1);
+
while ((c = getopt_long(argc, argv,
"-hCBU:w:b:l:",
- opts, NULL)) != -1)
+ opts, NULL)) != -1) {
+
switch (c) {
case 'C':
bdev = 0;
@@ -389,24 +430,56 @@ int main(int argc, char **argv)
case 'B':
bdev = 1;
break;
+ case 'l':
+ label = optarg;
+ memcpy(cache_set_sb->label, label,
+ sizeof(cache_set_sb->label));
+ break;
+ case CACHE_SET_UUID:
+ if (uuid_parse(optarg, cache_set_sb->set_uuid.b)) {
+ fprintf(stderr, "Bad uuid\n");
+ exit(EXIT_FAILURE);
+ }
+ break;
+ case CSUM_TYPE:
+ SET_CACHE_PREFERRED_CSUM_TYPE(cache_set_sb,
+ read_string_list_or_die(optarg, csum_types,
+ "csum type"));
+ break;
+
case 'b':
bucket_size = hatoi_validate(optarg, "bucket size");
break;
case 'w':
block_size = hatoi_validate(optarg, "block size");
break;
-#if 0
- case 'U':
- if (uuid_parse(optarg, sb.uuid)) {
- fprintf(stderr, "Bad uuid\n");
- exit(EXIT_FAILURE);
- }
+
+ case REPLICATION_SET:
+ replication_set = strtoul_or_die(optarg,
+ CACHE_REPLICATION_SET_MAX,
+ "replication set");
+ break;
+ case META_REPLICAS:
+ SET_CACHE_SET_META_REPLICAS_WANT(cache_set_sb,
+ strtoul_or_die(optarg,
+ CACHE_SET_META_REPLICAS_WANT_MAX,
+ "meta replicas"));
+ break;
+ case DATA_REPLICAS:
+ SET_CACHE_SET_DATA_REPLICAS_WANT(cache_set_sb,
+ strtoul_or_die(optarg,
+ CACHE_SET_DATA_REPLICAS_WANT_MAX,
+ "data replicas"));
+ break;
+ case 't':
+ tier = strtoul_or_die(optarg, CACHE_TIERS, "tier");
break;
-#endif
case 'p':
- cache_replacement_policy = read_string_list(optarg,
- cache_replacement_policies);
+ replacement_policy = read_string_list_or_die(optarg,
+ replacement_policies,
+ "cache replacement policy");
break;
+
case 'o':
data_offset = atoll(optarg);
if (data_offset < BDEV_DATA_START_DEFAULT) {
@@ -415,22 +488,6 @@ int main(int argc, char **argv)
exit(EXIT_FAILURE);
}
break;
- case 'u':
- if (uuid_parse(optarg, set_uuid)) {
- fprintf(stderr, "Bad uuid\n");
- exit(EXIT_FAILURE);
- }
- break;
- case 'l':
- label = optarg;
- break;
- case 't':
- tier = strtoul(optarg, NULL, 10);
- if (tier >= CACHE_TIERS) {
- fprintf(stderr, "Invalid tier %lu\n", tier);
- exit(EXIT_FAILURE);
- }
- break;
case 'h':
usage();
break;
@@ -441,16 +498,22 @@ int main(int argc, char **argv)
}
if (bdev) {
- backing_devices[nbacking_devices++] = optarg;
+ backing_dev_labels[nr_backing_devices] = label;
+ backing_devices[nr_backing_devices++] = optarg;
} else {
- cache_device_tier[ncache_devices] = tier;
- cache_devices[ncache_devices++] = optarg;
+ cache_devices[cache_set_sb->nr_in_set] = optarg;
+ next_cache_device(cache_set_sb,
+ replication_set,
+ tier,
+ replacement_policy,
+ discard);
}
break;
}
+ }
- if (!ncache_devices && !nbacking_devices) {
+ if (!cache_set_sb->nr_in_set && !nr_backing_devices) {
fprintf(stderr, "Please supply a device\n");
usage();
}
@@ -461,27 +524,29 @@ int main(int argc, char **argv)
}
if (!block_size) {
- for (i = 0; i < ncache_devices; i++)
+ for (i = 0; i < cache_set_sb->nr_in_set; i++)
block_size = max(block_size,
get_blocksize(cache_devices[i]));
- for (i = 0; i < nbacking_devices; i++)
+ for (i = 0; i < nr_backing_devices; i++)
block_size = max(block_size,
get_blocksize(backing_devices[i]));
}
- for (i = 0; i < ncache_devices; i++)
- write_sb(cache_devices[i], block_size, bucket_size,
- writeback, discard, wipe_bcache,
- cache_replacement_policy, data_offset,
- set_uuid, cache_device_tier[i], false,
- ncache_devices, i, label);
-
- for (i = 0; i < nbacking_devices; i++)
- write_sb(backing_devices[i], block_size, bucket_size,
- writeback, discard, wipe_bcache,
- cache_replacement_policy, data_offset,
- set_uuid, 0, true, nbacking_devices, i, label);
+ for (i = 0; i < cache_set_sb->nr_in_set; i++)
+ cache_dev_fd[i] = dev_open(cache_devices[i], wipe_bcache);
+
+ for (i = 0; i < nr_backing_devices; i++)
+ backing_dev_fd[i] = dev_open(backing_devices[i], wipe_bcache);
+
+ write_cache_sbs(cache_dev_fd, cache_set_sb, block_size, bucket_size);
+
+ for (i = 0; i < nr_backing_devices; i++)
+ write_backingdev_sb(backing_dev_fd[i],
+ block_size, bucket_size,
+ writeback, data_offset,
+ backing_dev_labels[i],
+ cache_set_sb->set_uuid);
return 0;
}
diff --git a/probe-bcache.c b/probe-bcache.c
index c94c972..5decb9b 100644
--- a/probe-bcache.c
+++ b/probe-bcache.c
@@ -69,10 +69,10 @@ int main(int argc, char **argv)
if (pread(fd, &sb, sizeof(sb), SB_START) != sizeof(sb))
continue;
- if (memcmp(sb.magic, bcache_magic, 16))
+ if (memcmp(&sb.magic, &BCACHE_MAGIC, sizeof(sb.magic)))
continue;
- uuid_unparse(sb.uuid, uuid);
+ uuid_unparse(sb.uuid.b, uuid);
if (udev)
printf("ID_FS_UUID=%s\n"