summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorKent Overstreet <kent.overstreet@linux.dev>2025-03-23 11:52:54 -0400
committerKent Overstreet <kent.overstreet@linux.dev>2025-03-23 11:52:54 -0400
commita55c6551589f859b116be81936f789ec0a16e388 (patch)
treee1841d1eaaf52ac77cfb58304198d4f1919ae091
parentb8435c569398fbc8a4a209dcd55cb660b6577d3d (diff)
format: Redo bucket size calculations
bucket size now takes into account system memory and amount of memory required to fsck: on very large filesystems it will automatically scale up bucket size to ensure we can fsck. Signed-off-by: Kent Overstreet <kent.overstreet@linux.dev>
-rw-r--r--c_src/cmd_migrate.c37
-rw-r--r--c_src/libbcachefs.c64
-rw-r--r--c_src/libbcachefs.h6
-rw-r--r--include/linux/mm.h1
4 files changed, 64 insertions, 44 deletions
diff --git a/c_src/cmd_migrate.c b/c_src/cmd_migrate.c
index 109416a8..924f874d 100644
--- a/c_src/cmd_migrate.c
+++ b/c_src/cmd_migrate.c
@@ -209,38 +209,39 @@ static int migrate_fs(const char *fs_path,
if (!S_ISDIR(stat.st_mode))
die("%s is not a directory", fs_path);
- struct dev_opts dev = dev_opts_default();
+ dev_opts_list devs = {};
+ darray_push(&devs, dev_opts_default());
+
+ struct dev_opts *dev = &devs.data[0];
- dev.path = dev_t_to_path(stat.st_dev);
- dev.file = bdev_file_open_by_path(dev.path, BLK_OPEN_READ|BLK_OPEN_WRITE, &dev, NULL);
+ dev->path = dev_t_to_path(stat.st_dev);
+ dev->file = bdev_file_open_by_path(dev->path, BLK_OPEN_READ|BLK_OPEN_WRITE, dev, NULL);
- int ret = PTR_ERR_OR_ZERO(dev.file);
+ int ret = PTR_ERR_OR_ZERO(dev->file);
if (ret < 0)
- die("Error opening device to format %s: %s", dev.path, strerror(-ret));
- dev.bdev = file_bdev(dev.file);
+ die("Error opening device to format %s: %s", dev->path, strerror(-ret));
+ dev->bdev = file_bdev(dev->file);
- opt_set(fs_opts, block_size, get_blocksize(dev.bdev->bd_fd));
+ opt_set(fs_opts, block_size, get_blocksize(dev->bdev->bd_fd));
char *file_path = mprintf("%s/bcachefs", fs_path);
printf("Creating new filesystem on %s in space reserved at %s\n",
- dev.path, file_path);
+ dev->path, file_path);
- dev.opts.fs_size = get_size(dev.bdev->bd_fd);
- dev.opts.bucket_size = bch2_pick_bucket_size(fs_opts, &dev);
- dev.nbuckets = dev.opts.fs_size / dev.opts.bucket_size;
+ dev->opts.fs_size = get_size(dev->bdev->bd_fd);
+ dev->opts.bucket_size = bch2_pick_bucket_size(fs_opts, devs);
+ dev->nbuckets = dev->opts.fs_size / dev->opts.bucket_size;
- bch2_check_bucket_size(fs_opts, &dev);
+ bch2_check_bucket_size(fs_opts, dev);
u64 bcachefs_inum;
ranges extents = reserve_new_fs_space(file_path,
fs_opts.block_size >> 9,
- get_size(dev.bdev->bd_fd) / 5,
+ get_size(dev->bdev->bd_fd) / 5,
&bcachefs_inum, stat.st_dev, force);
- find_superblock_space(extents, format_opts, &dev);
+ find_superblock_space(extents, format_opts, dev);
- dev_opts_list devs = {};
- darray_push(&devs, dev);
struct bch_sb *sb = bch2_format(fs_opt_strs, fs_opts, format_opts, devs);
darray_exit(&devs);
@@ -253,7 +254,7 @@ static int migrate_fs(const char *fs_path,
struct bch_opts opts = bch2_opts_empty();
struct bch_fs *c = NULL;
- char *path[1] = { dev.path };
+ char *path[1] = { dev->path };
opt_set(opts, sb, sb_offset);
opt_set(opts, nostart, true);
@@ -313,7 +314,7 @@ static int migrate_fs(const char *fs_path,
"filesystem. That file can be deleted once the old filesystem is\n"
"no longer needed (and should be deleted prior to running\n"
"bcachefs migrate-superblock)\n",
- sb_offset, dev.path, dev.path, sb_offset);
+ sb_offset, dev->path, dev->path, sb_offset);
return 0;
}
diff --git a/c_src/libbcachefs.c b/c_src/libbcachefs.c
index 50ac96a8..358fff09 100644
--- a/c_src/libbcachefs.c
+++ b/c_src/libbcachefs.c
@@ -16,6 +16,8 @@
#include <uuid/uuid.h>
+#include <linux/mm.h>
+
#include "libbcachefs.h"
#include "crypto.h"
#include "libbcachefs/bcachefs_format.h"
@@ -66,35 +68,53 @@ static u64 min_size(unsigned bucket_size)
return BCH_MIN_NR_NBUCKETS * bucket_size;
}
-u64 bch2_pick_bucket_size(struct bch_opts opts, struct dev_opts *dev)
+static u64 dev_max_bucket_size(u64 dev_size)
{
- if (dev->opts.fs_size < min_size(opts.block_size))
- die("cannot format %s, too small (%llu bytes, min %llu)",
- dev->path, dev->opts.fs_size, min_size(opts.block_size));
+ return dev_size / BCH_MIN_NR_NBUCKETS;
+}
+u64 bch2_pick_bucket_size(struct bch_opts opts, dev_opts_list devs)
+{
/* Bucket size must be >= block size: */
u64 bucket_size = opts.block_size;
/* Bucket size must be >= btree node size: */
if (opt_defined(opts, btree_node_size))
- bucket_size = max_t(unsigned, bucket_size, opts.btree_node_size);
+ bucket_size = max_t(u64, bucket_size, opts.btree_node_size);
- /* Want a bucket size of at least 128k, if possible: */
- bucket_size = max(bucket_size, 128ULL << 10);
+ u64 total_fs_size = 0;
+ darray_for_each(devs, i) {
+ if (i->opts.fs_size < min_size(opts.block_size))
+ die("cannot format %s, too small (%llu bytes, min %llu)",
+ i->path, i->opts.fs_size, min_size(opts.block_size));
- if (dev->opts.fs_size >= min_size(bucket_size)) {
- unsigned scale = max(1,
- ilog2(dev->opts.fs_size / min_size(bucket_size)) / 4);
+ total_fs_size += i->opts.fs_size;
+ }
- scale = rounddown_pow_of_two(scale);
+ struct sysinfo info;
+ si_meminfo(&info);
- /* max bucket size 1 mb */
- bucket_size = min(bucket_size * scale, 1ULL << 20);
- } else {
- do {
- bucket_size /= 2;
- } while (dev->opts.fs_size < min_size(bucket_size));
- }
+ /*
+ * Large fudge factor to allow for other fsck processes and devices
+ * being added after creation
+ */
+ u64 mem_available_for_fsck = info.totalram / 8;
+ u64 buckets_can_fsck = mem_available_for_fsck / (sizeof(struct bucket) * 1.5);
+ u64 mem_lower_bound = roundup_pow_of_two(total_fs_size / buckets_can_fsck);
+
+ /*
+ * Lower bound to avoid fragmenting encoded (checksummed, compressed)
+ * extents too much as they're moved:
+ */
+ bucket_size = max(bucket_size, opt_get(opts, encoded_extent_max) * 4);
+
+ /* Lower bound to ensure we can fsck: */
+ bucket_size = max(bucket_size, mem_lower_bound);
+
+ u64 perf_lower_bound = min(2ULL << 20, total_fs_size / (1ULL << 20));
+
+ /* We also prefer larger buckets for performance, up to 2MB at 2T */
+ bucket_size = max(bucket_size, perf_lower_bound);
return bucket_size;
}
@@ -152,7 +172,6 @@ struct bch_sb *bch2_format(struct bch_opt_strs fs_opt_strs,
{
struct bch_sb_handle sb = { NULL };
unsigned max_dev_block_size = 0;
- u64 min_bucket_size = U64_MAX;
darray_for_each(devs, i)
max_dev_block_size = max(max_dev_block_size, get_blocksize(i->bdev->bd_fd));
@@ -171,13 +190,12 @@ struct bch_sb *bch2_format(struct bch_opt_strs fs_opt_strs,
opt_set(i->opts, fs_size, get_size(i->bdev->bd_fd));
/* calculate bucket sizes: */
- darray_for_each(devs, i)
- min_bucket_size = min(min_bucket_size,
- i->opts.bucket_size ?: bch2_pick_bucket_size(fs_opts, i));
+ u64 fs_bucket_size = bch2_pick_bucket_size(fs_opts, devs);
darray_for_each(devs, i)
if (!opt_defined(i->opts, bucket_size))
- opt_set(i->opts, bucket_size, min_bucket_size);
+ opt_set(i->opts, bucket_size,
+ min(fs_bucket_size, dev_max_bucket_size(i->opts.fs_size)));
darray_for_each(devs, i) {
i->nbuckets = i->opts.fs_size / i->opts.bucket_size;
diff --git a/c_src/libbcachefs.h b/c_src/libbcachefs.h
index 4185cebb..141a6e8f 100644
--- a/c_src/libbcachefs.h
+++ b/c_src/libbcachefs.h
@@ -72,6 +72,8 @@ struct dev_opts {
struct bch_opts opts;
};
+typedef DARRAY(struct dev_opts) dev_opts_list;
+
static inline struct dev_opts dev_opts_default()
{
return (struct dev_opts) { .opts = bch2_opts_empty() };
@@ -80,11 +82,9 @@ static inline struct dev_opts dev_opts_default()
void bch2_sb_layout_init(struct bch_sb_layout *,
unsigned, unsigned, u64, u64);
-u64 bch2_pick_bucket_size(struct bch_opts, struct dev_opts *);
+u64 bch2_pick_bucket_size(struct bch_opts, dev_opts_list);
void bch2_check_bucket_size(struct bch_opts, struct dev_opts *);
-typedef DARRAY(struct dev_opts) dev_opts_list;
-
struct bch_sb *bch2_format(struct bch_opt_strs,
struct bch_opts,
struct format_opts,
diff --git a/include/linux/mm.h b/include/linux/mm.h
index b7e83af0..d0fad5ab 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -4,6 +4,7 @@
#include <sys/syscall.h>
#include <unistd.h>
+#include <linux/bug.h>
#include <linux/types.h>
struct sysinfo {