From c8bec83e307f28751c433ba1d3f648429fb5a34c Mon Sep 17 00:00:00 2001 From: Kent Overstreet Date: Mon, 10 Jul 2023 20:31:34 -0400 Subject: Update bcachefs sources to e14d7c7195 bcachefs: Compression levels Signed-off-by: Kent Overstreet --- .bcachefs_revision | 2 +- cmd_dump.c | 2 +- cmd_fsck.c | 8 +- cmd_list_journal.c | 2 +- include/linux/blkdev.h | 1 + include/linux/lz4.h | 5 ++ include/linux/rcupdate.h | 1 + include/linux/slab.h | 3 +- libbcachefs.c | 3 +- libbcachefs/bcachefs.h | 5 +- libbcachefs/bcachefs_format.h | 51 +++++++++---- libbcachefs/btree_gc.c | 2 +- libbcachefs/btree_update_leaf.c | 4 +- libbcachefs/btree_write_buffer.c | 3 + libbcachefs/checksum.h | 6 -- libbcachefs/compress.c | 157 ++++++++++++++++++++++++++++++--------- libbcachefs/compress.h | 37 +++++++++ libbcachefs/data_update.c | 4 +- libbcachefs/disk_groups.c | 21 ++++-- libbcachefs/disk_groups.h | 7 +- libbcachefs/error.c | 12 +-- libbcachefs/error.h | 7 -- libbcachefs/fs-io.c | 85 +++++++++++++++------ libbcachefs/fsck.c | 4 +- libbcachefs/io.c | 14 ++-- libbcachefs/io.h | 2 +- libbcachefs/io_types.h | 2 +- libbcachefs/opts.c | 115 ++++++++++++++++++---------- libbcachefs/opts.h | 32 ++++++-- libbcachefs/rebalance.c | 3 +- libbcachefs/recovery.c | 11 +-- libbcachefs/subvolume.c | 130 +++++++++++++++++++++++++------- libbcachefs/subvolume.h | 86 +++++++++++++++++---- libbcachefs/subvolume_types.h | 4 + libbcachefs/super-io.c | 17 ++++- libbcachefs/super.c | 19 +++-- libbcachefs/util.h | 2 +- linux/blkdev.c | 11 ++- rust-src/src/cmd_list.rs | 2 +- 39 files changed, 645 insertions(+), 237 deletions(-) diff --git a/.bcachefs_revision b/.bcachefs_revision index 9071b137..06166c86 100644 --- a/.bcachefs_revision +++ b/.bcachefs_revision @@ -1 +1 @@ -04f2d2ae5bd69eecd0b4bb700efdc665b09745a1 +e14d7c7195b974bbaf400f9c3f2bdaa94fc8d372 diff --git a/cmd_dump.c b/cmd_dump.c index 59d73c3c..cc25a6a3 100644 --- a/cmd_dump.c +++ b/cmd_dump.c @@ -117,7 +117,7 @@ int cmd_dump(int argc, char *argv[]) opt_set(opts, norecovery, true); opt_set(opts, degraded, true); opt_set(opts, errors, BCH_ON_ERROR_continue); - opt_set(opts, fix_errors, FSCK_OPT_NO); + opt_set(opts, fix_errors, FSCK_FIX_no); while ((opt = getopt(argc, argv, "o:fjvh")) != -1) switch (opt) { diff --git a/cmd_fsck.c b/cmd_fsck.c index 54ace957..00134971 100644 --- a/cmd_fsck.c +++ b/cmd_fsck.c @@ -37,7 +37,7 @@ int cmd_fsck(int argc, char *argv[]) opt_set(opts, degraded, true); opt_set(opts, fsck, true); - opt_set(opts, fix_errors, FSCK_OPT_ASK); + opt_set(opts, fix_errors, FSCK_FIX_ask); while ((opt = getopt_long(argc, argv, "apynfo:rvh", @@ -45,14 +45,14 @@ int cmd_fsck(int argc, char *argv[]) switch (opt) { case 'a': /* outdated alias for -p */ case 'p': - opt_set(opts, fix_errors, FSCK_OPT_YES); + opt_set(opts, fix_errors, FSCK_FIX_yes); break; case 'y': - opt_set(opts, fix_errors, FSCK_OPT_YES); + opt_set(opts, fix_errors, FSCK_FIX_no); break; case 'n': opt_set(opts, nochanges, true); - opt_set(opts, fix_errors, FSCK_OPT_NO); + opt_set(opts, fix_errors, FSCK_FIX_no); break; case 'f': /* force check, even if filesystem marked clean: */ diff --git a/cmd_list_journal.c b/cmd_list_journal.c index 17124225..ce4caad4 100644 --- a/cmd_list_journal.c +++ b/cmd_list_journal.c @@ -234,7 +234,7 @@ int cmd_list_journal(int argc, char *argv[]) opt_set(opts, norecovery, true); opt_set(opts, degraded, true); opt_set(opts, errors, BCH_ON_ERROR_continue); - opt_set(opts, fix_errors, FSCK_OPT_YES); + opt_set(opts, fix_errors, FSCK_FIX_yes); opt_set(opts, keep_journal, true); opt_set(opts, read_journal_only,true); diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index 9e020775..7d378ab2 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -44,6 +44,7 @@ struct user_namespace; #define FMODE_32BITHASH ((__force fmode_t)0x200) /* 64bit hashes as llseek() offset (for directories) */ #define FMODE_64BITHASH ((__force fmode_t)0x400) +#define FMODE_BUFFERED ((__force fmode_t)0x800) struct inode { unsigned long i_ino; diff --git a/include/linux/lz4.h b/include/linux/lz4.h index 86e1dde3..f574964a 100644 --- a/include/linux/lz4.h +++ b/include/linux/lz4.h @@ -2,4 +2,9 @@ #define LZ4_compress_destSize(src, dst, srclen, dstlen, workspace) \ LZ4_compress_destSize(src, dst, srclen, dstlen) + +#define LZ4_compress_HC(src, dst, srclen, dstlen, level, workspace) -1 + #define LZ4_MEM_COMPRESS 0 +#define LZ4HC_MEM_COMPRESS 0 +#define LZ4HC_MIN_CLEVEL 0 diff --git a/include/linux/rcupdate.h b/include/linux/rcupdate.h index 3db40cb4..ef032531 100644 --- a/include/linux/rcupdate.h +++ b/include/linux/rcupdate.h @@ -12,6 +12,7 @@ #define rcu_access_pointer(p) READ_ONCE(p) #define kfree_rcu(ptr, rcu_head) kfree(ptr) /* XXX */ +#define kvfree_rcu(ptr) kfree(ptr) /* XXX */ #define RCU_INIT_POINTER(p, v) WRITE_ONCE(p, v) diff --git a/include/linux/slab.h b/include/linux/slab.h index a36f6f43..78f906a8 100644 --- a/include/linux/slab.h +++ b/include/linux/slab.h @@ -213,7 +213,7 @@ static inline struct kmem_cache *kmem_cache_create(size_t obj_size) #define vfree(p) free(p) -static inline void *__vmalloc(unsigned long size, gfp_t flags) +static inline void *__vmalloc_noprof(unsigned long size, gfp_t flags) { unsigned i; void *p; @@ -234,6 +234,7 @@ static inline void *__vmalloc(unsigned long size, gfp_t flags) return p; } +#define __vmalloc __vmalloc_noprof static inline void *vmalloc_exec(unsigned long size, gfp_t gfp_mask) { diff --git a/libbcachefs.c b/libbcachefs.c index 092a54a6..bac772b2 100644 --- a/libbcachefs.c +++ b/libbcachefs.c @@ -604,8 +604,7 @@ struct bch_opts bch2_parse_opts(struct bch_opt_strs strs) u64 v; for (i = 0; i < bch2_opts_nr; i++) { - if (!strs.by_id[i] || - bch2_opt_table[i].type == BCH_OPT_FN) + if (!strs.by_id[i]) continue; ret = bch2_opt_parse(NULL, diff --git a/libbcachefs/bcachefs.h b/libbcachefs/bcachefs.h index d8c02064..445d010c 100644 --- a/libbcachefs/bcachefs.h +++ b/libbcachefs/bcachefs.h @@ -774,9 +774,10 @@ struct bch_fs { struct mutex sb_lock; /* snapshot.c: */ - GENRADIX(struct snapshot_t) snapshots; - struct bch_snapshot_table __rcu *snapshot_table; + struct snapshot_table __rcu *snapshots; + size_t snapshot_table_size; struct mutex snapshot_table_lock; + struct work_struct snapshot_delete_work; struct work_struct snapshot_wait_for_pagecache_and_delete_work; snapshot_id_list snapshots_unlinked; diff --git a/libbcachefs/bcachefs_format.h b/libbcachefs/bcachefs_format.h index 6d693e4d..5c308f84 100644 --- a/libbcachefs/bcachefs_format.h +++ b/libbcachefs/bcachefs_format.h @@ -695,7 +695,7 @@ struct bch_reservation { /* Maximum size (in u64s) a single pointer could be: */ #define BKEY_EXTENT_PTR_U64s_MAX\ ((sizeof(struct bch_extent_crc128) + \ - sizeof(struct bch_extent_ptr)) / sizeof(u64)) + sizeof(struct bch_extent_ptr)) / sizeof(__u64)) /* Maximum possible size of an entire extent value: */ #define BKEY_EXTENT_VAL_U64s_MAX \ @@ -707,7 +707,7 @@ struct bch_reservation { /* Btree pointers don't carry around checksums: */ #define BKEY_BTREE_PTR_VAL_U64s_MAX \ ((sizeof(struct bch_btree_ptr_v2) + \ - sizeof(struct bch_extent_ptr) * BCH_REPLICAS_MAX) / sizeof(u64)) + sizeof(struct bch_extent_ptr) * BCH_REPLICAS_MAX) / sizeof(__u64)) #define BKEY_BTREE_PTR_U64s_MAX \ (BKEY_U64s + BKEY_BTREE_PTR_VAL_U64s_MAX) @@ -749,7 +749,7 @@ struct bch_inode_v3 { } __packed __aligned(8); #define INODEv3_FIELDS_START_INITIAL 6 -#define INODEv3_FIELDS_START_CUR (offsetof(struct bch_inode_v3, fields) / sizeof(u64)) +#define INODEv3_FIELDS_START_CUR (offsetof(struct bch_inode_v3, fields) / sizeof(__u64)) struct bch_inode_generation { struct bch_val v; @@ -916,7 +916,7 @@ struct bch_dirent { #define DT_SUBVOL 16 #define BCH_DT_MAX 17 -#define BCH_NAME_MAX ((unsigned) (U8_MAX * sizeof(u64) - \ +#define BCH_NAME_MAX ((unsigned) (U8_MAX * sizeof(__u64) - \ sizeof(struct bkey) - \ offsetof(struct bch_dirent, d_name))) @@ -1009,7 +1009,7 @@ struct bch_alloc_v4 { } __packed __aligned(8); #define BCH_ALLOC_V4_U64s_V0 6 -#define BCH_ALLOC_V4_U64s (sizeof(struct bch_alloc_v4) / sizeof(u64)) +#define BCH_ALLOC_V4_U64s (sizeof(struct bch_alloc_v4) / sizeof(__u64)) BITMASK(BCH_ALLOC_V4_NEED_DISCARD, struct bch_alloc_v4, flags, 0, 1) BITMASK(BCH_ALLOC_V4_NEED_INC_GEN, struct bch_alloc_v4, flags, 1, 2) @@ -1289,10 +1289,10 @@ struct bch_key { }; #define BCH_KEY_MAGIC \ - (((u64) 'b' << 0)|((u64) 'c' << 8)| \ - ((u64) 'h' << 16)|((u64) '*' << 24)| \ - ((u64) '*' << 32)|((u64) 'k' << 40)| \ - ((u64) 'e' << 48)|((u64) 'y' << 56)) + (((__u64) 'b' << 0)|((__u64) 'c' << 8)| \ + ((__u64) 'h' << 16)|((__u64) '*' << 24)| \ + ((__u64) '*' << 32)|((__u64) 'k' << 40)| \ + ((__u64) 'e' << 48)|((__u64) 'y' << 56)) struct bch_encrypted_key { __le64 magic; @@ -1747,7 +1747,7 @@ LE64_BITMASK(BCH_SB_HAS_TOPOLOGY_ERRORS,struct bch_sb, flags[0], 61, 62); LE64_BITMASK(BCH_SB_BIG_ENDIAN, struct bch_sb, flags[0], 62, 63); LE64_BITMASK(BCH_SB_STR_HASH_TYPE, struct bch_sb, flags[1], 0, 4); -LE64_BITMASK(BCH_SB_COMPRESSION_TYPE, struct bch_sb, flags[1], 4, 8); +LE64_BITMASK(BCH_SB_COMPRESSION_TYPE_LO,struct bch_sb, flags[1], 4, 8); LE64_BITMASK(BCH_SB_INODE_32BIT, struct bch_sb, flags[1], 8, 9); LE64_BITMASK(BCH_SB_128_BIT_MACS, struct bch_sb, flags[1], 9, 10); @@ -1767,7 +1767,7 @@ LE64_BITMASK(BCH_SB_PROMOTE_TARGET, struct bch_sb, flags[1], 28, 40); LE64_BITMASK(BCH_SB_FOREGROUND_TARGET, struct bch_sb, flags[1], 40, 52); LE64_BITMASK(BCH_SB_BACKGROUND_TARGET, struct bch_sb, flags[1], 52, 64); -LE64_BITMASK(BCH_SB_BACKGROUND_COMPRESSION_TYPE, +LE64_BITMASK(BCH_SB_BACKGROUND_COMPRESSION_TYPE_LO, struct bch_sb, flags[2], 0, 4); LE64_BITMASK(BCH_SB_GC_RESERVE_BYTES, struct bch_sb, flags[2], 4, 64); @@ -1783,11 +1783,36 @@ LE64_BITMASK(BCH_SB_NOCOW, struct bch_sb, flags[4], 33, 34); LE64_BITMASK(BCH_SB_WRITE_BUFFER_SIZE, struct bch_sb, flags[4], 34, 54); LE64_BITMASK(BCH_SB_VERSION_UPGRADE, struct bch_sb, flags[4], 54, 56); -/* flags[4] 56-64 unused: */ +LE64_BITMASK(BCH_SB_COMPRESSION_TYPE_HI,struct bch_sb, flags[4], 56, 60); +LE64_BITMASK(BCH_SB_BACKGROUND_COMPRESSION_TYPE_HI, + struct bch_sb, flags[4], 60, 64); LE64_BITMASK(BCH_SB_VERSION_UPGRADE_COMPLETE, struct bch_sb, flags[5], 0, 16); +static inline __u64 BCH_SB_COMPRESSION_TYPE(const struct bch_sb *sb) +{ + return BCH_SB_COMPRESSION_TYPE_LO(sb) | (BCH_SB_COMPRESSION_TYPE_HI(sb) << 4); +} + +static inline void SET_BCH_SB_COMPRESSION_TYPE(struct bch_sb *sb, __u64 v) +{ + SET_BCH_SB_COMPRESSION_TYPE_LO(sb, v); + SET_BCH_SB_COMPRESSION_TYPE_HI(sb, v >> 4); +} + +static inline __u64 BCH_SB_BACKGROUND_COMPRESSION_TYPE(const struct bch_sb *sb) +{ + return BCH_SB_BACKGROUND_COMPRESSION_TYPE_LO(sb) | + (BCH_SB_BACKGROUND_COMPRESSION_TYPE_HI(sb) << 4); +} + +static inline void SET_BCH_SB_BACKGROUND_COMPRESSION_TYPE(struct bch_sb *sb, __u64 v) +{ + SET_BCH_SB_BACKGROUND_COMPRESSION_TYPE_LO(sb, v); + SET_BCH_SB_BACKGROUND_COMPRESSION_TYPE_HI(sb, v >> 4); +} + /* * Features: * @@ -2272,7 +2297,7 @@ static inline __u64 BTREE_NODE_ID(struct btree_node *n) return BTREE_NODE_ID_LO(n) | (BTREE_NODE_ID_HI(n) << 4); } -static inline void SET_BTREE_NODE_ID(struct btree_node *n, u64 v) +static inline void SET_BTREE_NODE_ID(struct btree_node *n, __u64 v) { SET_BTREE_NODE_ID_LO(n, v); SET_BTREE_NODE_ID_HI(n, v >> 4); diff --git a/libbcachefs/btree_gc.c b/libbcachefs/btree_gc.c index c47d5d8c..be537b23 100644 --- a/libbcachefs/btree_gc.c +++ b/libbcachefs/btree_gc.c @@ -1811,7 +1811,7 @@ again: if (IS_ENABLED(CONFIG_BCACHEFS_DEBUG) || (BCH_SB_HAS_TOPOLOGY_ERRORS(c->disk_sb.sb) && c->curr_recovery_pass <= BCH_RECOVERY_PASS_check_allocations && - c->opts.fix_errors != FSCK_OPT_NO)) { + c->opts.fix_errors != FSCK_FIX_no)) { bch_info(c, "Starting topology repair pass"); ret = bch2_repair_topology(c); if (ret) diff --git a/libbcachefs/btree_update_leaf.c b/libbcachefs/btree_update_leaf.c index 53219fdc..3638cef2 100644 --- a/libbcachefs/btree_update_leaf.c +++ b/libbcachefs/btree_update_leaf.c @@ -311,7 +311,7 @@ static inline void btree_insert_entry_checks(struct btree_trans *trans, !(i->flags & BTREE_UPDATE_INTERNAL_SNAPSHOT_NODE) && test_bit(JOURNAL_REPLAY_DONE, &trans->c->journal.flags) && i->k->k.p.snapshot && - bch2_snapshot_internal_node(trans->c, i->k->k.p.snapshot)); + bch2_snapshot_is_internal_node(trans->c, i->k->k.p.snapshot)); } static noinline int @@ -1229,7 +1229,7 @@ static inline int check_pos_snapshot_overwritten(struct btree_trans *trans, struct bpos pos) { if (!btree_type_has_snapshots(id) || - !snapshot_t(trans->c, pos.snapshot)->children[0]) + bch2_snapshot_is_leaf(trans->c, pos.snapshot)) return 0; return __check_pos_snapshot_overwritten(trans, id, pos); diff --git a/libbcachefs/btree_write_buffer.c b/libbcachefs/btree_write_buffer.c index b5022631..6c30a72e 100644 --- a/libbcachefs/btree_write_buffer.c +++ b/libbcachefs/btree_write_buffer.c @@ -129,6 +129,9 @@ int __bch2_btree_write_buffer_flush(struct btree_trans *trans, unsigned commit_f keys = wb->keys[s.idx]; nr = s.nr; + if (race_fault()) + goto slowpath; + /* * We first sort so that we can detect and skip redundant updates, and * then we attempt to flush in sorted btree order, as this is most diff --git a/libbcachefs/checksum.h b/libbcachefs/checksum.h index 409ad534..1ad1d5f0 100644 --- a/libbcachefs/checksum.h +++ b/libbcachefs/checksum.h @@ -120,12 +120,6 @@ static inline enum bch_csum_type bch2_meta_checksum_type(struct bch_fs *c) return bch2_csum_opt_to_type(c->opts.metadata_checksum, false); } -static const unsigned bch2_compression_opt_to_type[] = { -#define x(t, n) [BCH_COMPRESSION_OPT_##t] = BCH_COMPRESSION_TYPE_##t, - BCH_COMPRESSION_OPTS() -#undef x -}; - static inline bool bch2_checksum_type_valid(const struct bch_fs *c, unsigned type) { diff --git a/libbcachefs/compress.c b/libbcachefs/compress.c index 48427a27..560214c1 100644 --- a/libbcachefs/compress.c +++ b/libbcachefs/compress.c @@ -296,21 +296,32 @@ static int attempt_compress(struct bch_fs *c, void *workspace, void *dst, size_t dst_len, void *src, size_t src_len, - enum bch_compression_type compression_type) + struct bch_compression_opt compression) { - switch (compression_type) { - case BCH_COMPRESSION_TYPE_lz4: { - int len = src_len; - int ret = LZ4_compress_destSize( - src, dst, - &len, dst_len, - workspace); - - if (len < src_len) - return -len; + enum bch_compression_type compression_type = + __bch2_compression_opt_to_type[compression.type]; - return ret; - } + switch (compression_type) { + case BCH_COMPRESSION_TYPE_lz4: + if (compression.level < LZ4HC_MIN_CLEVEL) { + int len = src_len; + int ret = LZ4_compress_destSize( + src, dst, + &len, dst_len, + workspace); + if (len < src_len) + return -len; + + return ret; + } else { + int ret = LZ4_compress_HC( + src, dst, + src_len, dst_len, + compression.level, + workspace); + + return ret ?: -1; + } case BCH_COMPRESSION_TYPE_gzip: { z_stream strm = { .next_in = src, @@ -320,7 +331,11 @@ static int attempt_compress(struct bch_fs *c, }; zlib_set_workspace(&strm, workspace); - zlib_deflateInit2(&strm, Z_DEFAULT_COMPRESSION, + zlib_deflateInit2(&strm, + compression.level + ? clamp_t(unsigned, compression.level, + Z_BEST_SPEED, Z_BEST_COMPRESSION) + : Z_DEFAULT_COMPRESSION, Z_DEFLATED, -MAX_WBITS, DEF_MEM_LEVEL, Z_DEFAULT_STRATEGY); @@ -333,8 +348,14 @@ static int attempt_compress(struct bch_fs *c, return strm.total_out; } case BCH_COMPRESSION_TYPE_zstd: { + /* + * rescale: + * zstd max compression level is 22, our max level is 15 + */ + unsigned level = min((compression.level * 3) / 2, zstd_max_clevel()); + ZSTD_parameters params = zstd_get_params(level, c->opts.encoded_extent_max); ZSTD_CCtx *ctx = zstd_init_cctx(workspace, - zstd_cctx_workspace_bound(&c->zstd_params.cParams)); + zstd_cctx_workspace_bound(¶ms.cParams)); /* * ZSTD requires that when we decompress we pass in the exact @@ -365,10 +386,12 @@ static int attempt_compress(struct bch_fs *c, static unsigned __bio_compress(struct bch_fs *c, struct bio *dst, size_t *dst_len, struct bio *src, size_t *src_len, - enum bch_compression_type compression_type) + struct bch_compression_opt compression) { struct bbuf src_data = { NULL }, dst_data = { NULL }; void *workspace; + enum bch_compression_type compression_type = + __bch2_compression_opt_to_type[compression.type]; unsigned pad; int ret = 0; @@ -400,7 +423,7 @@ static unsigned __bio_compress(struct bch_fs *c, ret = attempt_compress(c, workspace, dst_data.b, *dst_len, src_data.b, *src_len, - compression_type); + compression); if (ret > 0) { *dst_len = ret; ret = 0; @@ -447,22 +470,24 @@ static unsigned __bio_compress(struct bch_fs *c, BUG_ON(!*src_len || *src_len > src->bi_iter.bi_size); BUG_ON(*dst_len & (block_bytes(c) - 1)); BUG_ON(*src_len & (block_bytes(c) - 1)); + ret = compression_type; out: bio_unmap_or_unbounce(c, src_data); bio_unmap_or_unbounce(c, dst_data); - return compression_type; + return ret; err: - compression_type = BCH_COMPRESSION_TYPE_incompressible; + ret = BCH_COMPRESSION_TYPE_incompressible; goto out; } unsigned bch2_bio_compress(struct bch_fs *c, struct bio *dst, size_t *dst_len, struct bio *src, size_t *src_len, - unsigned compression_type) + unsigned compression_opt) { unsigned orig_dst = dst->bi_iter.bi_size; unsigned orig_src = src->bi_iter.bi_size; + unsigned compression_type; /* Don't consume more than BCH_ENCODED_EXTENT_MAX from @src: */ src->bi_iter.bi_size = min_t(unsigned, src->bi_iter.bi_size, @@ -470,11 +495,9 @@ unsigned bch2_bio_compress(struct bch_fs *c, /* Don't generate a bigger output than input: */ dst->bi_iter.bi_size = min(dst->bi_iter.bi_size, src->bi_iter.bi_size); - if (compression_type == BCH_COMPRESSION_TYPE_lz4_old) - compression_type = BCH_COMPRESSION_TYPE_lz4; - compression_type = - __bio_compress(c, dst, dst_len, src, src_len, compression_type); + __bio_compress(c, dst, dst_len, src, src_len, + bch2_compression_decode(compression_opt)); dst->bi_iter.bi_size = orig_dst; src->bi_iter.bi_size = orig_src; @@ -521,8 +544,10 @@ static int __bch2_check_set_has_compressed_data(struct bch_fs *c, u64 f) } int bch2_check_set_has_compressed_data(struct bch_fs *c, - unsigned compression_type) + unsigned compression_opt) { + unsigned compression_type = bch2_compression_decode(compression_opt).type; + BUG_ON(compression_type >= ARRAY_SIZE(bch2_compression_opt_to_feature)); return compression_type @@ -546,14 +571,16 @@ static int __bch2_fs_compress_init(struct bch_fs *c, u64 features) { size_t decompress_workspace_size = 0; bool decompress_workspace_needed; - ZSTD_parameters params = zstd_get_params(0, c->opts.encoded_extent_max); + ZSTD_parameters params = zstd_get_params(zstd_max_clevel(), + c->opts.encoded_extent_max); struct { - unsigned feature; - unsigned type; - size_t compress_workspace; - size_t decompress_workspace; + unsigned feature; + enum bch_compression_type type; + size_t compress_workspace; + size_t decompress_workspace; } compression_types[] = { - { BCH_FEATURE_lz4, BCH_COMPRESSION_TYPE_lz4, LZ4_MEM_COMPRESS, 0 }, + { BCH_FEATURE_lz4, BCH_COMPRESSION_TYPE_lz4, + max_t(size_t, LZ4_MEM_COMPRESS, LZ4HC_MEM_COMPRESS) }, { BCH_FEATURE_gzip, BCH_COMPRESSION_TYPE_gzip, zlib_deflate_workspacesize(MAX_WBITS, DEF_MEM_LEVEL), zlib_inflate_workspacesize(), }, @@ -612,16 +639,74 @@ static int __bch2_fs_compress_init(struct bch_fs *c, u64 features) return 0; } +static u64 compression_opt_to_feature(unsigned v) +{ + unsigned type = bch2_compression_decode(v).type; + return 1ULL << bch2_compression_opt_to_feature[type]; +} + int bch2_fs_compress_init(struct bch_fs *c) { u64 f = c->sb.features; - if (c->opts.compression) - f |= 1ULL << bch2_compression_opt_to_feature[c->opts.compression]; - - if (c->opts.background_compression) - f |= 1ULL << bch2_compression_opt_to_feature[c->opts.background_compression]; + f |= compression_opt_to_feature(c->opts.compression); + f |= compression_opt_to_feature(c->opts.background_compression); return __bch2_fs_compress_init(c, f); +} + +int bch2_opt_compression_parse(struct bch_fs *c, const char *_val, u64 *res, + struct printbuf *err) +{ + char *val = kstrdup(_val, GFP_KERNEL); + char *p = val, *type_str, *level_str; + struct bch_compression_opt opt = { 0 }; + int ret; + + if (!val) + return -ENOMEM; + + type_str = strsep(&p, ":"); + level_str = p; + + ret = match_string(bch2_compression_opts, -1, type_str); + if (ret < 0 && err) + prt_str(err, "invalid compression type"); + if (ret < 0) + goto err; + + opt.type = ret; + + if (level_str) { + unsigned level; + + ret = kstrtouint(level_str, 10, &level); + if (!ret && !opt.type && level) + ret = -EINVAL; + if (!ret && level > 15) + ret = -EINVAL; + if (ret < 0 && err) + prt_str(err, "invalid compression level"); + if (ret < 0) + goto err; + + opt.level = level; + } + + *res = bch2_compression_encode(opt); +err: + kfree(val); + return ret; +} + +void bch2_opt_compression_to_text(struct printbuf *out, + struct bch_fs *c, + struct bch_sb *sb, + u64 v) +{ + struct bch_compression_opt opt = bch2_compression_decode(v); + prt_str(out, bch2_compression_opts[opt.type]); + if (opt.level) + prt_printf(out, ":%u", opt.level); } diff --git a/libbcachefs/compress.h b/libbcachefs/compress.h index 4bab1f61..052ea303 100644 --- a/libbcachefs/compress.h +++ b/libbcachefs/compress.h @@ -4,6 +4,35 @@ #include "extents_types.h" +struct bch_compression_opt { + u8 type:4, + level:4; +}; + +static inline struct bch_compression_opt bch2_compression_decode(unsigned v) +{ + return (struct bch_compression_opt) { + .type = v & 15, + .level = v >> 4, + }; +} + +static inline unsigned bch2_compression_encode(struct bch_compression_opt opt) +{ + return opt.type|(opt.level << 4); +} + +static const unsigned __bch2_compression_opt_to_type[] = { +#define x(t, n) [BCH_COMPRESSION_OPT_##t] = BCH_COMPRESSION_TYPE_##t, + BCH_COMPRESSION_OPTS() +#undef x +}; + +static inline enum bch_compression_type bch2_compression_opt_to_type(unsigned v) +{ + return __bch2_compression_opt_to_type[bch2_compression_decode(v).type]; +} + int bch2_bio_uncompress_inplace(struct bch_fs *, struct bio *, struct bch_extent_crc_unpacked *); int bch2_bio_uncompress(struct bch_fs *, struct bio *, struct bio *, @@ -15,4 +44,12 @@ int bch2_check_set_has_compressed_data(struct bch_fs *, unsigned); void bch2_fs_compress_exit(struct bch_fs *); int bch2_fs_compress_init(struct bch_fs *); +int bch2_opt_compression_parse(struct bch_fs *, const char *, u64 *, struct printbuf *); +void bch2_opt_compression_to_text(struct printbuf *, struct bch_fs *, struct bch_sb *, u64); + +#define bch2_opt_compression (struct bch_opt_fn) { \ + .parse = bch2_opt_compression_parse, \ + .to_text = bch2_opt_compression_to_text, \ +} + #endif /* _BCACHEFS_COMPRESS_H */ diff --git a/libbcachefs/data_update.c b/libbcachefs/data_update.c index 3c918368..cfc62446 100644 --- a/libbcachefs/data_update.c +++ b/libbcachefs/data_update.c @@ -455,9 +455,7 @@ int bch2_data_update_init(struct btree_trans *trans, BCH_WRITE_DATA_ENCODED| BCH_WRITE_MOVE| m->data_opts.write_flags; - m->op.compression_type = - bch2_compression_opt_to_type[io_opts.background_compression ?: - io_opts.compression]; + m->op.compression_opt = io_opts.background_compression ?: io_opts.compression; m->op.watermark = m->data_opts.btree_insert_flags & BCH_WATERMARK_MASK; bkey_for_each_ptr(ptrs, ptr) diff --git a/libbcachefs/disk_groups.c b/libbcachefs/disk_groups.c index 52b64007..de14ca3a 100644 --- a/libbcachefs/disk_groups.c +++ b/libbcachefs/disk_groups.c @@ -460,30 +460,37 @@ int bch2_dev_group_set(struct bch_fs *c, struct bch_dev *ca, const char *name) return ret; } -int bch2_opt_target_parse(struct bch_fs *c, const char *buf, u64 *v) +int bch2_opt_target_parse(struct bch_fs *c, const char *val, u64 *res, + struct printbuf *err) { struct bch_dev *ca; int g; - if (!strlen(buf) || !strcmp(buf, "none")) { - *v = 0; + if (!val) + return -EINVAL; + + if (!c) + return 0; + + if (!strlen(val) || !strcmp(val, "none")) { + *res = 0; return 0; } /* Is it a device? */ - ca = bch2_dev_lookup(c, buf); + ca = bch2_dev_lookup(c, val); if (!IS_ERR(ca)) { - *v = dev_to_target(ca->dev_idx); + *res = dev_to_target(ca->dev_idx); percpu_ref_put(&ca->ref); return 0; } mutex_lock(&c->sb_lock); - g = bch2_disk_path_find(&c->disk_sb, buf); + g = bch2_disk_path_find(&c->disk_sb, val); mutex_unlock(&c->sb_lock); if (g >= 0) { - *v = group_to_target(g); + *res = group_to_target(g); return 0; } diff --git a/libbcachefs/disk_groups.h b/libbcachefs/disk_groups.h index ec12584c..bd771176 100644 --- a/libbcachefs/disk_groups.h +++ b/libbcachefs/disk_groups.h @@ -85,9 +85,14 @@ int bch2_disk_path_find_or_create(struct bch_sb_handle *, const char *); void bch2_disk_path_to_text(struct printbuf *, struct bch_sb *, unsigned); -int bch2_opt_target_parse(struct bch_fs *, const char *, u64 *); +int bch2_opt_target_parse(struct bch_fs *, const char *, u64 *, struct printbuf *); void bch2_opt_target_to_text(struct printbuf *, struct bch_fs *, struct bch_sb *, u64); +#define bch2_opt_target (struct bch_opt_fn) { \ + .parse = bch2_opt_target_parse, \ + .to_text = bch2_opt_target_to_text, \ +} + int bch2_sb_disk_groups_to_cpu(struct bch_fs *); int __bch2_dev_group_set(struct bch_fs *, struct bch_dev *, const char *); diff --git a/libbcachefs/error.c b/libbcachefs/error.c index b08cd23d..685464b8 100644 --- a/libbcachefs/error.c +++ b/libbcachefs/error.c @@ -204,7 +204,7 @@ int bch2_fsck_err(struct bch_fs *c, unsigned flags, const char *fmt, ...) prt_str(out, ", continuing"); ret = -BCH_ERR_fsck_ignore; } - } else if (c->opts.fix_errors == FSCK_OPT_EXIT) { + } else if (c->opts.fix_errors == FSCK_FIX_exit) { prt_str(out, ", exiting"); ret = -BCH_ERR_fsck_errors_not_fixed; } else if (flags & FSCK_CAN_FIX) { @@ -212,7 +212,7 @@ int bch2_fsck_err(struct bch_fs *c, unsigned flags, const char *fmt, ...) ? s->fix : c->opts.fix_errors; - if (fix == FSCK_OPT_ASK) { + if (fix == FSCK_FIX_ask) { int ask; prt_str(out, ": fix?"); @@ -223,13 +223,13 @@ int bch2_fsck_err(struct bch_fs *c, unsigned flags, const char *fmt, ...) if (ask >= YN_ALLNO && s) s->fix = ask == YN_ALLNO - ? FSCK_OPT_NO - : FSCK_OPT_YES; + ? FSCK_FIX_no + : FSCK_FIX_yes; ret = ask & 1 ? -BCH_ERR_fsck_fix : -BCH_ERR_fsck_ignore; - } else if (fix == FSCK_OPT_YES || + } else if (fix == FSCK_FIX_yes || (c->opts.nochanges && !(flags & FSCK_CAN_IGNORE))) { prt_str(out, ", fixing"); @@ -244,7 +244,7 @@ int bch2_fsck_err(struct bch_fs *c, unsigned flags, const char *fmt, ...) } if (ret == -BCH_ERR_fsck_ignore && - (c->opts.fix_errors == FSCK_OPT_EXIT || + (c->opts.fix_errors == FSCK_FIX_exit || !(flags & FSCK_CAN_IGNORE))) ret = -BCH_ERR_fsck_errors_not_fixed; diff --git a/libbcachefs/error.h b/libbcachefs/error.h index edf12443..7ce95400 100644 --- a/libbcachefs/error.h +++ b/libbcachefs/error.h @@ -91,13 +91,6 @@ do { \ * be able to repair: */ -enum fsck_err_opts { - FSCK_OPT_EXIT, - FSCK_OPT_YES, - FSCK_OPT_NO, - FSCK_OPT_ASK, -}; - struct fsck_err_state { struct list_head list; const char *fmt; diff --git a/libbcachefs/fs-io.c b/libbcachefs/fs-io.c index a8060052..6b691b2b 100644 --- a/libbcachefs/fs-io.c +++ b/libbcachefs/fs-io.c @@ -35,6 +35,8 @@ #include +static void bch2_clamp_data_hole(struct inode *, u64 *, u64 *, unsigned); + struct folio_vec { struct folio *fv_folio; size_t fv_offset; @@ -1972,7 +1974,7 @@ static int __bch2_buffered_write(struct bch_inode_info *inode, darray_for_each(folios, fi) { struct folio *f = *fi; u64 f_len = min(end, folio_end_pos(f)) - f_pos; - unsigned f_copied = copy_folio_from_iter_atomic(f, f_offset, f_len, iter); + unsigned f_copied = copy_page_from_iter_atomic(&f->page, f_offset, f_len, iter); if (!f_copied) { folios_trunc(&folios, fi); @@ -3373,6 +3375,8 @@ static int __bchfs_fallocate(struct bch_inode_info *inode, int mode, struct quota_res quota_res = { 0 }; struct bkey_s_c k; unsigned sectors; + bool is_allocation; + u64 hole_start, hole_end; u32 snapshot; bch2_trans_begin(&trans); @@ -3388,6 +3392,10 @@ static int __bchfs_fallocate(struct bch_inode_info *inode, int mode, if ((ret = bkey_err(k))) goto bkey_err; + hole_start = iter.pos.offset; + hole_end = bpos_min(k.k->p, end_pos).offset; + is_allocation = bkey_extent_is_allocation(k.k); + /* already reserved */ if (bkey_extent_is_reservation(k) && bch2_bkey_nr_ptrs_fully_allocated(k) >= opts.data_replicas) { @@ -3401,17 +3409,26 @@ static int __bchfs_fallocate(struct bch_inode_info *inode, int mode, continue; } - /* - * XXX: for nocow mode, we should promote shared extents to - * unshared here - */ + if (!(mode & FALLOC_FL_ZERO_RANGE)) { + ret = drop_locks_do(&trans, + (bch2_clamp_data_hole(&inode->v, + &hole_start, + &hole_end, + opts.data_replicas), 0)); + bch2_btree_iter_set_pos(&iter, POS(iter.pos.inode, hole_start)); + + if (ret) + goto bkey_err; + + if (hole_start == hole_end) + continue; + } - sectors = bpos_min(k.k->p, end_pos).offset - iter.pos.offset; + sectors = hole_end - hole_start; - if (!bkey_extent_is_allocation(k.k)) { + if (!is_allocation) { ret = bch2_quota_reservation_add(c, inode, - "a_res, - sectors, true); + "a_res, sectors, true); if (unlikely(ret)) goto bkey_err; } @@ -3423,15 +3440,15 @@ static int __bchfs_fallocate(struct bch_inode_info *inode, int mode, goto bkey_err; i_sectors_acct(c, inode, "a_res, i_sectors_delta); + + drop_locks_do(&trans, + (mark_pagecache_reserved(inode, hole_start, iter.pos.offset), 0)); bkey_err: bch2_quota_reservation_put(c, inode, "a_res); if (bch2_err_matches(ret, BCH_ERR_transaction_restart)) ret = 0; } - bch2_trans_unlock(&trans); /* lock ordering, before taking pagecache locks: */ - mark_pagecache_reserved(inode, start_sector, iter.pos.offset); - if (bch2_err_matches(ret, ENOSPC) && (mode & FALLOC_FL_ZERO_RANGE)) { struct quota_res quota_res = { 0 }; s64 i_sectors_delta = 0; @@ -3679,14 +3696,16 @@ err: /* fseek: */ -static int folio_data_offset(struct folio *folio, loff_t pos) +static int folio_data_offset(struct folio *folio, loff_t pos, + unsigned min_replicas) { struct bch_folio *s = bch2_folio(folio); unsigned i, sectors = folio_sectors(folio); if (s) for (i = folio_pos_to_s(folio, pos); i < sectors; i++) - if (s->s[i].state >= SECTOR_dirty) + if (s->s[i].state >= SECTOR_dirty && + s->s[i].nr_replicas + s->s[i].replicas_reserved >= min_replicas) return i << SECTOR_SHIFT; return -1; @@ -3694,7 +3713,8 @@ static int folio_data_offset(struct folio *folio, loff_t pos) static loff_t bch2_seek_pagecache_data(struct inode *vinode, loff_t start_offset, - loff_t end_offset) + loff_t end_offset, + unsigned min_replicas) { struct folio_batch fbatch; pgoff_t start_index = start_offset >> PAGE_SHIFT; @@ -3713,7 +3733,8 @@ static loff_t bch2_seek_pagecache_data(struct inode *vinode, folio_lock(folio); offset = folio_data_offset(folio, - max(folio_pos(folio), start_offset)); + max(folio_pos(folio), start_offset), + min_replicas); if (offset >= 0) { ret = clamp(folio_pos(folio) + offset, start_offset, end_offset); @@ -3775,7 +3796,7 @@ err: if (next_data > offset) next_data = bch2_seek_pagecache_data(&inode->v, - offset, next_data); + offset, next_data, 0); if (next_data >= isize) return -ENXIO; @@ -3783,7 +3804,8 @@ err: return vfs_setpos(file, next_data, MAX_LFS_FILESIZE); } -static bool folio_hole_offset(struct address_space *mapping, loff_t *offset) +static bool folio_hole_offset(struct address_space *mapping, loff_t *offset, + unsigned min_replicas) { struct folio *folio; struct bch_folio *s; @@ -3800,7 +3822,8 @@ static bool folio_hole_offset(struct address_space *mapping, loff_t *offset) sectors = folio_sectors(folio); for (i = folio_pos_to_s(folio, *offset); i < sectors; i++) - if (s->s[i].state < SECTOR_dirty) { + if (s->s[i].state < SECTOR_dirty || + s->s[i].nr_replicas + s->s[i].replicas_reserved < min_replicas) { *offset = max(*offset, folio_pos(folio) + (i << SECTOR_SHIFT)); goto unlock; @@ -3815,18 +3838,34 @@ unlock: static loff_t bch2_seek_pagecache_hole(struct inode *vinode, loff_t start_offset, - loff_t end_offset) + loff_t end_offset, + unsigned min_replicas) { struct address_space *mapping = vinode->i_mapping; loff_t offset = start_offset; while (offset < end_offset && - !folio_hole_offset(mapping, &offset)) + !folio_hole_offset(mapping, &offset, min_replicas)) ; return min(offset, end_offset); } +static void bch2_clamp_data_hole(struct inode *inode, + u64 *hole_start, + u64 *hole_end, + unsigned min_replicas) +{ + *hole_start = bch2_seek_pagecache_hole(inode, + *hole_start << 9, *hole_end << 9, min_replicas) >> 9; + + if (*hole_start == *hole_end) + return; + + *hole_end = bch2_seek_pagecache_data(inode, + *hole_start << 9, *hole_end << 9, min_replicas) >> 9; +} + static loff_t bch2_seek_hole(struct file *file, u64 offset) { struct bch_inode_info *inode = file_bch_inode(file); @@ -3856,12 +3895,12 @@ retry: BTREE_ITER_SLOTS, k, ret) { if (k.k->p.inode != inode->v.i_ino) { next_hole = bch2_seek_pagecache_hole(&inode->v, - offset, MAX_LFS_FILESIZE); + offset, MAX_LFS_FILESIZE, 0); break; } else if (!bkey_extent_is_data(k.k)) { next_hole = bch2_seek_pagecache_hole(&inode->v, max(offset, bkey_start_offset(k.k) << 9), - k.k->p.offset << 9); + k.k->p.offset << 9, 0); if (next_hole < k.k->p.offset << 9) break; diff --git a/libbcachefs/fsck.c b/libbcachefs/fsck.c index ddc2782f..7edd4632 100644 --- a/libbcachefs/fsck.c +++ b/libbcachefs/fsck.c @@ -894,7 +894,7 @@ static int check_inode(struct btree_trans *trans, * particular is not atomic, so on the internal snapshot nodes * we can see inodes marked for deletion after a clean shutdown */ - if (bch2_snapshot_internal_node(c, k.k->p.snapshot)) + if (bch2_snapshot_is_internal_node(c, k.k->p.snapshot)) return 0; if (!bkey_is_inode(k.k)) @@ -2122,6 +2122,8 @@ int bch2_check_directory_structure(struct bch_fs *c) return ret; } +/* check_nlink pass: */ + struct nlink_table { size_t nr; size_t size; diff --git a/libbcachefs/io.c b/libbcachefs/io.c index 33762e4a..8604df80 100644 --- a/libbcachefs/io.c +++ b/libbcachefs/io.c @@ -1078,7 +1078,7 @@ static enum prep_encoded_ret { /* Can we just write the entire extent as is? */ if (op->crc.uncompressed_size == op->crc.live_size && op->crc.compressed_size <= wp->sectors_free && - (op->crc.compression_type == op->compression_type || + (op->crc.compression_type == bch2_compression_opt_to_type(op->compression_opt) || op->incompressible)) { if (!crc_is_compressed(op->crc) && op->csum_type != op->crc.csum_type && @@ -1126,7 +1126,7 @@ static enum prep_encoded_ret { /* * If we want to compress the data, it has to be decrypted: */ - if ((op->compression_type || + if ((op->compression_opt || bch2_csum_type_is_encryption(op->crc.csum_type) != bch2_csum_type_is_encryption(op->csum_type)) && bch2_write_decrypt(op)) @@ -1173,7 +1173,7 @@ static int bch2_write_extent(struct bch_write_op *op, struct write_point *wp, } if (ec_buf || - op->compression_type || + op->compression_opt || (op->csum_type && !(op->flags & BCH_WRITE_PAGES_STABLE)) || (bch2_csum_type_is_encryption(op->csum_type) && @@ -1196,16 +1196,16 @@ static int bch2_write_extent(struct bch_write_op *op, struct write_point *wp, dst->bi_iter.bi_size < c->opts.encoded_extent_max) break; - BUG_ON(op->compression_type && + BUG_ON(op->compression_opt && (op->flags & BCH_WRITE_DATA_ENCODED) && bch2_csum_type_is_encryption(op->crc.csum_type)); - BUG_ON(op->compression_type && !bounce); + BUG_ON(op->compression_opt && !bounce); crc.compression_type = op->incompressible ? BCH_COMPRESSION_TYPE_incompressible - : op->compression_type + : op->compression_opt ? bch2_bio_compress(c, dst, &dst_len, src, &src_len, - op->compression_type) + op->compression_opt) : 0; if (!crc_is_compressed(crc)) { dst_len = min(dst->bi_iter.bi_size, src->bi_iter.bi_size); diff --git a/libbcachefs/io.h b/libbcachefs/io.h index 7a243a5f..1476380d 100644 --- a/libbcachefs/io.h +++ b/libbcachefs/io.h @@ -86,7 +86,7 @@ static inline void bch2_write_op_init(struct bch_write_op *op, struct bch_fs *c, op->written = 0; op->error = 0; op->csum_type = bch2_data_checksum_type(c, opts); - op->compression_type = bch2_compression_opt_to_type[opts.compression]; + op->compression_opt = opts.compression; op->nr_replicas = 0; op->nr_replicas_required = c->opts.data_replicas_required; op->watermark = BCH_WATERMARK_normal; diff --git a/libbcachefs/io_types.h b/libbcachefs/io_types.h index 0fbdfbf9..737f16d7 100644 --- a/libbcachefs/io_types.h +++ b/libbcachefs/io_types.h @@ -115,8 +115,8 @@ struct bch_write_op { u16 flags; s16 error; /* dio write path expects it to hold -ERESTARTSYS... */ + unsigned compression_opt:8; unsigned csum_type:4; - unsigned compression_type:4; unsigned nr_replicas:4; unsigned nr_replicas_required:4; unsigned watermark:3; diff --git a/libbcachefs/opts.c b/libbcachefs/opts.c index 0c0c83fa..9dcc61ee 100644 --- a/libbcachefs/opts.c +++ b/libbcachefs/opts.c @@ -5,6 +5,7 @@ #include "bcachefs.h" #include "compress.h" #include "disk_groups.h" +#include "error.h" #include "opts.h" #include "super-io.h" #include "util.h" @@ -16,6 +17,11 @@ const char * const bch2_error_actions[] = { NULL }; +const char * const bch2_fsck_fix_opts[] = { + BCH_FIX_ERRORS_OPTS() + NULL +}; + const char * const bch2_version_upgrade_opts[] = { BCH_VERSION_UPGRADE_OPTS() NULL @@ -89,6 +95,37 @@ const char * const bch2_fs_usage_types[] = { #undef x +int bch2_opt_fix_errors_parse(struct bch_fs *c, const char *val, u64 *res, + struct printbuf *err) +{ + if (!val) { + *res = FSCK_FIX_yes; + } else { + int ret = match_string(bch2_fsck_fix_opts, -1, val); + + if (ret < 0 && err) + prt_str(err, "fix_errors: invalid selection"); + if (ret < 0) + return ret; + *res = ret; + } + + return 0; +} + +void bch2_opt_fix_errors_to_text(struct printbuf *out, + struct bch_fs *c, + struct bch_sb *sb, + u64 v) +{ + prt_str(out, bch2_fsck_fix_opts[v]); +} + +static const struct bch_opt_fn bch2_opt_fix_errors = { + .parse = bch2_opt_fix_errors_parse, + .to_text = bch2_opt_fix_errors_to_text, +}; + const char * const bch2_d_types[BCH_DT_MAX] = { [DT_UNKNOWN] = "unknown", [DT_FIFO] = "fifo", @@ -167,11 +204,9 @@ const struct bch_option bch2_opt_table[] = { #define OPT_UINT(_min, _max) .type = BCH_OPT_UINT, \ .min = _min, .max = _max #define OPT_STR(_choices) .type = BCH_OPT_STR, \ - .min = 0, .max = ARRAY_SIZE(_choices),\ + .min = 0, .max = ARRAY_SIZE(_choices), \ .choices = _choices -#define OPT_FN(_fn) .type = BCH_OPT_FN, \ - .parse = _fn##_parse, \ - .to_text = _fn##_to_text +#define OPT_FN(_fn) .type = BCH_OPT_FN, .fn = _fn #define x(_name, _bits, _flags, _type, _sb_opt, _default, _hint, _help) \ [Opt_##_name] = { \ @@ -267,15 +302,26 @@ int bch2_opt_parse(struct bch_fs *c, switch (opt->type) { case BCH_OPT_BOOL: - ret = kstrtou64(val, 10, res); + if (val) { + ret = kstrtou64(val, 10, res); + } else { + ret = 0; + *res = 1; + } + if (ret < 0 || (*res != 0 && *res != 1)) { if (err) - prt_printf(err, "%s: must be bool", - opt->attr.name); + prt_printf(err, "%s: must be bool", opt->attr.name); return ret; } break; case BCH_OPT_UINT: + if (!val) { + prt_printf(err, "%s: required value", + opt->attr.name); + return -EINVAL; + } + ret = opt->flags & OPT_HUMAN_READABLE ? bch2_strtou64_h(val, res) : kstrtou64(val, 10, res); @@ -287,6 +333,12 @@ int bch2_opt_parse(struct bch_fs *c, } break; case BCH_OPT_STR: + if (!val) { + prt_printf(err, "%s: required value", + opt->attr.name); + return -EINVAL; + } + ret = match_string(opt->choices, -1, val); if (ret < 0) { if (err) @@ -298,10 +350,7 @@ int bch2_opt_parse(struct bch_fs *c, *res = ret; break; case BCH_OPT_FN: - if (!c) - return 0; - - ret = opt->parse(c, val, res); + ret = opt->fn.parse(c, val, res, err); if (ret < 0) { if (err) prt_printf(err, "%s: parse error", @@ -341,10 +390,10 @@ void bch2_opt_to_text(struct printbuf *out, if (flags & OPT_SHOW_FULL_LIST) prt_string_option(out, opt->choices, v); else - prt_printf(out, "%s", opt->choices[v]); + prt_str(out, opt->choices[v]); break; case BCH_OPT_FN: - opt->to_text(out, c, sb, v); + opt->fn.to_text(out, c, sb, v); break; default: BUG(); @@ -405,31 +454,19 @@ int bch2_parse_mount_opts(struct bch_fs *c, struct bch_opts *opts, name = strsep(&opt, "="); val = opt; - if (val) { - id = bch2_mount_opt_lookup(name); - if (id < 0) - goto bad_opt; - - ret = bch2_opt_parse(c, &bch2_opt_table[id], val, &v, &err); - if (ret < 0) - goto bad_val; - } else { - id = bch2_mount_opt_lookup(name); - v = 1; - - if (id < 0 && - !strncmp("no", name, 2)) { - id = bch2_mount_opt_lookup(name + 2); - v = 0; - } + id = bch2_mount_opt_lookup(name); - if (id < 0) - goto bad_opt; - - if (bch2_opt_table[id].type != BCH_OPT_BOOL) - goto no_val; + /* Check for the form "noopt", negation of a boolean opt: */ + if (id < 0 && + !val && + !strncmp("no", name, 2)) { + id = bch2_mount_opt_lookup(name + 2); + val = "0"; } + if (id < 0) + goto bad_opt; + if (!(bch2_opt_table[id].flags & OPT_MOUNT)) goto bad_opt; @@ -442,6 +479,10 @@ int bch2_parse_mount_opts(struct bch_fs *c, struct bch_opts *opts, !IS_ENABLED(CONFIG_BCACHEFS_QUOTA)) goto bad_opt; + ret = bch2_opt_parse(c, &bch2_opt_table[id], val, &v, &err); + if (ret < 0) + goto bad_val; + bch2_opt_set_by_id(opts, id, v); } @@ -456,10 +497,6 @@ bad_val: pr_err("Invalid mount option %s", err.buf); ret = -1; goto out; -no_val: - pr_err("Mount option %s requires a value", name); - ret = -1; - goto out; out: kfree(copied_opts_start); printbuf_exit(&err); diff --git a/libbcachefs/opts.h b/libbcachefs/opts.h index e105a742..8a9db110 100644 --- a/libbcachefs/opts.h +++ b/libbcachefs/opts.h @@ -8,7 +8,10 @@ #include #include "bcachefs_format.h" +struct bch_fs; + extern const char * const bch2_error_actions[]; +extern const char * const bch2_fsck_fix_opts[]; extern const char * const bch2_version_upgrade_opts[]; extern const char * const bch2_sb_features[]; extern const char * const bch2_sb_compat[]; @@ -67,6 +70,11 @@ enum opt_type { BCH_OPT_FN, }; +struct bch_opt_fn { + int (*parse)(struct bch_fs *, const char *, u64 *, struct printbuf *); + void (*to_text)(struct printbuf *, struct bch_fs *, struct bch_sb *, u64); +}; + /** * x(name, shortopt, type, in mem type, mode, sb_opt) * @@ -98,6 +106,18 @@ enum opt_type { #define BCACHEFS_VERBOSE_DEFAULT false #endif +#define BCH_FIX_ERRORS_OPTS() \ + x(exit, 0) \ + x(yes, 1) \ + x(no, 2) \ + x(ask, 3) + +enum fsck_err_opts { +#define x(t, n) FSCK_FIX_##t, + BCH_FIX_ERRORS_OPTS() +#undef x +}; + #define BCH_OPTS() \ x(block_size, u16, \ OPT_FS|OPT_FORMAT| \ @@ -154,12 +174,12 @@ enum opt_type { NULL, NULL) \ x(compression, u8, \ OPT_FS|OPT_INODE|OPT_FORMAT|OPT_MOUNT|OPT_RUNTIME, \ - OPT_STR(bch2_compression_opts), \ + OPT_FN(bch2_opt_compression), \ BCH_SB_COMPRESSION_TYPE, BCH_COMPRESSION_OPT_none, \ NULL, NULL) \ x(background_compression, u8, \ OPT_FS|OPT_INODE|OPT_FORMAT|OPT_MOUNT|OPT_RUNTIME, \ - OPT_STR(bch2_compression_opts), \ + OPT_FN(bch2_opt_compression), \ BCH_SB_BACKGROUND_COMPRESSION_TYPE,BCH_COMPRESSION_OPT_none, \ NULL, NULL) \ x(str_hash, u8, \ @@ -318,8 +338,8 @@ enum opt_type { NULL, "Run fsck on mount") \ x(fix_errors, u8, \ OPT_FS|OPT_MOUNT, \ - OPT_BOOL(), \ - BCH2_NO_SB_OPT, false, \ + OPT_FN(bch2_opt_fix_errors), \ + BCH2_NO_SB_OPT, FSCK_FIX_exit, \ NULL, "Fix errors during fsck without asking") \ x(ratelimit_errors, u8, \ OPT_FS|OPT_MOUNT, \ @@ -495,8 +515,8 @@ struct bch_option { u64 min, max; const char * const *choices; - int (*parse)(struct bch_fs *, const char *, u64 *); - void (*to_text)(struct printbuf *, struct bch_fs *, struct bch_sb *, u64); + + struct bch_opt_fn fn; const char *hint; const char *help; diff --git a/libbcachefs/rebalance.c b/libbcachefs/rebalance.c index 989f37a3..c3d57723 100644 --- a/libbcachefs/rebalance.c +++ b/libbcachefs/rebalance.c @@ -5,6 +5,7 @@ #include "btree_iter.h" #include "buckets.h" #include "clock.h" +#include "compress.h" #include "disk_groups.h" #include "errcode.h" #include "extents.h" @@ -45,7 +46,7 @@ static bool rebalance_pred(struct bch_fs *c, void *arg, bkey_for_each_ptr_decode(k.k, ptrs, p, entry) { if (!p.ptr.cached && p.crc.compression_type != - bch2_compression_opt_to_type[io_opts->background_compression]) + bch2_compression_opt_to_type(io_opts->background_compression)) data_opts->rewrite_ptrs |= 1U << i; i++; } diff --git a/libbcachefs/recovery.c b/libbcachefs/recovery.c index b04e1565..63b385d8 100644 --- a/libbcachefs/recovery.c +++ b/libbcachefs/recovery.c @@ -1162,12 +1162,9 @@ static void check_version_upgrade(struct bch_fs *c) prt_str(&buf, " incomplete\n"); } - prt_str(&buf, "Doing "); - if (BCH_VERSION_MAJOR(old_version) != BCH_VERSION_MAJOR(new_version)) - prt_str(&buf, "incompatible"); - else - prt_str(&buf, "compatible"); - prt_str(&buf, "version upgrade from "); + prt_printf(&buf, "Doing %s version upgrade from ", + BCH_VERSION_MAJOR(old_version) != BCH_VERSION_MAJOR(new_version) + ? "incompatible" : "compatible"); bch2_version_to_text(&buf, old_version); prt_str(&buf, " to "); bch2_version_to_text(&buf, new_version); @@ -1178,7 +1175,7 @@ static void check_version_upgrade(struct bch_fs *c) prt_str(&buf, "fsck required"); c->recovery_passes_explicit |= recovery_passes; - c->opts.fix_errors = FSCK_OPT_YES; + c->opts.fix_errors = FSCK_FIX_yes; } bch_info(c, "%s", buf.buf); diff --git a/libbcachefs/subvolume.c b/libbcachefs/subvolume.c index f118e585..c15b4781 100644 --- a/libbcachefs/subvolume.c +++ b/libbcachefs/subvolume.c @@ -12,9 +12,9 @@ static int bch2_subvolume_delete(struct btree_trans *, u32); -static inline u32 get_ancestor_below(struct bch_fs *c, u32 id, u32 ancestor) +static inline u32 get_ancestor_below(struct snapshot_table *t, u32 id, u32 ancestor) { - struct snapshot_t *s = snapshot_t(c, id); + const struct snapshot_t *s = __snapshot_t(t, id); if (s->skip[2] <= ancestor) return s->skip[2]; @@ -27,22 +27,83 @@ static inline u32 get_ancestor_below(struct bch_fs *c, u32 id, u32 ancestor) bool bch2_snapshot_is_ancestor(struct bch_fs *c, u32 id, u32 ancestor) { + struct snapshot_table *t; + EBUG_ON(c->curr_recovery_pass <= BCH_RECOVERY_PASS_check_snapshots); + rcu_read_lock(); + t = rcu_dereference(c->snapshots); + while (id && id < ancestor) - id = get_ancestor_below(c, id, ancestor); + id = get_ancestor_below(t, id, ancestor); + rcu_read_unlock(); return id == ancestor; } static bool bch2_snapshot_is_ancestor_early(struct bch_fs *c, u32 id, u32 ancestor) { + struct snapshot_table *t; + + rcu_read_lock(); + t = rcu_dereference(c->snapshots); + while (id && id < ancestor) - id = snapshot_t(c, id)->parent; + id = __snapshot_t(t, id)->parent; + rcu_read_unlock(); return id == ancestor; } +static inline u32 bch2_snapshot_depth(struct bch_fs *c, u32 parent) +{ + u32 depth; + + rcu_read_lock(); + depth = parent ? snapshot_t(c, parent)->depth + 1 : 0; + rcu_read_unlock(); + + return depth; +} + +static noinline struct snapshot_t *__snapshot_t_mut(struct bch_fs *c, u32 id) +{ + size_t idx = U32_MAX - id; + size_t new_size; + struct snapshot_table *new, *old; + + new_size = max(16UL, roundup_pow_of_two(idx + 1)); + + new = kvzalloc(struct_size(new, s, new_size), GFP_KERNEL); + if (!new) + return NULL; + + old = c->snapshots; + if (old) + memcpy(new->s, + rcu_dereference_protected(c->snapshots, true)->s, + sizeof(new->s[0]) * c->snapshot_table_size); + + rcu_assign_pointer(c->snapshots, new); + c->snapshot_table_size = new_size; + if (old) + kvfree_rcu(old); + + return &rcu_dereference_protected(c->snapshots, true)->s[idx]; +} + +static inline struct snapshot_t *snapshot_t_mut(struct bch_fs *c, u32 id) +{ + size_t idx = U32_MAX - id; + + lockdep_assert_held(&c->snapshot_table_lock); + + if (likely(idx < c->snapshot_table_size)) + return &rcu_dereference_protected(c->snapshots, true)->s[idx]; + + return __snapshot_t_mut(c, id); +} + /* Snapshot tree: */ void bch2_snapshot_tree_to_text(struct printbuf *out, struct bch_fs *c, @@ -202,12 +263,15 @@ int bch2_mark_snapshot(struct btree_trans *trans, { struct bch_fs *c = trans->c; struct snapshot_t *t; + int ret = 0; + + mutex_lock(&c->snapshot_table_lock); - t = genradix_ptr_alloc(&c->snapshots, - U32_MAX - new.k->p.offset, - GFP_KERNEL); - if (!t) - return -BCH_ERR_ENOMEM_mark_snapshot; + t = snapshot_t_mut(c, new.k->p.offset); + if (!t) { + ret = -BCH_ERR_ENOMEM_mark_snapshot; + goto err; + } if (new.k->type == KEY_TYPE_snapshot) { struct bkey_s_c_snapshot s = bkey_s_c_to_snapshot(new); @@ -231,8 +295,9 @@ int bch2_mark_snapshot(struct btree_trans *trans, t->subvol = 0; t->tree = 0; } - - return 0; +err: + mutex_unlock(&c->snapshot_table_lock); + return ret; } static int snapshot_lookup(struct btree_trans *trans, u32 id, @@ -285,9 +350,14 @@ static int bch2_snapshot_set_equiv(struct btree_trans *trans, struct bkey_s_c k) nr_live += ret; } - snapshot_t(c, id)->equiv = nr_live == 1 - ? snapshot_t(c, child[live_idx])->equiv + mutex_lock(&c->snapshot_table_lock); + + snapshot_t_mut(c, id)->equiv = nr_live == 1 + ? snapshot_t_mut(c, child[live_idx])->equiv : id; + + mutex_unlock(&c->snapshot_table_lock); + return 0; } @@ -505,16 +575,18 @@ static int snapshot_tree_ptr_good(struct btree_trans *trans, static u32 snapshot_rand_ancestor_get(struct bch_fs *c, u32 id) { - struct snapshot_t *s; + const struct snapshot_t *s; if (!id) return 0; + rcu_read_lock(); s = snapshot_t(c, id); - if (!s->parent) - return id; + if (s->parent) + id = bch2_snapshot_nth_parent(c, id, get_random_u32_below(s->depth)); + rcu_read_unlock(); - return bch2_snapshot_nth_parent(c, id, get_random_u32_below(s->depth)); + return id; } static int snapshot_rand_ancestor_good(struct btree_trans *trans, @@ -613,9 +685,7 @@ static int check_snapshot(struct btree_trans *trans, struct bch_snapshot v; struct bkey_i_snapshot *u; u32 parent_id = bch2_snapshot_parent_early(c, k.k->p.offset); - struct snapshot_t *parent = parent_id - ? snapshot_t(c, parent_id) - : NULL; + u32 real_depth; struct printbuf buf = PRINTBUF; bool should_have_subvol; u32 i, id; @@ -706,16 +776,18 @@ static int check_snapshot(struct btree_trans *trans, } ret = 0; - if (fsck_err_on(le32_to_cpu(s.depth) != (parent ? parent->depth + 1 : 0), c, + real_depth = bch2_snapshot_depth(c, parent_id); + + if (fsck_err_on(le32_to_cpu(s.depth) != real_depth, c, "snapshot with incorrect depth fields, should be %u:\n %s", - parent->depth + 1, + real_depth, (bch2_bkey_val_to_text(&buf, c, k), buf.buf))) { u = bch2_bkey_make_mut_typed(trans, iter, &k, 0, snapshot); ret = PTR_ERR_OR_ZERO(u); if (ret) goto err; - u->v.depth = cpu_to_le32(parent ? parent->depth + 1 : 0); + u->v.depth = cpu_to_le32(real_depth); s = u->v; } @@ -799,9 +871,13 @@ static int check_subvol(struct btree_trans *trans, if (!BCH_SUBVOLUME_SNAP(subvol.v)) { u32 snapshot_root = bch2_snapshot_root(c, le32_to_cpu(subvol.v->snapshot)); - u32 snapshot_tree = snapshot_t(c, snapshot_root)->tree; + u32 snapshot_tree; struct bch_snapshot_tree st; + rcu_read_lock(); + snapshot_tree = snapshot_t(c, snapshot_root)->tree; + rcu_read_unlock(); + ret = bch2_snapshot_tree_lookup(trans, snapshot_tree, &st); bch2_fs_inconsistent_on(bch2_err_matches(ret, ENOENT), c, @@ -845,7 +921,7 @@ int bch2_check_subvols(struct bch_fs *c) void bch2_fs_snapshots_exit(struct bch_fs *c) { - genradix_free(&c->snapshots); + kfree(c->snapshots); } int bch2_snapshots_read(struct bch_fs *c) @@ -987,7 +1063,7 @@ static int create_snapids(struct btree_trans *trans, u32 parent, u32 tree, struct bkey_i_snapshot *n; struct bkey_s_c k; unsigned i, j; - u32 depth = parent ? snapshot_t(c, parent)->depth + 1 : 0; + u32 depth = bch2_snapshot_depth(c, parent); int ret; bch2_trans_iter_init(trans, &iter, BTREE_ID_snapshots, @@ -1126,7 +1202,7 @@ static int snapshot_delete_key(struct btree_trans *trans, struct bpos *last_pos) { struct bch_fs *c = trans->c; - u32 equiv = snapshot_t(c, k.k->p.snapshot)->equiv; + u32 equiv = bch2_snapshot_equiv(c, k.k->p.snapshot); if (!bkey_eq(k.k->p, *last_pos)) equiv_seen->nr = 0; diff --git a/libbcachefs/subvolume.h b/libbcachefs/subvolume.h index ab0b4a6d..6d134dab 100644 --- a/libbcachefs/subvolume.h +++ b/libbcachefs/subvolume.h @@ -32,17 +32,31 @@ int bch2_mark_snapshot(struct btree_trans *, enum btree_id, unsigned, .min_val_size = 24, \ }) -static inline struct snapshot_t *snapshot_t(struct bch_fs *c, u32 id) +static inline struct snapshot_t *__snapshot_t(struct snapshot_table *t, u32 id) { - return genradix_ptr(&c->snapshots, U32_MAX - id); + return &t->s[U32_MAX - id]; } -static inline u32 bch2_snapshot_parent_early(struct bch_fs *c, u32 id) +static inline const struct snapshot_t *snapshot_t(struct bch_fs *c, u32 id) +{ + return __snapshot_t(rcu_dereference(c->snapshots), id); +} + +static inline u32 __bch2_snapshot_parent_early(struct bch_fs *c, u32 id) { return snapshot_t(c, id)->parent; } -static inline u32 bch2_snapshot_parent(struct bch_fs *c, u32 id) +static inline u32 bch2_snapshot_parent_early(struct bch_fs *c, u32 id) +{ + rcu_read_lock(); + id = __bch2_snapshot_parent_early(c, id); + rcu_read_unlock(); + + return id; +} + +static inline u32 __bch2_snapshot_parent(struct bch_fs *c, u32 id) { #ifdef CONFIG_BCACHEFS_DEBUG u32 parent = snapshot_t(c, id)->parent; @@ -59,10 +73,21 @@ static inline u32 bch2_snapshot_parent(struct bch_fs *c, u32 id) #endif } +static inline u32 bch2_snapshot_parent(struct bch_fs *c, u32 id) +{ + rcu_read_lock(); + id = __bch2_snapshot_parent(c, id); + rcu_read_unlock(); + + return id; +} + static inline u32 bch2_snapshot_nth_parent(struct bch_fs *c, u32 id, u32 n) { + rcu_read_lock(); while (n--) - id = bch2_snapshot_parent(c, id); + id = __bch2_snapshot_parent(c, id); + rcu_read_unlock(); return id; } @@ -71,37 +96,60 @@ static inline u32 bch2_snapshot_root(struct bch_fs *c, u32 id) { u32 parent; - while ((parent = bch2_snapshot_parent(c, id))) + rcu_read_lock(); + while ((parent = __bch2_snapshot_parent(c, id))) id = parent; + rcu_read_unlock(); + return id; } -static inline u32 bch2_snapshot_equiv(struct bch_fs *c, u32 id) +static inline u32 __bch2_snapshot_equiv(struct bch_fs *c, u32 id) { return snapshot_t(c, id)->equiv; } +static inline u32 bch2_snapshot_equiv(struct bch_fs *c, u32 id) +{ + rcu_read_lock(); + id = __bch2_snapshot_equiv(c, id); + rcu_read_unlock(); + + return id; +} + static inline bool bch2_snapshot_is_equiv(struct bch_fs *c, u32 id) { - return id == snapshot_t(c, id)->equiv; + return id == bch2_snapshot_equiv(c, id); } -static inline u32 bch2_snapshot_internal_node(struct bch_fs *c, u32 id) +static inline bool bch2_snapshot_is_internal_node(struct bch_fs *c, u32 id) { - struct snapshot_t *s = snapshot_t(c, id); + const struct snapshot_t *s; + bool ret; + + rcu_read_lock(); + s = snapshot_t(c, id); + ret = s->children[0]; + rcu_read_unlock(); + + return ret; +} - return s->children[0] || s->children[1]; +static inline u32 bch2_snapshot_is_leaf(struct bch_fs *c, u32 id) +{ + return !bch2_snapshot_is_internal_node(c, id); } static inline u32 bch2_snapshot_sibling(struct bch_fs *c, u32 id) { - struct snapshot_t *s; - u32 parent = bch2_snapshot_parent(c, id); + const struct snapshot_t *s; + u32 parent = __bch2_snapshot_parent(c, id); if (!parent) return 0; - s = snapshot_t(c, bch2_snapshot_parent(c, id)); + s = snapshot_t(c, __bch2_snapshot_parent(c, id)); if (id == s->children[0]) return s->children[1]; if (id == s->children[1]) @@ -113,9 +161,15 @@ bool bch2_snapshot_is_ancestor(struct bch_fs *, u32, u32); static inline bool bch2_snapshot_has_children(struct bch_fs *c, u32 id) { - struct snapshot_t *t = snapshot_t(c, id); + const struct snapshot_t *t; + bool ret; - return (t->children[0]|t->children[1]) != 0; + rcu_read_lock(); + t = snapshot_t(c, id); + ret = (t->children[0]|t->children[1]) != 0; + rcu_read_unlock(); + + return ret; } static inline bool snapshot_list_has_id(snapshot_id_list *s, u32 id) diff --git a/libbcachefs/subvolume_types.h b/libbcachefs/subvolume_types.h index 750d975a..c596e427 100644 --- a/libbcachefs/subvolume_types.h +++ b/libbcachefs/subvolume_types.h @@ -16,6 +16,10 @@ struct snapshot_t { u32 equiv; }; +struct snapshot_table { + struct snapshot_t s[0]; +}; + typedef struct { u32 subvol; u64 inum; diff --git a/libbcachefs/super-io.c b/libbcachefs/super-io.c index 35325381..e9ce3f33 100644 --- a/libbcachefs/super-io.c +++ b/libbcachefs/super-io.c @@ -658,11 +658,18 @@ int bch2_read_super(const char *path, struct bch_opts *opts, struct printbuf err = PRINTBUF; __le64 *i; int ret; - +#ifndef __KERNEL__ +retry: +#endif memset(sb, 0, sizeof(*sb)); sb->mode = FMODE_READ; sb->have_bio = true; +#ifndef __KERNEL__ + if (opt_get(*opts, direct_io) == false) + sb->mode |= FMODE_BUFFERED; +#endif + if (!opt_get(*opts, noexcl)) sb->mode |= FMODE_EXCL; @@ -747,7 +754,13 @@ int bch2_read_super(const char *path, struct bch_opts *opts, got_super: if (le16_to_cpu(sb->sb->block_size) << 9 < - bdev_logical_block_size(sb->bdev)) { + bdev_logical_block_size(sb->bdev) && + opt_get(*opts, direct_io)) { +#ifndef __KERNEL__ + opt_set(*opts, direct_io, false); + bch2_free_super(sb); + goto retry; +#endif prt_printf(&err, "block size (%u) smaller than device block size (%u)", le16_to_cpu(sb->sb->block_size) << 9, bdev_logical_block_size(sb->bdev)); diff --git a/libbcachefs/super.c b/libbcachefs/super.c index 426d2acf..9f1047a7 100644 --- a/libbcachefs/super.c +++ b/libbcachefs/super.c @@ -344,6 +344,19 @@ static int bch2_fs_read_write_late(struct bch_fs *c) { int ret; + /* + * Data move operations can't run until after check_snapshots has + * completed, and bch2_snapshot_is_ancestor() is available. + * + * Ideally we'd start copygc/rebalance earlier instead of waiting for + * all of recovery/fsck to complete: + */ + ret = bch2_copygc_start(c); + if (ret) { + bch_err(c, "error starting copygc thread"); + return ret; + } + ret = bch2_rebalance_start(c); if (ret) { bch_err(c, "error starting rebalance thread"); @@ -403,12 +416,6 @@ static int __bch2_fs_read_write(struct bch_fs *c, bool early) return ret; } - ret = bch2_copygc_start(c); - if (ret) { - bch_err(c, "error starting copygc thread"); - return ret; - } - if (!early) { ret = bch2_fs_read_write_late(c); if (ret) diff --git a/libbcachefs/util.h b/libbcachefs/util.h index ca1b799e..5fa29dab 100644 --- a/libbcachefs/util.h +++ b/libbcachefs/util.h @@ -64,7 +64,7 @@ static inline void *vpmalloc_noprof(size_t size, gfp_t gfp_mask) { return (void *) get_free_pages_noprof(gfp_mask|__GFP_NOWARN, get_order(size)) ?: - __vmalloc(size, gfp_mask); + __vmalloc_noprof(size, gfp_mask); } #define vpmalloc(_size, _gfp) alloc_hooks(vpmalloc_noprof(_size, _gfp)) diff --git a/linux/blkdev.c b/linux/blkdev.c index 45b03fba..ea901a46 100644 --- a/linux/blkdev.c +++ b/linux/blkdev.c @@ -183,16 +183,19 @@ struct block_device *blkdev_get_by_path(const char *path, fmode_t mode, else if (mode & FMODE_WRITE) flags = O_WRONLY; + if (!(mode & FMODE_BUFFERED)) + flags |= O_DIRECT; + #if 0 /* using O_EXCL doesn't work with opening twice for an O_SYNC fd: */ if (mode & FMODE_EXCL) flags |= O_EXCL; #endif - buffered_fd = open(path, flags); + buffered_fd = open(path, flags & ~O_DIRECT); if (buffered_fd < 0) return ERR_PTR(-errno); - fd = open(path, flags|O_DIRECT); + fd = open(path, flags); if (fd < 0) fd = dup(buffered_fd); if (fd < 0) { @@ -200,9 +203,9 @@ struct block_device *blkdev_get_by_path(const char *path, fmode_t mode, return ERR_PTR(-errno); } - sync_fd = open(path, flags|O_DIRECT|O_SYNC); + sync_fd = open(path, flags|O_SYNC); if (sync_fd < 0) - sync_fd = open(path, flags|O_SYNC); + sync_fd = open(path, (flags & ~O_DIRECT)|O_SYNC); if (sync_fd < 0) { close(fd); close(buffered_fd); diff --git a/rust-src/src/cmd_list.rs b/rust-src/src/cmd_list.rs index 2dc8d719..3f86b8cd 100644 --- a/rust-src/src/cmd_list.rs +++ b/rust-src/src/cmd_list.rs @@ -138,7 +138,7 @@ fn cmd_list_inner(opt: Cli) -> anyhow::Result<()> { opt_set!(fs_opts, errors, bcachefs::bch_error_actions::BCH_ON_ERROR_continue as u8); if opt.fsck { - opt_set!(fs_opts, fix_errors, bcachefs::fsck_err_opts::FSCK_OPT_YES as u8); + opt_set!(fs_opts, fix_errors, bcachefs::fsck_err_opts::FSCK_FIX_yes as u8); opt_set!(fs_opts, norecovery, 0); } -- cgit v1.2.3