diff options
author | Kent Overstreet <kent.overstreet@linux.dev> | 2025-04-17 12:42:47 -0400 |
---|---|---|
committer | Kent Overstreet <kent.overstreet@linux.dev> | 2025-04-17 12:51:03 -0400 |
commit | 0f59cc6a9eae6624f767cb74a405971de5919135 (patch) | |
tree | c9ce0b10a0d9f9d2f8544628fa8ebbc81baa539b | |
parent | 28039ec698d81cd4581ca47191ed13568932e402 (diff) |
Update bcachefs sources to 65456ba56b93 bcachefs: Fix struct with flex member ABI warning
61 files changed, 989 insertions, 423 deletions
diff --git a/.bcachefs_revision b/.bcachefs_revision index 8df6477a..97e95d4e 100644 --- a/.bcachefs_revision +++ b/.bcachefs_revision @@ -1 +1 @@ -2f9361370129b6ae7fbc847aad638048997dab8b +65456ba56b930afe6935d2dc2128ba12c1e5fa6f diff --git a/c_src/cmd_option.c b/c_src/cmd_option.c index 02faca07..e314903a 100644 --- a/c_src/cmd_option.c +++ b/c_src/cmd_option.c @@ -115,7 +115,7 @@ int cmd_set_option(int argc, char *argv[]) fprintf(stderr, "Can't set option %s\n", opt->attr.name); if (opt->flags & OPT_FS) { - ret = bch2_opt_check_may_set(c, NULL, i, v); + ret = bch2_opt_hook_pre_set(c, NULL, i, v); if (ret < 0) { fprintf(stderr, "error setting %s: %i\n", opt->attr.name, ret); continue; @@ -133,7 +133,7 @@ int cmd_set_option(int argc, char *argv[]) continue; } - ret = bch2_opt_check_may_set(c, ca, i, v); + ret = bch2_opt_hook_pre_set(c, ca, i, v); if (ret < 0) { fprintf(stderr, "error setting %s: %i\n", opt->attr.name, ret); continue; diff --git a/libbcachefs/alloc_background.c b/libbcachefs/alloc_background.c index 94ea9e49..8b8c2344 100644 --- a/libbcachefs/alloc_background.c +++ b/libbcachefs/alloc_background.c @@ -2593,15 +2593,22 @@ static bool bch2_dev_has_open_write_point(struct bch_fs *c, struct bch_dev *ca) return ret; } +void bch2_dev_allocator_set_rw(struct bch_fs *c, struct bch_dev *ca, bool rw) +{ + for (unsigned i = 0; i < ARRAY_SIZE(c->rw_devs); i++) + if (rw && (ca->mi.data_allowed & BIT(i))) + set_bit(ca->dev_idx, c->rw_devs[i].d); + else + clear_bit(ca->dev_idx, c->rw_devs[i].d); +} + /* device goes ro: */ void bch2_dev_allocator_remove(struct bch_fs *c, struct bch_dev *ca) { lockdep_assert_held(&c->state_lock); /* First, remove device from allocation groups: */ - - for (unsigned i = 0; i < ARRAY_SIZE(c->rw_devs); i++) - clear_bit(ca->dev_idx, c->rw_devs[i].d); + bch2_dev_allocator_set_rw(c, ca, false); c->rw_devs_change_count++; @@ -2635,10 +2642,7 @@ void bch2_dev_allocator_add(struct bch_fs *c, struct bch_dev *ca) { lockdep_assert_held(&c->state_lock); - for (unsigned i = 0; i < ARRAY_SIZE(c->rw_devs); i++) - if (ca->mi.data_allowed & (1 << i)) - set_bit(ca->dev_idx, c->rw_devs[i].d); - + bch2_dev_allocator_set_rw(c, ca, true); c->rw_devs_change_count++; } diff --git a/libbcachefs/alloc_background.h b/libbcachefs/alloc_background.h index 34b3d6ac..4f94c6a6 100644 --- a/libbcachefs/alloc_background.h +++ b/libbcachefs/alloc_background.h @@ -350,6 +350,7 @@ int bch2_dev_remove_alloc(struct bch_fs *, struct bch_dev *); void bch2_recalc_capacity(struct bch_fs *); u64 bch2_min_rw_member_capacity(struct bch_fs *); +void bch2_dev_allocator_set_rw(struct bch_fs *, struct bch_dev *, bool); void bch2_dev_allocator_remove(struct bch_fs *, struct bch_dev *); void bch2_dev_allocator_add(struct bch_fs *, struct bch_dev *); diff --git a/libbcachefs/alloc_foreground.c b/libbcachefs/alloc_foreground.c index ae7eb523..59950d73 100644 --- a/libbcachefs/alloc_foreground.c +++ b/libbcachefs/alloc_foreground.c @@ -1642,7 +1642,7 @@ static noinline void bch2_print_allocator_stuck(struct bch_fs *c) bch2_journal_debug_to_text(&buf, &c->journal); printbuf_indent_sub(&buf, 2); - bch2_print_string_as_lines(KERN_ERR, buf.buf); + bch2_print_str(c, KERN_ERR, buf.buf); printbuf_exit(&buf); } diff --git a/libbcachefs/bcachefs.h b/libbcachefs/bcachefs.h index f6920899..a1a4e941 100644 --- a/libbcachefs/bcachefs.h +++ b/libbcachefs/bcachefs.h @@ -269,7 +269,8 @@ do { \ #define bch2_fmt(_c, fmt) bch2_log_msg(_c, fmt "\n") -void bch2_print_str(struct bch_fs *, const char *); +void bch2_print_str(struct bch_fs *, const char *, const char *); +void bch2_print_str_nonblocking(struct bch_fs *, const char *, const char *); __printf(2, 3) void bch2_print_opts(struct bch_opts *, const char *, ...); @@ -780,6 +781,7 @@ struct bch_fs { u8 nr_devices; u8 clean; + bool multi_device; /* true if we've ever had more than one device */ u8 encryption_type; @@ -792,6 +794,8 @@ struct bch_fs { unsigned long errors_silent[BITS_TO_LONGS(BCH_FSCK_ERR_MAX)]; u64 btrees_lost_data; } sb; + DARRAY(enum bcachefs_metadata_version) + incompat_versions_requested; #ifdef CONFIG_UNICODE struct unicode_map *cf_encoding; diff --git a/libbcachefs/bcachefs_format.h b/libbcachefs/bcachefs_format.h index f9bfb434..aa57f470 100644 --- a/libbcachefs/bcachefs_format.h +++ b/libbcachefs/bcachefs_format.h @@ -842,7 +842,7 @@ LE64_BITMASK(BCH_SB_SHARD_INUMS, struct bch_sb, flags[3], 28, 29); LE64_BITMASK(BCH_SB_INODES_USE_KEY_CACHE,struct bch_sb, flags[3], 29, 30); LE64_BITMASK(BCH_SB_JOURNAL_FLUSH_DELAY,struct bch_sb, flags[3], 30, 62); LE64_BITMASK(BCH_SB_JOURNAL_FLUSH_DISABLED,struct bch_sb, flags[3], 62, 63); -LE64_BITMASK(BCH_SB_SINGLE_DEVICE, struct bch_sb, flags[3], 63, 64); +LE64_BITMASK(BCH_SB_MULTI_DEVICE, struct bch_sb, flags[3], 63, 64); LE64_BITMASK(BCH_SB_JOURNAL_RECLAIM_DELAY,struct bch_sb, flags[4], 0, 32); LE64_BITMASK(BCH_SB_JOURNAL_TRANSACTION_NAMES,struct bch_sb, flags[4], 32, 33); LE64_BITMASK(BCH_SB_NOCOW, struct bch_sb, flags[4], 33, 34); @@ -918,7 +918,9 @@ static inline void SET_BCH_SB_BACKGROUND_COMPRESSION_TYPE(struct bch_sb *sb, __u x(alloc_v2, 17) \ x(extents_across_btree_nodes, 18) \ x(incompat_version_field, 19) \ - x(casefolding, 20) + x(casefolding, 20) \ + x(no_alloc_info, 21) \ + x(small_image, 22) #define BCH_SB_FEATURES_ALWAYS \ (BIT_ULL(BCH_FEATURE_new_extent_overwrite)| \ diff --git a/libbcachefs/btree_cache.c b/libbcachefs/btree_cache.c index 9b80201c..7973c38f 100644 --- a/libbcachefs/btree_cache.c +++ b/libbcachefs/btree_cache.c @@ -15,14 +15,9 @@ #include <linux/prefetch.h> #include <linux/sched/mm.h> +#include <linux/seq_buf.h> #include <linux/swap.h> -#define BTREE_CACHE_NOT_FREED_INCREMENT(counter) \ -do { \ - if (shrinker_counter) \ - bc->not_freed[BCH_BTREE_CACHE_NOT_FREED_##counter]++; \ -} while (0) - const char * const bch2_btree_node_flags[] = { "typebit", "typebit", @@ -350,115 +345,118 @@ static inline struct btree *btree_cache_find(struct btree_cache *bc, return rhashtable_lookup_fast(&bc->table, &v, bch_btree_cache_params); } -/* - * this version is for btree nodes that have already been freed (we're not - * reaping a real btree node) - */ -static int __btree_node_reclaim(struct bch_fs *c, struct btree *b, bool flush, bool shrinker_counter) +static int __btree_node_reclaim_checks(struct bch_fs *c, struct btree *b, + bool flush, bool locked) { struct btree_cache *bc = &c->btree_cache; - int ret = 0; lockdep_assert_held(&bc->lock); -wait_on_io: - if (b->flags & ((1U << BTREE_NODE_dirty)| - (1U << BTREE_NODE_read_in_flight)| + + if (btree_node_noevict(b)) { + bc->not_freed[BCH_BTREE_CACHE_NOT_FREED_noevict]++; + return -BCH_ERR_ENOMEM_btree_node_reclaim; + } + if (btree_node_write_blocked(b)) { + bc->not_freed[BCH_BTREE_CACHE_NOT_FREED_write_blocked]++; + return -BCH_ERR_ENOMEM_btree_node_reclaim; + } + if (btree_node_will_make_reachable(b)) { + bc->not_freed[BCH_BTREE_CACHE_NOT_FREED_will_make_reachable]++; + return -BCH_ERR_ENOMEM_btree_node_reclaim; + } + + if (btree_node_dirty(b)) { + if (!flush) { + bc->not_freed[BCH_BTREE_CACHE_NOT_FREED_dirty]++; + return -BCH_ERR_ENOMEM_btree_node_reclaim; + } + + if (locked) { + /* + * Using the underscore version because we don't want to compact + * bsets after the write, since this node is about to be evicted + * - unless btree verify mode is enabled, since it runs out of + * the post write cleanup: + */ + if (bch2_verify_btree_ondisk) + bch2_btree_node_write(c, b, SIX_LOCK_intent, + BTREE_WRITE_cache_reclaim); + else + __bch2_btree_node_write(c, b, + BTREE_WRITE_cache_reclaim); + } + } + + if (b->flags & ((1U << BTREE_NODE_read_in_flight)| (1U << BTREE_NODE_write_in_flight))) { if (!flush) { - if (btree_node_dirty(b)) - BTREE_CACHE_NOT_FREED_INCREMENT(dirty); - else if (btree_node_read_in_flight(b)) - BTREE_CACHE_NOT_FREED_INCREMENT(read_in_flight); + if (btree_node_read_in_flight(b)) + bc->not_freed[BCH_BTREE_CACHE_NOT_FREED_read_in_flight]++; else if (btree_node_write_in_flight(b)) - BTREE_CACHE_NOT_FREED_INCREMENT(write_in_flight); + bc->not_freed[BCH_BTREE_CACHE_NOT_FREED_write_in_flight]++; return -BCH_ERR_ENOMEM_btree_node_reclaim; } + if (locked) + return -EINTR; + /* XXX: waiting on IO with btree cache lock held */ bch2_btree_node_wait_on_read(b); bch2_btree_node_wait_on_write(b); } + return 0; +} + +/* + * this version is for btree nodes that have already been freed (we're not + * reaping a real btree node) + */ +static int __btree_node_reclaim(struct bch_fs *c, struct btree *b, bool flush) +{ + struct btree_cache *bc = &c->btree_cache; + int ret = 0; + + lockdep_assert_held(&bc->lock); +retry_unlocked: + ret = __btree_node_reclaim_checks(c, b, flush, false); + if (ret) + return ret; + if (!six_trylock_intent(&b->c.lock)) { - BTREE_CACHE_NOT_FREED_INCREMENT(lock_intent); + bc->not_freed[BCH_BTREE_CACHE_NOT_FREED_lock_intent]++; return -BCH_ERR_ENOMEM_btree_node_reclaim; } if (!six_trylock_write(&b->c.lock)) { - BTREE_CACHE_NOT_FREED_INCREMENT(lock_write); - goto out_unlock_intent; + bc->not_freed[BCH_BTREE_CACHE_NOT_FREED_lock_write]++; + six_unlock_intent(&b->c.lock); + return -BCH_ERR_ENOMEM_btree_node_reclaim; } /* recheck under lock */ - if (b->flags & ((1U << BTREE_NODE_read_in_flight)| - (1U << BTREE_NODE_write_in_flight))) { - if (!flush) { - if (btree_node_read_in_flight(b)) - BTREE_CACHE_NOT_FREED_INCREMENT(read_in_flight); - else if (btree_node_write_in_flight(b)) - BTREE_CACHE_NOT_FREED_INCREMENT(write_in_flight); - goto out_unlock; - } + ret = __btree_node_reclaim_checks(c, b, flush, true); + if (ret) { six_unlock_write(&b->c.lock); six_unlock_intent(&b->c.lock); - goto wait_on_io; - } - - if (btree_node_noevict(b)) { - BTREE_CACHE_NOT_FREED_INCREMENT(noevict); - goto out_unlock; - } - if (btree_node_write_blocked(b)) { - BTREE_CACHE_NOT_FREED_INCREMENT(write_blocked); - goto out_unlock; - } - if (btree_node_will_make_reachable(b)) { - BTREE_CACHE_NOT_FREED_INCREMENT(will_make_reachable); - goto out_unlock; + if (ret == -EINTR) + goto retry_unlocked; + return ret; } - if (btree_node_dirty(b)) { - if (!flush) { - BTREE_CACHE_NOT_FREED_INCREMENT(dirty); - goto out_unlock; - } - /* - * Using the underscore version because we don't want to compact - * bsets after the write, since this node is about to be evicted - * - unless btree verify mode is enabled, since it runs out of - * the post write cleanup: - */ - if (bch2_verify_btree_ondisk) - bch2_btree_node_write(c, b, SIX_LOCK_intent, - BTREE_WRITE_cache_reclaim); - else - __bch2_btree_node_write(c, b, - BTREE_WRITE_cache_reclaim); - - six_unlock_write(&b->c.lock); - six_unlock_intent(&b->c.lock); - goto wait_on_io; - } -out: if (b->hash_val && !ret) trace_and_count(c, btree_cache_reap, c, b); - return ret; -out_unlock: - six_unlock_write(&b->c.lock); -out_unlock_intent: - six_unlock_intent(&b->c.lock); - ret = -BCH_ERR_ENOMEM_btree_node_reclaim; - goto out; + return 0; } -static int btree_node_reclaim(struct bch_fs *c, struct btree *b, bool shrinker_counter) +static int btree_node_reclaim(struct bch_fs *c, struct btree *b) { - return __btree_node_reclaim(c, b, false, shrinker_counter); + return __btree_node_reclaim(c, b, false); } static int btree_node_write_and_reclaim(struct bch_fs *c, struct btree *b) { - return __btree_node_reclaim(c, b, true, false); + return __btree_node_reclaim(c, b, true); } static unsigned long bch2_btree_cache_scan(struct shrinker *shrink, @@ -490,7 +488,10 @@ static unsigned long bch2_btree_cache_scan(struct shrinker *shrink, * IO can always make forward progress: */ can_free = btree_cache_can_free(list); - nr = min_t(unsigned long, nr, can_free); + if (nr > can_free) { + bc->not_freed[BCH_BTREE_CACHE_NOT_FREED_cache_reserve] += nr - can_free; + nr = can_free; + } i = 0; list_for_each_entry_safe(b, t, &bc->freeable, list) { @@ -506,7 +507,7 @@ static unsigned long bch2_btree_cache_scan(struct shrinker *shrink, if (touched >= nr) goto out; - if (!btree_node_reclaim(c, b, true)) { + if (!btree_node_reclaim(c, b)) { btree_node_data_free(bc, b); six_unlock_write(&b->c.lock); six_unlock_intent(&b->c.lock); @@ -522,7 +523,7 @@ restart: clear_btree_node_accessed(b); bc->not_freed[BCH_BTREE_CACHE_NOT_FREED_access_bit]++; --touched;; - } else if (!btree_node_reclaim(c, b, true)) { + } else if (!btree_node_reclaim(c, b)) { __bch2_btree_node_hash_remove(bc, b); __btree_node_data_free(bc, b); @@ -575,6 +576,19 @@ static unsigned long bch2_btree_cache_count(struct shrinker *shrink, return btree_cache_can_free(list); } +static void bch2_btree_cache_shrinker_to_text(struct seq_buf *s, struct shrinker *shrink) +{ + struct btree_cache_list *list = shrink->private_data; + struct btree_cache *bc = container_of(list, struct btree_cache, live[list->idx]); + + char *cbuf; + size_t buflen = seq_buf_get_buf(s, &cbuf); + struct printbuf out = PRINTBUF_EXTERN(cbuf, buflen); + + bch2_btree_cache_to_text(&out, bc); + seq_buf_commit(s, out.pos); +} + void bch2_fs_btree_cache_exit(struct bch_fs *c) { struct btree_cache *bc = &c->btree_cache; @@ -666,6 +680,7 @@ int bch2_fs_btree_cache_init(struct bch_fs *c) bc->live[0].shrink = shrink; shrink->count_objects = bch2_btree_cache_count; shrink->scan_objects = bch2_btree_cache_scan; + shrink->to_text = bch2_btree_cache_shrinker_to_text; shrink->seeks = 2; shrink->private_data = &bc->live[0]; shrinker_register(shrink); @@ -676,6 +691,7 @@ int bch2_fs_btree_cache_init(struct bch_fs *c) bc->live[1].shrink = shrink; shrink->count_objects = bch2_btree_cache_count; shrink->scan_objects = bch2_btree_cache_scan; + shrink->to_text = bch2_btree_cache_shrinker_to_text; shrink->seeks = 8; shrink->private_data = &bc->live[1]; shrinker_register(shrink); @@ -755,7 +771,7 @@ static struct btree *btree_node_cannibalize(struct bch_fs *c) for (unsigned i = 0; i < ARRAY_SIZE(bc->live); i++) list_for_each_entry_reverse(b, &bc->live[i].list, list) - if (!btree_node_reclaim(c, b, false)) + if (!btree_node_reclaim(c, b)) return b; while (1) { @@ -790,7 +806,7 @@ struct btree *bch2_btree_node_mem_alloc(struct btree_trans *trans, bool pcpu_rea * disk node. Check the freed list before allocating a new one: */ list_for_each_entry(b, freed, list) - if (!btree_node_reclaim(c, b, false)) { + if (!btree_node_reclaim(c, b)) { list_del_init(&b->list); goto got_node; } @@ -817,7 +833,7 @@ got_node: * the list. Check if there's any freed nodes there: */ list_for_each_entry(b2, &bc->freeable, list) - if (!btree_node_reclaim(c, b2, false)) { + if (!btree_node_reclaim(c, b2)) { swap(b->data, b2->data); swap(b->aux_data, b2->aux_data); @@ -978,7 +994,7 @@ static noinline struct btree *bch2_btree_node_fill(struct btree_trans *trans, /* Unlock before doing IO: */ six_unlock_intent(&b->c.lock); - bch2_trans_unlock_noassert(trans); + bch2_trans_unlock(trans); bch2_btree_node_read(trans, b, sync); @@ -1493,9 +1509,10 @@ void bch2_btree_cache_to_text(struct printbuf *out, const struct btree_cache *bc prt_btree_cache_line(out, c, "live:", bc->live[0].nr); prt_btree_cache_line(out, c, "pinned:", bc->live[1].nr); - prt_btree_cache_line(out, c, "freeable:", bc->nr_freeable); + prt_btree_cache_line(out, c, "reserve:", bc->nr_reserve); + prt_btree_cache_line(out, c, "freed:", bc->nr_freeable); prt_btree_cache_line(out, c, "dirty:", atomic_long_read(&bc->nr_dirty)); - prt_printf(out, "cannibalize lock:\t%p\n", bc->alloc_lock); + prt_printf(out, "cannibalize lock:\t%s\n", bc->alloc_lock ? "held" : "not held"); prt_newline(out); for (unsigned i = 0; i < ARRAY_SIZE(bc->nr_by_btree); i++) { @@ -1506,6 +1523,7 @@ void bch2_btree_cache_to_text(struct printbuf *out, const struct btree_cache *bc } prt_newline(out); + prt_printf(out, "counters since mount:\n"); prt_printf(out, "freed:\t%zu\n", bc->nr_freed); prt_printf(out, "not freed:\n"); diff --git a/libbcachefs/btree_io.c b/libbcachefs/btree_io.c index 14e3329b..a261e856 100644 --- a/libbcachefs/btree_io.c +++ b/libbcachefs/btree_io.c @@ -604,7 +604,7 @@ static int __btree_err(int ret, } if (!silent) - bch2_print_string_as_lines(KERN_ERR, out.buf); + bch2_print_str(c, KERN_ERR, out.buf); out: fsck_err: printbuf_exit(&out); @@ -1017,7 +1017,6 @@ int bch2_btree_node_read_done(struct bch_fs *c, struct bch_dev *ca, bool used_mempool, blacklisted; bool updated_range = b->key.k.type == KEY_TYPE_btree_ptr_v2 && BTREE_PTR_RANGE_UPDATED(&bkey_i_to_btree_ptr_v2(&b->key)->v); - unsigned u64s; unsigned ptr_written = btree_ptr_sectors_written(bkey_i_to_s_c(&b->key)); u64 max_journal_seq = 0; struct printbuf buf = PRINTBUF; @@ -1224,23 +1223,20 @@ int bch2_btree_node_read_done(struct bch_fs *c, struct bch_dev *ca, sorted = btree_bounce_alloc(c, btree_buf_bytes(b), &used_mempool); sorted->keys.u64s = 0; - set_btree_bset(b, b->set, &b->data->keys); - b->nr = bch2_key_sort_fix_overlapping(c, &sorted->keys, iter); memset((uint8_t *)(sorted + 1) + b->nr.live_u64s * sizeof(u64), 0, btree_buf_bytes(b) - sizeof(struct btree_node) - b->nr.live_u64s * sizeof(u64)); - u64s = le16_to_cpu(sorted->keys.u64s); + b->data->keys.u64s = sorted->keys.u64s; *sorted = *b->data; - sorted->keys.u64s = cpu_to_le16(u64s); swap(sorted, b->data); set_btree_bset(b, b->set, &b->data->keys); b->nsets = 1; b->data->keys.journal_seq = cpu_to_le64(max_journal_seq); - BUG_ON(b->nr.live_u64s != u64s); + BUG_ON(b->nr.live_u64s != le16_to_cpu(b->data->keys.u64s)); btree_bounce_free(c, btree_buf_bytes(b), used_mempool, sorted); @@ -1918,7 +1914,7 @@ static void btree_node_scrub_work(struct work_struct *work) bch_err(c, "error validating btree node during scrub on %s at btree %s", scrub->ca->name, err.buf); - ret = bch2_btree_node_rewrite(trans, &iter, b, 0); + ret = bch2_btree_node_rewrite(trans, &iter, b, 0, 0); } err: bch2_trans_iter_exit(trans, &iter); diff --git a/libbcachefs/btree_iter.c b/libbcachefs/btree_iter.c index db7d6e19..c824f1b6 100644 --- a/libbcachefs/btree_iter.c +++ b/libbcachefs/btree_iter.c @@ -1591,7 +1591,7 @@ void __bch2_dump_trans_paths_updates(struct btree_trans *trans, bool nosort) __bch2_trans_paths_to_text(&buf, trans, nosort); bch2_trans_updates_to_text(&buf, trans); - bch2_print_str(trans->c, buf.buf); + bch2_print_str(trans->c, KERN_ERR, buf.buf); printbuf_exit(&buf); } @@ -3112,7 +3112,7 @@ void *__bch2_trans_kmalloc(struct btree_trans *trans, size_t size, unsigned long #ifdef CONFIG_BCACHEFS_DEBUG struct printbuf buf = PRINTBUF; bch2_trans_kmalloc_trace_to_text(&buf, &trans->trans_kmalloc_trace); - bch2_print_string_as_lines(KERN_ERR, buf.buf); + bch2_print_str(c, KERN_ERR, buf.buf); printbuf_exit(&buf); #endif } diff --git a/libbcachefs/btree_key_cache.c b/libbcachefs/btree_key_cache.c index 2b186584..aee4a551 100644 --- a/libbcachefs/btree_key_cache.c +++ b/libbcachefs/btree_key_cache.c @@ -13,6 +13,7 @@ #include "trace.h" #include <linux/sched/mm.h> +#include <linux/seq_buf.h> static inline bool btree_uses_pcpu_readers(enum btree_id id) { @@ -790,6 +791,18 @@ void bch2_fs_btree_key_cache_init_early(struct btree_key_cache *c) { } +static void bch2_btree_key_cache_shrinker_to_text(struct seq_buf *s, struct shrinker *shrink) +{ + struct bch_fs *c = shrink->private_data; + struct btree_key_cache *bc = &c->btree_key_cache; + char *cbuf; + size_t buflen = seq_buf_get_buf(s, &cbuf); + struct printbuf out = PRINTBUF_EXTERN(cbuf, buflen); + + bch2_btree_key_cache_to_text(&out, bc); + seq_buf_commit(s, out.pos); +} + int bch2_fs_btree_key_cache_init(struct btree_key_cache *bc) { struct bch_fs *c = container_of(bc, struct bch_fs, btree_key_cache); @@ -814,6 +827,7 @@ int bch2_fs_btree_key_cache_init(struct btree_key_cache *bc) bc->shrink = shrink; shrink->count_objects = bch2_btree_key_cache_count; shrink->scan_objects = bch2_btree_key_cache_scan; + shrink->to_text = bch2_btree_key_cache_shrinker_to_text; shrink->batch = 1 << 14; shrink->seeks = 0; shrink->private_data = c; diff --git a/libbcachefs/btree_locking.c b/libbcachefs/btree_locking.c index 94eb2b73..baa505a9 100644 --- a/libbcachefs/btree_locking.c +++ b/libbcachefs/btree_locking.c @@ -236,7 +236,7 @@ static noinline int break_cycle(struct lock_graph *g, struct printbuf *cycle, prt_newline(&buf); } - bch2_print_string_as_lines_nonblocking(KERN_ERR, buf.buf); + bch2_print_str_nonblocking(g->g->trans->c, KERN_ERR, buf.buf); printbuf_exit(&buf); BUG(); } @@ -799,13 +799,6 @@ int bch2_trans_relock_notrace(struct btree_trans *trans) return __bch2_trans_relock(trans, false); } -void bch2_trans_unlock_noassert(struct btree_trans *trans) -{ - __bch2_trans_unlock(trans); - - trans_set_unlocked(trans); -} - void bch2_trans_unlock(struct btree_trans *trans) { __bch2_trans_unlock(trans); diff --git a/libbcachefs/btree_locking.h b/libbcachefs/btree_locking.h index b33ab7af..66b27c08 100644 --- a/libbcachefs/btree_locking.h +++ b/libbcachefs/btree_locking.h @@ -15,7 +15,6 @@ void bch2_btree_lock_init(struct btree_bkey_cached_common *, enum six_lock_init_flags, gfp_t gfp); -void bch2_trans_unlock_noassert(struct btree_trans *); void bch2_trans_unlock_write(struct btree_trans *); static inline bool is_btree_node(struct btree_path *path, unsigned l) diff --git a/libbcachefs/btree_node_scan.c b/libbcachefs/btree_node_scan.c index 86acf037..81ee7ae8 100644 --- a/libbcachefs/btree_node_scan.c +++ b/libbcachefs/btree_node_scan.c @@ -395,7 +395,7 @@ int bch2_scan_for_btree_nodes(struct bch_fs *c) printbuf_reset(&buf); prt_printf(&buf, "%s: nodes found:\n", __func__); found_btree_nodes_to_text(&buf, c, f->nodes); - bch2_print_string_as_lines(KERN_INFO, buf.buf); + bch2_print_str(c, KERN_INFO, buf.buf); } sort_nonatomic(f->nodes.data, f->nodes.nr, sizeof(f->nodes.data[0]), found_btree_node_cmp_cookie, NULL); @@ -424,7 +424,7 @@ int bch2_scan_for_btree_nodes(struct bch_fs *c) printbuf_reset(&buf); prt_printf(&buf, "%s: nodes after merging replicas:\n", __func__); found_btree_nodes_to_text(&buf, c, f->nodes); - bch2_print_string_as_lines(KERN_INFO, buf.buf); + bch2_print_str(c, KERN_INFO, buf.buf); } swap(nodes_heap, f->nodes); @@ -470,7 +470,7 @@ int bch2_scan_for_btree_nodes(struct bch_fs *c) printbuf_reset(&buf); prt_printf(&buf, "%s: nodes found after overwrites:\n", __func__); found_btree_nodes_to_text(&buf, c, f->nodes); - bch2_print_string_as_lines(KERN_INFO, buf.buf); + bch2_print_str(c, KERN_INFO, buf.buf); } else { bch_info(c, "btree node scan found %zu nodes after overwrites", f->nodes.nr); } diff --git a/libbcachefs/btree_types.h b/libbcachefs/btree_types.h index dd109dea..9e6e47d5 100644 --- a/libbcachefs/btree_types.h +++ b/libbcachefs/btree_types.h @@ -139,6 +139,7 @@ struct btree { }; #define BCH_BTREE_CACHE_NOT_FREED_REASONS() \ + x(cache_reserve) \ x(lock_intent) \ x(lock_write) \ x(dirty) \ @@ -656,13 +657,13 @@ static inline struct bset_tree *bset_tree_last(struct btree *b) static inline void * __btree_node_offset_to_ptr(const struct btree *b, u16 offset) { - return (void *) ((u64 *) b->data + 1 + offset); + return (void *) ((u64 *) b->data + offset); } static inline u16 __btree_node_ptr_to_offset(const struct btree *b, const void *p) { - u16 ret = (u64 *) p - 1 - (u64 *) b->data; + u16 ret = (u64 *) p - (u64 *) b->data; EBUG_ON(__btree_node_offset_to_ptr(b, ret) != p); return ret; diff --git a/libbcachefs/btree_update_interior.c b/libbcachefs/btree_update_interior.c index 55fbeeb8..a0ff2dc0 100644 --- a/libbcachefs/btree_update_interior.c +++ b/libbcachefs/btree_update_interior.c @@ -284,6 +284,7 @@ static struct btree *__bch2_btree_node_alloc(struct btree_trans *trans, struct disk_reservation *res, struct closure *cl, bool interior_node, + unsigned target, unsigned flags) { struct bch_fs *c = trans->c; @@ -317,6 +318,7 @@ static struct btree *__bch2_btree_node_alloc(struct btree_trans *trans, mutex_unlock(&c->btree_reserve_cache_lock); retry: ret = bch2_alloc_sectors_start_trans(trans, + target ?: c->opts.metadata_target ?: c->opts.foreground_target, 0, @@ -325,7 +327,9 @@ retry: res->nr_replicas, min(res->nr_replicas, c->opts.metadata_replicas_required), - watermark, 0, cl, &wp); + watermark, + target ? BCH_WRITE_only_specified_devs : 0, + cl, &wp); if (unlikely(ret)) goto err; @@ -505,6 +509,7 @@ static void bch2_btree_reserve_put(struct btree_update *as, struct btree_trans * static int bch2_btree_reserve_get(struct btree_trans *trans, struct btree_update *as, unsigned nr_nodes[2], + unsigned target, unsigned flags, struct closure *cl) { @@ -527,7 +532,7 @@ static int bch2_btree_reserve_get(struct btree_trans *trans, while (p->nr < nr_nodes[interior]) { b = __bch2_btree_node_alloc(trans, &as->disk_res, cl, - interior, flags); + interior, target, flags); if (IS_ERR(b)) { ret = PTR_ERR(b); goto err; @@ -1116,7 +1121,8 @@ static void bch2_btree_update_done(struct btree_update *as, struct btree_trans * static struct btree_update * bch2_btree_update_start(struct btree_trans *trans, struct btree_path *path, - unsigned level_start, bool split, unsigned flags) + unsigned level_start, bool split, + unsigned target, unsigned flags) { struct bch_fs *c = trans->c; struct btree_update *as; @@ -1221,12 +1227,12 @@ bch2_btree_update_start(struct btree_trans *trans, struct btree_path *path, ret = bch2_disk_reservation_get(c, &as->disk_res, (nr_nodes[0] + nr_nodes[1]) * btree_sectors(c), - c->opts.metadata_replicas, + READ_ONCE(c->opts.metadata_replicas), disk_res_flags); if (ret) goto err; - ret = bch2_btree_reserve_get(trans, as, nr_nodes, flags, NULL); + ret = bch2_btree_reserve_get(trans, as, nr_nodes, target, flags, NULL); if (bch2_err_matches(ret, ENOSPC) || bch2_err_matches(ret, ENOMEM)) { struct closure cl; @@ -1245,7 +1251,7 @@ bch2_btree_update_start(struct btree_trans *trans, struct btree_path *path, closure_init_stack(&cl); do { - ret = bch2_btree_reserve_get(trans, as, nr_nodes, flags, &cl); + ret = bch2_btree_reserve_get(trans, as, nr_nodes, target, flags, &cl); bch2_trans_unlock(trans); bch2_wait_on_allocator(c, &cl); @@ -1792,7 +1798,7 @@ static int bch2_btree_insert_node(struct btree_update *as, struct btree_trans *t bch2_btree_update_to_text(&buf, as); bch2_btree_path_to_text(&buf, trans, path_idx); - bch2_print_string_as_lines(KERN_ERR, buf.buf); + bch2_print_str(c, KERN_ERR, buf.buf); printbuf_exit(&buf); bch2_fs_emergency_read_only(c); return -EIO; @@ -1863,7 +1869,7 @@ int bch2_btree_split_leaf(struct btree_trans *trans, as = bch2_btree_update_start(trans, trans->paths + path, trans->paths[path].level, - true, flags); + true, 0, flags); if (IS_ERR(as)) return PTR_ERR(as); @@ -1933,7 +1939,8 @@ int bch2_btree_increase_depth(struct btree_trans *trans, btree_path_idx_t path, return bch2_btree_split_leaf(trans, path, flags); struct btree_update *as = - bch2_btree_update_start(trans, trans->paths + path, b->c.level, true, flags); + bch2_btree_update_start(trans, trans->paths + path, b->c.level, + true, 0, flags); if (IS_ERR(as)) return PTR_ERR(as); @@ -2062,7 +2069,7 @@ int __bch2_foreground_maybe_merge(struct btree_trans *trans, parent = btree_node_parent(trans->paths + path, b); as = bch2_btree_update_start(trans, trans->paths + path, level, false, - BCH_TRANS_COMMIT_no_enospc|flags); + 0, BCH_TRANS_COMMIT_no_enospc|flags); ret = PTR_ERR_OR_ZERO(as); if (ret) goto err; @@ -2169,6 +2176,7 @@ err: int bch2_btree_node_rewrite(struct btree_trans *trans, struct btree_iter *iter, struct btree *b, + unsigned target, unsigned flags) { struct bch_fs *c = trans->c; @@ -2181,7 +2189,8 @@ int bch2_btree_node_rewrite(struct btree_trans *trans, struct btree_path *path = btree_iter_path(trans, iter); parent = btree_node_parent(path, b); - as = bch2_btree_update_start(trans, path, b->c.level, false, flags); + as = bch2_btree_update_start(trans, path, b->c.level, + false, target, flags); ret = PTR_ERR_OR_ZERO(as); if (ret) goto out; @@ -2246,7 +2255,7 @@ static int bch2_btree_node_rewrite_key(struct btree_trans *trans, bool found = b && btree_ptr_hash_val(&b->key) == btree_ptr_hash_val(k); ret = found - ? bch2_btree_node_rewrite(trans, &iter, b, flags) + ? bch2_btree_node_rewrite(trans, &iter, b, 0, flags) : -ENOENT; out: bch2_trans_iter_exit(trans, &iter); @@ -2255,7 +2264,9 @@ out: int bch2_btree_node_rewrite_pos(struct btree_trans *trans, enum btree_id btree, unsigned level, - struct bpos pos, unsigned flags) + struct bpos pos, + unsigned target, + unsigned flags) { BUG_ON(!level); @@ -2267,7 +2278,7 @@ int bch2_btree_node_rewrite_pos(struct btree_trans *trans, if (ret) goto err; - ret = bch2_btree_node_rewrite(trans, &iter, b, flags); + ret = bch2_btree_node_rewrite(trans, &iter, b, target, flags); err: bch2_trans_iter_exit(trans, &iter); return ret; @@ -2281,7 +2292,7 @@ int bch2_btree_node_rewrite_key_get_iter(struct btree_trans *trans, if (ret) return ret == -BCH_ERR_btree_node_dying ? 0 : ret; - ret = bch2_btree_node_rewrite(trans, &iter, b, flags); + ret = bch2_btree_node_rewrite(trans, &iter, b, 0, flags); bch2_trans_iter_exit(trans, &iter); return ret; } diff --git a/libbcachefs/btree_update_interior.h b/libbcachefs/btree_update_interior.h index be71cd73..ff9b95aa 100644 --- a/libbcachefs/btree_update_interior.h +++ b/libbcachefs/btree_update_interior.h @@ -168,10 +168,10 @@ static inline int bch2_foreground_maybe_merge(struct btree_trans *trans, } int bch2_btree_node_rewrite(struct btree_trans *, struct btree_iter *, - struct btree *, unsigned); + struct btree *, unsigned, unsigned); int bch2_btree_node_rewrite_pos(struct btree_trans *, enum btree_id, unsigned, - struct bpos, unsigned); + struct bpos, unsigned, unsigned); int bch2_btree_node_rewrite_key_get_iter(struct btree_trans *, struct btree *, unsigned); diff --git a/libbcachefs/buckets.c b/libbcachefs/buckets.c index fea61e60..09e84d4a 100644 --- a/libbcachefs/buckets.c +++ b/libbcachefs/buckets.c @@ -37,7 +37,8 @@ void bch2_dev_usage_read_fast(struct bch_dev *ca, struct bch_dev_usage *usage) void bch2_dev_usage_full_read_fast(struct bch_dev *ca, struct bch_dev_usage_full *usage) { memset(usage, 0, sizeof(*usage)); - acc_u64s_percpu((u64 *) usage, (u64 __percpu *) ca->usage, dev_usage_u64s()); + acc_u64s_percpu((u64 *) usage, (u64 __percpu *) ca->usage, + sizeof(struct bch_dev_usage_full) / sizeof(u64)); } static u64 reserve_factor(u64 r) @@ -391,29 +392,24 @@ static int bucket_ref_update_err(struct btree_trans *trans, struct printbuf *buf struct bkey_s_c k, bool insert, enum bch_sb_error_id id) { struct bch_fs *c = trans->c; - bool repeat = false, print = true, suppress = false; prt_printf(buf, "\nwhile marking "); bch2_bkey_val_to_text(buf, c, k); prt_newline(buf); - __bch2_count_fsck_err(c, id, buf->buf, &repeat, &print, &suppress); + bool print = __bch2_count_fsck_err(c, id, buf); - int ret = bch2_run_explicit_recovery_pass(c, BCH_RECOVERY_PASS_check_allocations); + int ret = bch2_run_explicit_recovery_pass_printbuf(c, buf, + BCH_RECOVERY_PASS_check_allocations); if (insert) { - print = true; - suppress = false; - bch2_trans_updates_to_text(buf, trans); __bch2_inconsistent_error(c, buf); ret = -BCH_ERR_bucket_ref_update; } - if (suppress) - prt_printf(buf, "Ratelimiting new instances of previous error\n"); - if (print) - bch2_print_string_as_lines(KERN_ERR, buf->buf); + if (print || insert) + bch2_print_str(c, KERN_ERR, buf->buf); return ret; } @@ -703,7 +699,7 @@ err: (u64) p.ec.idx); bch2_bkey_val_to_text(&buf, c, k); __bch2_inconsistent_error(c, &buf); - bch2_print_string_as_lines(KERN_ERR, buf.buf); + bch2_print_str(c, KERN_ERR, buf.buf); printbuf_exit(&buf); return -BCH_ERR_trigger_stripe_pointer; } @@ -958,14 +954,23 @@ static int __bch2_trans_mark_metadata_bucket(struct btree_trans *trans, return PTR_ERR(a); if (a->v.data_type && type && a->v.data_type != type) { - bch2_run_explicit_recovery_pass(c, BCH_RECOVERY_PASS_check_allocations); - log_fsck_err(trans, bucket_metadata_type_mismatch, - "bucket %llu:%llu gen %u different types of data in same bucket: %s, %s\n" - "while marking %s", - iter.pos.inode, iter.pos.offset, a->v.gen, - bch2_data_type_str(a->v.data_type), - bch2_data_type_str(type), - bch2_data_type_str(type)); + struct printbuf buf = PRINTBUF; + bch2_log_msg_start(c, &buf); + prt_printf(&buf, "bucket %llu:%llu gen %u different types of data in same bucket: %s, %s\n" + "while marking %s\n", + iter.pos.inode, iter.pos.offset, a->v.gen, + bch2_data_type_str(a->v.data_type), + bch2_data_type_str(type), + bch2_data_type_str(type)); + + bool print = bch2_count_fsck_err(c, bucket_metadata_type_mismatch, &buf); + + bch2_run_explicit_recovery_pass_printbuf(c, &buf, + BCH_RECOVERY_PASS_check_allocations); + + if (print) + bch2_print_str(c, KERN_ERR, buf.buf); + printbuf_exit(&buf); ret = -BCH_ERR_metadata_bucket_inconsistency; goto err; } @@ -977,7 +982,6 @@ static int __bch2_trans_mark_metadata_bucket(struct btree_trans *trans, ret = bch2_trans_update(trans, &iter, &a->k_i, 0); } err: -fsck_err: bch2_trans_iter_exit(trans, &iter); return ret; } diff --git a/libbcachefs/buckets.h b/libbcachefs/buckets.h index 1c38b165..8d75b27a 100644 --- a/libbcachefs/buckets.h +++ b/libbcachefs/buckets.h @@ -242,11 +242,6 @@ static inline u64 dev_buckets_available(struct bch_dev *ca, /* Filesystem usage: */ -static inline unsigned dev_usage_u64s(void) -{ - return sizeof(struct bch_dev_usage) / sizeof(u64); -} - struct bch_fs_usage_short bch2_fs_usage_read_short(struct bch_fs *); diff --git a/libbcachefs/data_update.c b/libbcachefs/data_update.c index b211c972..c3034338 100644 --- a/libbcachefs/data_update.c +++ b/libbcachefs/data_update.c @@ -358,7 +358,7 @@ restart_drop_extra_replicas: prt_str(&buf, "\nnew: "); bch2_bkey_val_to_text(&buf, c, bkey_i_to_s_c(insert)); - bch2_print_string_as_lines(KERN_ERR, buf.buf); + bch2_print_str(c, KERN_ERR, buf.buf); printbuf_exit(&buf); bch2_fatal_error(c); diff --git a/libbcachefs/debug.c b/libbcachefs/debug.c index 09b67279..8ad710bb 100644 --- a/libbcachefs/debug.c +++ b/libbcachefs/debug.c @@ -933,7 +933,7 @@ void bch2_fs_debug_init(struct bch_fs *c) if (IS_ERR_OR_NULL(bch_debug)) return; - if (!c->opts.single_device) + if (c->sb.multi_device) snprintf(name, sizeof(name), "%pU", c->sb.user_uuid.b); else strscpy(name, c->name, sizeof(name)); diff --git a/libbcachefs/disk_accounting.c b/libbcachefs/disk_accounting.c index b007319b..7be71952 100644 --- a/libbcachefs/disk_accounting.c +++ b/libbcachefs/disk_accounting.c @@ -618,17 +618,17 @@ static int accounting_read_key(struct btree_trans *trans, struct bkey_s_c k) } static int bch2_disk_accounting_validate_late(struct btree_trans *trans, - struct disk_accounting_pos acc, + struct disk_accounting_pos *acc, u64 *v, unsigned nr) { struct bch_fs *c = trans->c; struct printbuf buf = PRINTBUF; int ret = 0, invalid_dev = -1; - switch (acc.type) { + switch (acc->type) { case BCH_DISK_ACCOUNTING_replicas: { struct bch_replicas_padded r; - __accounting_to_replicas(&r.e, &acc); + __accounting_to_replicas(&r.e, acc); for (unsigned i = 0; i < r.e.nr_devs; i++) if (r.e.devs[i] != BCH_SB_MEMBER_INVALID && @@ -647,7 +647,7 @@ static int bch2_disk_accounting_validate_late(struct btree_trans *trans, trans, accounting_replicas_not_marked, "accounting not marked in superblock replicas\n%s", (printbuf_reset(&buf), - bch2_accounting_key_to_text(&buf, &acc), + bch2_accounting_key_to_text(&buf, acc), buf.buf))) { /* * We're not RW yet and still single threaded, dropping @@ -663,8 +663,8 @@ static int bch2_disk_accounting_validate_late(struct btree_trans *trans, } case BCH_DISK_ACCOUNTING_dev_data_type: - if (!bch2_dev_exists(c, acc.dev_data_type.dev)) { - invalid_dev = acc.dev_data_type.dev; + if (!bch2_dev_exists(c, acc->dev_data_type.dev)) { + invalid_dev = acc->dev_data_type.dev; goto invalid_device; } break; @@ -678,13 +678,13 @@ invalid_device: "accounting entry points to invalid device %i\n%s", invalid_dev, (printbuf_reset(&buf), - bch2_accounting_key_to_text(&buf, &acc), + bch2_accounting_key_to_text(&buf, acc), buf.buf))) { for (unsigned i = 0; i < nr; i++) v[i] = -v[i]; ret = commit_do(trans, NULL, NULL, 0, - bch2_disk_accounting_mod(trans, &acc, v, nr, false)) ?: + bch2_disk_accounting_mod(trans, acc, v, nr, false)) ?: -BCH_ERR_remove_disk_accounting_entry; } else { ret = -BCH_ERR_remove_disk_accounting_entry; @@ -735,7 +735,7 @@ int bch2_accounting_read(struct bch_fs *c) if (acc_k.type >= BCH_DISK_ACCOUNTING_TYPE_NR) break; - if (!bch2_accounting_is_mem(acc_k)) { + if (!bch2_accounting_is_mem(&acc_k)) { struct disk_accounting_pos next; memset(&next, 0, sizeof(next)); next.type = acc_k.type + 1; @@ -757,7 +757,7 @@ int bch2_accounting_read(struct bch_fs *c) struct disk_accounting_pos acc_k; bpos_to_disk_accounting_pos(&acc_k, i->k->k.p); - if (!bch2_accounting_is_mem(acc_k)) + if (!bch2_accounting_is_mem(&acc_k)) continue; struct bkey_s_c k = bkey_i_to_s_c(i->k); @@ -813,7 +813,7 @@ int bch2_accounting_read(struct bch_fs *c) */ ret = bch2_is_zero(v, sizeof(v[0]) * i->nr_counters) ? -BCH_ERR_remove_disk_accounting_entry - : bch2_disk_accounting_validate_late(trans, acc_k, v, i->nr_counters); + : bch2_disk_accounting_validate_late(trans, &acc_k, v, i->nr_counters); if (ret == -BCH_ERR_remove_disk_accounting_entry) { free_percpu(i->v[0]); @@ -926,7 +926,7 @@ void bch2_verify_accounting_clean(struct bch_fs *c) if (acc_k.type >= BCH_DISK_ACCOUNTING_TYPE_NR) break; - if (!bch2_accounting_is_mem(acc_k)) { + if (!bch2_accounting_is_mem(&acc_k)) { struct disk_accounting_pos next; memset(&next, 0, sizeof(next)); next.type = acc_k.type + 1; diff --git a/libbcachefs/disk_accounting.h b/libbcachefs/disk_accounting.h index abb1f620..6524cb14 100644 --- a/libbcachefs/disk_accounting.h +++ b/libbcachefs/disk_accounting.h @@ -138,10 +138,10 @@ enum bch_accounting_mode { int bch2_accounting_mem_insert(struct bch_fs *, struct bkey_s_c_accounting, enum bch_accounting_mode); void bch2_accounting_mem_gc(struct bch_fs *); -static inline bool bch2_accounting_is_mem(struct disk_accounting_pos acc) +static inline bool bch2_accounting_is_mem(struct disk_accounting_pos *acc) { - return acc.type < BCH_DISK_ACCOUNTING_TYPE_NR && - acc.type != BCH_DISK_ACCOUNTING_inum; + return acc->type < BCH_DISK_ACCOUNTING_TYPE_NR && + acc->type != BCH_DISK_ACCOUNTING_inum; } /* @@ -161,7 +161,7 @@ static inline int bch2_accounting_mem_mod_locked(struct btree_trans *trans, if (gc && !acc->gc_running) return 0; - if (!bch2_accounting_is_mem(acc_k)) + if (!bch2_accounting_is_mem(&acc_k)) return 0; if (mode == BCH_ACCOUNTING_normal) { diff --git a/libbcachefs/errcode.h b/libbcachefs/errcode.h index e3c85288..996cdf62 100644 --- a/libbcachefs/errcode.h +++ b/libbcachefs/errcode.h @@ -192,8 +192,6 @@ x(BCH_ERR_data_update_done, data_update_done_no_rw_devs) \ x(EINVAL, device_state_not_allowed) \ x(EINVAL, member_info_missing) \ - x(EINVAL, single_device_filesystem) \ - x(EINVAL, not_single_device_filesystem) \ x(EINVAL, mismatched_block_size) \ x(EINVAL, block_size_too_small) \ x(EINVAL, bucket_size_too_small) \ @@ -203,6 +201,7 @@ x(EINVAL, device_has_been_removed) \ x(EINVAL, device_splitbrain) \ x(EINVAL, device_already_online) \ + x(EINVAL, filesystem_uuid_already_open) \ x(EINVAL, insufficient_devices_to_start) \ x(EINVAL, invalid) \ x(EINVAL, internal_fsck_err) \ @@ -222,6 +221,8 @@ x(EROFS, erofs_unfixed_errors) \ x(EROFS, erofs_norecovery) \ x(EROFS, erofs_nochanges) \ + x(EROFS, erofs_no_alloc_info) \ + x(EROFS, erofs_filesystem_full) \ x(EROFS, insufficient_devices) \ x(0, operation_blocked) \ x(BCH_ERR_operation_blocked, btree_cache_cannibalize_lock_blocked) \ @@ -290,7 +291,7 @@ x(EIO, mark_stripe) \ x(EIO, stripe_reconstruct) \ x(EIO, key_type_error) \ - x(EIO, extent_poisened) \ + x(EIO, extent_poisoned) \ x(EIO, missing_indirect_extent) \ x(EIO, invalidate_stripe_to_dev) \ x(EIO, no_encryption_key) \ diff --git a/libbcachefs/error.c b/libbcachefs/error.c index baf5dfb3..04a714b0 100644 --- a/libbcachefs/error.c +++ b/libbcachefs/error.c @@ -34,7 +34,7 @@ bool __bch2_inconsistent_error(struct bch_fs *c, struct printbuf *out) journal_cur_seq(&c->journal)); return true; case BCH_ON_ERROR_panic: - bch2_print_string_as_lines_nonblocking(KERN_ERR, out->buf); + bch2_print_str(c, KERN_ERR, out->buf); panic(bch2_fmt(c, "panic after error")); return true; default: @@ -71,7 +71,7 @@ static bool bch2_fs_trans_inconsistent(struct bch_fs *c, struct btree_trans *tra if (trans) bch2_trans_updates_to_text(&buf, trans); bool ret = __bch2_inconsistent_error(c, &buf); - bch2_print_string_as_lines_nonblocking(KERN_ERR, buf.buf); + bch2_print_str_nonblocking(c, KERN_ERR, buf.buf); printbuf_exit(&buf); return ret; @@ -104,7 +104,7 @@ int __bch2_topology_error(struct bch_fs *c, struct printbuf *out) __bch2_inconsistent_error(c, out); return -BCH_ERR_btree_need_topology_repair; } else { - return bch2_run_explicit_recovery_pass(c, BCH_RECOVERY_PASS_check_topology) ?: + return bch2_run_explicit_recovery_pass_printbuf(c, out, BCH_RECOVERY_PASS_check_topology) ?: -BCH_ERR_btree_node_read_validate_error; } } @@ -121,7 +121,7 @@ int bch2_fs_topology_error(struct bch_fs *c, const char *fmt, ...) va_end(args); int ret = __bch2_topology_error(c, &buf); - bch2_print_string_as_lines(KERN_ERR, buf.buf); + bch2_print_str(c, KERN_ERR, buf.buf); printbuf_exit(&buf); return ret; @@ -331,7 +331,7 @@ static int do_fsck_ask_yn(struct bch_fs *c, if (bch2_fs_stdio_redirect(c)) bch2_print(c, "%s", question->buf); else - bch2_print_string_as_lines(KERN_ERR, question->buf); + bch2_print_str(c, KERN_ERR, question->buf); int ask = bch2_fsck_ask_yn(c, trans); @@ -379,15 +379,21 @@ static struct fsck_err_state *count_fsck_err_locked(struct bch_fs *c, return s; } -void __bch2_count_fsck_err(struct bch_fs *c, - enum bch_sb_error_id id, const char *msg, - bool *repeat, bool *print, bool *suppress) +bool __bch2_count_fsck_err(struct bch_fs *c, + enum bch_sb_error_id id, struct printbuf *msg) { bch2_sb_error_count(c, id); mutex_lock(&c->fsck_error_msgs_lock); - count_fsck_err_locked(c, id, msg, repeat, print, suppress); + bool print = true, repeat = false, suppress = false; + + count_fsck_err_locked(c, id, msg->buf, &repeat, &print, &suppress); mutex_unlock(&c->fsck_error_msgs_lock); + + if (suppress) + prt_printf(msg, "Ratelimiting new instances of previous error\n"); + + return print && !repeat; } int __bch2_fsck_err(struct bch_fs *c, @@ -560,7 +566,7 @@ print: if (bch2_fs_stdio_redirect(c)) bch2_print(c, "%s", out->buf); else - bch2_print_string_as_lines(KERN_ERR, out->buf); + bch2_print_str(c, KERN_ERR, out->buf); } if (s) diff --git a/libbcachefs/error.h b/libbcachefs/error.h index d0d024dc..da653e4e 100644 --- a/libbcachefs/error.h +++ b/libbcachefs/error.h @@ -76,9 +76,7 @@ struct fsck_err_state { #define fsck_err_count(_c, _err) bch2_sb_err_count(_c, BCH_FSCK_ERR_##_err) -void __bch2_count_fsck_err(struct bch_fs *, - enum bch_sb_error_id, const char *, - bool *, bool *, bool *); +bool __bch2_count_fsck_err(struct bch_fs *, enum bch_sb_error_id, struct printbuf *); #define bch2_count_fsck_err(_c, _err, ...) \ __bch2_count_fsck_err(_c, BCH_FSCK_ERR_##_err, __VA_ARGS__) diff --git a/libbcachefs/extents.c b/libbcachefs/extents.c index 98de81bd..7058a343 100644 --- a/libbcachefs/extents.c +++ b/libbcachefs/extents.c @@ -158,7 +158,15 @@ int bch2_bkey_pick_read_device(struct bch_fs *c, struct bkey_s_c k, if (dev >= 0 && p.ptr.dev != dev) continue; - struct bch_dev *ca = bch2_dev_rcu(c, p.ptr.dev); + struct bch_dev *ca = bch2_dev_rcu_noerror(c, p.ptr.dev); + + if (unlikely(!ca && p.ptr.dev != BCH_SB_MEMBER_INVALID)) { + rcu_read_unlock(); + int ret = bch2_dev_missing_bkey(c, k, p.ptr.dev); + if (ret) + return ret; + rcu_read_lock(); + } if (p.ptr.cached && (!ca || dev_ptr_stale_rcu(ca, &p.ptr))) continue; diff --git a/libbcachefs/fs-ioctl.c b/libbcachefs/fs-ioctl.c index c1553e44..14886e1d 100644 --- a/libbcachefs/fs-ioctl.c +++ b/libbcachefs/fs-ioctl.c @@ -69,7 +69,7 @@ static int bch2_inode_flags_set(struct btree_trans *trans, if (ret < 0) return ret; - ret = bch2_request_incompat_feature(c,bcachefs_metadata_version_casefolding); + ret = bch2_request_incompat_feature(c, bcachefs_metadata_version_casefolding); if (ret) return ret; diff --git a/libbcachefs/fs.c b/libbcachefs/fs.c index 5a41b1a8..b032ec7a 100644 --- a/libbcachefs/fs.c +++ b/libbcachefs/fs.c @@ -333,9 +333,8 @@ repeat: if (!trans) { __wait_on_freeing_inode(c, inode, inum); } else { - bch2_trans_unlock(trans); - __wait_on_freeing_inode(c, inode, inum); - int ret = bch2_trans_relock(trans); + int ret = drop_locks_do(trans, + (__wait_on_freeing_inode(c, inode, inum), 0)); if (ret) return ERR_PTR(ret); } @@ -2220,7 +2219,12 @@ got_sb: sb->s_time_min = div_s64(S64_MIN, c->sb.time_units_per_sec) + 1; sb->s_time_max = div_s64(S64_MAX, c->sb.time_units_per_sec); super_set_uuid(sb, c->sb.user_uuid.b, sizeof(c->sb.user_uuid)); - super_set_sysfs_name_uuid(sb); + + if (c->sb.multi_device) + super_set_sysfs_name_uuid(sb); + else + strscpy(sb->s_sysfs_name, c->name, sizeof(sb->s_sysfs_name)); + sb->s_shrink->seeks = 0; c->vfs_sb = sb; strscpy(sb->s_id, c->name, sizeof(sb->s_id)); diff --git a/libbcachefs/inode.h b/libbcachefs/inode.h index f82cfbf4..1b2fc902 100644 --- a/libbcachefs/inode.h +++ b/libbcachefs/inode.h @@ -284,7 +284,7 @@ static inline bool bch2_inode_should_have_single_bp(struct bch_inode_unpacked *i struct bch_opts bch2_inode_opts_to_opts(struct bch_inode_unpacked *); void bch2_inode_opts_get(struct bch_io_opts *, struct bch_fs *, struct bch_inode_unpacked *); -int bch2_inum_opts_get(struct btree_trans*, subvol_inum, struct bch_io_opts *); +int bch2_inum_opts_get(struct btree_trans *, subvol_inum, struct bch_io_opts *); #include "rebalance.h" diff --git a/libbcachefs/io_read.c b/libbcachefs/io_read.c index 2e5f81cd..9a2d5ac5 100644 --- a/libbcachefs/io_read.c +++ b/libbcachefs/io_read.c @@ -35,12 +35,6 @@ module_param_named(read_corrupt_ratio, bch2_read_corrupt_ratio, uint, 0644); MODULE_PARM_DESC(read_corrupt_ratio, ""); #endif -static bool bch2_poison_extents_on_checksum_error; -module_param_named(poison_extents_on_checksum_error, - bch2_poison_extents_on_checksum_error, bool, 0644); -MODULE_PARM_DESC(poison_extents_on_checksum_error, - "Extents with checksum errors are marked as poisoned - unsafe without read fua support"); - #ifndef CONFIG_BCACHEFS_NO_LATENCY_ACCT static bool bch2_target_congested(struct bch_fs *c, u16 target) @@ -469,9 +463,6 @@ static void get_rbio_extent(struct btree_trans *trans, static noinline int maybe_poison_extent(struct btree_trans *trans, struct bch_read_bio *rbio, enum btree_id btree, struct bkey_s_c read_k) { - if (!bch2_poison_extents_on_checksum_error) - return 0; - struct bch_fs *c = trans->c; struct data_update *u = rbio_data_update(rbio); @@ -570,6 +561,7 @@ static void bch2_rbio_retry(struct work_struct *work) .inum = rbio->read_pos.inode, }; struct bch_io_failures failed = { .nr = 0 }; + int orig_error = rbio->ret; struct btree_trans *trans = bch2_trans_get(c); @@ -610,7 +602,9 @@ static void bch2_rbio_retry(struct work_struct *work) if (ret) { rbio->ret = ret; rbio->bio.bi_status = BLK_STS_IOERR; - } else { + } else if (orig_error != -BCH_ERR_data_read_retry_csum_err_maybe_userspace && + orig_error != -BCH_ERR_data_read_ptr_stale_race && + !failed.nr) { struct printbuf buf = PRINTBUF; lockrestart_do(trans, @@ -1054,7 +1048,7 @@ int __bch2_read_extent(struct btree_trans *trans, struct bch_read_bio *orig, if ((bch2_bkey_extent_flags(k) & BIT_ULL(BCH_EXTENT_FLAG_poisoned)) && !orig->data_update) - return -BCH_ERR_extent_poisened; + return -BCH_ERR_extent_poisoned; retry_pick: ret = bch2_bkey_pick_read_device(c, k, failed, &pick, dev); @@ -1240,6 +1234,10 @@ retry_pick: rbio->bio.bi_iter.bi_sector = pick.ptr.offset; rbio->bio.bi_end_io = bch2_read_endio; + /* XXX: also nvme read recovery level */ + if (unlikely(failed && bch2_dev_io_failures(failed, pick.ptr.dev))) + rbio->bio.bi_opf |= REQ_FUA; + if (rbio->bounce) trace_and_count(c, io_read_bounce, &rbio->bio); @@ -1459,13 +1457,15 @@ err: } if (unlikely(ret)) { - struct printbuf buf = PRINTBUF; - lockrestart_do(trans, - bch2_inum_offset_err_msg_trans(trans, &buf, inum, - bvec_iter.bi_sector << 9)); - prt_printf(&buf, "read error: %s", bch2_err_str(ret)); - bch_err_ratelimited(c, "%s", buf.buf); - printbuf_exit(&buf); + if (ret != -BCH_ERR_extent_poisoned) { + struct printbuf buf = PRINTBUF; + lockrestart_do(trans, + bch2_inum_offset_err_msg_trans(trans, &buf, inum, + bvec_iter.bi_sector << 9)); + prt_printf(&buf, "data read error: %s", bch2_err_str(ret)); + bch_err_ratelimited(c, "%s", buf.buf); + printbuf_exit(&buf); + } rbio->bio.bi_status = BLK_STS_IOERR; rbio->ret = ret; diff --git a/libbcachefs/journal.c b/libbcachefs/journal.c index 55ed5704..898f6125 100644 --- a/libbcachefs/journal.c +++ b/libbcachefs/journal.c @@ -173,7 +173,7 @@ journal_error_check_stuck(struct journal *j, int error, unsigned flags) spin_unlock(&j->lock); prt_printf(&buf, bch2_fmt(c, "Journal stuck! Hava a pre-reservation but journal full (error %s)"), bch2_err_str(error)); - bch2_print_string_as_lines(KERN_ERR, buf.buf); + bch2_print_str(c, KERN_ERR, buf.buf); printbuf_reset(&buf); bch2_journal_pins_to_text(&buf, j); @@ -314,16 +314,6 @@ static void __journal_entry_close(struct journal *j, unsigned closed_val, bool t __bch2_journal_buf_put(j, le64_to_cpu(buf->data->seq)); } -void bch2_journal_halt(struct journal *j) -{ - spin_lock(&j->lock); - __journal_entry_close(j, JOURNAL_ENTRY_ERROR_VAL, true); - if (!j->err_seq) - j->err_seq = journal_cur_seq(j); - journal_wake(j); - spin_unlock(&j->lock); -} - void bch2_journal_halt_locked(struct journal *j) { lockdep_assert_held(&j->lock); @@ -334,6 +324,13 @@ void bch2_journal_halt_locked(struct journal *j) journal_wake(j); } +void bch2_journal_halt(struct journal *j) +{ + spin_lock(&j->lock); + bch2_journal_halt_locked(j); + spin_unlock(&j->lock); +} + static bool journal_entry_want_write(struct journal *j) { bool ret = !journal_entry_is_open(j) || @@ -729,7 +726,7 @@ int bch2_journal_res_get_slowpath(struct journal *j, struct journal_res *res, struct printbuf buf = PRINTBUF; bch2_journal_debug_to_text(&buf, j); - bch2_print_string_as_lines(KERN_ERR, buf.buf); + bch2_print_str(c, KERN_ERR, buf.buf); prt_printf(&buf, bch2_fmt(c, "Journal stuck? Waited for 10 seconds, err %s"), bch2_err_str(ret)); printbuf_exit(&buf); @@ -1281,6 +1278,16 @@ int bch2_set_nr_journal_buckets(struct bch_fs *c, struct bch_dev *ca, int bch2_dev_journal_alloc(struct bch_dev *ca, bool new_fs) { + struct bch_fs *c = ca->fs; + + if (!(ca->mi.data_allowed & BIT(BCH_DATA_journal))) + return 0; + + if (c->sb.features & BIT_ULL(BCH_FEATURE_small_image)) { + bch_err(c, "cannot allocate journal, filesystem is an unresized image file"); + return -BCH_ERR_erofs_filesystem_full; + } + unsigned nr; int ret; @@ -1301,7 +1308,7 @@ int bch2_dev_journal_alloc(struct bch_dev *ca, bool new_fs) min(1 << 13, (1 << 24) / ca->mi.bucket_size)); - ret = bch2_set_nr_journal_buckets_loop(ca->fs, ca, nr, new_fs); + ret = bch2_set_nr_journal_buckets_loop(c, ca, nr, new_fs); err: bch_err_fn(ca, ret); return ret; diff --git a/libbcachefs/journal.h b/libbcachefs/journal.h index 0b92a8b5..738dd2c8 100644 --- a/libbcachefs/journal.h +++ b/libbcachefs/journal.h @@ -426,8 +426,8 @@ int bch2_journal_flush(struct journal *); bool bch2_journal_noflush_seq(struct journal *, u64, u64); int bch2_journal_meta(struct journal *); -void bch2_journal_halt(struct journal *); void bch2_journal_halt_locked(struct journal *); +void bch2_journal_halt(struct journal *); static inline int bch2_journal_error(struct journal *j) { diff --git a/libbcachefs/journal_io.c b/libbcachefs/journal_io.c index 2a54ac79..913125df 100644 --- a/libbcachefs/journal_io.c +++ b/libbcachefs/journal_io.c @@ -2105,7 +2105,7 @@ CLOSURE_CALLBACK(bch2_journal_write) le64_to_cpu(w->data->seq), vstruct_sectors(w->data, c->block_bits), bch2_err_str(ret)); - bch2_print_string_as_lines(KERN_ERR, buf.buf); + bch2_print_str(c, KERN_ERR, buf.buf); printbuf_exit(&buf); } if (ret) diff --git a/libbcachefs/move.c b/libbcachefs/move.c index a83b67f5..f605bd01 100644 --- a/libbcachefs/move.c +++ b/libbcachefs/move.c @@ -423,6 +423,9 @@ static struct bch_io_opts *bch2_move_get_io_opts(struct btree_trans *trans, struct bch_io_opts *opts_ret = &io_opts->fs_io_opts; int ret = 0; + if (extent_iter->min_depth) + return opts_ret; + if (extent_k.k->type == KEY_TYPE_reflink_v) goto out; @@ -573,11 +576,11 @@ static struct bkey_s_c bch2_lookup_indirect_extent_for_move(struct btree_trans * return k; } -static int bch2_move_data_btree(struct moving_context *ctxt, - struct bpos start, - struct bpos end, - move_pred_fn pred, void *arg, - enum btree_id btree_id) +int bch2_move_data_btree(struct moving_context *ctxt, + struct bpos start, + struct bpos end, + move_pred_fn pred, void *arg, + enum btree_id btree_id, unsigned level) { struct btree_trans *trans = ctxt->trans; struct bch_fs *c = trans->c; @@ -603,11 +606,56 @@ static int bch2_move_data_btree(struct moving_context *ctxt, ctxt->stats->pos = BBPOS(btree_id, start); } +retry_root: bch2_trans_begin(trans); - bch2_trans_iter_init(trans, &iter, btree_id, start, - BTREE_ITER_prefetch| - BTREE_ITER_not_extents| - BTREE_ITER_all_snapshots); + + if (level == bch2_btree_id_root(c, btree_id)->level + 1) { + bch2_trans_node_iter_init(trans, &iter, btree_id, start, 0, level - 1, + BTREE_ITER_prefetch| + BTREE_ITER_not_extents| + BTREE_ITER_all_snapshots); + struct btree *b = bch2_btree_iter_peek_node(trans, &iter); + ret = PTR_ERR_OR_ZERO(b); + if (ret) + goto root_err; + + if (b != btree_node_root(c, b)) { + bch2_trans_iter_exit(trans, &iter); + goto retry_root; + } + + k = bkey_i_to_s_c(&b->key); + + io_opts = bch2_move_get_io_opts(trans, &snapshot_io_opts, + iter.pos, &iter, k); + ret = PTR_ERR_OR_ZERO(io_opts); + if (ret) + goto root_err; + + memset(&data_opts, 0, sizeof(data_opts)); + if (!pred(c, arg, iter.btree_id, k, io_opts, &data_opts)) + goto out; + + + if (!data_opts.scrub) + ret = bch2_btree_node_rewrite_pos(trans, btree_id, level, + k.k->p, data_opts.target, 0); + else + ret = bch2_btree_node_scrub(trans, btree_id, level, k, data_opts.read_dev); + +root_err: + if (bch2_err_matches(ret, BCH_ERR_transaction_restart)) { + bch2_trans_iter_exit(trans, &iter); + goto retry_root; + } + + goto out; + } + + bch2_trans_node_iter_init(trans, &iter, btree_id, start, 0, level, + BTREE_ITER_prefetch| + BTREE_ITER_not_extents| + BTREE_ITER_all_snapshots); if (ctxt->rate) bch2_ratelimit_reset(ctxt->rate); @@ -667,7 +715,7 @@ static int bch2_move_data_btree(struct moving_context *ctxt, continue; memset(&data_opts, 0, sizeof(data_opts)); - if (!pred(c, arg, k, io_opts, &data_opts)) + if (!pred(c, arg, extent_iter->btree_id, k, io_opts, &data_opts)) goto next; /* @@ -677,7 +725,14 @@ static int bch2_move_data_btree(struct moving_context *ctxt, bch2_bkey_buf_reassemble(&sk, c, k); k = bkey_i_to_s_c(sk.k); - ret2 = bch2_move_extent(ctxt, NULL, extent_iter, k, *io_opts, data_opts); + if (!level) + ret2 = bch2_move_extent(ctxt, NULL, extent_iter, k, *io_opts, data_opts); + else if (!data_opts.scrub) + ret2 = bch2_btree_node_rewrite_pos(trans, btree_id, level, + k.k->p, data_opts.target, 0); + else + ret2 = bch2_btree_node_scrub(trans, btree_id, level, k, data_opts.read_dev); + if (ret2) { if (bch2_err_matches(ret2, BCH_ERR_transaction_restart)) continue; @@ -697,7 +752,7 @@ next: next_nondata: bch2_btree_iter_advance(trans, &iter); } - +out: bch2_trans_iter_exit(trans, &reflink_iter); bch2_trans_iter_exit(trans, &iter); bch2_bkey_buf_exit(&sk, c); @@ -727,7 +782,7 @@ int __bch2_move_data(struct moving_context *ctxt, ret = bch2_move_data_btree(ctxt, id == start.btree ? start.pos : POS_MIN, id == end.btree ? end.pos : POS_MAX, - pred, arg, id); + pred, arg, id, 0); if (ret) break; } @@ -850,7 +905,7 @@ static int __bch2_move_data_phys(struct moving_context *ctxt, } struct data_update_opts data_opts = {}; - if (!pred(c, arg, k, &io_opts, &data_opts)) { + if (!pred(c, arg, bp.v->btree_id, k, &io_opts, &data_opts)) { bch2_trans_iter_exit(trans, &iter); goto next; } @@ -871,7 +926,8 @@ static int __bch2_move_data_phys(struct moving_context *ctxt, if (!bp.v->level) ret = bch2_move_extent(ctxt, bucket_in_flight, &iter, k, io_opts, data_opts); else if (!data_opts.scrub) - ret = bch2_btree_node_rewrite_pos(trans, bp.v->btree_id, bp.v->level, k.k->p, 0); + ret = bch2_btree_node_rewrite_pos(trans, bp.v->btree_id, bp.v->level, + k.k->p, data_opts.target, 0); else ret = bch2_btree_node_scrub(trans, bp.v->btree_id, bp.v->level, k, data_opts.read_dev); @@ -899,16 +955,16 @@ err: return ret; } -static int bch2_move_data_phys(struct bch_fs *c, - unsigned dev, - u64 start, - u64 end, - unsigned data_types, - struct bch_ratelimit *rate, - struct bch_move_stats *stats, - struct write_point_specifier wp, - bool wait_on_copygc, - move_pred_fn pred, void *arg) +int bch2_move_data_phys(struct bch_fs *c, + unsigned dev, + u64 start, + u64 end, + unsigned data_types, + struct bch_ratelimit *rate, + struct bch_move_stats *stats, + struct write_point_specifier wp, + bool wait_on_copygc, + move_pred_fn pred, void *arg) { struct moving_context ctxt; @@ -932,7 +988,8 @@ struct evacuate_bucket_arg { struct data_update_opts data_opts; }; -static bool evacuate_bucket_pred(struct bch_fs *c, void *_arg, struct bkey_s_c k, +static bool evacuate_bucket_pred(struct bch_fs *c, void *_arg, + enum btree_id btree, struct bkey_s_c k, struct bch_io_opts *io_opts, struct data_update_opts *data_opts) { @@ -1021,7 +1078,7 @@ retry: if (!pred(c, arg, b, &io_opts, &data_opts)) goto next; - ret = bch2_btree_node_rewrite(trans, &iter, b, 0) ?: ret; + ret = bch2_btree_node_rewrite(trans, &iter, b, 0, 0) ?: ret; if (bch2_err_matches(ret, BCH_ERR_transaction_restart)) continue; if (ret) @@ -1046,7 +1103,7 @@ next: } static bool rereplicate_pred(struct bch_fs *c, void *arg, - struct bkey_s_c k, + enum btree_id btree, struct bkey_s_c k, struct bch_io_opts *io_opts, struct data_update_opts *data_opts) { @@ -1078,7 +1135,7 @@ static bool rereplicate_pred(struct bch_fs *c, void *arg, } static bool migrate_pred(struct bch_fs *c, void *arg, - struct bkey_s_c k, + enum btree_id btree, struct bkey_s_c k, struct bch_io_opts *io_opts, struct data_update_opts *data_opts) { @@ -1105,7 +1162,7 @@ static bool rereplicate_btree_pred(struct bch_fs *c, void *arg, struct bch_io_opts *io_opts, struct data_update_opts *data_opts) { - return rereplicate_pred(c, arg, bkey_i_to_s_c(&b->key), io_opts, data_opts); + return rereplicate_pred(c, arg, b->c.btree_id, bkey_i_to_s_c(&b->key), io_opts, data_opts); } /* @@ -1161,7 +1218,7 @@ int bch2_scan_old_btree_nodes(struct bch_fs *c, struct bch_move_stats *stats) } static bool drop_extra_replicas_pred(struct bch_fs *c, void *arg, - struct bkey_s_c k, + enum btree_id btree, struct bkey_s_c k, struct bch_io_opts *io_opts, struct data_update_opts *data_opts) { @@ -1194,11 +1251,12 @@ static bool drop_extra_replicas_btree_pred(struct bch_fs *c, void *arg, struct bch_io_opts *io_opts, struct data_update_opts *data_opts) { - return drop_extra_replicas_pred(c, arg, bkey_i_to_s_c(&b->key), io_opts, data_opts); + return drop_extra_replicas_pred(c, arg, b->c.btree_id, bkey_i_to_s_c(&b->key), + io_opts, data_opts); } static bool scrub_pred(struct bch_fs *c, void *_arg, - struct bkey_s_c k, + enum btree_id btree, struct bkey_s_c k, struct bch_io_opts *io_opts, struct data_update_opts *data_opts) { diff --git a/libbcachefs/move.h b/libbcachefs/move.h index 51e0505a..0c620a5f 100644 --- a/libbcachefs/move.h +++ b/libbcachefs/move.h @@ -72,7 +72,7 @@ do { \ break; \ } while (1) -typedef bool (*move_pred_fn)(struct bch_fs *, void *, struct bkey_s_c, +typedef bool (*move_pred_fn)(struct bch_fs *, void *, enum btree_id, struct bkey_s_c, struct bch_io_opts *, struct data_update_opts *); extern const char * const bch2_data_ops_strs[]; @@ -122,6 +122,8 @@ int bch2_move_extent(struct moving_context *, struct bch_io_opts, struct data_update_opts); +int bch2_move_data_btree(struct moving_context *, struct bpos, struct bpos, + move_pred_fn, void *, enum btree_id, unsigned); int __bch2_move_data(struct moving_context *, struct bbpos, struct bbpos, @@ -135,6 +137,11 @@ int bch2_move_data(struct bch_fs *, bool, move_pred_fn, void *); +int bch2_move_data_phys(struct bch_fs *, unsigned, u64, u64, unsigned, + struct bch_ratelimit *, struct bch_move_stats *, + struct write_point_specifier, bool, + move_pred_fn, void *); + int bch2_evacuate_bucket(struct moving_context *, struct move_bucket_in_flight *, struct bpos, int, diff --git a/libbcachefs/movinggc.h b/libbcachefs/movinggc.h index ea181fef..d1885cf6 100644 --- a/libbcachefs/movinggc.h +++ b/libbcachefs/movinggc.h @@ -5,6 +5,15 @@ unsigned long bch2_copygc_wait_amount(struct bch_fs *); void bch2_copygc_wait_to_text(struct printbuf *, struct bch_fs *); +static inline void bch2_copygc_wakeup(struct bch_fs *c) +{ + rcu_read_lock(); + struct task_struct *p = rcu_dereference(c->copygc_thread); + if (p) + wake_up_process(p); + rcu_read_unlock(); +} + void bch2_copygc_stop(struct bch_fs *); int bch2_copygc_start(struct bch_fs *); void bch2_fs_copygc_init(struct bch_fs *); diff --git a/libbcachefs/opts.c b/libbcachefs/opts.c index 5c5cf26d..b1cf8890 100644 --- a/libbcachefs/opts.c +++ b/libbcachefs/opts.c @@ -7,7 +7,9 @@ #include "compress.h" #include "disk_groups.h" #include "error.h" +#include "movinggc.h" #include "opts.h" +#include "rebalance.h" #include "recovery_passes.h" #include "super-io.h" #include "util.h" @@ -516,7 +518,7 @@ void bch2_opts_to_text(struct printbuf *out, } } -int bch2_opt_check_may_set(struct bch_fs *c, struct bch_dev *ca, int id, u64 v) +int bch2_opt_hook_pre_set(struct bch_fs *c, struct bch_dev *ca, enum bch_opt_id id, u64 v) { int ret = 0; @@ -534,23 +536,17 @@ int bch2_opt_check_may_set(struct bch_fs *c, struct bch_dev *ca, int id, u64 v) if (v) bch2_check_set_feature(c, BCH_FEATURE_ec); break; - case Opt_single_device: - if (v) { - mutex_lock(&c->sb_lock); - if (bch2_sb_nr_devices(c->disk_sb.sb) > 1) - ret = -BCH_ERR_not_single_device_filesystem; - mutex_unlock(&c->sb_lock); - } + default: break; } return ret; } -int bch2_opts_check_may_set(struct bch_fs *c) +int bch2_opts_hooks_pre_set(struct bch_fs *c) { for (unsigned i = 0; i < bch2_opts_nr; i++) { - int ret = bch2_opt_check_may_set(c, NULL, i, bch2_opt_get_by_id(&c->opts, i)); + int ret = bch2_opt_hook_pre_set(c, NULL, i, bch2_opt_get_by_id(&c->opts, i)); if (ret) return ret; } @@ -558,6 +554,61 @@ int bch2_opts_check_may_set(struct bch_fs *c) return 0; } +void bch2_opt_hook_post_set(struct bch_fs *c, struct bch_dev *ca, u64 inum, + struct bch_opts *new_opts, enum bch_opt_id id) +{ + switch (id) { + case Opt_foreground_target: + if (new_opts->foreground_target && + !new_opts->background_target) + bch2_set_rebalance_needs_scan(c, inum); + break; + case Opt_compression: + if (new_opts->compression && + !new_opts->background_compression) + bch2_set_rebalance_needs_scan(c, inum); + break; + case Opt_background_target: + if (new_opts->background_target) + bch2_set_rebalance_needs_scan(c, inum); + break; + case Opt_background_compression: + if (new_opts->background_compression) + bch2_set_rebalance_needs_scan(c, inum); + break; + case Opt_rebalance_enabled: + bch2_rebalance_wakeup(c); + break; + case Opt_copygc_enabled: + bch2_copygc_wakeup(c); + break; + case Opt_discard: + if (!ca) { + mutex_lock(&c->sb_lock); + for_each_member_device(c, ca) { + struct bch_member *m = + bch2_members_v2_get_mut(ca->disk_sb.sb, ca->dev_idx); + SET_BCH_MEMBER_DISCARD(m, c->opts.discard); + } + + bch2_write_super(c); + mutex_unlock(&c->sb_lock); + } + break; + case Opt_version_upgrade: + /* + * XXX: in the future we'll likely want to do compatible + * upgrades at runtime as well, but right now there's nothing + * that does that: + */ + if (new_opts->version_upgrade == BCH_VERSION_UPGRADE_incompatible) + bch2_sb_upgrade_incompat(c); + break; + default: + break; + } +} + int bch2_parse_one_mount_opt(struct bch_fs *c, struct bch_opts *opts, struct printbuf *parse_later, const char *name, const char *val) @@ -717,9 +768,11 @@ int bch2_opts_from_sb(struct bch_opts *opts, struct bch_sb *sb) return 0; } -void __bch2_opt_set_sb(struct bch_sb *sb, int dev_idx, +bool __bch2_opt_set_sb(struct bch_sb *sb, int dev_idx, const struct bch_option *opt, u64 v) { + bool changed = false; + if (opt->flags & OPT_SB_FIELD_SECTORS) v >>= 9; @@ -729,26 +782,35 @@ void __bch2_opt_set_sb(struct bch_sb *sb, int dev_idx, if (opt->flags & OPT_SB_FIELD_ONE_BIAS) v++; - if ((opt->flags & OPT_FS) && opt->set_sb && dev_idx < 0) + if ((opt->flags & OPT_FS) && opt->set_sb && dev_idx < 0) { + changed = v != opt->get_sb(sb); + opt->set_sb(sb, v); + } if ((opt->flags & OPT_DEVICE) && opt->set_member && dev_idx >= 0) { if (WARN(!bch2_member_exists(sb, dev_idx), "tried to set device option %s on nonexistent device %i", opt->attr.name, dev_idx)) - return; + return false; - opt->set_member(bch2_members_v2_get_mut(sb, dev_idx), v); + struct bch_member *m = bch2_members_v2_get_mut(sb, dev_idx); + changed = v != opt->get_member(m); + opt->set_member(m, v); } + + return changed; } -void bch2_opt_set_sb(struct bch_fs *c, struct bch_dev *ca, +bool bch2_opt_set_sb(struct bch_fs *c, struct bch_dev *ca, const struct bch_option *opt, u64 v) { mutex_lock(&c->sb_lock); - __bch2_opt_set_sb(c->disk_sb.sb, ca ? ca->dev_idx : -1, opt, v); - bch2_write_super(c); + bool changed = __bch2_opt_set_sb(c->disk_sb.sb, ca ? ca->dev_idx : -1, opt, v); + if (changed) + bch2_write_super(c); mutex_unlock(&c->sb_lock); + return changed; } /* io opts: */ diff --git a/libbcachefs/opts.h b/libbcachefs/opts.h index ae24a2f0..5cdb1880 100644 --- a/libbcachefs/opts.h +++ b/libbcachefs/opts.h @@ -445,7 +445,7 @@ enum fsck_err_opts { BCH2_NO_SB_OPT, false, \ NULL, "Reconstruct alloc btree") \ x(version_upgrade, u8, \ - OPT_FS|OPT_MOUNT, \ + OPT_FS|OPT_MOUNT|OPT_RUNTIME, \ OPT_STR(bch2_version_upgrade_opts), \ BCH_SB_VERSION_UPGRADE, BCH_VERSION_UPGRADE_compatible, \ NULL, "Set superblock to latest version,\n" \ @@ -522,12 +522,7 @@ enum fsck_err_opts { OPT_BOOL(), \ BCH2_NO_SB_OPT, true, \ NULL, "BTREE_ITER_prefetch causes btree nodes to be\n"\ - " prefetched sequentially") \ - x(single_device, u8, \ - OPT_FS|OPT_MOUNT|OPT_RUNTIME, \ - OPT_BOOL(), \ - BCH_SB_SINGLE_DEVICE, false, \ - NULL, "Devices with the same UUID may be mounted simultaneously") + " prefetched sequentially") struct bch_opts { #define x(_name, _bits, ...) unsigned _name##_defined:1; @@ -612,10 +607,10 @@ void bch2_opt_set_by_id(struct bch_opts *, enum bch_opt_id, u64); u64 bch2_opt_from_sb(struct bch_sb *, enum bch_opt_id, int); int bch2_opts_from_sb(struct bch_opts *, struct bch_sb *); -void __bch2_opt_set_sb(struct bch_sb *, int, const struct bch_option *, u64); +bool __bch2_opt_set_sb(struct bch_sb *, int, const struct bch_option *, u64); struct bch_dev; -void bch2_opt_set_sb(struct bch_fs *, struct bch_dev *, const struct bch_option *, u64); +bool bch2_opt_set_sb(struct bch_fs *, struct bch_dev *, const struct bch_option *, u64); int bch2_opt_lookup(const char *); int bch2_opt_validate(const struct bch_option *, u64, struct printbuf *); @@ -632,8 +627,11 @@ void bch2_opts_to_text(struct printbuf *, struct bch_fs *, struct bch_sb *, unsigned, unsigned, unsigned); -int bch2_opt_check_may_set(struct bch_fs *, struct bch_dev *, int, u64); -int bch2_opts_check_may_set(struct bch_fs *); +int bch2_opt_hook_pre_set(struct bch_fs *, struct bch_dev *, enum bch_opt_id, u64); +int bch2_opts_hooks_pre_set(struct bch_fs *); +void bch2_opt_hook_post_set(struct bch_fs *, struct bch_dev *, u64, + struct bch_opts *, enum bch_opt_id); + int bch2_parse_one_mount_opt(struct bch_fs *, struct bch_opts *, struct printbuf *, const char *, const char *); int bch2_parse_mount_opts(struct bch_fs *, struct bch_opts *, struct printbuf *, diff --git a/libbcachefs/rebalance.c b/libbcachefs/rebalance.c index 63f01349..2f0f8782 100644 --- a/libbcachefs/rebalance.c +++ b/libbcachefs/rebalance.c @@ -268,7 +268,7 @@ int bch2_set_rebalance_needs_scan(struct bch_fs *c, u64 inum) int ret = bch2_trans_commit_do(c, NULL, NULL, BCH_TRANS_COMMIT_no_enospc, bch2_set_rebalance_needs_scan_trans(trans, inum)); - rebalance_wakeup(c); + bch2_rebalance_wakeup(c); return ret; } @@ -454,7 +454,7 @@ out: } static bool rebalance_pred(struct bch_fs *c, void *arg, - struct bkey_s_c k, + enum btree_id btree, struct bkey_s_c k, struct bch_io_opts *io_opts, struct data_update_opts *data_opts) { @@ -670,7 +670,7 @@ void bch2_rebalance_stop(struct bch_fs *c) c->rebalance.thread = NULL; if (p) { - /* for sychronizing with rebalance_wakeup() */ + /* for sychronizing with bch2_rebalance_wakeup() */ synchronize_rcu(); kthread_stop(p); diff --git a/libbcachefs/rebalance.h b/libbcachefs/rebalance.h index 62a3859d..e5e8eb4a 100644 --- a/libbcachefs/rebalance.h +++ b/libbcachefs/rebalance.h @@ -37,7 +37,7 @@ int bch2_set_rebalance_needs_scan_trans(struct btree_trans *, u64); int bch2_set_rebalance_needs_scan(struct bch_fs *, u64 inum); int bch2_set_fs_needs_rebalance(struct bch_fs *); -static inline void rebalance_wakeup(struct bch_fs *c) +static inline void bch2_rebalance_wakeup(struct bch_fs *c) { struct task_struct *p; diff --git a/libbcachefs/recovery.c b/libbcachefs/recovery.c index d2b07f60..f7b4b659 100644 --- a/libbcachefs/recovery.c +++ b/libbcachefs/recovery.c @@ -32,7 +32,6 @@ #include <linux/sort.h> #include <linux/stat.h> - int bch2_btree_lost_data(struct bch_fs *c, enum btree_id btree) { u64 b = BIT_ULL(btree); @@ -113,11 +112,8 @@ static void kill_btree(struct bch_fs *c, enum btree_id btree) } /* for -o reconstruct_alloc: */ -static void bch2_reconstruct_alloc(struct bch_fs *c) +void bch2_reconstruct_alloc(struct bch_fs *c) { - bch2_journal_log_msg(c, "dropping alloc info"); - bch_info(c, "dropping and reconstructing all alloc info"); - mutex_lock(&c->sb_lock); struct bch_sb_field_ext *ext = bch2_sb_field_get(c->disk_sb.sb, ext); @@ -159,6 +155,8 @@ static void bch2_reconstruct_alloc(struct bch_fs *c) c->opts.recovery_passes |= bch2_recovery_passes_from_stable(le64_to_cpu(ext->recovery_passes_required[0])); + c->disk_sb.sb->features[0] &= ~cpu_to_le64(BIT_ULL(BCH_FEATURE_no_alloc_info)); + bch2_write_super(c); mutex_unlock(&c->sb_lock); @@ -666,7 +664,7 @@ static bool check_version_upgrade(struct bch_fs *c) bch2_recovery_passes_from_stable(le64_to_cpu(passes))); } - bch_info(c, "%s", buf.buf); + bch_notice(c, "%s", buf.buf); printbuf_exit(&buf); ret = true; @@ -682,7 +680,7 @@ static bool check_version_upgrade(struct bch_fs *c) bch2_version_to_text(&buf, c->sb.version_incompat_allowed); prt_newline(&buf); - bch_info(c, "%s", buf.buf); + bch_notice(c, "%s", buf.buf); printbuf_exit(&buf); ret = true; @@ -888,8 +886,26 @@ use_clean: if (ret) goto err; - if (c->opts.reconstruct_alloc) + if (!c->opts.read_only && + (c->sb.features & BIT_ULL(BCH_FEATURE_no_alloc_info))) { + bch_info(c, "mounting a filesystem with no alloc info read-write; will recreate"); + + bch2_reconstruct_alloc(c); + } else if (c->opts.reconstruct_alloc) { + bch2_journal_log_msg(c, "dropping alloc info"); + bch_info(c, "dropping and reconstructing all alloc info"); + bch2_reconstruct_alloc(c); + } + + if (c->sb.features & BIT_ULL(BCH_FEATURE_no_alloc_info)) { + /* We can't go RW to fix errors without alloc info */ + if (c->opts.fix_errors == FSCK_FIX_yes || + c->opts.fix_errors == FSCK_FIX_ask) + c->opts.fix_errors = FSCK_FIX_no; + if (c->opts.errors == BCH_ON_ERROR_fix_safe) + c->opts.errors = BCH_ON_ERROR_continue; + } /* * After an unclean shutdown, skip then next few journal sequence @@ -933,6 +949,10 @@ use_clean: ret = bch2_sb_set_upgrade_extra(c); + ret = bch2_fs_resize_on_mount(c); + if (ret) + goto err; + ret = bch2_run_recovery_passes(c); if (ret) goto err; @@ -1125,7 +1145,10 @@ int bch2_fs_initialize(struct bch_fs *c) * journal_res_get() will crash if called before this has * set up the journal.pin FIFO and journal.cur pointer: */ - bch2_fs_journal_start(&c->journal, 1); + ret = bch2_fs_journal_start(&c->journal, 1); + if (ret) + goto err; + set_bit(BCH_FS_accounting_replay_done, &c->flags); bch2_journal_set_replay_done(&c->journal); diff --git a/libbcachefs/recovery.h b/libbcachefs/recovery.h index b0d55754..d858ba67 100644 --- a/libbcachefs/recovery.h +++ b/libbcachefs/recovery.h @@ -3,6 +3,7 @@ #define _BCACHEFS_RECOVERY_H int bch2_btree_lost_data(struct bch_fs *, enum btree_id); +void bch2_reconstruct_alloc(struct bch_fs *); int bch2_journal_replay(struct bch_fs *); diff --git a/libbcachefs/recovery_passes.c b/libbcachefs/recovery_passes.c index 593ff142..7a644408 100644 --- a/libbcachefs/recovery_passes.c +++ b/libbcachefs/recovery_passes.c @@ -46,11 +46,36 @@ static int bch2_set_may_go_rw(struct bch_fs *c) set_bit(BCH_FS_may_go_rw, &c->flags); - if (keys->nr || !c->opts.read_only || c->opts.fsck || !c->sb.clean || c->opts.recovery_passes) + if (keys->nr || + !c->opts.read_only || + !c->sb.clean || + c->opts.recovery_passes || + (c->opts.fsck && !(c->sb.features & BIT_ULL(BCH_FEATURE_no_alloc_info)))) { + if (c->sb.features & BIT_ULL(BCH_FEATURE_no_alloc_info)) { + bch_info(c, "mounting a filesystem with no alloc info read-write; will recreate"); + bch2_reconstruct_alloc(c); + } + return bch2_fs_read_write_early(c); + } return 0; } +/* + * Make sure root inode is readable while we're still in recovery and can rewind + * for repair: + */ +static int bch2_lookup_root_inode(struct bch_fs *c) +{ + subvol_inum inum = BCACHEFS_ROOT_SUBVOL_INUM; + struct bch_inode_unpacked inode_u; + struct bch_subvolume subvol; + + return bch2_trans_do(c, + bch2_subvolume_get(trans, inum.subvol, true, &subvol) ?: + bch2_inode_find_by_inum_trans(trans, inum, &inode_u)); +} + struct recovery_pass_fn { int (*fn)(struct bch_fs *); unsigned when; @@ -100,7 +125,8 @@ u64 bch2_recovery_passes_from_stable(u64 v) /* * For when we need to rewind recovery passes and run a pass we skipped: */ -static int __bch2_run_explicit_recovery_pass(struct bch_fs *c, +static int __bch2_run_explicit_recovery_pass(struct printbuf *out, + struct bch_fs *c, enum bch_recovery_pass pass) { if (c->curr_recovery_pass == ARRAY_SIZE(recovery_pass_fns)) @@ -114,15 +140,15 @@ static int __bch2_run_explicit_recovery_pass(struct bch_fs *c, if (pass < BCH_RECOVERY_PASS_set_may_go_rw && c->curr_recovery_pass >= BCH_RECOVERY_PASS_set_may_go_rw) { if (print) - bch_info(c, "need recovery pass %s (%u), but already rw", - bch2_recovery_passes[pass], pass); + prt_printf(out, "need recovery pass %s (%u), but already rw", + bch2_recovery_passes[pass], pass); return -BCH_ERR_cannot_rewind_recovery; } if (print) - bch_info(c, "running explicit recovery pass %s (%u), currently at %s (%u)", - bch2_recovery_passes[pass], pass, - bch2_recovery_passes[c->curr_recovery_pass], c->curr_recovery_pass); + prt_printf(out, "running explicit recovery pass %s (%u), currently at %s (%u)", + bch2_recovery_passes[pass], pass, + bch2_recovery_passes[c->curr_recovery_pass], c->curr_recovery_pass); c->opts.recovery_passes |= BIT_ULL(pass); @@ -135,13 +161,34 @@ static int __bch2_run_explicit_recovery_pass(struct bch_fs *c, } } -int bch2_run_explicit_recovery_pass(struct bch_fs *c, +int bch2_run_explicit_recovery_pass_printbuf(struct bch_fs *c, + struct printbuf *out, enum bch_recovery_pass pass) { + bch2_printbuf_make_room(out, 1024); + out->atomic++; + unsigned long flags; spin_lock_irqsave(&c->recovery_pass_lock, flags); - int ret = __bch2_run_explicit_recovery_pass(c, pass); + int ret = __bch2_run_explicit_recovery_pass(out, c, pass); spin_unlock_irqrestore(&c->recovery_pass_lock, flags); + + --out->atomic; + return ret; +} + +int bch2_run_explicit_recovery_pass(struct bch_fs *c, + enum bch_recovery_pass pass) +{ + struct printbuf buf = PRINTBUF; + bch2_log_msg_start(c, &buf); + unsigned len = buf.pos; + + int ret = bch2_run_explicit_recovery_pass_printbuf(c, &buf, pass); + + if (len != buf.pos) + bch2_print_str(c, KERN_NOTICE, buf.buf); + printbuf_exit(&buf); return ret; } @@ -202,6 +249,8 @@ static bool should_run_recovery_pass(struct bch_fs *c, enum bch_recovery_pass pa { struct recovery_pass_fn *p = recovery_pass_fns + pass; + if ((p->when & PASS_ALLOC) && (c->sb.features & BIT_ULL(BCH_FEATURE_no_alloc_info))) + return false; if (c->opts.recovery_passes_exclude & BIT_ULL(pass)) return false; if (c->opts.recovery_passes & BIT_ULL(pass)) diff --git a/libbcachefs/recovery_passes.h b/libbcachefs/recovery_passes.h index 7d7339c8..e19a8aab 100644 --- a/libbcachefs/recovery_passes.h +++ b/libbcachefs/recovery_passes.h @@ -8,6 +8,9 @@ u64 bch2_recovery_passes_from_stable(u64 v); u64 bch2_fsck_recovery_passes(void); +int bch2_run_explicit_recovery_pass_printbuf(struct bch_fs *, + struct printbuf *, + enum bch_recovery_pass); int bch2_run_explicit_recovery_pass(struct bch_fs *, enum bch_recovery_pass); int bch2_run_explicit_recovery_pass_persistent_locked(struct bch_fs *, enum bch_recovery_pass); int bch2_run_explicit_recovery_pass_persistent(struct bch_fs *, enum bch_recovery_pass); diff --git a/libbcachefs/recovery_passes_types.h b/libbcachefs/recovery_passes_types.h index e89b9c78..f9d565bb 100644 --- a/libbcachefs/recovery_passes_types.h +++ b/libbcachefs/recovery_passes_types.h @@ -7,6 +7,8 @@ #define PASS_UNCLEAN BIT(2) #define PASS_ALWAYS BIT(3) #define PASS_ONLINE BIT(4) +#define PASS_ALLOC BIT(5) +#define PASS_FSCK_ALLOC (PASS_FSCK|PASS_ALLOC) #ifdef CONFIG_BCACHEFS_DEBUG #define PASS_FSCK_DEBUG BIT(1) @@ -27,17 +29,17 @@ x(stripes_read, 1, 0) \ x(initialize_subvolumes, 2, 0) \ x(snapshots_read, 3, PASS_ALWAYS) \ - x(check_allocations, 5, PASS_FSCK) \ - x(trans_mark_dev_sbs, 6, PASS_ALWAYS|PASS_SILENT) \ - x(fs_journal_alloc, 7, PASS_ALWAYS|PASS_SILENT) \ + x(check_allocations, 5, PASS_FSCK_ALLOC) \ + x(trans_mark_dev_sbs, 6, PASS_ALWAYS|PASS_SILENT|PASS_ALLOC) \ + x(fs_journal_alloc, 7, PASS_ALWAYS|PASS_SILENT|PASS_ALLOC) \ x(set_may_go_rw, 8, PASS_ALWAYS|PASS_SILENT) \ x(journal_replay, 9, PASS_ALWAYS) \ - x(check_alloc_info, 10, PASS_ONLINE|PASS_FSCK) \ - x(check_lrus, 11, PASS_ONLINE|PASS_FSCK) \ - x(check_btree_backpointers, 12, PASS_ONLINE|PASS_FSCK) \ + x(check_alloc_info, 10, PASS_ONLINE|PASS_FSCK_ALLOC) \ + x(check_lrus, 11, PASS_ONLINE|PASS_FSCK_ALLOC) \ + x(check_btree_backpointers, 12, PASS_ONLINE|PASS_FSCK_ALLOC) \ x(check_backpointers_to_extents, 13, PASS_ONLINE|PASS_FSCK_DEBUG) \ - x(check_extents_to_backpointers, 14, PASS_ONLINE|PASS_FSCK) \ - x(check_alloc_to_lru_refs, 15, PASS_ONLINE|PASS_FSCK) \ + x(check_extents_to_backpointers, 14, PASS_ONLINE|PASS_FSCK_ALLOC) \ + x(check_alloc_to_lru_refs, 15, PASS_ONLINE|PASS_FSCK_ALLOC) \ x(fs_freespace_init, 16, PASS_ALWAYS|PASS_SILENT) \ x(bucket_gens_init, 17, 0) \ x(reconstruct_snapshots, 38, 0) \ @@ -60,7 +62,8 @@ x(resume_logged_ops, 23, PASS_ALWAYS) \ x(delete_dead_inodes, 32, PASS_ALWAYS) \ x(fix_reflink_p, 33, 0) \ - x(set_fs_needs_rebalance, 34, 0) + x(set_fs_needs_rebalance, 34, 0) \ + x(lookup_root_inode, 42, PASS_ALWAYS|PASS_SILENT) /* We normally enumerate recovery passes in the order we run them: */ enum bch_recovery_pass { diff --git a/libbcachefs/sb-downgrade.c b/libbcachefs/sb-downgrade.c index acb5d845..6a16767e 100644 --- a/libbcachefs/sb-downgrade.c +++ b/libbcachefs/sb-downgrade.c @@ -370,6 +370,9 @@ int bch2_sb_downgrade_update(struct bch_fs *c) if (BCH_VERSION_MAJOR(src->version) != BCH_VERSION_MAJOR(le16_to_cpu(c->disk_sb.sb->version))) continue; + if (src->version < c->sb.version_incompat) + continue; + struct bch_sb_field_downgrade_entry *dst; unsigned bytes = sizeof(*dst) + sizeof(dst->errors[0]) * src->nr_errors; diff --git a/libbcachefs/sb-errors_format.h b/libbcachefs/sb-errors_format.h index 5d43e350..83acfd9c 100644 --- a/libbcachefs/sb-errors_format.h +++ b/libbcachefs/sb-errors_format.h @@ -290,14 +290,15 @@ enum bch_fsck_flags { x(btree_node_bkey_bad_u64s, 260, 0) \ x(btree_node_topology_empty_interior_node, 261, 0) \ x(btree_ptr_v2_min_key_bad, 262, 0) \ - x(btree_root_unreadable_and_scan_found_nothing, 263, 0) \ - x(snapshot_node_missing, 264, 0) \ + x(btree_root_unreadable_and_scan_found_nothing, 263, FSCK_AUTOFIX) \ + x(snapshot_node_missing, 264, FSCK_AUTOFIX) \ x(dup_backpointer_to_bad_csum_extent, 265, 0) \ x(btree_bitmap_not_marked, 266, FSCK_AUTOFIX) \ x(sb_clean_entry_overrun, 267, 0) \ x(btree_ptr_v2_written_0, 268, 0) \ x(subvol_snapshot_bad, 269, 0) \ x(subvol_inode_bad, 270, 0) \ + x(subvol_missing, 308, FSCK_AUTOFIX) \ x(alloc_key_stripe_sectors_wrong, 271, FSCK_AUTOFIX) \ x(accounting_mismatch, 272, FSCK_AUTOFIX) \ x(accounting_replicas_not_marked, 273, 0) \ @@ -317,7 +318,7 @@ enum bch_fsck_flags { x(directory_size_mismatch, 303, FSCK_AUTOFIX) \ x(dirent_cf_name_too_big, 304, 0) \ x(dirent_stray_data_after_cf_name, 305, 0) \ - x(MAX, 308, 0) + x(MAX, 309, 0) enum bch_sb_error_id { #define x(t, n, ...) BCH_FSCK_ERR_##t = n, diff --git a/libbcachefs/sb-members.c b/libbcachefs/sb-members.c index 116131f9..b2f9cb2d 100644 --- a/libbcachefs/sb-members.c +++ b/libbcachefs/sb-members.c @@ -5,11 +5,31 @@ #include "disk_groups.h" #include "error.h" #include "opts.h" +#include "recovery_passes.h" #include "replicas.h" #include "sb-members.h" #include "super-io.h" -void bch2_dev_missing(struct bch_fs *c, unsigned dev) +int bch2_dev_missing_bkey(struct bch_fs *c, struct bkey_s_c k, unsigned dev) +{ + struct printbuf buf = PRINTBUF; + bch2_log_msg_start(c, &buf); + + prt_printf(&buf, "pointer to nonexistent device %u in key\n", dev); + bch2_bkey_val_to_text(&buf, c, k); + + bool print = bch2_count_fsck_err(c, ptr_to_invalid_device, &buf); + + int ret = bch2_run_explicit_recovery_pass_printbuf(c, &buf, + BCH_RECOVERY_PASS_check_allocations); + + if (print) + bch2_print_str(c, KERN_ERR, buf.buf); + printbuf_exit(&buf); + return ret; +} + +void bch2_dev_missing_atomic(struct bch_fs *c, unsigned dev) { if (dev != BCH_SB_MEMBER_INVALID) bch2_fs_inconsistent(c, "pointer to nonexistent device %u", dev); @@ -168,6 +188,12 @@ static int validate_member(struct printbuf *err, return -BCH_ERR_invalid_sb_members; } + if (BCH_MEMBER_FREESPACE_INITIALIZED(&m) && + sb->features[0] & BIT_ULL(BCH_FEATURE_no_alloc_info)) { + prt_printf(err, "device %u: freespace initialized but fs has no alloc info", i); + return -BCH_ERR_invalid_sb_members; + } + return 0; } @@ -266,6 +292,7 @@ static void member_to_text(struct printbuf *out, prt_printf(out, "Discard:\t%llu\n", BCH_MEMBER_DISCARD(&m)); prt_printf(out, "Freespace initialized:\t%llu\n", BCH_MEMBER_FREESPACE_INITIALIZED(&m)); + prt_printf(out, "Resize on mount:\t%llu\n", BCH_MEMBER_RESIZE_ON_MOUNT(&m)); printbuf_indent_sub(out, 2); } diff --git a/libbcachefs/sb-members.h b/libbcachefs/sb-members.h index 06bb41a3..e2a8d03a 100644 --- a/libbcachefs/sb-members.h +++ b/libbcachefs/sb-members.h @@ -218,13 +218,15 @@ static inline struct bch_dev *bch2_dev_rcu_noerror(struct bch_fs *c, unsigned de : NULL; } -void bch2_dev_missing(struct bch_fs *, unsigned); +int bch2_dev_missing_bkey(struct bch_fs *, struct bkey_s_c, unsigned); + +void bch2_dev_missing_atomic(struct bch_fs *, unsigned); static inline struct bch_dev *bch2_dev_rcu(struct bch_fs *c, unsigned dev) { struct bch_dev *ca = bch2_dev_rcu_noerror(c, dev); if (unlikely(!ca)) - bch2_dev_missing(c, dev); + bch2_dev_missing_atomic(c, dev); return ca; } @@ -242,7 +244,7 @@ static inline struct bch_dev *bch2_dev_tryget(struct bch_fs *c, unsigned dev) { struct bch_dev *ca = bch2_dev_tryget_noerror(c, dev); if (unlikely(!ca)) - bch2_dev_missing(c, dev); + bch2_dev_missing_atomic(c, dev); return ca; } @@ -348,6 +350,7 @@ static inline struct bch_member_cpu bch2_mi_to_cpu(struct bch_member *mi) ? BCH_MEMBER_DURABILITY(mi) - 1 : 1, .freespace_initialized = BCH_MEMBER_FREESPACE_INITIALIZED(mi), + .resize_on_mount = BCH_MEMBER_RESIZE_ON_MOUNT(mi), .valid = bch2_member_alive(mi), .btree_bitmap_shift = mi->btree_bitmap_shift, .btree_allocated_bitmap = le64_to_cpu(mi->btree_allocated_bitmap), diff --git a/libbcachefs/sb-members_format.h b/libbcachefs/sb-members_format.h index 3affec82..472218a5 100644 --- a/libbcachefs/sb-members_format.h +++ b/libbcachefs/sb-members_format.h @@ -88,6 +88,8 @@ LE64_BITMASK(BCH_MEMBER_GROUP, struct bch_member, flags, 20, 28) LE64_BITMASK(BCH_MEMBER_DURABILITY, struct bch_member, flags, 28, 30) LE64_BITMASK(BCH_MEMBER_FREESPACE_INITIALIZED, struct bch_member, flags, 30, 31) +LE64_BITMASK(BCH_MEMBER_RESIZE_ON_MOUNT, + struct bch_member, flags, 31, 32) #if 0 LE64_BITMASK(BCH_MEMBER_NR_READ_ERRORS, struct bch_member, flags[1], 0, 20); diff --git a/libbcachefs/sb-members_types.h b/libbcachefs/sb-members_types.h index c0eda888..d6443e18 100644 --- a/libbcachefs/sb-members_types.h +++ b/libbcachefs/sb-members_types.h @@ -13,6 +13,7 @@ struct bch_member_cpu { u8 data_allowed; u8 durability; u8 freespace_initialized; + u8 resize_on_mount; u8 valid; u8 btree_bitmap_shift; u64 btree_allocated_bitmap; diff --git a/libbcachefs/subvolume.c b/libbcachefs/subvolume.c index c9acaf13..1b9fb60c 100644 --- a/libbcachefs/subvolume.c +++ b/libbcachefs/subvolume.c @@ -6,6 +6,7 @@ #include "errcode.h" #include "error.h" #include "fs.h" +#include "recovery_passes.h" #include "snapshot.h" #include "subvolume.h" @@ -13,6 +14,22 @@ static int bch2_subvolume_delete(struct btree_trans *, u32); +static int bch2_subvolume_missing(struct bch_fs *c, u32 subvolid) +{ + struct printbuf buf = PRINTBUF; + bch2_log_msg_start(c, &buf); + + prt_printf(&buf, "missing subvolume %u", subvolid); + bool print = bch2_count_fsck_err(c, subvol_missing, &buf); + + int ret = bch2_run_explicit_recovery_pass_printbuf(c, &buf, + BCH_RECOVERY_PASS_check_inodes); + if (print) + bch2_print_str(c, KERN_ERR, buf.buf); + printbuf_exit(&buf); + return ret; +} + static struct bpos subvolume_children_pos(struct bkey_s_c k) { if (k.k->type != KEY_TYPE_subvolume) @@ -291,9 +308,8 @@ bch2_subvolume_get_inlined(struct btree_trans *trans, unsigned subvol, int ret = bch2_bkey_get_val_typed(trans, BTREE_ID_subvolumes, POS(0, subvol), BTREE_ITER_cached| BTREE_ITER_with_updates, subvolume, s); - bch2_fs_inconsistent_on(bch2_err_matches(ret, ENOENT) && - inconsistent_if_not_found, - trans->c, "missing subvolume %u", subvol); + if (bch2_err_matches(ret, ENOENT) && inconsistent_if_not_found) + ret = bch2_subvolume_missing(trans->c, subvol) ?: ret; return ret; } @@ -343,8 +359,8 @@ int __bch2_subvolume_get_snapshot(struct btree_trans *trans, u32 subvolid, subvolume); ret = bkey_err(subvol); - bch2_fs_inconsistent_on(warn && bch2_err_matches(ret, ENOENT), trans->c, - "missing subvolume %u", subvolid); + if (bch2_err_matches(ret, ENOENT)) + ret = bch2_subvolume_missing(trans->c, subvolid) ?: ret; if (likely(!ret)) *snapid = le32_to_cpu(subvol.v->snapshot); @@ -417,8 +433,8 @@ static int __bch2_subvolume_delete(struct btree_trans *trans, u32 subvolid) BTREE_ITER_cached|BTREE_ITER_intent, subvolume); int ret = bkey_err(subvol); - bch2_fs_inconsistent_on(bch2_err_matches(ret, ENOENT), trans->c, - "missing subvolume %u", subvolid); + if (bch2_err_matches(ret, ENOENT)) + ret = bch2_subvolume_missing(trans->c, subvolid) ?: ret; if (ret) goto err; @@ -552,11 +568,10 @@ int bch2_subvolume_unlink(struct btree_trans *trans, u32 subvolid) BTREE_ID_subvolumes, POS(0, subvolid), BTREE_ITER_cached, subvolume); ret = PTR_ERR_OR_ZERO(n); - if (unlikely(ret)) { - bch2_fs_inconsistent_on(bch2_err_matches(ret, ENOENT), trans->c, - "missing subvolume %u", subvolid); + if (bch2_err_matches(ret, ENOENT)) + ret = bch2_subvolume_missing(trans->c, subvolid) ?: ret; + if (unlikely(ret)) return ret; - } SET_BCH_SUBVOLUME_UNLINKED(&n->v, true); n->v.fs_path_parent = 0; @@ -595,11 +610,10 @@ int bch2_subvolume_create(struct btree_trans *trans, u64 inode, BTREE_ID_subvolumes, POS(0, src_subvolid), BTREE_ITER_cached, subvolume); ret = PTR_ERR_OR_ZERO(src_subvol); - if (unlikely(ret)) { - bch2_fs_inconsistent_on(bch2_err_matches(ret, ENOENT), c, - "subvolume %u not found", src_subvolid); + if (bch2_err_matches(ret, ENOENT)) + ret = bch2_subvolume_missing(trans->c, src_subvolid) ?: ret; + if (unlikely(ret)) goto err; - } parent = le32_to_cpu(src_subvol->v.snapshot); } diff --git a/libbcachefs/super-io.c b/libbcachefs/super-io.c index 7d0c5cb6..bc2721b9 100644 --- a/libbcachefs/super-io.c +++ b/libbcachefs/super-io.c @@ -73,14 +73,30 @@ int bch2_set_version_incompat(struct bch_fs *c, enum bcachefs_metadata_version v ? 0 : -BCH_ERR_may_not_use_incompat_feature; + mutex_lock(&c->sb_lock); if (!ret) { - mutex_lock(&c->sb_lock); SET_BCH_SB_VERSION_INCOMPAT(c->disk_sb.sb, max(BCH_SB_VERSION_INCOMPAT(c->disk_sb.sb), version)); bch2_write_super(c); - mutex_unlock(&c->sb_lock); + } else { + darray_for_each(c->incompat_versions_requested, i) + if (version == *i) + goto out; + + darray_push(&c->incompat_versions_requested, version); + struct printbuf buf = PRINTBUF; + prt_str(&buf, "requested incompat feature "); + bch2_version_to_text(&buf, version); + prt_str(&buf, " currently not enabled"); + prt_printf(&buf, "\n set version_upgrade=incompat to enable"); + + bch_notice(c, "%s", buf.buf); + printbuf_exit(&buf); } +out: + mutex_unlock(&c->sb_lock); + return ret; } @@ -368,7 +384,6 @@ static int bch2_sb_compatible(struct bch_sb *sb, struct printbuf *out) int bch2_sb_validate(struct bch_sb *sb, u64 read_offset, enum bch_validate_flags flags, struct printbuf *out) { - struct bch_sb_field_members_v1 *mi; enum bch_opt_id opt_id; int ret; @@ -452,6 +467,9 @@ int bch2_sb_validate(struct bch_sb *sb, u64 read_offset, SET_BCH_SB_VERSION_INCOMPAT_ALLOWED(sb, BCH_SB_VERSION_INCOMPAT(sb)); } + if (sb->nr_devices > 1) + SET_BCH_SB_MULTI_DEVICE(sb, true); + if (!flags) { /* * Been seeing a bug where these are getting inexplicably @@ -520,14 +538,17 @@ int bch2_sb_validate(struct bch_sb *sb, u64 read_offset, } } + struct bch_sb_field *mi = + bch2_sb_field_get_id(sb, BCH_SB_FIELD_members_v2) ?: + bch2_sb_field_get_id(sb, BCH_SB_FIELD_members_v1); + /* members must be validated first: */ - mi = bch2_sb_field_get(sb, members_v1); if (!mi) { prt_printf(out, "Invalid superblock: member info area missing"); return -BCH_ERR_invalid_sb_members_missing; } - ret = bch2_sb_field_validate(sb, &mi->field, flags, out); + ret = bch2_sb_field_validate(sb, mi, flags, out); if (ret) return ret; @@ -596,6 +617,7 @@ static void bch2_sb_update(struct bch_fs *c) c->sb.features = le64_to_cpu(src->features[0]); c->sb.compat = le64_to_cpu(src->compat[0]); + c->sb.multi_device = BCH_SB_MULTI_DEVICE(src); memset(c->sb.errors_silent, 0, sizeof(c->sb.errors_silent)); @@ -1253,6 +1275,31 @@ void bch2_sb_upgrade(struct bch_fs *c, unsigned new_version, bool incompat) } } +void bch2_sb_upgrade_incompat(struct bch_fs *c) +{ + mutex_lock(&c->sb_lock); + if (c->sb.version == c->sb.version_incompat_allowed) + goto unlock; + + struct printbuf buf = PRINTBUF; + + prt_str(&buf, "Now allowing incompatible features up to "); + bch2_version_to_text(&buf, c->sb.version); + prt_str(&buf, ", previously allowed up to "); + bch2_version_to_text(&buf, c->sb.version_incompat_allowed); + prt_newline(&buf); + + bch_notice(c, "%s", buf.buf); + printbuf_exit(&buf); + + c->disk_sb.sb->features[0] |= cpu_to_le64(BCH_SB_FEATURES_ALL); + SET_BCH_SB_VERSION_INCOMPAT_ALLOWED(c->disk_sb.sb, + max(BCH_SB_VERSION_INCOMPAT_ALLOWED(c->disk_sb.sb), c->sb.version)); + bch2_write_super(c); +unlock: + mutex_unlock(&c->sb_lock); +} + static int bch2_sb_ext_validate(struct bch_sb *sb, struct bch_sb_field *f, enum bch_validate_flags flags, struct printbuf *err) { diff --git a/libbcachefs/super-io.h b/libbcachefs/super-io.h index 78f708a6..a3b7a90f 100644 --- a/libbcachefs/super-io.h +++ b/libbcachefs/super-io.h @@ -107,6 +107,7 @@ static inline void bch2_check_set_feature(struct bch_fs *c, unsigned feat) bool bch2_check_version_downgrade(struct bch_fs *); void bch2_sb_upgrade(struct bch_fs *, unsigned, bool); +void bch2_sb_upgrade_incompat(struct bch_fs *); void __bch2_sb_field_to_text(struct printbuf *, struct bch_sb *, struct bch_sb_field *); diff --git a/libbcachefs/super.c b/libbcachefs/super.c index 32a9ac30..3fa4706c 100644 --- a/libbcachefs/super.c +++ b/libbcachefs/super.c @@ -84,7 +84,8 @@ const char * const bch2_fs_flag_strs[] = { NULL }; -void bch2_print_str(struct bch_fs *c, const char *str) +static void __bch2_print_str(struct bch_fs *c, const char *prefix, + const char *str, bool nonblocking) { #ifdef __KERNEL__ struct stdio_redirect *stdio = bch2_fs_stdio_redirect(c); @@ -94,7 +95,17 @@ void bch2_print_str(struct bch_fs *c, const char *str) return; } #endif - bch2_print_string_as_lines(KERN_ERR, str); + bch2_print_string_as_lines(KERN_ERR, str, nonblocking); +} + +void bch2_print_str(struct bch_fs *c, const char *prefix, const char *str) +{ + __bch2_print_str(c, prefix, str, false); +} + +void bch2_print_str_nonblocking(struct bch_fs *c, const char *prefix, const char *str) +{ + __bch2_print_str(c, prefix, str, true); } __printf(2, 0) @@ -453,11 +464,19 @@ static int __bch2_fs_read_write(struct bch_fs *c, bool early) BUG_ON(!test_bit(BCH_FS_may_go_rw, &c->flags)); + if (WARN_ON(c->sb.features & BIT_ULL(BCH_FEATURE_no_alloc_info))) + return -BCH_ERR_erofs_no_alloc_info; + if (test_bit(BCH_FS_initial_gc_unfixed, &c->flags)) { bch_err(c, "cannot go rw, unfixed btree errors"); return -BCH_ERR_erofs_unfixed_errors; } + if (c->sb.features & BIT_ULL(BCH_FEATURE_small_image)) { + bch_err(c, "cannot go rw, filesystem is an unresized image file"); + return -BCH_ERR_erofs_filesystem_full; + } + if (test_bit(BCH_FS_rw, &c->flags)) return 0; @@ -539,6 +558,9 @@ int bch2_fs_read_write(struct bch_fs *c) if (c->opts.nochanges) return -BCH_ERR_erofs_nochanges; + if (c->sb.features & BIT_ULL(BCH_FEATURE_no_alloc_info)) + return -BCH_ERR_erofs_no_alloc_info; + return __bch2_fs_read_write(c, false); } @@ -595,6 +617,7 @@ static void __bch2_fs_free(struct bch_fs *c) free_percpu(c->online_reserved); } + darray_exit(&c->incompat_versions_requested); darray_exit(&c->btree_roots_extra); free_percpu(c->pcpu); free_percpu(c->usage); @@ -708,10 +731,10 @@ static int bch2_fs_online(struct bch_fs *c) lockdep_assert_held(&bch_fs_list_lock); - if (!c->opts.single_device && + if (c->sb.multi_device && __bch2_uuid_to_fs(c->sb.uuid)) { bch_err(c, "filesystem UUID already open"); - return -EINVAL; + return -BCH_ERR_filesystem_uuid_already_open; } ret = bch2_fs_chardev_init(c); @@ -722,7 +745,7 @@ static int bch2_fs_online(struct bch_fs *c) bch2_fs_debug_init(c); - ret = (!c->opts.single_device + ret = (c->sb.multi_device ? kobject_add(&c->kobj, NULL, "%pU", c->sb.user_uuid.b) : kobject_add(&c->kobj, NULL, "%s", c->name)) ?: kobject_add(&c->internal, &c->kobj, "internal") ?: @@ -933,7 +956,7 @@ static struct bch_fs *bch2_fs_alloc(struct bch_sb *sb, struct bch_opts opts, goto err; } - if (sbs->nr != 1 && !c->opts.single_device) + if (c->sb.multi_device) pr_uuid(&name, c->sb.user_uuid.b); else prt_bdevname(&name, sbs->data[0].bdev); @@ -1043,6 +1066,16 @@ static void print_mount_opts(struct bch_fs *c) bch2_opt_to_text(&p, c, c->disk_sb.sb, opt, v, OPT_SHOW_MOUNT_STYLE); } + if (c->sb.version_incompat_allowed != c->sb.version) { + prt_printf(&p, "\n allowing incompatible features above "); + bch2_version_to_text(&p, c->sb.version_incompat_allowed); + } + + if (c->opts.verbose) { + prt_printf(&p, "\n features: "); + prt_bitflags(&p, bch2_sb_features, c->sb.features); + } + bch_info(c, "%s", p.buf); printbuf_exit(&p); } @@ -1077,6 +1110,7 @@ int bch2_fs_start(struct bch_fs *c) for_each_online_member(c, ca) bch2_members_v2_get_mut(c->disk_sb.sb, ca->dev_idx)->last_mount = cpu_to_le64(now); + bch2_write_super(c); mutex_unlock(&c->sb_lock); for_each_rw_member(c, ca) @@ -1093,7 +1127,7 @@ int bch2_fs_start(struct bch_fs *c) if (ret) goto err; - ret = bch2_opts_check_may_set(c); + ret = bch2_opts_hooks_pre_set(c); if (ret) goto err; @@ -1127,9 +1161,6 @@ static int bch2_dev_may_add(struct bch_sb *sb, struct bch_fs *c) { struct bch_member m = bch2_sb_member_get(sb, sb->dev_idx); - if (c->opts.single_device) - return -BCH_ERR_single_device_filesystem; - if (le16_to_cpu(sb->block_size) != block_sectors(c)) return -BCH_ERR_mismatched_block_size; @@ -1523,7 +1554,7 @@ static int bch2_dev_attach_bdev(struct bch_fs *c, struct bch_sb_handle *sb) strscpy(ca->name, name.buf, sizeof(ca->name)); printbuf_exit(&name); - rebalance_wakeup(c); + bch2_rebalance_wakeup(c); return 0; } @@ -1669,7 +1700,7 @@ int __bch2_dev_set_state(struct bch_fs *c, struct bch_dev *ca, if (new_state == BCH_MEMBER_STATE_rw) __bch2_dev_read_write(c, ca); - rebalance_wakeup(c); + bch2_rebalance_wakeup(c); return ret; } @@ -1786,7 +1817,8 @@ int bch2_dev_remove(struct bch_fs *c, struct bch_dev *ca, int flags) up_write(&c->state_lock); return 0; err: - if (ca->mi.state == BCH_MEMBER_STATE_rw && + if (test_bit(BCH_FS_rw, &c->flags) && + ca->mi.state == BCH_MEMBER_STATE_rw && !percpu_ref_is_zero(&ca->io_ref[READ])) __bch2_dev_read_write(c, ca); up_write(&c->state_lock); @@ -1801,7 +1833,7 @@ int bch2_dev_add(struct bch_fs *c, const char *path) struct bch_dev *ca = NULL; struct printbuf errbuf = PRINTBUF; struct printbuf label = PRINTBUF; - int ret; + int ret = 0; ret = bch2_read_super(path, &opts, &sb); bch_err_msg(c, ret, "reading super"); @@ -1818,6 +1850,20 @@ int bch2_dev_add(struct bch_fs *c, const char *path) } } + if (list_empty(&c->list)) { + mutex_lock(&bch_fs_list_lock); + if (__bch2_uuid_to_fs(c->sb.uuid)) + ret = -BCH_ERR_filesystem_uuid_already_open; + else + list_add(&c->list, &bch_fs_list); + mutex_unlock(&bch_fs_list_lock); + + if (ret) { + bch_err(c, "filesystem UUID already open"); + goto err; + } + } + ret = bch2_dev_may_add(sb.sb, c); if (ret) goto err; @@ -1834,6 +1880,7 @@ int bch2_dev_add(struct bch_fs *c, const char *path) down_write(&c->state_lock); mutex_lock(&c->sb_lock); + SET_BCH_SB_MULTI_DEVICE(c->disk_sb.sb, true); ret = bch2_sb_from_fs(c, ca); bch_err_msg(c, ret, "setting up new superblock"); @@ -1999,6 +2046,18 @@ int bch2_dev_offline(struct bch_fs *c, struct bch_dev *ca, int flags) return 0; } +static int __bch2_dev_resize_alloc(struct bch_dev *ca, u64 old_nbuckets, u64 new_nbuckets) +{ + struct bch_fs *c = ca->fs; + u64 v[3] = { new_nbuckets - old_nbuckets, 0, 0 }; + + return bch2_trans_commit_do(ca->fs, NULL, NULL, 0, + bch2_disk_accounting_mod2(trans, false, v, dev_data_type, + .dev = ca->dev_idx, + .data_type = BCH_DATA_free)) ?: + bch2_dev_freespace_init(c, ca, old_nbuckets, new_nbuckets); +} + int bch2_dev_resize(struct bch_fs *c, struct bch_dev *ca, u64 nbuckets) { struct bch_member *m; @@ -2046,13 +2105,7 @@ int bch2_dev_resize(struct bch_fs *c, struct bch_dev *ca, u64 nbuckets) mutex_unlock(&c->sb_lock); if (ca->mi.freespace_initialized) { - u64 v[3] = { nbuckets - old_nbuckets, 0, 0 }; - - ret = bch2_trans_commit_do(ca->fs, NULL, NULL, 0, - bch2_disk_accounting_mod2(trans, false, v, dev_data_type, - .dev = ca->dev_idx, - .data_type = BCH_DATA_free)) ?: - bch2_dev_freespace_init(c, ca, old_nbuckets, nbuckets); + ret = __bch2_dev_resize_alloc(ca, old_nbuckets, nbuckets); if (ret) goto err; } @@ -2063,6 +2116,52 @@ err: return ret; } +int bch2_fs_resize_on_mount(struct bch_fs *c) +{ + down_write(&c->state_lock); + + for_each_online_member(c, ca) { + u64 old_nbuckets = ca->mi.nbuckets; + u64 new_nbuckets = div64_u64(get_capacity(ca->disk_sb.bdev->bd_disk), + ca->mi.bucket_size); + + if (ca->mi.resize_on_mount && + new_nbuckets > ca->mi.nbuckets) { + bch_info(ca, "resizing to size %llu", new_nbuckets * ca->mi.bucket_size); + int ret = bch2_dev_buckets_resize(c, ca, new_nbuckets); + bch_err_fn(ca, ret); + if (ret) { + percpu_ref_put(&ca->io_ref[READ]); + up_write(&c->state_lock); + return ret; + } + + mutex_lock(&c->sb_lock); + struct bch_member *m = + bch2_members_v2_get_mut(c->disk_sb.sb, ca->dev_idx); + m->nbuckets = cpu_to_le64(new_nbuckets); + SET_BCH_MEMBER_RESIZE_ON_MOUNT(m, false); + + c->disk_sb.sb->features[0] &= ~BIT_ULL(BCH_FEATURE_small_image); + bch2_write_super(c); + mutex_unlock(&c->sb_lock); + + if (ca->mi.freespace_initialized) { + ret = __bch2_dev_resize_alloc(ca, old_nbuckets, new_nbuckets); + if (ret) { + percpu_ref_put(&ca->io_ref[READ]); + up_write(&c->state_lock); + return ret; + } + } + } + } + + bch2_recalc_capacity(c); + up_write(&c->state_lock); + return 0; +} + /* return with ref on ca->ref: */ struct bch_dev *bch2_dev_lookup(struct bch_fs *c, const char *name) { diff --git a/libbcachefs/super.h b/libbcachefs/super.h index 23533bce..502d6c57 100644 --- a/libbcachefs/super.h +++ b/libbcachefs/super.h @@ -26,6 +26,7 @@ int bch2_dev_add(struct bch_fs *, const char *); int bch2_dev_online(struct bch_fs *, const char *); int bch2_dev_offline(struct bch_fs *, struct bch_dev *, int); int bch2_dev_resize(struct bch_fs *, struct bch_dev *, u64); +int bch2_fs_resize_on_mount(struct bch_fs *); struct bch_dev *bch2_dev_lookup(struct bch_fs *, const char *); bool bch2_fs_emergency_read_only(struct bch_fs *); diff --git a/libbcachefs/sysfs.c b/libbcachefs/sysfs.c index e5f003c2..455c6ae9 100644 --- a/libbcachefs/sysfs.c +++ b/libbcachefs/sysfs.c @@ -40,6 +40,7 @@ #include <linux/blkdev.h> #include <linux/sort.h> +#include <linux/string_choices.h> #include <linux/sched/clock.h> #include "util.h" @@ -148,6 +149,7 @@ write_attribute(trigger_btree_key_cache_shrink); write_attribute(trigger_freelist_wakeup); write_attribute(trigger_btree_updates); read_attribute(gc_gens_pos); +__sysfs_attribute(read_fua_test, 0400); read_attribute(uuid); read_attribute(minor); @@ -308,6 +310,116 @@ static void bch2_fs_usage_base_to_text(struct printbuf *out, struct bch_fs *c) prt_printf(out, "nr_inodes:\t%llu\n", b.nr_inodes); } +static int bch2_read_fua_test(struct printbuf *out, struct bch_dev *ca) +{ + struct bch_fs *c = ca->fs; + struct bio *bio = NULL; + void *buf = NULL; + unsigned bs = c->opts.block_size, iters; + u64 end, test_duration = NSEC_PER_SEC * 2; + struct bch2_time_stats stats_nofua, stats_fua, stats_random; + int ret = 0; + + bch2_time_stats_init_no_pcpu(&stats_nofua); + bch2_time_stats_init_no_pcpu(&stats_fua); + bch2_time_stats_init_no_pcpu(&stats_random); + + if (!bch2_dev_get_ioref(c, ca->dev_idx, READ)) { + prt_str(out, "offline\n"); + return 0; + } + + struct block_device *bdev = ca->disk_sb.bdev; + + bio = bio_kmalloc(1, GFP_KERNEL); + if (!bio) { + ret = -ENOMEM; + goto err; + } + + buf = kmalloc(bs, GFP_KERNEL); + if (!buf) + goto err; + + end = ktime_get_ns() + test_duration; + for (iters = 0; iters < 1000 && time_before64(ktime_get_ns(), end); iters++) { + bio_init(bio, bdev, bio->bi_inline_vecs, 1, READ); + bch2_bio_map(bio, buf, bs); + + u64 submit_time = ktime_get_ns(); + ret = submit_bio_wait(bio); + bch2_time_stats_update(&stats_nofua, submit_time); + + if (ret) + goto err; + } + + end = ktime_get_ns() + test_duration; + for (iters = 0; iters < 1000 && time_before64(ktime_get_ns(), end); iters++) { + bio_init(bio, bdev, bio->bi_inline_vecs, 1, REQ_FUA|READ); + bch2_bio_map(bio, buf, bs); + + u64 submit_time = ktime_get_ns(); + ret = submit_bio_wait(bio); + bch2_time_stats_update(&stats_fua, submit_time); + + if (ret) + goto err; + } + + u64 dev_size = ca->mi.nbuckets * bucket_bytes(ca); + + end = ktime_get_ns() + test_duration; + for (iters = 0; iters < 1000 && time_before64(ktime_get_ns(), end); iters++) { + bio_init(bio, bdev, bio->bi_inline_vecs, 1, READ); + bio->bi_iter.bi_sector = (bch2_get_random_u64_below(dev_size) & ~((u64) bs - 1)) >> 9; + bch2_bio_map(bio, buf, bs); + + u64 submit_time = ktime_get_ns(); + ret = submit_bio_wait(bio); + bch2_time_stats_update(&stats_random, submit_time); + + if (ret) + goto err; + } + + u64 ns_nofua = mean_and_variance_get_mean(stats_nofua.duration_stats); + u64 ns_fua = mean_and_variance_get_mean(stats_fua.duration_stats); + u64 ns_rand = mean_and_variance_get_mean(stats_random.duration_stats); + + u64 stddev_nofua = mean_and_variance_get_stddev(stats_nofua.duration_stats); + u64 stddev_fua = mean_and_variance_get_stddev(stats_fua.duration_stats); + u64 stddev_rand = mean_and_variance_get_stddev(stats_random.duration_stats); + + printbuf_tabstop_push(out, 8); + printbuf_tabstop_push(out, 12); + printbuf_tabstop_push(out, 12); + prt_printf(out, "This test must be run on an idle drive for accurate results\n"); + prt_printf(out, "%s\n", dev_name(&ca->disk_sb.bdev->bd_device)); + prt_printf(out, "fua support advertized: %s\n", str_yes_no(bdev_fua(bdev))); + prt_newline(out); + prt_printf(out, "ns:\tlatency\rstddev\r\n"); + prt_printf(out, "nofua\t%llu\r%llu\r\n", ns_nofua, stddev_nofua); + prt_printf(out, "fua\t%llu\r%llu\r\n", ns_fua, stddev_fua); + prt_printf(out, "random\t%llu\r%llu\r\n", ns_rand, stddev_rand); + + bool read_cache = ns_nofua * 2 < ns_rand; + bool fua_cached = read_cache && ns_fua < (ns_nofua + ns_rand) / 2; + + if (!read_cache) + prt_str(out, "reads don't appear to be cached - safe\n"); + else if (!fua_cached) + prt_str(out, "fua reads don't appear to be cached - safe\n"); + else + prt_str(out, "fua reads appear to be cached - unsafe\n"); +err: + kfree(buf); + kfree(bio); + percpu_ref_put(&ca->io_ref[READ]); + bch_err_fn(c, ret); + return ret; +} + SHOW(bch2_fs) { struct bch_fs *c = container_of(kobj, struct bch_fs, kobj); @@ -637,37 +749,19 @@ static ssize_t sysfs_opt_store(struct bch_fs *c, u64 v; ret = bch2_opt_parse(c, opt, strim(tmp), &v, NULL) ?: - bch2_opt_check_may_set(c, ca, id, v); + bch2_opt_hook_pre_set(c, ca, id, v); kfree(tmp); if (ret < 0) goto err; - bch2_opt_set_sb(c, ca, opt, v); - bch2_opt_set_by_id(&c->opts, id, v); - - if (v && - (id == Opt_background_target || - (id == Opt_foreground_target && !c->opts.background_target) || - id == Opt_background_compression || - (id == Opt_compression && !c->opts.background_compression))) - bch2_set_rebalance_needs_scan(c, 0); + bool changed = bch2_opt_set_sb(c, ca, opt, v); - if (v && id == Opt_rebalance_enabled) - rebalance_wakeup(c); + if (!ca) + bch2_opt_set_by_id(&c->opts, id, v); - if (v && id == Opt_copygc_enabled && - c->copygc_thread) - wake_up_process(c->copygc_thread); - - if (id == Opt_discard && !ca) { - mutex_lock(&c->sb_lock); - for_each_member_device(c, ca) - opt->set_member(bch2_members_v2_get_mut(ca->disk_sb.sb, ca->dev_idx), v); - - bch2_write_super(c); - mutex_unlock(&c->sb_lock); - } + if (changed) + bch2_opt_hook_post_set(c, ca, 0, &c->opts, id); ret = size; err: @@ -818,6 +912,9 @@ SHOW(bch2_dev) if (attr == &sysfs_open_buckets) bch2_open_buckets_to_text(out, c, ca); + if (attr == &sysfs_read_fua_test) + return bch2_read_fua_test(out, ca); + int opt_id = bch2_opt_lookup(attr->name); if (opt_id >= 0) return sysfs_opt_show(c, ca, opt_id, out); @@ -874,6 +971,8 @@ struct attribute *bch2_dev_files[] = { &sysfs_io_latency_stats_write, &sysfs_congested, + &sysfs_read_fua_test, + /* debug: */ &sysfs_alloc_debug, &sysfs_open_buckets, diff --git a/libbcachefs/util.c b/libbcachefs/util.c index 87af5516..6e5d7fc2 100644 --- a/libbcachefs/util.c +++ b/libbcachefs/util.c @@ -252,8 +252,8 @@ void bch2_prt_u64_base2(struct printbuf *out, u64 v) bch2_prt_u64_base2_nbits(out, v, fls64(v) ?: 1); } -static void __bch2_print_string_as_lines(const char *prefix, const char *lines, - bool nonblocking) +void bch2_print_string_as_lines(const char *prefix, const char *lines, + bool nonblocking) { bool locked = false; const char *p; @@ -281,16 +281,6 @@ static void __bch2_print_string_as_lines(const char *prefix, const char *lines, console_unlock(); } -void bch2_print_string_as_lines(const char *prefix, const char *lines) -{ - return __bch2_print_string_as_lines(prefix, lines, false); -} - -void bch2_print_string_as_lines_nonblocking(const char *prefix, const char *lines) -{ - return __bch2_print_string_as_lines(prefix, lines, true); -} - int bch2_save_backtrace(bch_stacktrace *stack, struct task_struct *task, unsigned skipnr, gfp_t gfp) { diff --git a/libbcachefs/util.h b/libbcachefs/util.h index 3cd2a423..fa742af9 100644 --- a/libbcachefs/util.h +++ b/libbcachefs/util.h @@ -212,8 +212,7 @@ u64 bch2_read_flag_list(const char *, const char * const[]); void bch2_prt_u64_base2_nbits(struct printbuf *, u64, unsigned); void bch2_prt_u64_base2(struct printbuf *, u64); -void bch2_print_string_as_lines(const char *prefix, const char *lines); -void bch2_print_string_as_lines_nonblocking(const char *prefix, const char *lines); +void bch2_print_string_as_lines(const char *, const char *, bool); typedef DARRAY(unsigned long) bch_stacktrace; int bch2_save_backtrace(bch_stacktrace *stack, struct task_struct *, unsigned, gfp_t); diff --git a/libbcachefs/xattr.c b/libbcachefs/xattr.c index 651da52b..3d324e48 100644 --- a/libbcachefs/xattr.c +++ b/libbcachefs/xattr.c @@ -523,7 +523,7 @@ static int bch2_xattr_bcachefs_set(const struct xattr_handler *handler, if (ret < 0) goto err_class_exit; - ret = bch2_opt_check_may_set(c, NULL, opt_id, v); + ret = bch2_opt_hook_pre_set(c, NULL, opt_id, v); if (ret < 0) goto err_class_exit; |