diff options
Diffstat (limited to 'fs')
-rw-r--r-- | fs/bcachefs/bcachefs.h | 2 | ||||
-rw-r--r-- | fs/bcachefs/btree_io.c | 43 | ||||
-rw-r--r-- | fs/bcachefs/data_update.c | 13 | ||||
-rw-r--r-- | fs/bcachefs/fs.c | 2 | ||||
-rw-r--r-- | fs/bcachefs/sb-errors_format.h | 3 | ||||
-rw-r--r-- | fs/bcachefs/sb-members.c | 13 | ||||
-rw-r--r-- | fs/bcachefs/super.c | 15 | ||||
-rw-r--r-- | fs/bcachefs/trace.h | 5 | ||||
-rw-r--r-- | fs/bcachefs/util.c | 5 |
9 files changed, 74 insertions, 27 deletions
diff --git a/fs/bcachefs/bcachefs.h b/fs/bcachefs/bcachefs.h index 41c6d8865a74..fb3156ed7f0b 100644 --- a/fs/bcachefs/bcachefs.h +++ b/fs/bcachefs/bcachefs.h @@ -1288,7 +1288,7 @@ static inline int bch2_fs_casefold_enabled(struct bch_fs *c) { if (!IS_ENABLED(CONFIG_UNICODE)) return bch_err_throw(c, no_casefolding_without_utf8); - if (!c->opts.casefold_disabled) + if (c->opts.casefold_disabled) return bch_err_throw(c, casefolding_disabled); return 0; } diff --git a/fs/bcachefs/btree_io.c b/fs/bcachefs/btree_io.c index 19fd951495ac..84e302afc8fc 100644 --- a/fs/bcachefs/btree_io.c +++ b/fs/bcachefs/btree_io.c @@ -1337,15 +1337,42 @@ int bch2_btree_node_read_done(struct bch_fs *c, struct bch_dev *ca, btree_node_reset_sib_u64s(b); - scoped_guard(rcu) - bkey_for_each_ptr(bch2_bkey_ptrs(bkey_i_to_s(&b->key)), ptr) { - struct bch_dev *ca2 = bch2_dev_rcu(c, ptr->dev); - - if (!ca2 || ca2->mi.state != BCH_MEMBER_STATE_rw) { - set_btree_node_need_rewrite(b); - set_btree_node_need_rewrite_degraded(b); + /* + * XXX: + * + * We deadlock if too many btree updates require node rewrites while + * we're still in journal replay. + * + * This is because btree node rewrites generate more updates for the + * interior updates (alloc, backpointers), and if those updates touch + * new nodes and generate more rewrites - well, you see the problem. + * + * The biggest cause is that we don't use the btree write buffer (for + * the backpointer updates - this needs some real thought on locking in + * order to fix. + * + * The problem with this workaround (not doing the rewrite for degraded + * nodes in journal replay) is that those degraded nodes persist, and we + * don't want that (this is a real bug when a btree node write completes + * with fewer replicas than we wanted and leaves a degraded node due to + * device _removal_, i.e. the device went away mid write). + * + * It's less of a bug here, but still a problem because we don't yet + * have a way of tracking degraded data - we another index (all + * extents/btree nodes, by replicas entry) in order to fix properly + * (re-replicate degraded data at the earliest possible time). + */ + if (c->recovery.passes_complete & BIT_ULL(BCH_RECOVERY_PASS_journal_replay)) { + scoped_guard(rcu) + bkey_for_each_ptr(bch2_bkey_ptrs(bkey_i_to_s(&b->key)), ptr) { + struct bch_dev *ca2 = bch2_dev_rcu(c, ptr->dev); + + if (!ca2 || ca2->mi.state != BCH_MEMBER_STATE_rw) { + set_btree_node_need_rewrite(b); + set_btree_node_need_rewrite_degraded(b); + } } - } + } if (!ptr_written) { set_btree_node_need_rewrite(b); diff --git a/fs/bcachefs/data_update.c b/fs/bcachefs/data_update.c index e848e210a9bf..3968f3be7f3b 100644 --- a/fs/bcachefs/data_update.c +++ b/fs/bcachefs/data_update.c @@ -783,6 +783,9 @@ static int can_write_extent(struct bch_fs *c, struct data_update *m) darray_for_each(m->op.devs_have, i) __clear_bit(*i, devs.d); + CLASS(printbuf, buf)(); + buf.atomic++; + guard(rcu)(); unsigned nr_replicas = 0, i; @@ -794,7 +797,11 @@ static int can_write_extent(struct bch_fs *c, struct data_update *m) struct bch_dev_usage usage; bch2_dev_usage_read_fast(ca, &usage); - if (!dev_buckets_free(ca, usage, m->op.watermark)) + u64 nr_free = dev_buckets_free(ca, usage, m->op.watermark); + + prt_printf(&buf, "%s=%llu ", ca->name, nr_free); + + if (!nr_free) continue; nr_replicas += ca->mi.durability; @@ -802,8 +809,10 @@ static int can_write_extent(struct bch_fs *c, struct data_update *m) break; } - if (!nr_replicas) + if (!nr_replicas) { + trace_data_update_done_no_rw_devs(c, buf.buf); return bch_err_throw(c, data_update_done_no_rw_devs); + } if (nr_replicas < m->op.nr_replicas) return bch_err_throw(c, insufficient_devices); return 0; diff --git a/fs/bcachefs/fs.c b/fs/bcachefs/fs.c index 28a86bb644f7..f9bc99eb2d02 100644 --- a/fs/bcachefs/fs.c +++ b/fs/bcachefs/fs.c @@ -2563,9 +2563,11 @@ got_sb: sb->s_shrink->seeks = 0; +#if IS_ENABLED(CONFIG_UNICODE) if (!bch2_fs_casefold_enabled(c)) sb->s_encoding = c->cf_encoding; generic_set_sb_d_ops(sb); +#endif vinode = bch2_vfs_inode_get(c, BCACHEFS_ROOT_SUBVOL_INUM); ret = PTR_ERR_OR_ZERO(vinode); diff --git a/fs/bcachefs/sb-errors_format.h b/fs/bcachefs/sb-errors_format.h index 02605976a114..dd4ee46606d7 100644 --- a/fs/bcachefs/sb-errors_format.h +++ b/fs/bcachefs/sb-errors_format.h @@ -158,6 +158,7 @@ enum bch_fsck_flags { x(extent_ptrs_unwritten, 140, 0) \ x(extent_ptrs_written_and_unwritten, 141, 0) \ x(ptr_to_invalid_device, 142, 0) \ + x(ptr_to_removed_device, 322, 0) \ x(ptr_to_duplicate_device, 143, 0) \ x(ptr_after_last_bucket, 144, 0) \ x(ptr_before_first_bucket, 145, 0) \ @@ -333,7 +334,7 @@ enum bch_fsck_flags { x(dirent_stray_data_after_cf_name, 305, 0) \ x(rebalance_work_incorrectly_set, 309, FSCK_AUTOFIX) \ x(rebalance_work_incorrectly_unset, 310, FSCK_AUTOFIX) \ - x(MAX, 322, 0) + x(MAX, 323, 0) enum bch_sb_error_id { #define x(t, n, ...) BCH_FSCK_ERR_##t = n, diff --git a/fs/bcachefs/sb-members.c b/fs/bcachefs/sb-members.c index 96f98f111f48..f2abe92ca130 100644 --- a/fs/bcachefs/sb-members.c +++ b/fs/bcachefs/sb-members.c @@ -15,10 +15,15 @@ int bch2_dev_missing_bkey(struct bch_fs *c, struct bkey_s_c k, unsigned dev) struct printbuf buf = PRINTBUF; bch2_log_msg_start(c, &buf); - prt_printf(&buf, "pointer to nonexistent device %u in key\n", dev); + bool removed = test_bit(dev, c->devs_removed.d); + + prt_printf(&buf, "pointer to %s device %u in key\n", + removed ? "removed" : "nonexistent", dev); bch2_bkey_val_to_text(&buf, c, k); - bool print = bch2_count_fsck_err(c, ptr_to_invalid_device, &buf); + bool print = removed + ? bch2_count_fsck_err(c, ptr_to_removed_device, &buf) + : bch2_count_fsck_err(c, ptr_to_invalid_device, &buf); int ret = bch2_run_explicit_recovery_pass(c, &buf, BCH_RECOVERY_PASS_check_allocations, 0); @@ -32,7 +37,9 @@ int bch2_dev_missing_bkey(struct bch_fs *c, struct bkey_s_c k, unsigned dev) void bch2_dev_missing_atomic(struct bch_fs *c, unsigned dev) { if (dev != BCH_SB_MEMBER_INVALID) - bch2_fs_inconsistent(c, "pointer to nonexistent device %u", dev); + bch2_fs_inconsistent(c, "pointer to %s device %u", + test_bit(dev, c->devs_removed.d) + ? "removed" : "nonexistent", dev); } void bch2_dev_bucket_missing(struct bch_dev *ca, u64 bucket) diff --git a/fs/bcachefs/super.c b/fs/bcachefs/super.c index 7340c1118579..6980cd5b0ca8 100644 --- a/fs/bcachefs/super.c +++ b/fs/bcachefs/super.c @@ -1024,13 +1024,7 @@ static struct bch_fs *bch2_fs_alloc(struct bch_sb *sb, struct bch_opts *opts, goto err; } -#if !IS_ENABLED(CONFIG_UNICODE) - if (c->sb.features & BIT_ULL(BCH_FEATURE_casefolding)) { - printk(KERN_ERR "Cannot mount a filesystem with casefolding on a kernel without CONFIG_UNICODE\n"); - ret = -EINVAL; - goto err; - } -#endif +#if IS_ENABLED(CONFIG_UNICODE) if (!bch2_fs_casefold_enabled(c)) { /* Default encoding until we can potentially have more as an option. */ c->cf_encoding = utf8_load(BCH_FS_DEFAULT_UTF8_ENCODING); @@ -1043,6 +1037,13 @@ static struct bch_fs *bch2_fs_alloc(struct bch_sb *sb, struct bch_opts *opts, goto err; } } +#else + if (c->sb.features & BIT_ULL(BCH_FEATURE_casefolding)) { + printk(KERN_ERR "Cannot mount a filesystem with casefolding on a kernel without CONFIG_UNICODE\n"); + ret = -EINVAL; + goto err; + } +#endif for (i = 0; i < c->sb.nr_devices; i++) { if (!bch2_member_exists(c->disk_sb.sb, i)) diff --git a/fs/bcachefs/trace.h b/fs/bcachefs/trace.h index b5dae1145afa..9324ef32903d 100644 --- a/fs/bcachefs/trace.h +++ b/fs/bcachefs/trace.h @@ -1330,6 +1330,11 @@ DEFINE_EVENT(fs_str, data_update, TP_ARGS(c, str) ); +DEFINE_EVENT(fs_str, data_update_done_no_rw_devs, + TP_PROTO(struct bch_fs *c, const char *str), + TP_ARGS(c, str) +); + DEFINE_EVENT(fs_str, io_move_pred, TP_PROTO(struct bch_fs *c, const char *str), TP_ARGS(c, str) diff --git a/fs/bcachefs/util.c b/fs/bcachefs/util.c index 05b40debf211..7a4436fd4441 100644 --- a/fs/bcachefs/util.c +++ b/fs/bcachefs/util.c @@ -299,17 +299,12 @@ int bch2_save_backtrace(bch_stacktrace *stack, struct task_struct *task, unsigne if (ret) return ret; - if (!down_read_trylock(&task->signal->exec_update_lock)) - return -1; - do { nr_entries = stack_trace_save_tsk(task, stack->data, stack->size, skipnr + 1); } while (nr_entries == stack->size && !(ret = darray_make_room_gfp(stack, stack->size * 2, gfp))); stack->nr = nr_entries; - up_read(&task->signal->exec_update_lock); - return ret; #else return 0; |