diff options
-rw-r--r-- | fs/bcachefs/bcachefs.h | 2 | ||||
-rw-r--r-- | fs/bcachefs/btree_io.c | 43 | ||||
-rw-r--r-- | fs/bcachefs/data_update.c | 13 | ||||
-rw-r--r-- | fs/bcachefs/super.c | 15 | ||||
-rw-r--r-- | fs/bcachefs/trace.h | 5 |
5 files changed, 60 insertions, 18 deletions
diff --git a/fs/bcachefs/bcachefs.h b/fs/bcachefs/bcachefs.h index 41c6d8865a74..fb3156ed7f0b 100644 --- a/fs/bcachefs/bcachefs.h +++ b/fs/bcachefs/bcachefs.h @@ -1288,7 +1288,7 @@ static inline int bch2_fs_casefold_enabled(struct bch_fs *c) { if (!IS_ENABLED(CONFIG_UNICODE)) return bch_err_throw(c, no_casefolding_without_utf8); - if (!c->opts.casefold_disabled) + if (c->opts.casefold_disabled) return bch_err_throw(c, casefolding_disabled); return 0; } diff --git a/fs/bcachefs/btree_io.c b/fs/bcachefs/btree_io.c index 19fd951495ac..84e302afc8fc 100644 --- a/fs/bcachefs/btree_io.c +++ b/fs/bcachefs/btree_io.c @@ -1337,15 +1337,42 @@ int bch2_btree_node_read_done(struct bch_fs *c, struct bch_dev *ca, btree_node_reset_sib_u64s(b); - scoped_guard(rcu) - bkey_for_each_ptr(bch2_bkey_ptrs(bkey_i_to_s(&b->key)), ptr) { - struct bch_dev *ca2 = bch2_dev_rcu(c, ptr->dev); - - if (!ca2 || ca2->mi.state != BCH_MEMBER_STATE_rw) { - set_btree_node_need_rewrite(b); - set_btree_node_need_rewrite_degraded(b); + /* + * XXX: + * + * We deadlock if too many btree updates require node rewrites while + * we're still in journal replay. + * + * This is because btree node rewrites generate more updates for the + * interior updates (alloc, backpointers), and if those updates touch + * new nodes and generate more rewrites - well, you see the problem. + * + * The biggest cause is that we don't use the btree write buffer (for + * the backpointer updates - this needs some real thought on locking in + * order to fix. + * + * The problem with this workaround (not doing the rewrite for degraded + * nodes in journal replay) is that those degraded nodes persist, and we + * don't want that (this is a real bug when a btree node write completes + * with fewer replicas than we wanted and leaves a degraded node due to + * device _removal_, i.e. the device went away mid write). + * + * It's less of a bug here, but still a problem because we don't yet + * have a way of tracking degraded data - we another index (all + * extents/btree nodes, by replicas entry) in order to fix properly + * (re-replicate degraded data at the earliest possible time). + */ + if (c->recovery.passes_complete & BIT_ULL(BCH_RECOVERY_PASS_journal_replay)) { + scoped_guard(rcu) + bkey_for_each_ptr(bch2_bkey_ptrs(bkey_i_to_s(&b->key)), ptr) { + struct bch_dev *ca2 = bch2_dev_rcu(c, ptr->dev); + + if (!ca2 || ca2->mi.state != BCH_MEMBER_STATE_rw) { + set_btree_node_need_rewrite(b); + set_btree_node_need_rewrite_degraded(b); + } } - } + } if (!ptr_written) { set_btree_node_need_rewrite(b); diff --git a/fs/bcachefs/data_update.c b/fs/bcachefs/data_update.c index e848e210a9bf..3968f3be7f3b 100644 --- a/fs/bcachefs/data_update.c +++ b/fs/bcachefs/data_update.c @@ -783,6 +783,9 @@ static int can_write_extent(struct bch_fs *c, struct data_update *m) darray_for_each(m->op.devs_have, i) __clear_bit(*i, devs.d); + CLASS(printbuf, buf)(); + buf.atomic++; + guard(rcu)(); unsigned nr_replicas = 0, i; @@ -794,7 +797,11 @@ static int can_write_extent(struct bch_fs *c, struct data_update *m) struct bch_dev_usage usage; bch2_dev_usage_read_fast(ca, &usage); - if (!dev_buckets_free(ca, usage, m->op.watermark)) + u64 nr_free = dev_buckets_free(ca, usage, m->op.watermark); + + prt_printf(&buf, "%s=%llu ", ca->name, nr_free); + + if (!nr_free) continue; nr_replicas += ca->mi.durability; @@ -802,8 +809,10 @@ static int can_write_extent(struct bch_fs *c, struct data_update *m) break; } - if (!nr_replicas) + if (!nr_replicas) { + trace_data_update_done_no_rw_devs(c, buf.buf); return bch_err_throw(c, data_update_done_no_rw_devs); + } if (nr_replicas < m->op.nr_replicas) return bch_err_throw(c, insufficient_devices); return 0; diff --git a/fs/bcachefs/super.c b/fs/bcachefs/super.c index 7340c1118579..6980cd5b0ca8 100644 --- a/fs/bcachefs/super.c +++ b/fs/bcachefs/super.c @@ -1024,13 +1024,7 @@ static struct bch_fs *bch2_fs_alloc(struct bch_sb *sb, struct bch_opts *opts, goto err; } -#if !IS_ENABLED(CONFIG_UNICODE) - if (c->sb.features & BIT_ULL(BCH_FEATURE_casefolding)) { - printk(KERN_ERR "Cannot mount a filesystem with casefolding on a kernel without CONFIG_UNICODE\n"); - ret = -EINVAL; - goto err; - } -#endif +#if IS_ENABLED(CONFIG_UNICODE) if (!bch2_fs_casefold_enabled(c)) { /* Default encoding until we can potentially have more as an option. */ c->cf_encoding = utf8_load(BCH_FS_DEFAULT_UTF8_ENCODING); @@ -1043,6 +1037,13 @@ static struct bch_fs *bch2_fs_alloc(struct bch_sb *sb, struct bch_opts *opts, goto err; } } +#else + if (c->sb.features & BIT_ULL(BCH_FEATURE_casefolding)) { + printk(KERN_ERR "Cannot mount a filesystem with casefolding on a kernel without CONFIG_UNICODE\n"); + ret = -EINVAL; + goto err; + } +#endif for (i = 0; i < c->sb.nr_devices; i++) { if (!bch2_member_exists(c->disk_sb.sb, i)) diff --git a/fs/bcachefs/trace.h b/fs/bcachefs/trace.h index b5dae1145afa..9324ef32903d 100644 --- a/fs/bcachefs/trace.h +++ b/fs/bcachefs/trace.h @@ -1330,6 +1330,11 @@ DEFINE_EVENT(fs_str, data_update, TP_ARGS(c, str) ); +DEFINE_EVENT(fs_str, data_update_done_no_rw_devs, + TP_PROTO(struct bch_fs *c, const char *str), + TP_ARGS(c, str) +); + DEFINE_EVENT(fs_str, io_move_pred, TP_PROTO(struct bch_fs *c, const char *str), TP_ARGS(c, str) |