summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorKent Overstreet <kent.overstreet@linux.dev>2025-04-19 00:11:55 -0400
committerKent Overstreet <kent.overstreet@linux.dev>2025-04-19 00:12:06 -0400
commit3b819fd0d76356a2b6f7fe5797afc7832a3583cf (patch)
treef6f20f13812215e113bb983ac1f510e4caa27485
parentd681613129c84a9e139bbe2f6e683fc6f6076fe3 (diff)
Update bcachefs sources to 7307b739bbe5 bcachefs: Read retries are after checksum errors now REQ_FUA
Signed-off-by: Kent Overstreet <kent.overstreet@linux.dev>
-rw-r--r--.bcachefs_revision2
-rw-r--r--libbcachefs/bcachefs_format.h80
-rw-r--r--libbcachefs/bkey_methods.c24
-rw-r--r--libbcachefs/error.c17
-rw-r--r--libbcachefs/error.h1
-rw-r--r--libbcachefs/fsck.c44
-rw-r--r--libbcachefs/journal.c35
-rw-r--r--libbcachefs/journal.h7
-rw-r--r--libbcachefs/journal_reclaim.c7
-rw-r--r--libbcachefs/rebalance.c116
-rw-r--r--libbcachefs/rebalance.h2
-rw-r--r--libbcachefs/recovery.c6
-rw-r--r--libbcachefs/recovery_passes_types.h1
-rw-r--r--libbcachefs/sb-errors_format.h4
-rw-r--r--libbcachefs/snapshot.c2
-rw-r--r--libbcachefs/super.c20
16 files changed, 293 insertions, 75 deletions
diff --git a/.bcachefs_revision b/.bcachefs_revision
index 97e95d4e..de77df06 100644
--- a/.bcachefs_revision
+++ b/.bcachefs_revision
@@ -1 +1 @@
-65456ba56b930afe6935d2dc2128ba12c1e5fa6f
+7307b739bbe5f1d9415f5c1da070723b3fb5abbd
diff --git a/libbcachefs/bcachefs_format.h b/libbcachefs/bcachefs_format.h
index aa57f470..9af2a607 100644
--- a/libbcachefs/bcachefs_format.h
+++ b/libbcachefs/bcachefs_format.h
@@ -366,6 +366,10 @@ static inline void bkey_init(struct bkey *k)
#define __BKEY_PADDED(key, pad) \
struct bkey_i key; __u64 key ## _pad[pad]
+enum bch_bkey_type_flags {
+ BKEY_TYPE_strict_btree_checks = BIT(0),
+};
+
/*
* - DELETED keys are used internally to mark keys that should be ignored but
* override keys in composition order. Their version number is ignored.
@@ -383,46 +387,46 @@ static inline void bkey_init(struct bkey *k)
*
* - WHITEOUT: for hash table btrees
*/
-#define BCH_BKEY_TYPES() \
- x(deleted, 0) \
- x(whiteout, 1) \
- x(error, 2) \
- x(cookie, 3) \
- x(hash_whiteout, 4) \
- x(btree_ptr, 5) \
- x(extent, 6) \
- x(reservation, 7) \
- x(inode, 8) \
- x(inode_generation, 9) \
- x(dirent, 10) \
- x(xattr, 11) \
- x(alloc, 12) \
- x(quota, 13) \
- x(stripe, 14) \
- x(reflink_p, 15) \
- x(reflink_v, 16) \
- x(inline_data, 17) \
- x(btree_ptr_v2, 18) \
- x(indirect_inline_data, 19) \
- x(alloc_v2, 20) \
- x(subvolume, 21) \
- x(snapshot, 22) \
- x(inode_v2, 23) \
- x(alloc_v3, 24) \
- x(set, 25) \
- x(lru, 26) \
- x(alloc_v4, 27) \
- x(backpointer, 28) \
- x(inode_v3, 29) \
- x(bucket_gens, 30) \
- x(snapshot_tree, 31) \
- x(logged_op_truncate, 32) \
- x(logged_op_finsert, 33) \
- x(accounting, 34) \
- x(inode_alloc_cursor, 35)
+#define BCH_BKEY_TYPES() \
+ x(deleted, 0, 0) \
+ x(whiteout, 1, 0) \
+ x(error, 2, 0) \
+ x(cookie, 3, 0) \
+ x(hash_whiteout, 4, BKEY_TYPE_strict_btree_checks) \
+ x(btree_ptr, 5, BKEY_TYPE_strict_btree_checks) \
+ x(extent, 6, BKEY_TYPE_strict_btree_checks) \
+ x(reservation, 7, BKEY_TYPE_strict_btree_checks) \
+ x(inode, 8, BKEY_TYPE_strict_btree_checks) \
+ x(inode_generation, 9, BKEY_TYPE_strict_btree_checks) \
+ x(dirent, 10, BKEY_TYPE_strict_btree_checks) \
+ x(xattr, 11, BKEY_TYPE_strict_btree_checks) \
+ x(alloc, 12, BKEY_TYPE_strict_btree_checks) \
+ x(quota, 13, BKEY_TYPE_strict_btree_checks) \
+ x(stripe, 14, BKEY_TYPE_strict_btree_checks) \
+ x(reflink_p, 15, BKEY_TYPE_strict_btree_checks) \
+ x(reflink_v, 16, BKEY_TYPE_strict_btree_checks) \
+ x(inline_data, 17, BKEY_TYPE_strict_btree_checks) \
+ x(btree_ptr_v2, 18, BKEY_TYPE_strict_btree_checks) \
+ x(indirect_inline_data, 19, BKEY_TYPE_strict_btree_checks) \
+ x(alloc_v2, 20, BKEY_TYPE_strict_btree_checks) \
+ x(subvolume, 21, BKEY_TYPE_strict_btree_checks) \
+ x(snapshot, 22, BKEY_TYPE_strict_btree_checks) \
+ x(inode_v2, 23, BKEY_TYPE_strict_btree_checks) \
+ x(alloc_v3, 24, BKEY_TYPE_strict_btree_checks) \
+ x(set, 25, 0) \
+ x(lru, 26, BKEY_TYPE_strict_btree_checks) \
+ x(alloc_v4, 27, BKEY_TYPE_strict_btree_checks) \
+ x(backpointer, 28, BKEY_TYPE_strict_btree_checks) \
+ x(inode_v3, 29, BKEY_TYPE_strict_btree_checks) \
+ x(bucket_gens, 30, BKEY_TYPE_strict_btree_checks) \
+ x(snapshot_tree, 31, BKEY_TYPE_strict_btree_checks) \
+ x(logged_op_truncate, 32, BKEY_TYPE_strict_btree_checks) \
+ x(logged_op_finsert, 33, BKEY_TYPE_strict_btree_checks) \
+ x(accounting, 34, BKEY_TYPE_strict_btree_checks) \
+ x(inode_alloc_cursor, 35, BKEY_TYPE_strict_btree_checks)
enum bch_bkey_type {
-#define x(name, nr) KEY_TYPE_##name = nr,
+#define x(name, nr, ...) KEY_TYPE_##name = nr,
BCH_BKEY_TYPES()
#undef x
KEY_TYPE_MAX,
diff --git a/libbcachefs/bkey_methods.c b/libbcachefs/bkey_methods.c
index 15c93576..00d05ccf 100644
--- a/libbcachefs/bkey_methods.c
+++ b/libbcachefs/bkey_methods.c
@@ -21,7 +21,7 @@
#include "xattr.h"
const char * const bch2_bkey_types[] = {
-#define x(name, nr) #name,
+#define x(name, nr, ...) #name,
BCH_BKEY_TYPES()
#undef x
NULL
@@ -115,7 +115,7 @@ static bool key_type_set_merge(struct bch_fs *c, struct bkey_s l, struct bkey_s_
})
const struct bkey_ops bch2_bkey_ops[] = {
-#define x(name, nr) [KEY_TYPE_##name] = bch2_bkey_ops_##name,
+#define x(name, nr, ...) [KEY_TYPE_##name] = bch2_bkey_ops_##name,
BCH_BKEY_TYPES()
#undef x
};
@@ -155,6 +155,12 @@ static u64 bch2_key_types_allowed[] = {
#undef x
};
+static const enum bch_bkey_type_flags bch2_bkey_type_flags[] = {
+#define x(name, nr, flags) [KEY_TYPE_##name] = flags,
+ BCH_BKEY_TYPES()
+#undef x
+};
+
const char *bch2_btree_node_type_str(enum btree_node_type type)
{
return type == BKEY_TYPE_btree ? "internal btree node" : bch2_btree_id_str(type - 1);
@@ -177,8 +183,18 @@ int __bch2_bkey_validate(struct bch_fs *c, struct bkey_s_c k,
if (type >= BKEY_TYPE_NR)
return 0;
- bkey_fsck_err_on(k.k->type < KEY_TYPE_MAX &&
- (type == BKEY_TYPE_btree || (from.flags & BCH_VALIDATE_commit)) &&
+ enum bch_bkey_type_flags bkey_flags = k.k->type < KEY_TYPE_MAX
+ ? bch2_bkey_type_flags[k.k->type]
+ : 0;
+
+ bool strict_key_type_allowed =
+ (from.flags & BCH_VALIDATE_commit) ||
+ type == BKEY_TYPE_btree ||
+ (from.btree < BTREE_ID_NR &&
+ (bkey_flags & BKEY_TYPE_strict_btree_checks));
+
+ bkey_fsck_err_on(strict_key_type_allowed &&
+ k.k->type < KEY_TYPE_MAX &&
!(bch2_key_types_allowed[type] & BIT_ULL(k.k->type)),
c, bkey_invalid_type_for_btree,
"invalid key type for btree %s (%s)",
diff --git a/libbcachefs/error.c b/libbcachefs/error.c
index 04a714b0..91a65a20 100644
--- a/libbcachefs/error.c
+++ b/libbcachefs/error.c
@@ -272,9 +272,6 @@ static struct fsck_err_state *fsck_err_get(struct bch_fs *c,
{
struct fsck_err_state *s;
- if (!test_bit(BCH_FS_fsck_running, &c->flags))
- return NULL;
-
list_for_each_entry(s, &c->fsck_error_msgs, list)
if (s->id == id) {
/*
@@ -645,14 +642,14 @@ int __bch2_bkey_fsck_err(struct bch_fs *c,
return ret;
}
-void bch2_flush_fsck_errs(struct bch_fs *c)
+static void __bch2_flush_fsck_errs(struct bch_fs *c, bool print)
{
struct fsck_err_state *s, *n;
mutex_lock(&c->fsck_error_msgs_lock);
list_for_each_entry_safe(s, n, &c->fsck_error_msgs, list) {
- if (s->ratelimited && s->last_msg)
+ if (print && s->ratelimited && s->last_msg)
bch_err(c, "Saw %llu errors like:\n %s", s->nr, s->last_msg);
list_del(&s->list);
@@ -663,6 +660,16 @@ void bch2_flush_fsck_errs(struct bch_fs *c)
mutex_unlock(&c->fsck_error_msgs_lock);
}
+void bch2_flush_fsck_errs(struct bch_fs *c)
+{
+ __bch2_flush_fsck_errs(c, true);
+}
+
+void bch2_free_fsck_errs(struct bch_fs *c)
+{
+ __bch2_flush_fsck_errs(c, false);
+}
+
int bch2_inum_offset_err_msg_trans(struct btree_trans *trans, struct printbuf *out,
subvol_inum inum, u64 offset)
{
diff --git a/libbcachefs/error.h b/libbcachefs/error.h
index da653e4e..0b3ede1c 100644
--- a/libbcachefs/error.h
+++ b/libbcachefs/error.h
@@ -91,6 +91,7 @@ int __bch2_fsck_err(struct bch_fs *, struct btree_trans *,
_flags, BCH_FSCK_ERR_##_err_type, __VA_ARGS__)
void bch2_flush_fsck_errs(struct bch_fs *);
+void bch2_free_fsck_errs(struct bch_fs *);
#define fsck_err_wrap(_do) \
({ \
diff --git a/libbcachefs/fsck.c b/libbcachefs/fsck.c
index 8e95cc7f..540db66e 100644
--- a/libbcachefs/fsck.c
+++ b/libbcachefs/fsck.c
@@ -321,6 +321,31 @@ static inline bool inode_should_reattach(struct bch_inode_unpacked *inode)
inode->bi_subvol == BCACHEFS_ROOT_SUBVOL)
return false;
+ /*
+ * Subvolume roots are special: older versions of subvolume roots may be
+ * disconnected, it's only the newest version that matters.
+ *
+ * We only keep a single dirent pointing to a subvolume root, i.e.
+ * older versions of snapshots will not have a different dirent pointing
+ * to the same subvolume root.
+ *
+ * This is because dirents that point to subvolumes are only visible in
+ * the parent subvolume - versioning is not needed - and keeping them
+ * around would break fsck, because when we're crossing subvolumes we
+ * don't have a consistent snapshot ID to do check the inode <-> dirent
+ * relationships.
+ *
+ * Thus, a subvolume root that's been renamed after a snapshot will have
+ * a disconnected older version - that's expected.
+ *
+ * Note that taking a snapshot always updates the root inode (to update
+ * the dirent backpointer), so a subvolume root inode with
+ * BCH_INODE_has_child_snapshot is never visible.
+ */
+ if (inode->bi_subvol &&
+ (inode->bi_flags & BCH_INODE_has_child_snapshot))
+ return false;
+
return !inode->bi_dir && !(inode->bi_flags & BCH_INODE_unlinked);
}
@@ -1007,6 +1032,23 @@ static int check_inode_dirent_inode(struct btree_trans *trans,
if (ret && !bch2_err_matches(ret, ENOENT))
return ret;
+ if ((ret || dirent_points_to_inode_nowarn(d, inode)) &&
+ inode->bi_subvol &&
+ (inode->bi_flags & BCH_INODE_has_child_snapshot)) {
+ /* Older version of a renamed subvolume root: we won't have a
+ * correct dirent for it. That's expected, see
+ * inode_should_reattach().
+ *
+ * We don't clear the backpointer field when doing the rename
+ * because there might be arbitrarily many versions in older
+ * snapshots.
+ */
+ inode->bi_dir = 0;
+ inode->bi_dir_offset = 0;
+ *write_inode = true;
+ goto out;
+ }
+
if (fsck_err_on(ret,
trans, inode_points_to_missing_dirent,
"inode points to missing dirent\n%s",
@@ -1027,7 +1069,7 @@ static int check_inode_dirent_inode(struct btree_trans *trans,
inode->bi_dir_offset = 0;
*write_inode = true;
}
-
+out:
ret = 0;
fsck_err:
bch2_trans_iter_exit(trans, &dirent_iter);
diff --git a/libbcachefs/journal.c b/libbcachefs/journal.c
index 898f6125..3694b83a 100644
--- a/libbcachefs/journal.c
+++ b/libbcachefs/journal.c
@@ -281,7 +281,24 @@ static void __journal_entry_close(struct journal *j, unsigned closed_val, bool t
sectors = vstruct_blocks_plus(buf->data, c->block_bits,
buf->u64s_reserved) << c->block_bits;
- BUG_ON(sectors > buf->sectors);
+ if (unlikely(sectors > buf->sectors)) {
+ struct printbuf err = PRINTBUF;
+ err.atomic++;
+
+ prt_printf(&err, "journal entry overran reserved space: %u > %u\n",
+ sectors, buf->sectors);
+ prt_printf(&err, "buf u64s %u u64s reserved %u cur_entry_u64s %u block_bits %u\n",
+ le32_to_cpu(buf->data->u64s), buf->u64s_reserved,
+ j->cur_entry_u64s,
+ c->block_bits);
+ prt_printf(&err, "fatal error - emergency read only");
+ bch2_journal_halt_locked(j);
+
+ bch_err(c, "%s", err.buf);
+ printbuf_exit(&err);
+ return;
+ }
+
buf->sectors = sectors;
/*
@@ -1467,7 +1484,6 @@ int bch2_fs_journal_start(struct journal *j, u64 cur_seq)
j->last_empty_seq = cur_seq - 1; /* to match j->seq */
spin_lock(&j->lock);
- set_bit(JOURNAL_running, &j->flags);
j->last_flush_write = jiffies;
j->reservations.idx = journal_cur_seq(j);
@@ -1478,6 +1494,21 @@ int bch2_fs_journal_start(struct journal *j, u64 cur_seq)
return 0;
}
+void bch2_journal_set_replay_done(struct journal *j)
+{
+ /*
+ * journal_space_available must happen before setting JOURNAL_running
+ * JOURNAL_running must happen before JOURNAL_replay_done
+ */
+ spin_lock(&j->lock);
+ bch2_journal_space_available(j);
+
+ set_bit(JOURNAL_need_flush_write, &j->flags);
+ set_bit(JOURNAL_running, &j->flags);
+ set_bit(JOURNAL_replay_done, &j->flags);
+ spin_unlock(&j->lock);
+}
+
/* init/exit: */
void bch2_dev_journal_exit(struct bch_dev *ca)
diff --git a/libbcachefs/journal.h b/libbcachefs/journal.h
index 738dd2c8..8ff00a0e 100644
--- a/libbcachefs/journal.h
+++ b/libbcachefs/journal.h
@@ -437,12 +437,6 @@ static inline int bch2_journal_error(struct journal *j)
struct bch_dev;
-static inline void bch2_journal_set_replay_done(struct journal *j)
-{
- BUG_ON(!test_bit(JOURNAL_running, &j->flags));
- set_bit(JOURNAL_replay_done, &j->flags);
-}
-
void bch2_journal_unblock(struct journal *);
void bch2_journal_block(struct journal *);
struct journal_buf *bch2_next_write_buffer_flush_journal_buf(struct journal *, u64, bool *);
@@ -459,6 +453,7 @@ void bch2_dev_journal_stop(struct journal *, struct bch_dev *);
void bch2_fs_journal_stop(struct journal *);
int bch2_fs_journal_start(struct journal *, u64);
+void bch2_journal_set_replay_done(struct journal *);
void bch2_dev_journal_exit(struct bch_dev *);
int bch2_dev_journal_init(struct bch_dev *, struct bch_sb *);
diff --git a/libbcachefs/journal_reclaim.c b/libbcachefs/journal_reclaim.c
index 5d1547aa..d1c0fc59 100644
--- a/libbcachefs/journal_reclaim.c
+++ b/libbcachefs/journal_reclaim.c
@@ -252,7 +252,10 @@ void bch2_journal_space_available(struct journal *j)
bch2_journal_set_watermark(j);
out:
- j->cur_entry_sectors = !ret ? j->space[journal_space_discarded].next_entry : 0;
+ j->cur_entry_sectors = !ret
+ ? round_down(j->space[journal_space_discarded].next_entry,
+ block_sectors(c))
+ : 0;
j->cur_entry_error = ret;
if (!ret)
@@ -624,8 +627,6 @@ static u64 journal_seq_to_flush(struct journal *j)
/* Try to keep the journal at most half full: */
nr_buckets = ja->nr / 2;
- nr_buckets = min(nr_buckets, ja->nr);
-
bucket_to_flush = (ja->cur_idx + nr_buckets) % ja->nr;
seq_to_flush = max(seq_to_flush,
ja->bucket_seq[bucket_to_flush]);
diff --git a/libbcachefs/rebalance.c b/libbcachefs/rebalance.c
index 2f0f8782..b0314674 100644
--- a/libbcachefs/rebalance.c
+++ b/libbcachefs/rebalance.c
@@ -705,3 +705,119 @@ void bch2_fs_rebalance_init(struct bch_fs *c)
{
bch2_pd_controller_init(&c->rebalance.pd);
}
+
+static int check_rebalance_work_one(struct btree_trans *trans,
+ struct btree_iter *extent_iter,
+ struct btree_iter *rebalance_iter,
+ struct bkey_buf *last_flushed)
+{
+ struct bch_fs *c = trans->c;
+ struct bkey_s_c extent_k, rebalance_k;
+ struct printbuf buf = PRINTBUF;
+
+ int ret = bkey_err(extent_k = bch2_btree_iter_peek(trans, extent_iter)) ?:
+ bkey_err(rebalance_k = bch2_btree_iter_peek(trans, rebalance_iter));
+ if (ret)
+ return ret;
+
+ if (!extent_k.k &&
+ extent_iter->btree_id == BTREE_ID_reflink &&
+ (!rebalance_k.k ||
+ rebalance_k.k->p.inode >= BCACHEFS_ROOT_INO)) {
+ bch2_trans_iter_exit(trans, extent_iter);
+ bch2_trans_iter_init(trans, extent_iter,
+ BTREE_ID_extents, POS_MIN,
+ BTREE_ITER_prefetch|
+ BTREE_ITER_all_snapshots);
+ return -BCH_ERR_transaction_restart_nested;
+ }
+
+ if (!extent_k.k && !rebalance_k.k)
+ return 1;
+
+ int cmp = bpos_cmp(extent_k.k ? extent_k.k->p : SPOS_MAX,
+ rebalance_k.k ? rebalance_k.k->p : SPOS_MAX);
+
+ struct bkey deleted;
+ bkey_init(&deleted);
+
+ if (cmp < 0) {
+ deleted.p = extent_k.k->p;
+ rebalance_k.k = &deleted;
+ } else if (cmp > 0) {
+ deleted.p = rebalance_k.k->p;
+ extent_k.k = &deleted;
+ }
+
+ bool should_have_rebalance =
+ bch2_bkey_sectors_need_rebalance(c, extent_k) != 0;
+ bool have_rebalance = rebalance_k.k->type == KEY_TYPE_set;
+
+ if (should_have_rebalance != have_rebalance) {
+ ret = bch2_btree_write_buffer_maybe_flush(trans, extent_k, last_flushed);
+ if (ret)
+ return ret;
+
+ bch2_bkey_val_to_text(&buf, c, extent_k);
+ }
+
+ if (fsck_err_on(!should_have_rebalance && have_rebalance,
+ trans, rebalance_work_incorrectly_set,
+ "rebalance work incorrectly set\n%s", buf.buf)) {
+ ret = bch2_btree_bit_mod_buffered(trans, BTREE_ID_rebalance_work,
+ extent_k.k->p, false);
+ if (ret)
+ goto err;
+ }
+
+ if (fsck_err_on(should_have_rebalance && !have_rebalance,
+ trans, rebalance_work_incorrectly_unset,
+ "rebalance work incorrectly unset\n%s", buf.buf)) {
+ ret = bch2_btree_bit_mod_buffered(trans, BTREE_ID_rebalance_work,
+ extent_k.k->p, true);
+ if (ret)
+ goto err;
+ }
+
+ if (cmp <= 0)
+ bch2_btree_iter_advance(trans, extent_iter);
+ if (cmp >= 0)
+ bch2_btree_iter_advance(trans, rebalance_iter);
+err:
+fsck_err:
+ printbuf_exit(&buf);
+ return ret;
+}
+
+int bch2_check_rebalance_work(struct bch_fs *c)
+{
+ struct btree_trans *trans = bch2_trans_get(c);
+ struct btree_iter rebalance_iter, extent_iter;
+ int ret = 0;
+
+ bch2_trans_iter_init(trans, &extent_iter,
+ BTREE_ID_reflink, POS_MIN,
+ BTREE_ITER_prefetch);
+ bch2_trans_iter_init(trans, &rebalance_iter,
+ BTREE_ID_rebalance_work, POS_MIN,
+ BTREE_ITER_prefetch);
+
+ struct bkey_buf last_flushed;
+ bch2_bkey_buf_init(&last_flushed);
+ bkey_init(&last_flushed.k->k);
+
+ while (!ret) {
+ bch2_trans_begin(trans);
+
+ ret = check_rebalance_work_one(trans, &extent_iter, &rebalance_iter, &last_flushed);
+
+ if (bch2_err_matches(ret, BCH_ERR_transaction_restart))
+ ret = 0;
+ }
+
+ bch2_bkey_buf_exit(&last_flushed, c);
+ bch2_trans_iter_exit(trans, &extent_iter);
+ bch2_trans_iter_exit(trans, &rebalance_iter);
+ bch2_trans_put(trans);
+ return ret < 0 ? ret : 0;
+}
diff --git a/libbcachefs/rebalance.h b/libbcachefs/rebalance.h
index e5e8eb4a..b7c8c065 100644
--- a/libbcachefs/rebalance.h
+++ b/libbcachefs/rebalance.h
@@ -54,4 +54,6 @@ void bch2_rebalance_stop(struct bch_fs *);
int bch2_rebalance_start(struct bch_fs *);
void bch2_fs_rebalance_init(struct bch_fs *);
+int bch2_check_rebalance_work(struct bch_fs *);
+
#endif /* _BCACHEFS_REBALANCE_H */
diff --git a/libbcachefs/recovery.c b/libbcachefs/recovery.c
index f7b4b659..df953b64 100644
--- a/libbcachefs/recovery.c
+++ b/libbcachefs/recovery.c
@@ -1149,13 +1149,13 @@ int bch2_fs_initialize(struct bch_fs *c)
if (ret)
goto err;
- set_bit(BCH_FS_accounting_replay_done, &c->flags);
- bch2_journal_set_replay_done(&c->journal);
-
ret = bch2_fs_read_write_early(c);
if (ret)
goto err;
+ set_bit(BCH_FS_accounting_replay_done, &c->flags);
+ bch2_journal_set_replay_done(&c->journal);
+
for_each_member_device(c, ca) {
ret = bch2_dev_usage_init(ca, false);
if (ret) {
diff --git a/libbcachefs/recovery_passes_types.h b/libbcachefs/recovery_passes_types.h
index f9d565bb..be3185fc 100644
--- a/libbcachefs/recovery_passes_types.h
+++ b/libbcachefs/recovery_passes_types.h
@@ -59,6 +59,7 @@
x(check_subvolume_structure, 36, PASS_ONLINE|PASS_FSCK) \
x(check_directory_structure, 30, PASS_ONLINE|PASS_FSCK) \
x(check_nlinks, 31, PASS_FSCK) \
+ x(check_rebalance_work, 43, PASS_ONLINE|PASS_FSCK) \
x(resume_logged_ops, 23, PASS_ALWAYS) \
x(delete_dead_inodes, 32, PASS_ALWAYS) \
x(fix_reflink_p, 33, 0) \
diff --git a/libbcachefs/sb-errors_format.h b/libbcachefs/sb-errors_format.h
index 83acfd9c..2e1d487a 100644
--- a/libbcachefs/sb-errors_format.h
+++ b/libbcachefs/sb-errors_format.h
@@ -318,7 +318,9 @@ enum bch_fsck_flags {
x(directory_size_mismatch, 303, FSCK_AUTOFIX) \
x(dirent_cf_name_too_big, 304, 0) \
x(dirent_stray_data_after_cf_name, 305, 0) \
- x(MAX, 309, 0)
+ x(rebalance_work_incorrectly_set, 309, FSCK_AUTOFIX) \
+ x(rebalance_work_incorrectly_unset, 310, FSCK_AUTOFIX) \
+ x(MAX, 311, 0)
enum bch_sb_error_id {
#define x(t, n, ...) BCH_FSCK_ERR_##t = n,
diff --git a/libbcachefs/snapshot.c b/libbcachefs/snapshot.c
index df14237a..2eede851 100644
--- a/libbcachefs/snapshot.c
+++ b/libbcachefs/snapshot.c
@@ -406,7 +406,7 @@ u32 bch2_snapshot_tree_oldest_subvol(struct bch_fs *c, u32 snapshot_root)
u32 subvol = 0, s;
rcu_read_lock();
- while (id) {
+ while (id && bch2_snapshot_exists(c, id)) {
s = snapshot_t(c, id)->subvol;
if (s && (!subvol || s < subvol))
diff --git a/libbcachefs/super.c b/libbcachefs/super.c
index 3fa4706c..4632230f 100644
--- a/libbcachefs/super.c
+++ b/libbcachefs/super.c
@@ -492,29 +492,28 @@ static int __bch2_fs_read_write(struct bch_fs *c, bool early)
clear_bit(BCH_FS_clean_shutdown, &c->flags);
+ __for_each_online_member(c, ca, BIT(BCH_MEMBER_STATE_rw), READ) {
+ bch2_dev_allocator_add(c, ca);
+ percpu_ref_reinit(&ca->io_ref[WRITE]);
+ }
+ bch2_recalc_capacity(c);
+
/*
* First journal write must be a flush write: after a clean shutdown we
* don't read the journal, so the first journal write may end up
* overwriting whatever was there previously, and there must always be
* at least one non-flush write in the journal or recovery will fail:
*/
+ spin_lock(&c->journal.lock);
set_bit(JOURNAL_need_flush_write, &c->journal.flags);
set_bit(JOURNAL_running, &c->journal.flags);
-
- __for_each_online_member(c, ca, BIT(BCH_MEMBER_STATE_rw), READ) {
- bch2_dev_allocator_add(c, ca);
- percpu_ref_reinit(&ca->io_ref[WRITE]);
- }
- bch2_recalc_capacity(c);
+ bch2_journal_space_available(&c->journal);
+ spin_unlock(&c->journal.lock);
ret = bch2_fs_mark_dirty(c);
if (ret)
goto err;
- spin_lock(&c->journal.lock);
- bch2_journal_space_available(&c->journal);
- spin_unlock(&c->journal.lock);
-
ret = bch2_journal_reclaim_start(&c->journal);
if (ret)
goto err;
@@ -582,6 +581,7 @@ static void __bch2_fs_free(struct bch_fs *c)
bch2_find_btree_nodes_exit(&c->found_btree_nodes);
bch2_free_pending_node_rewrites(c);
+ bch2_free_fsck_errs(c);
bch2_fs_accounting_exit(c);
bch2_fs_sb_errors_exit(c);
bch2_fs_counters_exit(c);