summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorKent Overstreet <kent.overstreet@linux.dev>2024-12-03 21:33:02 -0500
committerKent Overstreet <kent.overstreet@linux.dev>2024-12-03 21:51:54 -0500
commitac124c88859e6520e01d11a346e38f901052d657 (patch)
tree2a9102c5c02b725507bf33bc0c83b26c7e8fdfa8
parentc560ff06f4798f27aa7b3ae322c83d5a9ba37b13 (diff)
Update bcachefs sources to 4a32728376a8 bcachefs: bcachefs_metadata_version_inode_depth
Signed-off-by: Kent Overstreet <kent.overstreet@linux.dev>
-rw-r--r--.bcachefs_revision2
-rw-r--r--bch_bindgen/src/bkey.rs2
-rw-r--r--include/linux/slab.h2
-rw-r--r--include/linux/srcu.h34
-rw-r--r--libbcachefs/backpointers.c376
-rw-r--r--libbcachefs/backpointers.h30
-rw-r--r--libbcachefs/bcachefs.h4
-rw-r--r--libbcachefs/bcachefs_format.h28
-rw-r--r--libbcachefs/btree_cache.c1
-rw-r--r--libbcachefs/btree_journal_iter.c2
-rw-r--r--libbcachefs/buckets.c10
-rw-r--r--libbcachefs/ec.c14
-rw-r--r--libbcachefs/errcode.h1
-rw-r--r--libbcachefs/fs-common.c13
-rw-r--r--libbcachefs/fs-io.c9
-rw-r--r--libbcachefs/fsck.c91
-rw-r--r--libbcachefs/inode.h14
-rw-r--r--libbcachefs/inode_format.h3
-rw-r--r--libbcachefs/journal_io.c3
-rw-r--r--libbcachefs/journal_reclaim.c7
-rw-r--r--libbcachefs/logged_ops.c10
-rw-r--r--libbcachefs/logged_ops_format.h2
-rw-r--r--libbcachefs/move.c59
-rw-r--r--libbcachefs/rcu_pending.c40
-rw-r--r--libbcachefs/recovery.c27
-rw-r--r--libbcachefs/recovery_passes_types.h92
-rw-r--r--libbcachefs/reflink.c18
-rw-r--r--libbcachefs/reflink.h3
-rw-r--r--libbcachefs/reflink_format.h2
-rw-r--r--libbcachefs/sb-errors_format.h4
-rw-r--r--libbcachefs/super-io.c51
-rw-r--r--libbcachefs/super-io.h19
32 files changed, 713 insertions, 260 deletions
diff --git a/.bcachefs_revision b/.bcachefs_revision
index bbe1be9a..534b898f 100644
--- a/.bcachefs_revision
+++ b/.bcachefs_revision
@@ -1 +1 @@
-bc01863fb6eff06f7b028e4c5cd8850d21d7f10d
+4a32728376a81dd9e75851a49159ff33602840f4
diff --git a/bch_bindgen/src/bkey.rs b/bch_bindgen/src/bkey.rs
index 91d515b7..2a012508 100644
--- a/bch_bindgen/src/bkey.rs
+++ b/bch_bindgen/src/bkey.rs
@@ -50,6 +50,7 @@ pub enum BkeyValC<'a> {
logged_op_truncate(&'a c::bch_logged_op_truncate),
logged_op_finsert(&'a c::bch_logged_op_finsert),
accounting(&'a c::bch_accounting),
+ inode_alloc_cursor(&'a c::bch_inode_alloc_cursor),
}
impl<'a, 'b> BkeySC<'a> {
@@ -106,6 +107,7 @@ impl<'a, 'b> BkeySC<'a> {
KEY_TYPE_logged_op_truncate => logged_op_truncate(transmute(self.v)),
KEY_TYPE_logged_op_finsert => logged_op_finsert(transmute(self.v)),
KEY_TYPE_accounting => accounting(transmute(self.v)),
+ KEY_TYPE_inode_alloc_cursor => inode_alloc_cursor(transmute(self.v)),
KEY_TYPE_MAX => unreachable!(),
}
}
diff --git a/include/linux/slab.h b/include/linux/slab.h
index 24df2cc5..4a6d015f 100644
--- a/include/linux/slab.h
+++ b/include/linux/slab.h
@@ -103,6 +103,8 @@ static inline void *kmalloc_array(size_t n, size_t size, gfp_t flags)
((size) != 0 && (n) > SIZE_MAX / (size) \
? NULL : kmalloc((n) * (size), flags))
+#define kvcalloc(n, size, flags) kvmalloc_array(n, size, flags|__GFP_ZERO)
+
#define kvmalloc_array_noprof(...) kvmalloc_array(__VA_ARGS__)
#define kcalloc(n, size, flags) kmalloc_array(n, size, flags|__GFP_ZERO)
diff --git a/include/linux/srcu.h b/include/linux/srcu.h
index b93cb8e3..7aa2b964 100644
--- a/include/linux/srcu.h
+++ b/include/linux/srcu.h
@@ -7,6 +7,11 @@
typedef void (*rcu_callback_t)(struct rcu_head *head);
+static inline struct urcu_gp_poll_state get_state_synchronize_rcu()
+{
+ return start_poll_synchronize_rcu();
+}
+
struct srcu_struct {
};
@@ -17,36 +22,19 @@ static inline int srcu_read_lock(struct srcu_struct *ssp)
return 0;
}
-static inline bool poll_state_synchronize_srcu(struct srcu_struct *ssp, unsigned long cookie)
-{
- return true;
-}
-
-static inline unsigned long start_poll_synchronize_srcu(struct srcu_struct *ssp)
-{
- return 0;
-}
-
-static inline unsigned long get_state_synchronize_srcu(struct srcu_struct *ssp)
-{
- return 0;
-}
-
-#undef poll_state_synchronize_rcu
-static inline bool poll_state_synchronize_rcu(unsigned long cookie)
+static inline bool poll_state_synchronize_srcu(struct srcu_struct *ssp, struct urcu_gp_poll_state cookie)
{
- return false;
+ return poll_state_synchronize_rcu(cookie);
}
-#undef start_poll_synchronize_rcu
-static inline unsigned long start_poll_synchronize_rcu()
+static inline struct urcu_gp_poll_state start_poll_synchronize_srcu(struct srcu_struct *ssp)
{
- return 0;
+ return start_poll_synchronize_rcu();
}
-static inline unsigned long get_state_synchronize_rcu()
+static inline struct urcu_gp_poll_state get_state_synchronize_srcu(struct srcu_struct *ssp)
{
- return 0;
+ return get_state_synchronize_rcu();
}
static inline void synchronize_srcu_expedited(struct srcu_struct *ssp) {}
diff --git a/libbcachefs/backpointers.c b/libbcachefs/backpointers.c
index b53d98c7..72f37171 100644
--- a/libbcachefs/backpointers.c
+++ b/libbcachefs/backpointers.c
@@ -72,26 +72,14 @@ static bool extent_matches_bp(struct bch_fs *c,
const union bch_extent_entry *entry;
struct extent_ptr_decoded p;
- rcu_read_lock();
bkey_for_each_ptr_decode(k.k, ptrs, p, entry) {
- struct bpos bucket2;
struct bkey_i_backpointer bp2;
+ bch2_extent_ptr_to_bp(c, btree_id, level, k, p, entry, &bp2);
- if (p.ptr.cached)
- continue;
-
- struct bch_dev *ca = bch2_dev_rcu(c, p.ptr.dev);
- if (!ca)
- continue;
-
- bch2_extent_ptr_to_bp(c, ca, btree_id, level, k, p, entry, &bucket2, &bp2);
if (bpos_eq(bp.k->p, bp2.k.p) &&
- !memcmp(bp.v, &bp2.v, sizeof(bp2.v))) {
- rcu_read_unlock();
+ !memcmp(bp.v, &bp2.v, sizeof(bp2.v)))
return true;
- }
}
- rcu_read_unlock();
return false;
}
@@ -183,9 +171,10 @@ err:
static int bch2_backpointer_del(struct btree_trans *trans, struct bpos pos)
{
- return likely(!bch2_backpointers_no_use_write_buffer)
- ? bch2_btree_delete_at_buffered(trans, BTREE_ID_backpointers, pos)
- : bch2_btree_delete(trans, BTREE_ID_backpointers, pos, 0);
+ return (likely(!bch2_backpointers_no_use_write_buffer)
+ ? bch2_btree_delete_at_buffered(trans, BTREE_ID_backpointers, pos)
+ : bch2_btree_delete(trans, BTREE_ID_backpointers, pos, 0)) ?:
+ bch2_trans_commit(trans, NULL, NULL, BCH_TRANS_COMMIT_no_enospc);
}
static int bch2_backpointers_maybe_flush(struct btree_trans *trans,
@@ -197,20 +186,23 @@ static int bch2_backpointers_maybe_flush(struct btree_trans *trans,
: 0;
}
-static void backpointer_target_not_found(struct btree_trans *trans,
- struct bkey_s_c_backpointer bp,
- struct bkey_s_c target_k)
+static int backpointer_target_not_found(struct btree_trans *trans,
+ struct bkey_s_c_backpointer bp,
+ struct bkey_s_c target_k,
+ struct bkey_buf *last_flushed)
{
struct bch_fs *c = trans->c;
struct printbuf buf = PRINTBUF;
+ int ret = 0;
/*
* If we're using the btree write buffer, the backpointer we were
* looking at may have already been deleted - failure to find what it
* pointed to is not an error:
*/
- if (likely(!bch2_backpointers_no_use_write_buffer))
- return;
+ ret = bch2_backpointers_maybe_flush(trans, bp.s_c, last_flushed);
+ if (ret)
+ return ret;
prt_printf(&buf, "backpointer doesn't match %s it points to:\n ",
bp.v->level ? "btree node" : "extent");
@@ -218,18 +210,20 @@ static void backpointer_target_not_found(struct btree_trans *trans,
prt_printf(&buf, "\n ");
bch2_bkey_val_to_text(&buf, c, target_k);
- if (c->curr_recovery_pass >= BCH_RECOVERY_PASS_check_extents_to_backpointers)
- bch_err_ratelimited(c, "%s", buf.buf);
- else
- bch2_trans_inconsistent(trans, "%s", buf.buf);
+ if (fsck_err(trans, backpointer_to_missing_ptr,
+ "%s", buf.buf))
+ ret = bch2_backpointer_del(trans, bp.k->p);
+fsck_err:
printbuf_exit(&buf);
+ return ret;
}
struct bkey_s_c bch2_backpointer_get_key(struct btree_trans *trans,
struct bkey_s_c_backpointer bp,
struct btree_iter *iter,
- unsigned iter_flags)
+ unsigned iter_flags,
+ struct bkey_buf *last_flushed)
{
struct bch_fs *c = trans->c;
@@ -253,22 +247,21 @@ struct bkey_s_c bch2_backpointer_get_key(struct btree_trans *trans,
return k;
bch2_trans_iter_exit(trans, iter);
- backpointer_target_not_found(trans, bp, k);
- return bkey_s_c_null;
+ int ret = backpointer_target_not_found(trans, bp, k, last_flushed);
+ return ret ? bkey_s_c_err(ret) : bkey_s_c_null;
} else {
- struct btree *b = bch2_backpointer_get_node(trans, bp, iter);
+ struct btree *b = bch2_backpointer_get_node(trans, bp, iter, last_flushed);
+ if (IS_ERR_OR_NULL(b))
+ return ((struct bkey_s_c) { .k = ERR_CAST(b) });
- if (IS_ERR_OR_NULL(b)) {
- bch2_trans_iter_exit(trans, iter);
- return IS_ERR(b) ? bkey_s_c_err(PTR_ERR(b)) : bkey_s_c_null;
- }
return bkey_i_to_s_c(&b->key);
}
}
struct btree *bch2_backpointer_get_node(struct btree_trans *trans,
struct bkey_s_c_backpointer bp,
- struct btree_iter *iter)
+ struct btree_iter *iter,
+ struct bkey_buf *last_flushed)
{
struct bch_fs *c = trans->c;
@@ -293,8 +286,8 @@ struct btree *bch2_backpointer_get_node(struct btree_trans *trans,
if (btree_node_will_make_reachable(b)) {
b = ERR_PTR(-BCH_ERR_backpointer_to_overwritten_btree_node);
} else {
- backpointer_target_not_found(trans, bp, bkey_i_to_s_c(&b->key));
- b = NULL;
+ int ret = backpointer_target_not_found(trans, bp, bkey_i_to_s_c(&b->key), last_flushed);
+ b = ret ? ERR_PTR(ret) : NULL;
}
err:
bch2_trans_iter_exit(trans, iter);
@@ -497,7 +490,7 @@ check_existing_bp:
struct bkey_s_c_backpointer other_bp = bkey_s_c_to_backpointer(bp_k);
struct bkey_s_c other_extent =
- bch2_backpointer_get_key(trans, other_bp, &other_extent_iter, 0);
+ bch2_backpointer_get_key(trans, other_bp, &other_extent_iter, 0, &s->last_flushed);
ret = bkey_err(other_extent);
if (ret == -BCH_ERR_backpointer_to_overwritten_btree_node)
ret = 0;
@@ -578,31 +571,29 @@ static int check_extent_to_backpointers(struct btree_trans *trans,
struct bkey_s_c k)
{
struct bch_fs *c = trans->c;
- struct bkey_ptrs_c ptrs;
+ struct bkey_ptrs_c ptrs = bch2_bkey_ptrs_c(k);
const union bch_extent_entry *entry;
struct extent_ptr_decoded p;
- int ret;
- ptrs = bch2_bkey_ptrs_c(k);
bkey_for_each_ptr_decode(k.k, ptrs, p, entry) {
- struct bpos bucket_pos;
- struct bkey_i_backpointer bp;
-
if (p.ptr.cached)
continue;
+ if (p.ptr.dev == BCH_SB_MEMBER_INVALID)
+ continue;
+
rcu_read_lock();
struct bch_dev *ca = bch2_dev_rcu_noerror(c, p.ptr.dev);
- if (ca)
- bch2_extent_ptr_to_bp(c, ca, btree, level, k, p, entry, &bucket_pos, &bp);
+ bool check = ca && test_bit(PTR_BUCKET_NR(ca, &p.ptr), ca->bucket_backpointer_mismatches);
rcu_read_unlock();
- if (!ca)
- continue;
-
- ret = check_bp_exists(trans, s, &bp, k);
- if (ret)
- return ret;
+ if (check) {
+ struct bkey_i_backpointer bp;
+ bch2_extent_ptr_to_bp(c, btree, level, k, p, entry, &bp);
+ int ret = check_bp_exists(trans, s, &bp, k);
+ if (ret)
+ return ret;
+ }
}
return 0;
@@ -811,26 +802,259 @@ static int bch2_check_extents_to_backpointers_pass(struct btree_trans *trans,
return 0;
}
+enum alloc_sector_counter {
+ ALLOC_dirty,
+ ALLOC_cached,
+ ALLOC_stripe,
+ ALLOC_SECTORS_NR
+};
+
+static enum alloc_sector_counter data_type_to_alloc_counter(enum bch_data_type t)
+{
+ switch (t) {
+ case BCH_DATA_btree:
+ case BCH_DATA_user:
+ return ALLOC_dirty;
+ case BCH_DATA_cached:
+ return ALLOC_cached;
+ case BCH_DATA_stripe:
+ return ALLOC_stripe;
+ default:
+ BUG();
+ }
+}
+
+static int check_bucket_backpointer_mismatch_one(struct btree_trans *trans, struct bkey_s_c alloc_k,
+ struct bkey_buf *last_flushed)
+{
+ int ret = 0;
+ struct bch_alloc_v4 a_convert;
+ const struct bch_alloc_v4 *a = bch2_alloc_to_v4(alloc_k, &a_convert);
+
+ if (a->data_type == BCH_DATA_sb ||
+ a->data_type == BCH_DATA_journal ||
+ a->data_type == BCH_DATA_parity)
+ return 0;
+
+ u32 sectors[ALLOC_SECTORS_NR];
+ memset(sectors, 0, sizeof(sectors));
+
+ struct bch_dev *ca = bch2_dev_bucket_tryget_noerror(trans->c, alloc_k.k->p);
+ if (!ca)
+ return 0;
+
+ struct btree_iter iter;
+ struct bkey_s_c bp_k;
+ for_each_btree_key_max_norestart(trans, iter, BTREE_ID_backpointers,
+ bucket_pos_to_bp_start(ca, alloc_k.k->p),
+ bucket_pos_to_bp_end(ca, alloc_k.k->p), 0, bp_k, ret) {
+ if (bp_k.k->type != KEY_TYPE_backpointer)
+ continue;
+
+ struct bkey_s_c_backpointer bp = bkey_s_c_to_backpointer(bp_k);
+
+ if (bp.v->bucket_gen != a->gen)
+ continue;
+
+ sectors[data_type_to_alloc_counter(bp.v->data_type)] += bp.v->bucket_len;
+ };
+ bch2_trans_iter_exit(trans, &iter);
+ if (ret)
+ goto err;
+
+ /* Cached pointers don't have backpointers: */
+
+ if (sectors[ALLOC_dirty] != a->dirty_sectors ||
+ sectors[ALLOC_stripe] != a->stripe_sectors) {
+ ret = bch2_backpointers_maybe_flush(trans, alloc_k, last_flushed);
+ if (ret)
+ goto err;
+
+ __set_bit(alloc_k.k->p.offset, ca->bucket_backpointer_mismatches);
+ }
+err:
+ bch2_dev_put(ca);
+ return ret;
+}
+
+static int check_bucket_backpointer_mismatches(struct btree_trans *trans,
+ struct bkey_buf *last_flushed)
+{
+ return for_each_btree_key(trans, iter, BTREE_ID_alloc,
+ POS_MIN, BTREE_ITER_prefetch, k, ({
+ check_bucket_backpointer_mismatch_one(trans, k, last_flushed);
+ }));
+}
+
+static bool backpointer_node_has_missing(struct bch_fs *c, struct bkey_s_c k)
+{
+ switch (k.k->type) {
+ case KEY_TYPE_btree_ptr_v2: {
+ bool ret = false;
+
+ rcu_read_lock();
+ struct bpos pos = bkey_s_c_to_btree_ptr_v2(k).v->min_key;
+ while (pos.inode <= k.k->p.inode) {
+ struct bch_dev *ca = bch2_dev_rcu_noerror(c, pos.inode);
+ if (!ca)
+ goto next;
+
+ struct bpos bucket = bp_pos_to_bucket(ca, pos);
+ bucket.offset = find_next_bit(ca->bucket_backpointer_mismatches,
+ ca->mi.nbuckets, bucket.offset);
+ if (bucket.offset == ca->mi.nbuckets)
+ goto next;
+
+ ret = bpos_le(bucket_pos_to_bp_end(ca, bucket), k.k->p);
+ if (ret)
+ break;
+next:
+ pos = SPOS(pos.inode + 1, 0, 0);
+ }
+ rcu_read_unlock();
+
+ return ret;
+ }
+ case KEY_TYPE_btree_ptr:
+ return true;
+ default:
+ return false;
+ }
+}
+
+static int btree_node_get_and_pin(struct btree_trans *trans, struct bkey_i *k,
+ enum btree_id btree, unsigned level)
+{
+ struct btree_iter iter;
+ bch2_trans_node_iter_init(trans, &iter, btree, k->k.p, 0, level, 0);
+ struct btree *b = bch2_btree_iter_peek_node(&iter);
+ int ret = PTR_ERR_OR_ZERO(b);
+ if (ret)
+ goto err;
+
+ if (b)
+ bch2_node_pin(trans->c, b);
+err:
+ bch2_trans_iter_exit(trans, &iter);
+ return ret;
+}
+
+static int bch2_pin_backpointer_nodes_with_missing(struct btree_trans *trans,
+ struct bpos start, struct bpos *end)
+{
+ struct bch_fs *c = trans->c;
+ int ret = 0;
+
+ struct bkey_buf tmp;
+ bch2_bkey_buf_init(&tmp);
+
+ bch2_btree_cache_unpin(c);
+
+ *end = SPOS_MAX;
+
+ s64 mem_may_pin = mem_may_pin_bytes(c);
+ struct btree_iter iter;
+ bch2_trans_node_iter_init(trans, &iter, BTREE_ID_backpointers, start,
+ 0, 1, BTREE_ITER_prefetch);
+ ret = for_each_btree_key_continue(trans, iter, 0, k, ({
+ if (!backpointer_node_has_missing(c, k))
+ continue;
+
+ mem_may_pin -= c->opts.btree_node_size;
+ if (mem_may_pin <= 0)
+ break;
+
+ bch2_bkey_buf_reassemble(&tmp, c, k);
+ struct btree_path *path = btree_iter_path(trans, &iter);
+
+ BUG_ON(path->level != 1);
+
+ bch2_btree_node_prefetch(trans, path, tmp.k, path->btree_id, path->level - 1);
+ }));
+ if (ret)
+ return ret;
+
+ struct bpos pinned = SPOS_MAX;
+ mem_may_pin = mem_may_pin_bytes(c);
+ bch2_trans_node_iter_init(trans, &iter, BTREE_ID_backpointers, start,
+ 0, 1, BTREE_ITER_prefetch);
+ ret = for_each_btree_key_continue(trans, iter, 0, k, ({
+ if (!backpointer_node_has_missing(c, k))
+ continue;
+
+ mem_may_pin -= c->opts.btree_node_size;
+ if (mem_may_pin <= 0) {
+ *end = pinned;
+ break;
+ }
+
+ bch2_bkey_buf_reassemble(&tmp, c, k);
+ struct btree_path *path = btree_iter_path(trans, &iter);
+
+ BUG_ON(path->level != 1);
+
+ int ret2 = btree_node_get_and_pin(trans, tmp.k, path->btree_id, path->level - 1);
+
+ if (!ret2)
+ pinned = tmp.k->k.p;
+
+ ret;
+ }));
+ if (ret)
+ return ret;
+
+ return ret;
+}
+
int bch2_check_extents_to_backpointers(struct bch_fs *c)
{
+ int ret = 0;
+
+ /*
+ * Can't allow devices to come/go/resize while we have bucket bitmaps
+ * allocated
+ */
+ lockdep_assert_held(&c->state_lock);
+
+ for_each_member_device(c, ca) {
+ BUG_ON(ca->bucket_backpointer_mismatches);
+ ca->bucket_backpointer_mismatches = kvcalloc(BITS_TO_LONGS(ca->mi.nbuckets),
+ sizeof(unsigned long),
+ GFP_KERNEL);
+ if (!ca->bucket_backpointer_mismatches) {
+ bch2_dev_put(ca);
+ ret = -BCH_ERR_ENOMEM_backpointer_mismatches_bitmap;
+ goto err_free_bitmaps;
+ }
+ }
+
struct btree_trans *trans = bch2_trans_get(c);
struct extents_to_bp_state s = { .bp_start = POS_MIN };
- int ret;
bch2_bkey_buf_init(&s.last_flushed);
bkey_init(&s.last_flushed.k->k);
+ ret = check_bucket_backpointer_mismatches(trans, &s.last_flushed);
+ if (ret)
+ goto err;
+
+ u64 nr_buckets = 0, nr_mismatches = 0;
+ for_each_member_device(c, ca) {
+ nr_buckets += ca->mi.nbuckets;
+ nr_mismatches += bitmap_weight(ca->bucket_backpointer_mismatches, ca->mi.nbuckets);
+ }
+
+ if (!nr_mismatches)
+ goto err;
+
+ bch_info(c, "scanning for missing backpointers in %llu/%llu buckets",
+ nr_mismatches, nr_buckets);
+
while (1) {
- struct bbpos end;
- ret = bch2_get_btree_in_memory_pos(trans,
- BIT_ULL(BTREE_ID_backpointers),
- BIT_ULL(BTREE_ID_backpointers),
- BBPOS(BTREE_ID_backpointers, s.bp_start), &end);
+ ret = bch2_pin_backpointer_nodes_with_missing(trans, s.bp_start, &s.bp_end);
if (ret)
break;
- s.bp_end = end.pos;
-
if ( bpos_eq(s.bp_start, POS_MIN) &&
!bpos_eq(s.bp_end, SPOS_MAX))
bch_verbose(c, "%s(): alloc info does not fit in ram, running in multiple passes with %zu nodes per pass",
@@ -855,10 +1079,15 @@ int bch2_check_extents_to_backpointers(struct bch_fs *c)
s.bp_start = bpos_successor(s.bp_end);
}
+err:
bch2_trans_put(trans);
bch2_bkey_buf_exit(&s.last_flushed, c);
-
bch2_btree_cache_unpin(c);
+err_free_bitmaps:
+ for_each_member_device(c, ca) {
+ kvfree(ca->bucket_backpointer_mismatches);
+ ca->bucket_backpointer_mismatches = NULL;
+ }
bch_err_fn(c, ret);
return ret;
@@ -874,39 +1103,21 @@ static int check_one_backpointer(struct btree_trans *trans,
return 0;
struct bkey_s_c_backpointer bp = bkey_s_c_to_backpointer(bp_k);
- struct bch_fs *c = trans->c;
struct bbpos pos = bp_to_bbpos(*bp.v);
- struct printbuf buf = PRINTBUF;
if (bbpos_cmp(pos, start) < 0 ||
bbpos_cmp(pos, end) > 0)
return 0;
struct btree_iter iter;
- struct bkey_s_c k = bch2_backpointer_get_key(trans, bp, &iter, 0);
+ struct bkey_s_c k = bch2_backpointer_get_key(trans, bp, &iter, 0, last_flushed);
int ret = bkey_err(k);
if (ret == -BCH_ERR_backpointer_to_overwritten_btree_node)
return 0;
if (ret)
return ret;
- if (!k.k) {
- ret = bch2_backpointers_maybe_flush(trans, bp.s_c, last_flushed);
- if (ret)
- goto out;
-
- if (fsck_err(trans, backpointer_to_missing_ptr,
- "backpointer for missing %s\n %s",
- bp.v->level ? "btree node" : "extent",
- (bch2_bkey_val_to_text(&buf, c, bp.s_c), buf.buf))) {
- ret = bch2_backpointer_del(trans, bp.k->p);
- goto out;
- }
- }
-out:
-fsck_err:
bch2_trans_iter_exit(trans, &iter);
- printbuf_exit(&buf);
return ret;
}
@@ -922,9 +1133,8 @@ static int bch2_check_backpointers_to_extents_pass(struct btree_trans *trans,
bkey_init(&last_flushed.k->k);
progress_init(&progress, trans->c, BIT_ULL(BTREE_ID_backpointers));
- int ret = for_each_btree_key_commit(trans, iter, BTREE_ID_backpointers,
- POS_MIN, BTREE_ITER_prefetch, k,
- NULL, NULL, BCH_TRANS_COMMIT_no_enospc, ({
+ int ret = for_each_btree_key(trans, iter, BTREE_ID_backpointers,
+ POS_MIN, BTREE_ITER_prefetch, k, ({
progress_update_iter(trans, &progress, &iter, "backpointers_to_extents");
check_one_backpointer(trans, start, end, k, &last_flushed);
}));
diff --git a/libbcachefs/backpointers.h b/libbcachefs/backpointers.h
index caffc684..060dad15 100644
--- a/libbcachefs/backpointers.h
+++ b/libbcachefs/backpointers.h
@@ -140,45 +140,29 @@ static inline enum bch_data_type bch2_bkey_ptr_data_type(struct bkey_s_c k,
}
}
-static inline void __bch2_extent_ptr_to_bp(struct bch_fs *c, struct bch_dev *ca,
+static inline void bch2_extent_ptr_to_bp(struct bch_fs *c,
enum btree_id btree_id, unsigned level,
struct bkey_s_c k, struct extent_ptr_decoded p,
const union bch_extent_entry *entry,
- struct bpos *bucket, struct bkey_i_backpointer *bp,
- u64 sectors)
+ struct bkey_i_backpointer *bp)
{
- u32 bucket_offset;
- *bucket = PTR_BUCKET_POS_OFFSET(ca, &p.ptr, &bucket_offset);
-
- u64 bp_bucket_offset = ((u64) bucket_offset << MAX_EXTENT_COMPRESS_RATIO_SHIFT) + p.crc.offset;
-
bkey_backpointer_init(&bp->k_i);
- bp->k.p = bucket_pos_to_bp(ca, *bucket, bp_bucket_offset);
+ bp->k.p = POS(p.ptr.dev, ((u64) p.ptr.offset << MAX_EXTENT_COMPRESS_RATIO_SHIFT) + p.crc.offset);
bp->v = (struct bch_backpointer) {
.btree_id = btree_id,
.level = level,
.data_type = bch2_bkey_ptr_data_type(k, p, entry),
.bucket_gen = p.ptr.gen,
- .bucket_len = sectors,
+ .bucket_len = ptr_disk_sectors(level ? btree_sectors(c) : k.k->size, p),
.pos = k.k->p,
};
}
-static inline void bch2_extent_ptr_to_bp(struct bch_fs *c, struct bch_dev *ca,
- enum btree_id btree_id, unsigned level,
- struct bkey_s_c k, struct extent_ptr_decoded p,
- const union bch_extent_entry *entry,
- struct bpos *bucket_pos, struct bkey_i_backpointer *bp)
-{
- u64 sectors = ptr_disk_sectors(level ? btree_sectors(c) : k.k->size, p);
-
- __bch2_extent_ptr_to_bp(c, ca, btree_id, level, k, p, entry, bucket_pos, bp, sectors);
-}
-
+struct bkey_buf;
struct bkey_s_c bch2_backpointer_get_key(struct btree_trans *, struct bkey_s_c_backpointer,
- struct btree_iter *, unsigned);
+ struct btree_iter *, unsigned, struct bkey_buf *);
struct btree *bch2_backpointer_get_node(struct btree_trans *, struct bkey_s_c_backpointer,
- struct btree_iter *);
+ struct btree_iter *, struct bkey_buf *);
int bch2_check_btree_backpointers(struct bch_fs *);
int bch2_check_extents_to_backpointers(struct bch_fs *);
diff --git a/libbcachefs/bcachefs.h b/libbcachefs/bcachefs.h
index b12c9c78..58d42b3f 100644
--- a/libbcachefs/bcachefs.h
+++ b/libbcachefs/bcachefs.h
@@ -557,6 +557,8 @@ struct bch_dev {
unsigned long *buckets_nouse;
struct rw_semaphore bucket_lock;
+ unsigned long *bucket_backpointer_mismatches;
+
struct bch_dev_usage __percpu *usage;
/* Allocator: */
@@ -759,6 +761,8 @@ struct bch_fs {
__uuid_t user_uuid;
u16 version;
+ u16 version_incompat;
+ u16 version_incompat_allowed;
u16 version_min;
u16 version_upgrade_complete;
diff --git a/libbcachefs/bcachefs_format.h b/libbcachefs/bcachefs_format.h
index cef22c15..f140c336 100644
--- a/libbcachefs/bcachefs_format.h
+++ b/libbcachefs/bcachefs_format.h
@@ -680,7 +680,9 @@ struct bch_sb_field_ext {
x(rebalance_work_acct_fix, BCH_VERSION(1, 12)) \
x(inode_has_child_snapshots, BCH_VERSION(1, 13)) \
x(backpointer_bucket_gen, BCH_VERSION(1, 14)) \
- x(disk_accounting_big_endian, BCH_VERSION(1, 15))
+ x(disk_accounting_big_endian, BCH_VERSION(1, 15)) \
+ x(reflink_p_may_update_opts, BCH_VERSION(1, 16)) \
+ x(inode_depth, BCH_VERSION(1, 17))
enum bcachefs_metadata_version {
bcachefs_metadata_version_min = 9,
@@ -845,6 +847,9 @@ LE64_BITMASK(BCH_SB_VERSION_UPGRADE_COMPLETE,
struct bch_sb, flags[5], 0, 16);
LE64_BITMASK(BCH_SB_ALLOCATOR_STUCK_TIMEOUT,
struct bch_sb, flags[5], 16, 32);
+LE64_BITMASK(BCH_SB_VERSION_INCOMPAT, struct bch_sb, flags[5], 32, 48);
+LE64_BITMASK(BCH_SB_VERSION_INCOMPAT_ALLOWED,
+ struct bch_sb, flags[5], 48, 64);
static inline __u64 BCH_SB_COMPRESSION_TYPE(const struct bch_sb *sb)
{
@@ -897,21 +902,22 @@ static inline void SET_BCH_SB_BACKGROUND_COMPRESSION_TYPE(struct bch_sb *sb, __u
x(new_varint, 15) \
x(journal_no_flush, 16) \
x(alloc_v2, 17) \
- x(extents_across_btree_nodes, 18)
+ x(extents_across_btree_nodes, 18) \
+ x(incompat_version_field, 19)
#define BCH_SB_FEATURES_ALWAYS \
- ((1ULL << BCH_FEATURE_new_extent_overwrite)| \
- (1ULL << BCH_FEATURE_extents_above_btree_updates)|\
- (1ULL << BCH_FEATURE_btree_updates_journalled)|\
- (1ULL << BCH_FEATURE_alloc_v2)|\
- (1ULL << BCH_FEATURE_extents_across_btree_nodes))
+ (BIT_ULL(BCH_FEATURE_new_extent_overwrite)| \
+ BIT_ULL(BCH_FEATURE_extents_above_btree_updates)|\
+ BIT_ULL(BCH_FEATURE_btree_updates_journalled)|\
+ BIT_ULL(BCH_FEATURE_alloc_v2)|\
+ BIT_ULL(BCH_FEATURE_extents_across_btree_nodes))
#define BCH_SB_FEATURES_ALL \
(BCH_SB_FEATURES_ALWAYS| \
- (1ULL << BCH_FEATURE_new_siphash)| \
- (1ULL << BCH_FEATURE_btree_ptr_v2)| \
- (1ULL << BCH_FEATURE_new_varint)| \
- (1ULL << BCH_FEATURE_journal_no_flush))
+ BIT_ULL(BCH_FEATURE_new_siphash)| \
+ BIT_ULL(BCH_FEATURE_btree_ptr_v2)| \
+ BIT_ULL(BCH_FEATURE_new_varint)| \
+ BIT_ULL(BCH_FEATURE_journal_no_flush))
enum bch_sb_feature {
#define x(f, n) BCH_FEATURE_##f,
diff --git a/libbcachefs/btree_cache.c b/libbcachefs/btree_cache.c
index 1117be90..b00c6a20 100644
--- a/libbcachefs/btree_cache.c
+++ b/libbcachefs/btree_cache.c
@@ -222,7 +222,6 @@ void bch2_node_pin(struct bch_fs *c, struct btree *b)
struct btree_cache *bc = &c->btree_cache;
mutex_lock(&bc->lock);
- BUG_ON(!__btree_node_pinned(bc, b));
if (b != btree_node_root(c, b) && !btree_node_pinned(b)) {
set_btree_node_pinned(b);
list_move(&b->list, &bc->live[1].list);
diff --git a/libbcachefs/btree_journal_iter.c b/libbcachefs/btree_journal_iter.c
index de3db161..6d25e3f8 100644
--- a/libbcachefs/btree_journal_iter.c
+++ b/libbcachefs/btree_journal_iter.c
@@ -259,7 +259,7 @@ int bch2_journal_key_insert_take(struct bch_fs *c, enum btree_id id,
* Ensure these keys are done last by journal replay, to unblock
* journal reclaim:
*/
- .journal_seq = U32_MAX,
+ .journal_seq = U64_MAX,
};
struct journal_keys *keys = &c->journal_keys;
size_t idx = bch2_journal_key_search(keys, id, level, k->k.p);
diff --git a/libbcachefs/buckets.c b/libbcachefs/buckets.c
index afd35c93..1e43ece2 100644
--- a/libbcachefs/buckets.c
+++ b/libbcachefs/buckets.c
@@ -574,8 +574,10 @@ static int bch2_trigger_pointer(struct btree_trans *trans,
struct printbuf buf = PRINTBUF;
int ret = 0;
- u64 abs_sectors = ptr_disk_sectors(level ? btree_sectors(c) : k.k->size, p);
- *sectors = insert ? abs_sectors : -abs_sectors;
+ struct bkey_i_backpointer bp;
+ bch2_extent_ptr_to_bp(c, btree_id, level, k, p, entry, &bp);
+
+ *sectors = insert ? bp.v.bucket_len : -(s64) bp.v.bucket_len;
struct bch_dev *ca = bch2_dev_tryget(c, p.ptr.dev);
if (unlikely(!ca)) {
@@ -584,9 +586,7 @@ static int bch2_trigger_pointer(struct btree_trans *trans,
goto err;
}
- struct bpos bucket;
- struct bkey_i_backpointer bp;
- __bch2_extent_ptr_to_bp(trans->c, ca, btree_id, level, k, p, entry, &bucket, &bp, abs_sectors);
+ struct bpos bucket = PTR_BUCKET_POS(ca, &p.ptr);
if (flags & BTREE_TRIGGER_transactional) {
struct bkey_i_alloc_v4 *a = bch2_trans_start_alloc_update(trans, bucket, 0);
diff --git a/libbcachefs/ec.c b/libbcachefs/ec.c
index 250e7389..6b297f90 100644
--- a/libbcachefs/ec.c
+++ b/libbcachefs/ec.c
@@ -1276,7 +1276,8 @@ static int ec_stripe_update_extent(struct btree_trans *trans,
struct bch_dev *ca,
struct bpos bucket, u8 gen,
struct ec_stripe_buf *s,
- struct bkey_s_c_backpointer bp)
+ struct bkey_s_c_backpointer bp,
+ struct bkey_buf *last_flushed)
{
struct bch_stripe *v = &bkey_i_to_stripe(&s->key)->v;
struct bch_fs *c = trans->c;
@@ -1293,7 +1294,7 @@ static int ec_stripe_update_extent(struct btree_trans *trans,
struct btree_iter node_iter;
struct btree *b;
- b = bch2_backpointer_get_node(trans, bp, &node_iter);
+ b = bch2_backpointer_get_node(trans, bp, &node_iter, last_flushed);
bch2_trans_iter_exit(trans, &node_iter);
if (!b)
@@ -1307,7 +1308,7 @@ static int ec_stripe_update_extent(struct btree_trans *trans,
return -EIO;
}
- k = bch2_backpointer_get_key(trans, bp, &iter, BTREE_ITER_intent);
+ k = bch2_backpointer_get_key(trans, bp, &iter, BTREE_ITER_intent, last_flushed);
ret = bkey_err(k);
if (ret)
return ret;
@@ -1374,6 +1375,10 @@ static int ec_stripe_update_bucket(struct btree_trans *trans, struct ec_stripe_b
struct bpos bucket_pos = PTR_BUCKET_POS(ca, &ptr);
+ struct bkey_buf last_flushed;
+ bch2_bkey_buf_init(&last_flushed);
+ bkey_init(&last_flushed.k->k);
+
ret = for_each_btree_key_max_commit(trans, bp_iter, BTREE_ID_backpointers,
bucket_pos_to_bp_start(ca, bucket_pos),
bucket_pos_to_bp_end(ca, bucket_pos), 0, bp_k,
@@ -1387,9 +1392,10 @@ static int ec_stripe_update_bucket(struct btree_trans *trans, struct ec_stripe_b
continue;
ec_stripe_update_extent(trans, ca, bucket_pos, ptr.gen, s,
- bkey_s_c_to_backpointer(bp_k));
+ bkey_s_c_to_backpointer(bp_k), &last_flushed);
}));
+ bch2_bkey_buf_exit(&last_flushed, c);
bch2_dev_put(ca);
return ret;
}
diff --git a/libbcachefs/errcode.h b/libbcachefs/errcode.h
index 47387f7d..924d2400 100644
--- a/libbcachefs/errcode.h
+++ b/libbcachefs/errcode.h
@@ -54,6 +54,7 @@
x(ENOMEM, ENOMEM_compression_bounce_read_init) \
x(ENOMEM, ENOMEM_compression_bounce_write_init) \
x(ENOMEM, ENOMEM_compression_workspace_init) \
+ x(ENOMEM, ENOMEM_backpointer_mismatches_bitmap) \
x(EIO, compression_workspace_not_initialized) \
x(ENOMEM, ENOMEM_bucket_gens) \
x(ENOMEM, ENOMEM_buckets_nouse) \
diff --git a/libbcachefs/fs-common.c b/libbcachefs/fs-common.c
index dcaa47f6..d2966b67 100644
--- a/libbcachefs/fs-common.c
+++ b/libbcachefs/fs-common.c
@@ -172,6 +172,10 @@ int bch2_create_trans(struct btree_trans *trans,
new_inode->bi_dir_offset = dir_offset;
}
+ if (S_ISDIR(mode) &&
+ !new_inode->bi_subvol)
+ new_inode->bi_depth = dir_u->bi_depth + 1;
+
inode_iter.flags &= ~BTREE_ITER_all_snapshots;
bch2_btree_iter_set_snapshot(&inode_iter, snapshot);
@@ -512,6 +516,15 @@ int bch2_rename_trans(struct btree_trans *trans,
dst_dir_u->bi_nlink++;
}
+ if (S_ISDIR(src_inode_u->bi_mode) &&
+ !src_inode_u->bi_subvol)
+ src_inode_u->bi_depth = dst_dir_u->bi_depth + 1;
+
+ if (mode == BCH_RENAME_EXCHANGE &&
+ S_ISDIR(dst_inode_u->bi_mode) &&
+ !dst_inode_u->bi_subvol)
+ dst_inode_u->bi_depth = src_dir_u->bi_depth + 1;
+
if (dst_inum.inum && is_subdir_for_nlink(dst_inode_u)) {
dst_dir_u->bi_nlink--;
src_dir_u->bi_nlink += mode == BCH_RENAME_EXCHANGE;
diff --git a/libbcachefs/fs-io.c b/libbcachefs/fs-io.c
index 33d0e708..94bf34b9 100644
--- a/libbcachefs/fs-io.c
+++ b/libbcachefs/fs-io.c
@@ -906,11 +906,18 @@ loff_t bch2_remap_file_range(struct file *file_src, loff_t pos_src,
bch2_mark_pagecache_unallocated(src, pos_src >> 9,
(pos_src + aligned_len) >> 9);
+ /*
+ * XXX: we'd like to be telling bch2_remap_range() if we have
+ * permission to write to the source file, and thus if io path option
+ * changes should be propagated through the copy, but we need mnt_idmap
+ * from the pathwalk, awkward
+ */
ret = bch2_remap_range(c,
inode_inum(dst), pos_dst >> 9,
inode_inum(src), pos_src >> 9,
aligned_len >> 9,
- pos_dst + len, &i_sectors_delta);
+ pos_dst + len, &i_sectors_delta,
+ false);
if (ret < 0)
goto err;
diff --git a/libbcachefs/fsck.c b/libbcachefs/fsck.c
index 1a5a0711..d67a3f52 100644
--- a/libbcachefs/fsck.c
+++ b/libbcachefs/fsck.c
@@ -2791,6 +2791,48 @@ struct pathbuf_entry {
typedef DARRAY(struct pathbuf_entry) pathbuf;
+static int bch2_bi_depth_renumber_one(struct btree_trans *trans, struct pathbuf_entry *p,
+ u32 new_depth)
+{
+ struct btree_iter iter;
+ struct bkey_s_c k = bch2_bkey_get_iter(trans, &iter, BTREE_ID_inodes,
+ SPOS(0, p->inum, p->snapshot), 0);
+
+ struct bch_inode_unpacked inode;
+ int ret = bkey_err(k) ?:
+ !bkey_is_inode(k.k) ? -BCH_ERR_ENOENT_inode
+ : bch2_inode_unpack(k, &inode);
+ if (ret)
+ goto err;
+
+ if (inode.bi_depth != new_depth) {
+ inode.bi_depth = new_depth;
+ ret = __bch2_fsck_write_inode(trans, &inode) ?:
+ bch2_trans_commit(trans, NULL, NULL, 0);
+ }
+err:
+ bch2_trans_iter_exit(trans, &iter);
+ return ret;
+}
+
+static int bch2_bi_depth_renumber(struct btree_trans *trans, pathbuf *path, u32 new_bi_depth)
+{
+ u32 restart_count = trans->restart_count;
+ int ret = 0;
+
+ darray_for_each_reverse(*path, i) {
+ ret = nested_lockrestart_do(trans,
+ bch2_bi_depth_renumber_one(trans, i, new_bi_depth));
+ bch_err_fn(trans->c, ret);
+ if (ret)
+ break;
+
+ new_bi_depth++;
+ }
+
+ return ret ?: trans_was_restarted(trans, restart_count);
+}
+
static bool path_is_dup(pathbuf *p, u64 inum, u32 snapshot)
{
darray_for_each(*p, i)
@@ -2800,22 +2842,20 @@ static bool path_is_dup(pathbuf *p, u64 inum, u32 snapshot)
return false;
}
-static int check_path(struct btree_trans *trans, pathbuf *p, struct bkey_s_c inode_k)
+static int check_path_loop(struct btree_trans *trans, struct bkey_s_c inode_k)
{
struct bch_fs *c = trans->c;
struct btree_iter inode_iter = {};
- struct bch_inode_unpacked inode;
+ pathbuf path = {};
struct printbuf buf = PRINTBUF;
u32 snapshot = inode_k.k->p.snapshot;
+ bool redo_bi_depth = false;
+ u32 min_bi_depth = U32_MAX;
int ret = 0;
- p->nr = 0;
-
+ struct bch_inode_unpacked inode;
BUG_ON(bch2_inode_unpack(inode_k, &inode));
- if (!S_ISDIR(inode.bi_mode))
- return 0;
-
while (!inode.bi_subvol) {
struct btree_iter dirent_iter;
struct bkey_s_c_dirent d;
@@ -2839,7 +2879,7 @@ static int check_path(struct btree_trans *trans, pathbuf *p, struct bkey_s_c ino
bch2_trans_iter_exit(trans, &dirent_iter);
- ret = darray_push(p, ((struct pathbuf_entry) {
+ ret = darray_push(&path, ((struct pathbuf_entry) {
.inum = inode.bi_inum,
.snapshot = snapshot,
}));
@@ -2851,22 +2891,32 @@ static int check_path(struct btree_trans *trans, pathbuf *p, struct bkey_s_c ino
bch2_trans_iter_exit(trans, &inode_iter);
inode_k = bch2_bkey_get_iter(trans, &inode_iter, BTREE_ID_inodes,
SPOS(0, inode.bi_dir, snapshot), 0);
+
+ struct bch_inode_unpacked parent_inode;
ret = bkey_err(inode_k) ?:
!bkey_is_inode(inode_k.k) ? -BCH_ERR_ENOENT_inode
- : bch2_inode_unpack(inode_k, &inode);
+ : bch2_inode_unpack(inode_k, &parent_inode);
if (ret) {
/* Should have been caught in dirents pass */
bch_err_msg(c, ret, "error looking up parent directory");
break;
}
+ min_bi_depth = parent_inode.bi_depth;
+
+ if (parent_inode.bi_depth < inode.bi_depth &&
+ min_bi_depth < U16_MAX)
+ break;
+
+ inode = parent_inode;
snapshot = inode_k.k->p.snapshot;
+ redo_bi_depth = true;
- if (path_is_dup(p, inode.bi_inum, snapshot)) {
+ if (path_is_dup(&path, inode.bi_inum, snapshot)) {
/* XXX print path */
bch_err(c, "directory structure loop");
- darray_for_each(*p, i)
+ darray_for_each(path, i)
pr_err("%llu:%u", i->inum, i->snapshot);
pr_err("%llu:%u", inode.bi_inum, snapshot);
@@ -2879,12 +2929,21 @@ static int check_path(struct btree_trans *trans, pathbuf *p, struct bkey_s_c ino
ret = reattach_inode(trans, &inode);
bch_err_msg(c, ret, "reattaching inode %llu", inode.bi_inum);
}
+
+ redo_bi_depth = false;
break;
}
}
+
+ if (inode.bi_subvol)
+ min_bi_depth = 0;
+
+ if (redo_bi_depth)
+ ret = bch2_bi_depth_renumber(trans, &path, min_bi_depth);
out:
fsck_err:
bch2_trans_iter_exit(trans, &inode_iter);
+ darray_exit(&path);
printbuf_exit(&buf);
bch_err_fn(c, ret);
return ret;
@@ -2896,24 +2955,20 @@ fsck_err:
*/
int bch2_check_directory_structure(struct bch_fs *c)
{
- pathbuf path = { 0, };
- int ret;
-
- ret = bch2_trans_run(c,
+ int ret = bch2_trans_run(c,
for_each_btree_key_commit(trans, iter, BTREE_ID_inodes, POS_MIN,
BTREE_ITER_intent|
BTREE_ITER_prefetch|
BTREE_ITER_all_snapshots, k,
NULL, NULL, BCH_TRANS_COMMIT_no_enospc, ({
- if (!bkey_is_inode(k.k))
+ if (!S_ISDIR(bkey_inode_mode(k)))
continue;
if (bch2_inode_flags(k) & BCH_INODE_unlinked)
continue;
- check_path(trans, &path, k);
+ check_path_loop(trans, k);
})));
- darray_exit(&path);
bch_err_fn(c, ret);
return ret;
diff --git a/libbcachefs/inode.h b/libbcachefs/inode.h
index 927c8759..5bca6950 100644
--- a/libbcachefs/inode.h
+++ b/libbcachefs/inode.h
@@ -219,6 +219,20 @@ static inline u32 bch2_inode_flags(struct bkey_s_c k)
}
}
+static inline unsigned bkey_inode_mode(struct bkey_s_c k)
+{
+ switch (k.k->type) {
+ case KEY_TYPE_inode:
+ return le16_to_cpu(bkey_s_c_to_inode(k).v->bi_mode);
+ case KEY_TYPE_inode_v2:
+ return le16_to_cpu(bkey_s_c_to_inode_v2(k).v->bi_mode);
+ case KEY_TYPE_inode_v3:
+ return INODEv3_MODE(bkey_s_c_to_inode_v3(k).v);
+ default:
+ return 0;
+ }
+}
+
/* i_nlink: */
static inline unsigned nlink_bias(umode_t mode)
diff --git a/libbcachefs/inode_format.h b/libbcachefs/inode_format.h
index 7928d0c6..be1e7476 100644
--- a/libbcachefs/inode_format.h
+++ b/libbcachefs/inode_format.h
@@ -101,7 +101,8 @@ struct bch_inode_generation {
x(bi_dir_offset, 64) \
x(bi_subvol, 32) \
x(bi_parent_subvol, 32) \
- x(bi_nocow, 8)
+ x(bi_nocow, 8) \
+ x(bi_depth, 32)
/* subset of BCH_INODE_FIELDS */
#define BCH_INODE_OPTS() \
diff --git a/libbcachefs/journal_io.c b/libbcachefs/journal_io.c
index 1627f3e1..bb69d808 100644
--- a/libbcachefs/journal_io.c
+++ b/libbcachefs/journal_io.c
@@ -2036,8 +2036,9 @@ CLOSURE_CALLBACK(bch2_journal_write)
struct printbuf buf = PRINTBUF;
buf.atomic++;
- prt_printf(&buf, bch2_fmt(c, "Unable to allocate journal write at seq %llu: %s"),
+ prt_printf(&buf, bch2_fmt(c, "Unable to allocate journal write at seq %llu for %zu sectors: %s"),
le64_to_cpu(w->data->seq),
+ vstruct_sectors(w->data, c->block_bits),
bch2_err_str(ret));
__bch2_journal_debug_to_text(&buf, j);
spin_unlock(&j->lock);
diff --git a/libbcachefs/journal_reclaim.c b/libbcachefs/journal_reclaim.c
index 1aabbbe3..b7936ad3 100644
--- a/libbcachefs/journal_reclaim.c
+++ b/libbcachefs/journal_reclaim.c
@@ -140,6 +140,7 @@ static struct journal_space __journal_space_available(struct journal *j, unsigne
struct bch_fs *c = container_of(j, struct bch_fs, journal);
unsigned pos, nr_devs = 0;
struct journal_space space, dev_space[BCH_SB_MEMBERS_MAX];
+ unsigned min_bucket_size = U32_MAX;
BUG_ON(nr_devs_want > ARRAY_SIZE(dev_space));
@@ -148,6 +149,8 @@ static struct journal_space __journal_space_available(struct journal *j, unsigne
if (!ca->journal.nr)
continue;
+ min_bucket_size = min(min_bucket_size, ca->mi.bucket_size);
+
space = journal_dev_space_available(j, ca, from);
if (!space.next_entry)
continue;
@@ -167,7 +170,9 @@ static struct journal_space __journal_space_available(struct journal *j, unsigne
* We sorted largest to smallest, and we want the smallest out of the
* @nr_devs_want largest devices:
*/
- return dev_space[nr_devs_want - 1];
+ space = dev_space[nr_devs_want - 1];
+ space.next_entry = min(space.next_entry, min_bucket_size);
+ return space;
}
void bch2_journal_space_available(struct journal *j)
diff --git a/libbcachefs/logged_ops.c b/libbcachefs/logged_ops.c
index 60e00702..1ac51af1 100644
--- a/libbcachefs/logged_ops.c
+++ b/libbcachefs/logged_ops.c
@@ -63,8 +63,9 @@ fsck_err:
int bch2_resume_logged_ops(struct bch_fs *c)
{
int ret = bch2_trans_run(c,
- for_each_btree_key(trans, iter,
- BTREE_ID_logged_ops, POS_MIN,
+ for_each_btree_key_max(trans, iter,
+ BTREE_ID_logged_ops,
+ POS(LOGGED_OPS_INUM, 0), POS(LOGGED_OPS_INUM, U64_MAX),
BTREE_ITER_prefetch, k,
resume_logged_op(trans, &iter, k)));
bch_err_fn(c, ret);
@@ -74,9 +75,8 @@ int bch2_resume_logged_ops(struct bch_fs *c)
static int __bch2_logged_op_start(struct btree_trans *trans, struct bkey_i *k)
{
struct btree_iter iter;
- int ret;
-
- ret = bch2_bkey_get_empty_slot(trans, &iter, BTREE_ID_logged_ops, POS_MAX);
+ int ret = bch2_bkey_get_empty_slot(trans, &iter,
+ BTREE_ID_logged_ops, POS(LOGGED_OPS_INUM, U64_MAX));
if (ret)
return ret;
diff --git a/libbcachefs/logged_ops_format.h b/libbcachefs/logged_ops_format.h
index 6a4bf712..0b370a96 100644
--- a/libbcachefs/logged_ops_format.h
+++ b/libbcachefs/logged_ops_format.h
@@ -2,6 +2,8 @@
#ifndef _BCACHEFS_LOGGED_OPS_FORMAT_H
#define _BCACHEFS_LOGGED_OPS_FORMAT_H
+#define LOGGED_OPS_INUM 0
+
struct bch_logged_op_truncate {
struct bch_val v;
__le32 subvol;
diff --git a/libbcachefs/move.c b/libbcachefs/move.c
index 46017546..a50f27f4 100644
--- a/libbcachefs/move.c
+++ b/libbcachefs/move.c
@@ -22,6 +22,7 @@
#include "keylist.h"
#include "move.h"
#include "rebalance.h"
+#include "reflink.h"
#include "replicas.h"
#include "snapshot.h"
#include "super-io.h"
@@ -389,6 +390,7 @@ err:
static struct bch_io_opts *bch2_move_get_io_opts(struct btree_trans *trans,
struct per_snapshot_io_opts *io_opts,
+ struct bpos extent_pos, /* extent_iter, extent_k may be in reflink btree */
struct btree_iter *extent_iter,
struct bkey_s_c extent_k)
{
@@ -400,12 +402,12 @@ static struct bch_io_opts *bch2_move_get_io_opts(struct btree_trans *trans,
if (extent_k.k->type == KEY_TYPE_reflink_v)
goto out;
- if (io_opts->cur_inum != extent_k.k->p.inode) {
+ if (io_opts->cur_inum != extent_pos.inode) {
io_opts->d.nr = 0;
- ret = for_each_btree_key(trans, iter, BTREE_ID_inodes, POS(0, extent_k.k->p.inode),
+ ret = for_each_btree_key(trans, iter, BTREE_ID_inodes, POS(0, extent_pos.inode),
BTREE_ITER_all_snapshots, k, ({
- if (k.k->p.offset != extent_k.k->p.inode)
+ if (k.k->p.offset != extent_pos.inode)
break;
if (!bkey_is_inode(k.k))
@@ -419,7 +421,7 @@ static struct bch_io_opts *bch2_move_get_io_opts(struct btree_trans *trans,
darray_push(&io_opts->d, e);
}));
- io_opts->cur_inum = extent_k.k->p.inode;
+ io_opts->cur_inum = extent_pos.inode;
}
ret = ret ?: trans_was_restarted(trans, restart_count);
@@ -525,9 +527,15 @@ static int bch2_move_data_btree(struct moving_context *ctxt,
struct per_snapshot_io_opts snapshot_io_opts;
struct bch_io_opts *io_opts;
struct bkey_buf sk;
- struct btree_iter iter;
+ struct btree_iter iter, reflink_iter = {};
struct bkey_s_c k;
struct data_update_opts data_opts;
+ /*
+ * If we're moving a single file, also process reflinked data it points
+ * to (this includes propagating changed io_opts from the inode to the
+ * extent):
+ */
+ bool walk_indirect = start.inode == end.inode;
int ret = 0, ret2;
per_snapshot_io_opts_init(&snapshot_io_opts, c);
@@ -547,6 +555,8 @@ static int bch2_move_data_btree(struct moving_context *ctxt,
bch2_ratelimit_reset(ctxt->rate);
while (!bch2_move_ratelimit(ctxt)) {
+ struct btree_iter *extent_iter = &iter;
+
bch2_trans_begin(trans);
k = bch2_btree_iter_peek(&iter);
@@ -565,10 +575,36 @@ static int bch2_move_data_btree(struct moving_context *ctxt,
if (ctxt->stats)
ctxt->stats->pos = BBPOS(iter.btree_id, iter.pos);
+ if (walk_indirect &&
+ k.k->type == KEY_TYPE_reflink_p &&
+ REFLINK_P_MAY_UPDATE_OPTIONS(bkey_s_c_to_reflink_p(k).v)) {
+ struct bkey_s_c_reflink_p p = bkey_s_c_to_reflink_p(k);
+ s64 offset_into_extent = iter.pos.offset - bkey_start_offset(k.k);
+
+ bch2_trans_iter_exit(trans, &reflink_iter);
+ k = bch2_lookup_indirect_extent(trans, &reflink_iter, &offset_into_extent, p, true, 0);
+ ret = bkey_err(k);
+ if (bch2_err_matches(ret, BCH_ERR_transaction_restart))
+ continue;
+ if (ret)
+ break;
+
+ if (bkey_deleted(k.k))
+ goto next_nondata;
+
+ /*
+ * XXX: reflink pointers may point to multiple indirect
+ * extents, so don't advance past the entire reflink
+ * pointer - need to fixup iter->k
+ */
+ extent_iter = &reflink_iter;
+ }
+
if (!bkey_extent_is_direct_data(k.k))
goto next_nondata;
- io_opts = bch2_move_get_io_opts(trans, &snapshot_io_opts, &iter, k);
+ io_opts = bch2_move_get_io_opts(trans, &snapshot_io_opts,
+ iter.pos, extent_iter, k);
ret = PTR_ERR_OR_ZERO(io_opts);
if (ret)
continue;
@@ -584,7 +620,7 @@ static int bch2_move_data_btree(struct moving_context *ctxt,
bch2_bkey_buf_reassemble(&sk, c, k);
k = bkey_i_to_s_c(sk.k);
- ret2 = bch2_move_extent(ctxt, NULL, &iter, k, *io_opts, data_opts);
+ ret2 = bch2_move_extent(ctxt, NULL, extent_iter, k, *io_opts, data_opts);
if (ret2) {
if (bch2_err_matches(ret2, BCH_ERR_transaction_restart))
continue;
@@ -605,6 +641,7 @@ next_nondata:
bch2_btree_iter_advance(&iter);
}
+ bch2_trans_iter_exit(trans, &reflink_iter);
bch2_trans_iter_exit(trans, &iter);
bch2_bkey_buf_exit(&sk, c);
per_snapshot_io_opts_exit(&snapshot_io_opts);
@@ -675,6 +712,7 @@ int bch2_evacuate_bucket(struct moving_context *ctxt,
struct bkey_s_c k;
struct data_update_opts data_opts;
unsigned sectors_moved = 0;
+ struct bkey_buf last_flushed;
int ret = 0;
struct bch_dev *ca = bch2_dev_tryget(c, bucket.inode);
@@ -683,6 +721,8 @@ int bch2_evacuate_bucket(struct moving_context *ctxt,
trace_bucket_evacuate(c, &bucket);
+ bch2_bkey_buf_init(&last_flushed);
+ bkey_init(&last_flushed.k->k);
bch2_bkey_buf_init(&sk);
/*
@@ -724,7 +764,7 @@ int bch2_evacuate_bucket(struct moving_context *ctxt,
struct bkey_s_c_backpointer bp = bkey_s_c_to_backpointer(k);
if (!bp.v->level) {
- k = bch2_backpointer_get_key(trans, bp, &iter, 0);
+ k = bch2_backpointer_get_key(trans, bp, &iter, 0, &last_flushed);
ret = bkey_err(k);
if (bch2_err_matches(ret, BCH_ERR_transaction_restart))
continue;
@@ -782,7 +822,7 @@ int bch2_evacuate_bucket(struct moving_context *ctxt,
} else {
struct btree *b;
- b = bch2_backpointer_get_node(trans, bp, &iter);
+ b = bch2_backpointer_get_node(trans, bp, &iter, &last_flushed);
ret = PTR_ERR_OR_ZERO(b);
if (ret == -BCH_ERR_backpointer_to_overwritten_btree_node)
continue;
@@ -820,6 +860,7 @@ err:
bch2_trans_iter_exit(trans, &bp_iter);
bch2_dev_put(ca);
bch2_bkey_buf_exit(&sk, c);
+ bch2_bkey_buf_exit(&last_flushed, c);
return ret;
}
diff --git a/libbcachefs/rcu_pending.c b/libbcachefs/rcu_pending.c
index 67522aa3..bef2aa1b 100644
--- a/libbcachefs/rcu_pending.c
+++ b/libbcachefs/rcu_pending.c
@@ -25,21 +25,37 @@ enum rcu_pending_special {
#define RCU_PENDING_KVFREE_FN ((rcu_pending_process_fn) (ulong) RCU_PENDING_KVFREE)
#define RCU_PENDING_CALL_RCU_FN ((rcu_pending_process_fn) (ulong) RCU_PENDING_CALL_RCU)
-static inline unsigned long __get_state_synchronize_rcu(struct srcu_struct *ssp)
+#ifdef __KERNEL__
+typedef unsigned long rcu_gp_poll_state_t;
+
+static inline bool rcu_gp_poll_cookie_eq(rcu_gp_poll_state_t l, rcu_gp_poll_state_t r)
+{
+ return l == r;
+}
+#else
+typedef struct urcu_gp_poll_state rcu_gp_poll_state_t;
+
+static inline bool rcu_gp_poll_cookie_eq(rcu_gp_poll_state_t l, rcu_gp_poll_state_t r)
+{
+ return l.grace_period_id == r.grace_period_id;
+}
+#endif
+
+static inline rcu_gp_poll_state_t __get_state_synchronize_rcu(struct srcu_struct *ssp)
{
return ssp
? get_state_synchronize_srcu(ssp)
: get_state_synchronize_rcu();
}
-static inline unsigned long __start_poll_synchronize_rcu(struct srcu_struct *ssp)
+static inline rcu_gp_poll_state_t __start_poll_synchronize_rcu(struct srcu_struct *ssp)
{
return ssp
? start_poll_synchronize_srcu(ssp)
: start_poll_synchronize_rcu();
}
-static inline bool __poll_state_synchronize_rcu(struct srcu_struct *ssp, unsigned long cookie)
+static inline bool __poll_state_synchronize_rcu(struct srcu_struct *ssp, rcu_gp_poll_state_t cookie)
{
return ssp
? poll_state_synchronize_srcu(ssp, cookie)
@@ -71,13 +87,13 @@ struct rcu_pending_seq {
GENRADIX(struct rcu_head *) objs;
size_t nr;
struct rcu_head **cursor;
- unsigned long seq;
+ rcu_gp_poll_state_t seq;
};
struct rcu_pending_list {
struct rcu_head *head;
struct rcu_head *tail;
- unsigned long seq;
+ rcu_gp_poll_state_t seq;
};
struct rcu_pending_pcpu {
@@ -316,10 +332,10 @@ static void rcu_pending_rcu_cb(struct rcu_head *rcu)
}
static __always_inline struct rcu_pending_seq *
-get_object_radix(struct rcu_pending_pcpu *p, unsigned long seq)
+get_object_radix(struct rcu_pending_pcpu *p, rcu_gp_poll_state_t seq)
{
darray_for_each_reverse(p->objs, objs)
- if (objs->seq == seq)
+ if (rcu_gp_poll_cookie_eq(objs->seq, seq))
return objs;
if (darray_push_gfp(&p->objs, ((struct rcu_pending_seq) { .seq = seq }), GFP_ATOMIC))
@@ -329,7 +345,7 @@ get_object_radix(struct rcu_pending_pcpu *p, unsigned long seq)
}
static noinline bool
-rcu_pending_enqueue_list(struct rcu_pending_pcpu *p, unsigned long seq,
+rcu_pending_enqueue_list(struct rcu_pending_pcpu *p, rcu_gp_poll_state_t seq,
struct rcu_head *head, void *ptr,
unsigned long *flags)
{
@@ -364,7 +380,7 @@ rcu_pending_enqueue_list(struct rcu_pending_pcpu *p, unsigned long seq,
again:
for (struct rcu_pending_list *i = p->lists;
i < p->lists + NUM_ACTIVE_RCU_POLL_OLDSTATE; i++) {
- if (i->seq == seq) {
+ if (rcu_gp_poll_cookie_eq(i->seq, seq)) {
rcu_pending_list_add(i, head);
return false;
}
@@ -408,7 +424,7 @@ __rcu_pending_enqueue(struct rcu_pending *pending, struct rcu_head *head,
struct rcu_pending_pcpu *p;
struct rcu_pending_seq *objs;
struct genradix_node *new_node = NULL;
- unsigned long seq, flags;
+ unsigned long flags;
bool start_gp = false;
BUG_ON((ptr != NULL) != (pending->process == RCU_PENDING_KVFREE_FN));
@@ -416,7 +432,7 @@ __rcu_pending_enqueue(struct rcu_pending *pending, struct rcu_head *head,
local_irq_save(flags);
p = this_cpu_ptr(pending->p);
spin_lock(&p->lock);
- seq = __get_state_synchronize_rcu(pending->srcu);
+ rcu_gp_poll_state_t seq = __get_state_synchronize_rcu(pending->srcu);
restart:
if (may_sleep &&
unlikely(process_finished_items(pending, p, flags)))
@@ -478,9 +494,7 @@ start_gp:
*/
if (!p->cb_armed) {
p->cb_armed = true;
- spin_unlock_irqrestore(&p->lock, flags);
__call_rcu(pending->srcu, &p->cb, rcu_pending_rcu_cb);
- goto free_node;
} else {
__start_poll_synchronize_rcu(pending->srcu);
}
diff --git a/libbcachefs/recovery.c b/libbcachefs/recovery.c
index a342744f..7849916e 100644
--- a/libbcachefs/recovery.c
+++ b/libbcachefs/recovery.c
@@ -616,6 +616,7 @@ static bool check_version_upgrade(struct bch_fs *c)
bch2_latest_compatible_version(c->sb.version));
unsigned old_version = c->sb.version_upgrade_complete ?: c->sb.version;
unsigned new_version = 0;
+ bool ret = false;
if (old_version < bcachefs_metadata_required_upgrade_below) {
if (c->opts.version_upgrade == BCH_VERSION_UPGRADE_incompatible ||
@@ -671,14 +672,32 @@ static bool check_version_upgrade(struct bch_fs *c)
}
bch_info(c, "%s", buf.buf);
+ printbuf_exit(&buf);
+
+ ret = true;
+ }
- bch2_sb_upgrade(c, new_version);
+ if (new_version > c->sb.version_incompat &&
+ c->opts.version_upgrade == BCH_VERSION_UPGRADE_incompatible) {
+ struct printbuf buf = PRINTBUF;
+
+ prt_str(&buf, "Now allowing incompatible features up to ");
+ bch2_version_to_text(&buf, new_version);
+ prt_str(&buf, ", previously allowed up to ");
+ bch2_version_to_text(&buf, c->sb.version_incompat_allowed);
+ prt_newline(&buf);
+ bch_info(c, "%s", buf.buf);
printbuf_exit(&buf);
- return true;
+
+ ret = true;
}
- return false;
+ if (ret)
+ bch2_sb_upgrade(c, new_version,
+ c->opts.version_upgrade == BCH_VERSION_UPGRADE_incompatible);
+
+ return ret;
}
int bch2_fs_recovery(struct bch_fs *c)
@@ -1078,7 +1097,7 @@ int bch2_fs_initialize(struct bch_fs *c)
bch2_check_version_downgrade(c);
if (c->opts.version_upgrade != BCH_VERSION_UPGRADE_none) {
- bch2_sb_upgrade(c, bcachefs_metadata_version_current);
+ bch2_sb_upgrade(c, bcachefs_metadata_version_current, false);
SET_BCH_SB_VERSION_UPGRADE_COMPLETE(c->disk_sb.sb, bcachefs_metadata_version_current);
bch2_write_super(c);
}
diff --git a/libbcachefs/recovery_passes_types.h b/libbcachefs/recovery_passes_types.h
index 94dc20ca..f967b23d 100644
--- a/libbcachefs/recovery_passes_types.h
+++ b/libbcachefs/recovery_passes_types.h
@@ -8,53 +8,59 @@
#define PASS_ALWAYS BIT(3)
#define PASS_ONLINE BIT(4)
+#ifdef CONFIG_BCACHEFS_DEBUG
+#define PASS_FSCK_DEBUG BIT(1)
+#else
+#define PASS_FSCK_DEBUG 0
+#endif
+
/*
* Passes may be reordered, but the second field is a persistent identifier and
* must never change:
*/
-#define BCH_RECOVERY_PASSES() \
- x(recovery_pass_empty, 41, PASS_SILENT) \
- x(scan_for_btree_nodes, 37, 0) \
- x(check_topology, 4, 0) \
- x(accounting_read, 39, PASS_ALWAYS) \
- x(alloc_read, 0, PASS_ALWAYS) \
- x(stripes_read, 1, PASS_ALWAYS) \
- x(initialize_subvolumes, 2, 0) \
- x(snapshots_read, 3, PASS_ALWAYS) \
- x(check_allocations, 5, PASS_FSCK) \
- x(trans_mark_dev_sbs, 6, PASS_ALWAYS|PASS_SILENT) \
- x(fs_journal_alloc, 7, PASS_ALWAYS|PASS_SILENT) \
- x(set_may_go_rw, 8, PASS_ALWAYS|PASS_SILENT) \
- x(journal_replay, 9, PASS_ALWAYS) \
- x(check_alloc_info, 10, PASS_ONLINE|PASS_FSCK) \
- x(check_lrus, 11, PASS_ONLINE|PASS_FSCK) \
- x(check_btree_backpointers, 12, PASS_ONLINE|PASS_FSCK) \
- x(check_backpointers_to_extents, 13, PASS_ONLINE|PASS_FSCK) \
- x(check_extents_to_backpointers, 14, PASS_ONLINE|PASS_FSCK) \
- x(check_alloc_to_lru_refs, 15, PASS_ONLINE|PASS_FSCK) \
- x(fs_freespace_init, 16, PASS_ALWAYS|PASS_SILENT) \
- x(bucket_gens_init, 17, 0) \
- x(reconstruct_snapshots, 38, 0) \
- x(check_snapshot_trees, 18, PASS_ONLINE|PASS_FSCK) \
- x(check_snapshots, 19, PASS_ONLINE|PASS_FSCK) \
- x(check_subvols, 20, PASS_ONLINE|PASS_FSCK) \
- x(check_subvol_children, 35, PASS_ONLINE|PASS_FSCK) \
- x(delete_dead_snapshots, 21, PASS_ONLINE|PASS_FSCK) \
- x(fs_upgrade_for_subvolumes, 22, 0) \
- x(check_inodes, 24, PASS_FSCK) \
- x(check_extents, 25, PASS_FSCK) \
- x(check_indirect_extents, 26, PASS_FSCK) \
- x(check_dirents, 27, PASS_FSCK) \
- x(check_xattrs, 28, PASS_FSCK) \
- x(check_root, 29, PASS_ONLINE|PASS_FSCK) \
- x(check_unreachable_inodes, 40, PASS_ONLINE|PASS_FSCK) \
- x(check_subvolume_structure, 36, PASS_ONLINE|PASS_FSCK) \
- x(check_directory_structure, 30, PASS_ONLINE|PASS_FSCK) \
- x(check_nlinks, 31, PASS_FSCK) \
- x(resume_logged_ops, 23, PASS_ALWAYS) \
- x(delete_dead_inodes, 32, PASS_ALWAYS) \
- x(fix_reflink_p, 33, 0) \
- x(set_fs_needs_rebalance, 34, 0) \
+#define BCH_RECOVERY_PASSES() \
+ x(recovery_pass_empty, 41, PASS_SILENT) \
+ x(scan_for_btree_nodes, 37, 0) \
+ x(check_topology, 4, 0) \
+ x(accounting_read, 39, PASS_ALWAYS) \
+ x(alloc_read, 0, PASS_ALWAYS) \
+ x(stripes_read, 1, PASS_ALWAYS) \
+ x(initialize_subvolumes, 2, 0) \
+ x(snapshots_read, 3, PASS_ALWAYS) \
+ x(check_allocations, 5, PASS_FSCK) \
+ x(trans_mark_dev_sbs, 6, PASS_ALWAYS|PASS_SILENT) \
+ x(fs_journal_alloc, 7, PASS_ALWAYS|PASS_SILENT) \
+ x(set_may_go_rw, 8, PASS_ALWAYS|PASS_SILENT) \
+ x(journal_replay, 9, PASS_ALWAYS) \
+ x(check_alloc_info, 10, PASS_ONLINE|PASS_FSCK) \
+ x(check_lrus, 11, PASS_ONLINE|PASS_FSCK) \
+ x(check_btree_backpointers, 12, PASS_ONLINE|PASS_FSCK) \
+ x(check_backpointers_to_extents, 13, PASS_ONLINE|PASS_FSCK_DEBUG) \
+ x(check_extents_to_backpointers, 14, PASS_ONLINE|PASS_FSCK) \
+ x(check_alloc_to_lru_refs, 15, PASS_ONLINE|PASS_FSCK) \
+ x(fs_freespace_init, 16, PASS_ALWAYS|PASS_SILENT) \
+ x(bucket_gens_init, 17, 0) \
+ x(reconstruct_snapshots, 38, 0) \
+ x(check_snapshot_trees, 18, PASS_ONLINE|PASS_FSCK) \
+ x(check_snapshots, 19, PASS_ONLINE|PASS_FSCK) \
+ x(check_subvols, 20, PASS_ONLINE|PASS_FSCK) \
+ x(check_subvol_children, 35, PASS_ONLINE|PASS_FSCK) \
+ x(delete_dead_snapshots, 21, PASS_ONLINE|PASS_FSCK) \
+ x(fs_upgrade_for_subvolumes, 22, 0) \
+ x(check_inodes, 24, PASS_FSCK) \
+ x(check_extents, 25, PASS_FSCK) \
+ x(check_indirect_extents, 26, PASS_FSCK) \
+ x(check_dirents, 27, PASS_FSCK) \
+ x(check_xattrs, 28, PASS_FSCK) \
+ x(check_root, 29, PASS_ONLINE|PASS_FSCK) \
+ x(check_unreachable_inodes, 40, PASS_ONLINE|PASS_FSCK) \
+ x(check_subvolume_structure, 36, PASS_ONLINE|PASS_FSCK) \
+ x(check_directory_structure, 30, PASS_ONLINE|PASS_FSCK) \
+ x(check_nlinks, 31, PASS_FSCK) \
+ x(resume_logged_ops, 23, PASS_ALWAYS) \
+ x(delete_dead_inodes, 32, PASS_ALWAYS) \
+ x(fix_reflink_p, 33, 0) \
+ x(set_fs_needs_rebalance, 34, 0)
/* We normally enumerate recovery passes in the order we run them: */
enum bch_recovery_pass {
diff --git a/libbcachefs/reflink.c b/libbcachefs/reflink.c
index e1911b9b..93ba4f4e 100644
--- a/libbcachefs/reflink.c
+++ b/libbcachefs/reflink.c
@@ -482,7 +482,8 @@ int bch2_trigger_indirect_inline_data(struct btree_trans *trans,
static int bch2_make_extent_indirect(struct btree_trans *trans,
struct btree_iter *extent_iter,
- struct bkey_i *orig)
+ struct bkey_i *orig,
+ bool reflink_p_may_update_opts_field)
{
struct bch_fs *c = trans->c;
struct btree_iter reflink_iter = { NULL };
@@ -548,6 +549,9 @@ static int bch2_make_extent_indirect(struct btree_trans *trans,
SET_REFLINK_P_IDX(&r_p->v, bkey_start_offset(&r_v->k));
+ if (reflink_p_may_update_opts_field)
+ SET_REFLINK_P_MAY_UPDATE_OPTIONS(&r_p->v, true);
+
ret = bch2_trans_update(trans, extent_iter, &r_p->k_i,
BTREE_UPDATE_internal_snapshot_node);
err:
@@ -578,7 +582,8 @@ s64 bch2_remap_range(struct bch_fs *c,
subvol_inum dst_inum, u64 dst_offset,
subvol_inum src_inum, u64 src_offset,
u64 remap_sectors,
- u64 new_i_size, s64 *i_sectors_delta)
+ u64 new_i_size, s64 *i_sectors_delta,
+ bool may_change_src_io_path_opts)
{
struct btree_trans *trans;
struct btree_iter dst_iter, src_iter;
@@ -591,6 +596,8 @@ s64 bch2_remap_range(struct bch_fs *c,
struct bpos src_want;
u64 dst_done = 0;
u32 dst_snapshot, src_snapshot;
+ bool reflink_p_may_update_opts_field =
+ bch2_request_incompat_feature(c, bcachefs_metadata_version_reflink_p_may_update_opts);
int ret = 0, ret2 = 0;
if (!bch2_write_ref_tryget(c, BCH_WRITE_REF_reflink))
@@ -672,7 +679,8 @@ s64 bch2_remap_range(struct bch_fs *c,
src_k = bkey_i_to_s_c(new_src.k);
ret = bch2_make_extent_indirect(trans, &src_iter,
- new_src.k);
+ new_src.k,
+ reflink_p_may_update_opts_field);
if (ret)
continue;
@@ -690,6 +698,10 @@ s64 bch2_remap_range(struct bch_fs *c,
bkey_start_offset(src_k.k));
SET_REFLINK_P_IDX(&dst_p->v, offset);
+
+ if (reflink_p_may_update_opts_field &&
+ may_change_src_io_path_opts)
+ SET_REFLINK_P_MAY_UPDATE_OPTIONS(&dst_p->v, true);
} else {
BUG();
}
diff --git a/libbcachefs/reflink.h b/libbcachefs/reflink.h
index f119316a..1632780b 100644
--- a/libbcachefs/reflink.h
+++ b/libbcachefs/reflink.h
@@ -78,7 +78,8 @@ struct bkey_s_c bch2_lookup_indirect_extent(struct btree_trans *, struct btree_i
bool, unsigned);
s64 bch2_remap_range(struct bch_fs *, subvol_inum, u64,
- subvol_inum, u64, u64, u64, s64 *);
+ subvol_inum, u64, u64, u64, s64 *,
+ bool);
int bch2_gc_reflink_done(struct bch_fs *);
int bch2_gc_reflink_start(struct bch_fs *);
diff --git a/libbcachefs/reflink_format.h b/libbcachefs/reflink_format.h
index 53502627..92995e4f 100644
--- a/libbcachefs/reflink_format.h
+++ b/libbcachefs/reflink_format.h
@@ -19,6 +19,8 @@ struct bch_reflink_p {
LE64_BITMASK(REFLINK_P_IDX, struct bch_reflink_p, idx_flags, 0, 56);
LE64_BITMASK(REFLINK_P_ERROR, struct bch_reflink_p, idx_flags, 56, 57);
+LE64_BITMASK(REFLINK_P_MAY_UPDATE_OPTIONS,
+ struct bch_reflink_p, idx_flags, 57, 58);
struct bch_reflink_v {
struct bch_val v;
diff --git a/libbcachefs/sb-errors_format.h b/libbcachefs/sb-errors_format.h
index 3bbda181..4248ff3c 100644
--- a/libbcachefs/sb-errors_format.h
+++ b/libbcachefs/sb-errors_format.h
@@ -140,8 +140,8 @@ enum bch_fsck_flags {
x(backpointer_bucket_offset_wrong, 125, 0) \
x(backpointer_level_bad, 294, 0) \
x(backpointer_dev_bad, 297, 0) \
- x(backpointer_to_missing_device, 126, 0) \
- x(backpointer_to_missing_alloc, 127, 0) \
+ x(backpointer_to_missing_device, 126, FSCK_AUTOFIX) \
+ x(backpointer_to_missing_alloc, 127, FSCK_AUTOFIX) \
x(backpointer_to_missing_ptr, 128, FSCK_AUTOFIX) \
x(lru_entry_at_time_0, 129, FSCK_AUTOFIX) \
x(lru_entry_to_invalid_bucket, 130, FSCK_AUTOFIX) \
diff --git a/libbcachefs/super-io.c b/libbcachefs/super-io.c
index 6a086c1c..912c3696 100644
--- a/libbcachefs/super-io.c
+++ b/libbcachefs/super-io.c
@@ -42,7 +42,7 @@ static const struct bch2_metadata_version bch2_metadata_versions[] = {
#undef x
};
-void bch2_version_to_text(struct printbuf *out, unsigned v)
+void bch2_version_to_text(struct printbuf *out, enum bcachefs_metadata_version v)
{
const char *str = "(unknown version)";
@@ -55,7 +55,7 @@ void bch2_version_to_text(struct printbuf *out, unsigned v)
prt_printf(out, "%u.%u: %s", BCH_VERSION_MAJOR(v), BCH_VERSION_MINOR(v), str);
}
-unsigned bch2_latest_compatible_version(unsigned v)
+enum bcachefs_metadata_version bch2_latest_compatible_version(enum bcachefs_metadata_version v)
{
if (!BCH_VERSION_MAJOR(v))
return v;
@@ -69,6 +69,16 @@ unsigned bch2_latest_compatible_version(unsigned v)
return v;
}
+void bch2_set_version_incompat(struct bch_fs *c, enum bcachefs_metadata_version version)
+{
+ mutex_lock(&c->sb_lock);
+ SET_BCH_SB_VERSION_INCOMPAT(c->disk_sb.sb,
+ max(BCH_SB_VERSION_INCOMPAT(c->disk_sb.sb), version));
+ c->disk_sb.sb->features[0] |= cpu_to_le64(BCH_FEATURE_incompat_version_field);
+ bch2_write_super(c);
+ mutex_unlock(&c->sb_lock);
+}
+
const char * const bch2_sb_fields[] = {
#define x(name, nr) #name,
BCH_SB_FIELDS()
@@ -369,6 +379,12 @@ static int bch2_sb_validate(struct bch_sb_handle *disk_sb,
return -BCH_ERR_invalid_sb_features;
}
+ if (BCH_VERSION_MAJOR(le16_to_cpu(sb->version)) > BCH_VERSION_MAJOR(bcachefs_metadata_version_current) ||
+ BCH_SB_VERSION_INCOMPAT(sb) > bcachefs_metadata_version_current) {
+ prt_printf(out, "Filesystem has incompatible version");
+ return -BCH_ERR_invalid_sb_features;
+ }
+
block_size = le16_to_cpu(sb->block_size);
if (block_size > PAGE_SECTORS) {
@@ -407,6 +423,18 @@ static int bch2_sb_validate(struct bch_sb_handle *disk_sb,
return -BCH_ERR_invalid_sb_time_precision;
}
+ /* old versions didn't know to downgrade this field */
+ if (BCH_SB_VERSION_INCOMPAT_ALLOWED(sb) > le16_to_cpu(sb->version))
+ SET_BCH_SB_VERSION_INCOMPAT_ALLOWED(sb, le16_to_cpu(sb->version));
+
+ if (BCH_SB_VERSION_INCOMPAT(sb) > BCH_SB_VERSION_INCOMPAT_ALLOWED(sb)) {
+ prt_printf(out, "Invalid version_incompat ");
+ bch2_version_to_text(out, BCH_SB_VERSION_INCOMPAT(sb));
+ prt_str(out, " > incompat_allowed ");
+ bch2_version_to_text(out, BCH_SB_VERSION_INCOMPAT_ALLOWED(sb));
+ return -BCH_ERR_invalid_sb_version;
+ }
+
if (!flags) {
/*
* Been seeing a bug where these are getting inexplicably
@@ -520,6 +548,9 @@ static void bch2_sb_update(struct bch_fs *c)
c->sb.uuid = src->uuid;
c->sb.user_uuid = src->user_uuid;
c->sb.version = le16_to_cpu(src->version);
+ c->sb.version_incompat = BCH_SB_VERSION_INCOMPAT(src);
+ c->sb.version_incompat_allowed
+ = BCH_SB_VERSION_INCOMPAT_ALLOWED(src);
c->sb.version_min = le16_to_cpu(src->version_min);
c->sb.version_upgrade_complete = BCH_SB_VERSION_UPGRADE_COMPLETE(src);
c->sb.nr_devices = src->nr_devices;
@@ -1152,6 +1183,8 @@ bool bch2_check_version_downgrade(struct bch_fs *c)
*/
if (BCH_SB_VERSION_UPGRADE_COMPLETE(c->disk_sb.sb) > bcachefs_metadata_version_current)
SET_BCH_SB_VERSION_UPGRADE_COMPLETE(c->disk_sb.sb, bcachefs_metadata_version_current);
+ if (BCH_SB_VERSION_INCOMPAT_ALLOWED(c->disk_sb.sb) > bcachefs_metadata_version_current)
+ SET_BCH_SB_VERSION_INCOMPAT_ALLOWED(c->disk_sb.sb, bcachefs_metadata_version_current);
if (c->sb.version > bcachefs_metadata_version_current)
c->disk_sb.sb->version = cpu_to_le16(bcachefs_metadata_version_current);
if (c->sb.version_min > bcachefs_metadata_version_current)
@@ -1160,7 +1193,7 @@ bool bch2_check_version_downgrade(struct bch_fs *c)
return ret;
}
-void bch2_sb_upgrade(struct bch_fs *c, unsigned new_version)
+void bch2_sb_upgrade(struct bch_fs *c, unsigned new_version, bool incompat)
{
lockdep_assert_held(&c->sb_lock);
@@ -1170,6 +1203,10 @@ void bch2_sb_upgrade(struct bch_fs *c, unsigned new_version)
c->disk_sb.sb->version = cpu_to_le16(new_version);
c->disk_sb.sb->features[0] |= cpu_to_le64(BCH_SB_FEATURES_ALL);
+
+ if (incompat)
+ SET_BCH_SB_VERSION_INCOMPAT_ALLOWED(c->disk_sb.sb,
+ max(BCH_SB_VERSION_INCOMPAT_ALLOWED(c->disk_sb.sb), new_version));
}
static int bch2_sb_ext_validate(struct bch_sb *sb, struct bch_sb_field *f,
@@ -1334,6 +1371,14 @@ void bch2_sb_to_text(struct printbuf *out, struct bch_sb *sb,
bch2_version_to_text(out, le16_to_cpu(sb->version));
prt_newline(out);
+ prt_printf(out, "Incompatible features allowed:\t");
+ bch2_version_to_text(out, BCH_SB_VERSION_INCOMPAT_ALLOWED(sb));
+ prt_newline(out);
+
+ prt_printf(out, "Incompatible features in use:\t");
+ bch2_version_to_text(out, BCH_SB_VERSION_INCOMPAT(sb));
+ prt_newline(out);
+
prt_printf(out, "Version upgrade complete:\t");
bch2_version_to_text(out, BCH_SB_VERSION_UPGRADE_COMPLETE(sb));
prt_newline(out);
diff --git a/libbcachefs/super-io.h b/libbcachefs/super-io.h
index 90e7b176..f1ab4f94 100644
--- a/libbcachefs/super-io.h
+++ b/libbcachefs/super-io.h
@@ -18,8 +18,21 @@ static inline bool bch2_version_compatible(u16 version)
version >= bcachefs_metadata_version_min;
}
-void bch2_version_to_text(struct printbuf *, unsigned);
-unsigned bch2_latest_compatible_version(unsigned);
+void bch2_version_to_text(struct printbuf *, enum bcachefs_metadata_version);
+enum bcachefs_metadata_version bch2_latest_compatible_version(enum bcachefs_metadata_version);
+
+void bch2_set_version_incompat(struct bch_fs *, enum bcachefs_metadata_version);
+
+static inline bool bch2_request_incompat_feature(struct bch_fs *c,
+ enum bcachefs_metadata_version version)
+{
+ if (unlikely(version > c->sb.version_incompat)) {
+ if (version > c->sb.version_incompat_allowed)
+ return false;
+ bch2_set_version_incompat(c, version);
+ }
+ return true;
+}
static inline size_t bch2_sb_field_bytes(struct bch_sb_field *f)
{
@@ -94,7 +107,7 @@ static inline void bch2_check_set_feature(struct bch_fs *c, unsigned feat)
}
bool bch2_check_version_downgrade(struct bch_fs *);
-void bch2_sb_upgrade(struct bch_fs *, unsigned);
+void bch2_sb_upgrade(struct bch_fs *, unsigned, bool);
void __bch2_sb_field_to_text(struct printbuf *, struct bch_sb *,
struct bch_sb_field *);