summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorKent Overstreet <kent.overstreet@gmail.com>2021-05-22 21:43:20 -0400
committerKent Overstreet <kent.overstreet@gmail.com>2021-05-22 22:42:25 -0400
commit75a3eb86f7dad211cfc4617e106efe5899528870 (patch)
tree988f6926759f3998910a16e8b5a3b26e4a2ec4f3
parent8506365949cff1c9e177909360db1f9154d537c3 (diff)
bcachefs: Fix an issue with inconsistent btree writes after unclean shutdown
After unclean shutdown, btree writes may have completed on one device and not others - and this inconsistency could lead us to writing new bsets with a gap in our btree node in one of our replicas. Fortunately, this is only an issue with bsets that are newer than the most recent journal flush, and we already have a mechanism for detecting and blacklisting those. We just need to make sure to start new btree writes after the most recent _non_ blacklisted bset. Signed-off-by: Kent Overstreet <kent.overstreet@gmail.com>
-rw-r--r--fs/bcachefs/btree_io.c19
1 files changed, 18 insertions, 1 deletions
diff --git a/fs/bcachefs/btree_io.c b/fs/bcachefs/btree_io.c
index 4896187dc67f..857ed00f1a5d 100644
--- a/fs/bcachefs/btree_io.c
+++ b/fs/bcachefs/btree_io.c
@@ -815,6 +815,7 @@ int bch2_btree_node_read_done(struct bch_fs *c, struct bch_dev *ca,
bool updated_range = b->key.k.type == KEY_TYPE_btree_ptr_v2 &&
BTREE_PTR_RANGE_UPDATED(&bkey_i_to_btree_ptr_v2(&b->key)->v);
unsigned u64s;
+ unsigned nonblacklisted_written = 0;
int ret, retry_read = 0, write = READ;
b->version_ondisk = U16_MAX;
@@ -934,15 +935,31 @@ int bch2_btree_node_read_done(struct bch_fs *c, struct bch_dev *ca,
sort_iter_add(iter,
vstruct_idx(i, whiteout_u64s),
vstruct_last(i));
+
+ nonblacklisted_written = b->written;
}
for (bne = write_block(b);
bset_byte_offset(b, bne) < btree_bytes(c);
bne = (void *) bne + block_bytes(c))
- btree_err_on(bne->keys.seq == b->data->keys.seq,
+ btree_err_on(bne->keys.seq == b->data->keys.seq &&
+ !bch2_journal_seq_is_blacklisted(c,
+ le64_to_cpu(bne->keys.journal_seq),
+ true),
BTREE_ERR_WANT_RETRY, c, ca, b, NULL,
"found bset signature after last bset");
+ /*
+ * Blacklisted bsets are those that were written after the most recent
+ * (flush) journal write. Since there wasn't a flush, they may not have
+ * made it to all devices - which means we shouldn't write new bsets
+ * after them, as that could leave a gap and then reads from that device
+ * wouldn't find all the bsets in that btree node - which means it's
+ * important that we start writing new bsets after the most recent _non_
+ * blacklisted bset:
+ */
+ b->written = nonblacklisted_written;
+
sorted = btree_bounce_alloc(c, btree_bytes(c), &used_mempool);
sorted->keys.u64s = 0;