summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorKent Overstreet <kent.overstreet@linux.dev>2025-03-10 13:33:41 -0400
committerKent Overstreet <kent.overstreet@linux.dev>2025-04-06 19:13:43 -0400
commit482ed8059782d114c9dd7a4088cb7b752d7b4cfd (patch)
tree8707b0269cda3e7f8e10ff2ad0e139c47663391c
parent22316fd2a926fda089ec8050844f4e6e339729ec (diff)
bcachefs: Be precise about bch_io_failures
If the extent we're reading from changes, due to be being overwritten or moved (possibly partially) - we need to reset bch_io_failures so that we don't accidentally mark a new extent as poisoned prematurely. This means we have to separately track (in the retry path) the extent we previously read from. Signed-off-by: Kent Overstreet <kent.overstreet@linux.dev>
-rw-r--r--fs/bcachefs/bkey.h1
-rw-r--r--fs/bcachefs/io_read.c52
-rw-r--r--fs/bcachefs/io_read.h5
3 files changed, 53 insertions, 5 deletions
diff --git a/fs/bcachefs/bkey.h b/fs/bcachefs/bkey.h
index 054e2d5e8448..082632905649 100644
--- a/fs/bcachefs/bkey.h
+++ b/fs/bcachefs/bkey.h
@@ -191,6 +191,7 @@ static inline struct bpos bkey_max(struct bpos l, struct bpos r)
static inline bool bkey_and_val_eq(struct bkey_s_c l, struct bkey_s_c r)
{
return bpos_eq(l.k->p, r.k->p) &&
+ l.k->size == r.k->size &&
bkey_bytes(l.k) == bkey_bytes(r.k) &&
!memcmp(l.v, r.v, bkey_val_bytes(l.k));
}
diff --git a/fs/bcachefs/io_read.c b/fs/bcachefs/io_read.c
index 417bb0c7bbfa..28f4d8f8c60e 100644
--- a/fs/bcachefs/io_read.c
+++ b/fs/bcachefs/io_read.c
@@ -296,6 +296,13 @@ static struct bch_read_bio *promote_alloc(struct btree_trans *trans,
bool *read_full,
struct bch_io_failures *failed)
{
+ /*
+ * We're in the retry path, but we don't know what to repair yet, and we
+ * don't want to do a promote here:
+ */
+ if (failed && !failed->nr)
+ return NULL;
+
struct bch_fs *c = trans->c;
/*
* if failed != NULL we're not actually doing a promote, we're
@@ -430,6 +437,28 @@ static void bch2_rbio_done(struct bch_read_bio *rbio)
bio_endio(&rbio->bio);
}
+static void get_rbio_extent(struct btree_trans *trans,
+ struct bch_read_bio *rbio,
+ struct bkey_buf *sk)
+{
+ struct btree_iter iter;
+ struct bkey_s_c k;
+ int ret = lockrestart_do(trans,
+ bkey_err(k = bch2_bkey_get_iter(trans, &iter,
+ rbio->data_btree, rbio->data_pos, 0)));
+ if (ret)
+ return;
+
+ struct bkey_ptrs_c ptrs = bch2_bkey_ptrs_c(k);
+ bkey_for_each_ptr(ptrs, ptr)
+ if (bch2_extent_ptr_eq(*ptr, rbio->pick.ptr)) {
+ bch2_bkey_buf_reassemble(sk, trans->c, k);
+ break;
+ }
+
+ bch2_trans_iter_exit(trans, &iter);
+}
+
static noinline int bch2_read_retry_nodecode(struct btree_trans *trans,
struct bch_read_bio *rbio,
struct bvec_iter bvec_iter,
@@ -487,13 +516,21 @@ static void bch2_rbio_retry(struct work_struct *work)
.inum = rbio->read_pos.inode,
};
struct bch_io_failures failed = { .nr = 0 };
+
struct btree_trans *trans = bch2_trans_get(c);
+ struct bkey_buf sk;
+ bch2_bkey_buf_init(&sk);
+ bkey_init(&sk.k->k);
+
trace_io_read_retry(&rbio->bio);
this_cpu_add(c->counters[BCH_COUNTER_io_read_retry],
bvec_iter_sectors(rbio->bvec_iter));
- if (bch2_err_matches(rbio->ret, BCH_ERR_data_read_retry_avoid))
+ get_rbio_extent(trans, rbio, &sk);
+
+ if (!bkey_deleted(&sk.k->k) &&
+ bch2_err_matches(rbio->ret, BCH_ERR_data_read_retry_avoid))
bch2_mark_io_failure(&failed, &rbio->pick,
rbio->ret == -BCH_ERR_data_read_retry_csum_err);
@@ -514,7 +551,7 @@ static void bch2_rbio_retry(struct work_struct *work)
int ret = rbio->data_update
? bch2_read_retry_nodecode(trans, rbio, iter, &failed, flags)
- : __bch2_read(trans, rbio, iter, inum, &failed, flags);
+ : __bch2_read(trans, rbio, iter, inum, &failed, &sk, flags);
if (ret) {
rbio->ret = ret;
@@ -535,6 +572,7 @@ static void bch2_rbio_retry(struct work_struct *work)
}
bch2_rbio_done(rbio);
+ bch2_bkey_buf_exit(&sk, c);
bch2_trans_put(trans);
}
@@ -1260,7 +1298,9 @@ out_read_done:
int __bch2_read(struct btree_trans *trans, struct bch_read_bio *rbio,
struct bvec_iter bvec_iter, subvol_inum inum,
- struct bch_io_failures *failed, unsigned flags)
+ struct bch_io_failures *failed,
+ struct bkey_buf *prev_read,
+ unsigned flags)
{
struct bch_fs *c = trans->c;
struct btree_iter iter;
@@ -1308,6 +1348,12 @@ int __bch2_read(struct btree_trans *trans, struct bch_read_bio *rbio,
k = bkey_i_to_s_c(sk.k);
+ if (unlikely(flags & BCH_READ_in_retry)) {
+ if (!bkey_and_val_eq(k, bkey_i_to_s_c(prev_read->k)))
+ failed->nr = 0;
+ bch2_bkey_buf_copy(prev_read, c, sk.k);
+ }
+
/*
* With indirect extents, the amount of data to read is the min
* of the original extent and the indirect extent:
diff --git a/fs/bcachefs/io_read.h b/fs/bcachefs/io_read.h
index c78025d863e0..1a85b092fd1d 100644
--- a/fs/bcachefs/io_read.h
+++ b/fs/bcachefs/io_read.h
@@ -144,7 +144,8 @@ static inline void bch2_read_extent(struct btree_trans *trans,
}
int __bch2_read(struct btree_trans *, struct bch_read_bio *, struct bvec_iter,
- subvol_inum, struct bch_io_failures *, unsigned flags);
+ subvol_inum,
+ struct bch_io_failures *, struct bkey_buf *, unsigned flags);
static inline void bch2_read(struct bch_fs *c, struct bch_read_bio *rbio,
subvol_inum inum)
@@ -154,7 +155,7 @@ static inline void bch2_read(struct bch_fs *c, struct bch_read_bio *rbio,
rbio->subvol = inum.subvol;
bch2_trans_run(c,
- __bch2_read(trans, rbio, rbio->bio.bi_iter, inum, NULL,
+ __bch2_read(trans, rbio, rbio->bio.bi_iter, inum, NULL, NULL,
BCH_READ_retry_if_stale|
BCH_READ_may_promote|
BCH_READ_user_mapped));