summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorKent Overstreet <kent.overstreet@gmail.com>2022-02-15 00:06:59 -0500
committerKent Overstreet <kent.overstreet@gmail.com>2022-02-17 02:20:00 -0500
commite434530ee566e63794b7c37ffdf7a46ad9cfcd7a (patch)
tree099bc1fcb1778af81a8a2ee2ec51317c30365456
parent81646e74c26b5927d9854214a487a5b3cfd56e6e (diff)
bcachefs: Check for stale dirty pointer before reads
Since we retry reads when we discover we read from a pointer that went stale, if a dirty pointer is erroniously stale it would cause us to loop retrying that read forever - unless we check before issuing the read, while the btree is still locked, when we know that a dirty pointer should never be stale. This patch adds that check, along with printing some helpful debug info. Signed-off-by: Kent Overstreet <kent.overstreet@gmail.com>
-rw-r--r--fs/bcachefs/fs-io.c2
-rw-r--r--fs/bcachefs/io.c53
-rw-r--r--fs/bcachefs/move.c6
3 files changed, 47 insertions, 14 deletions
diff --git a/fs/bcachefs/fs-io.c b/fs/bcachefs/fs-io.c
index 3b9b96e5a0a2..1d0871f63e4e 100644
--- a/fs/bcachefs/fs-io.c
+++ b/fs/bcachefs/fs-io.c
@@ -1062,8 +1062,6 @@ retry:
sectors = min(sectors, k.k->size - offset_into_extent);
- bch2_trans_unlock(trans);
-
if (readpages_iter)
readpage_bio_extend(readpages_iter, &rbio->bio, sectors,
extent_partial_reads_expensive(k));
diff --git a/fs/bcachefs/io.c b/fs/bcachefs/io.c
index f0e93de4680d..10f8b3aedc3c 100644
--- a/fs/bcachefs/io.c
+++ b/fs/bcachefs/io.c
@@ -1953,6 +1953,33 @@ err:
return ret;
}
+static noinline void read_from_stale_dirty_pointer(struct btree_trans *trans,
+ struct bkey_s_c k,
+ struct bch_extent_ptr ptr)
+{
+ struct bch_fs *c = trans->c;
+ struct bch_dev *ca = bch_dev_bkey_exists(c, ptr.dev);
+ struct btree_iter iter;
+ char buf[200];
+ int ret;
+
+ bch2_bkey_val_to_text(&PBUF(buf), c, k);
+ bch2_fs_inconsistent(c, "Attempting to read from stale dirty pointer: %s", buf);
+
+ bch2_trans_iter_init(trans, &iter, BTREE_ID_alloc,
+ POS(ptr.dev, PTR_BUCKET_NR(ca, &ptr)),
+ BTREE_ITER_CACHED);
+
+ ret = lockrestart_do(trans, bkey_err(k = bch2_btree_iter_peek_slot(&iter)));
+ if (ret)
+ return;
+
+ bch2_bkey_val_to_text(&PBUF(buf), c, k);
+ bch_err(c, "%s", buf);
+ bch_err(c, "memory gen: %u", *bucket_gen(ca, iter.pos.offset));
+ bch2_trans_iter_exit(trans, &iter);
+}
+
int __bch2_read_extent(struct btree_trans *trans, struct bch_read_bio *orig,
struct bvec_iter iter, struct bpos read_pos,
enum btree_id data_btree, struct bkey_s_c k,
@@ -1962,7 +1989,7 @@ int __bch2_read_extent(struct btree_trans *trans, struct bch_read_bio *orig,
struct bch_fs *c = trans->c;
struct extent_ptr_decoded pick;
struct bch_read_bio *rbio = NULL;
- struct bch_dev *ca;
+ struct bch_dev *ca = NULL;
struct promote_op *promote = NULL;
bool bounce = false, read_full = false, narrow_crcs = false;
struct bpos data_pos = bkey_start_pos(k.k);
@@ -1979,7 +2006,7 @@ int __bch2_read_extent(struct btree_trans *trans, struct bch_read_bio *orig,
zero_fill_bio_iter(&orig->bio, iter);
goto out_read_done;
}
-
+retry_pick:
pick_ret = bch2_bkey_pick_read_device(c, k, failed, &pick);
/* hole or reservation - just zero fill: */
@@ -1992,8 +2019,20 @@ int __bch2_read_extent(struct btree_trans *trans, struct bch_read_bio *orig,
goto err;
}
- if (pick_ret > 0)
- ca = bch_dev_bkey_exists(c, pick.ptr.dev);
+ ca = bch_dev_bkey_exists(c, pick.ptr.dev);
+
+ if (!pick.ptr.cached &&
+ unlikely(ptr_stale(ca, &pick.ptr))) {
+ read_from_stale_dirty_pointer(trans, k, pick.ptr);
+ bch2_mark_io_failure(failed, &pick);
+ goto retry_pick;
+ }
+
+ /*
+ * Unlock the iterator while the btree node's lock is still in
+ * cache, before doing the IO:
+ */
+ bch2_trans_unlock(trans);
if (flags & BCH_READ_NODECODE) {
/*
@@ -2281,12 +2320,6 @@ retry:
*/
sectors = min(sectors, k.k->size - offset_into_extent);
- /*
- * Unlock the iterator while the btree node's lock is still in
- * cache, before doing the IO:
- */
- bch2_trans_unlock(&trans);
-
bytes = min(sectors, bvec_iter_sectors(bvec_iter)) << 9;
swap(bvec_iter.bi_size, bytes);
diff --git a/fs/bcachefs/move.c b/fs/bcachefs/move.c
index 83536fdc309a..7ca7ce394135 100644
--- a/fs/bcachefs/move.c
+++ b/fs/bcachefs/move.c
@@ -751,10 +751,12 @@ static int __bch2_move_data(struct bch_fs *c,
BUG();
}
- /* unlock before doing IO: */
+ /*
+ * The iterator gets unlocked by __bch2_read_extent - need to
+ * save a copy of @k elsewhere:
+ */
bch2_bkey_buf_reassemble(&sk, c, k);
k = bkey_i_to_s_c(sk.k);
- bch2_trans_unlock(&trans);
ret2 = bch2_move_extent(&trans, ctxt, wp, io_opts, btree_id, k,
data_cmd, data_opts);