summaryrefslogtreecommitdiff
path: root/libbcachefs/io_read.c
diff options
context:
space:
mode:
Diffstat (limited to 'libbcachefs/io_read.c')
-rw-r--r--libbcachefs/io_read.c123
1 files changed, 114 insertions, 9 deletions
diff --git a/libbcachefs/io_read.c b/libbcachefs/io_read.c
index 4fb279f1..a04dffa4 100644
--- a/libbcachefs/io_read.c
+++ b/libbcachefs/io_read.c
@@ -295,6 +295,13 @@ static struct bch_read_bio *promote_alloc(struct btree_trans *trans,
bool *read_full,
struct bch_io_failures *failed)
{
+ /*
+ * We're in the retry path, but we don't know what to repair yet, and we
+ * don't want to do a promote here:
+ */
+ if (failed && !failed->nr)
+ return NULL;
+
struct bch_fs *c = trans->c;
/*
* if failed != NULL we're not actually doing a promote, we're
@@ -429,6 +436,71 @@ static void bch2_rbio_done(struct bch_read_bio *rbio)
bio_endio(&rbio->bio);
}
+static void get_rbio_extent(struct btree_trans *trans,
+ struct bch_read_bio *rbio,
+ struct bkey_buf *sk)
+{
+ struct btree_iter iter;
+ struct bkey_s_c k;
+ int ret = lockrestart_do(trans,
+ bkey_err(k = bch2_bkey_get_iter(trans, &iter,
+ rbio->data_btree, rbio->data_pos, 0)));
+ if (ret)
+ return;
+
+ struct bkey_ptrs_c ptrs = bch2_bkey_ptrs_c(k);
+ bkey_for_each_ptr(ptrs, ptr)
+ if (bch2_extent_ptr_eq(*ptr, rbio->pick.ptr)) {
+ bch2_bkey_buf_reassemble(sk, trans->c, k);
+ break;
+ }
+
+ bch2_trans_iter_exit(trans, &iter);
+}
+
+static noinline int maybe_poison_extent(struct btree_trans *trans, struct bch_read_bio *rbio,
+ enum btree_id btree, struct bkey_s_c read_k)
+{
+ struct bch_fs *c = trans->c;
+
+ struct data_update *u = rbio_data_update(rbio);
+ if (u)
+ read_k = bkey_i_to_s_c(u->k.k);
+
+ u64 flags = bch2_bkey_extent_flags(read_k);
+ if (flags & BIT_ULL(BCH_EXTENT_FLAG_poisoned))
+ return 0;
+
+ struct btree_iter iter;
+ struct bkey_s_c k = bch2_bkey_get_iter(trans, &iter, btree, bkey_start_pos(read_k.k),
+ BTREE_ITER_intent);
+ int ret = bkey_err(k);
+ if (ret)
+ return ret;
+
+ if (!bkey_and_val_eq(k, read_k))
+ goto out;
+
+ struct bkey_i *new = bch2_trans_kmalloc(trans,
+ bkey_bytes(k.k) + sizeof(struct bch_extent_flags));
+ ret = PTR_ERR_OR_ZERO(new) ?:
+ (bkey_reassemble(new, k), 0) ?:
+ bch2_bkey_extent_flags_set(c, new, flags|BIT_ULL(BCH_EXTENT_FLAG_poisoned)) ?:
+ bch2_trans_update(trans, &iter, new, BTREE_UPDATE_internal_snapshot_node) ?:
+ bch2_trans_commit(trans, NULL, NULL, 0);
+
+ /*
+ * Propagate key change back to data update path, in particular so it
+ * knows the extent has been poisoned and it's safe to change the
+ * checksum
+ */
+ if (u && !ret)
+ bch2_bkey_buf_copy(&u->k, c, new);
+out:
+ bch2_trans_iter_exit(trans, &iter);
+ return ret;
+}
+
static noinline int bch2_read_retry_nodecode(struct btree_trans *trans,
struct bch_read_bio *rbio,
struct bvec_iter bvec_iter,
@@ -462,7 +534,8 @@ retry:
err:
bch2_trans_iter_exit(trans, &iter);
- if (bch2_err_matches(ret, BCH_ERR_data_read_retry))
+ if (bch2_err_matches(ret, BCH_ERR_transaction_restart) ||
+ bch2_err_matches(ret, BCH_ERR_data_read_retry))
goto retry;
if (ret) {
@@ -486,13 +559,21 @@ static void bch2_rbio_retry(struct work_struct *work)
.inum = rbio->read_pos.inode,
};
struct bch_io_failures failed = { .nr = 0 };
+
struct btree_trans *trans = bch2_trans_get(c);
+ struct bkey_buf sk;
+ bch2_bkey_buf_init(&sk);
+ bkey_init(&sk.k->k);
+
trace_io_read_retry(&rbio->bio);
this_cpu_add(c->counters[BCH_COUNTER_io_read_retry],
bvec_iter_sectors(rbio->bvec_iter));
- if (bch2_err_matches(rbio->ret, BCH_ERR_data_read_retry_avoid))
+ get_rbio_extent(trans, rbio, &sk);
+
+ if (!bkey_deleted(&sk.k->k) &&
+ bch2_err_matches(rbio->ret, BCH_ERR_data_read_retry_avoid))
bch2_mark_io_failure(&failed, &rbio->pick,
rbio->ret == -BCH_ERR_data_read_retry_csum_err);
@@ -513,7 +594,7 @@ static void bch2_rbio_retry(struct work_struct *work)
int ret = rbio->data_update
? bch2_read_retry_nodecode(trans, rbio, iter, &failed, flags)
- : __bch2_read(trans, rbio, iter, inum, &failed, flags);
+ : __bch2_read(trans, rbio, iter, inum, &failed, &sk, flags);
if (ret) {
rbio->ret = ret;
@@ -534,6 +615,7 @@ static void bch2_rbio_retry(struct work_struct *work)
}
bch2_rbio_done(rbio);
+ bch2_bkey_buf_exit(&sk, c);
bch2_trans_put(trans);
}
@@ -958,6 +1040,10 @@ int __bch2_read_extent(struct btree_trans *trans, struct bch_read_bio *orig,
bvec_iter_sectors(iter));
goto out_read_done;
}
+
+ if ((bch2_bkey_extent_flags(k) & BIT_ULL(BCH_EXTENT_FLAG_poisoned)) &&
+ !orig->data_update)
+ return -BCH_ERR_extent_poisened;
retry_pick:
ret = bch2_bkey_pick_read_device(c, k, failed, &pick, dev);
@@ -966,6 +1052,16 @@ retry_pick:
goto hole;
if (unlikely(ret < 0)) {
+ if (ret == -BCH_ERR_data_read_csum_err) {
+ int ret2 = maybe_poison_extent(trans, orig, data_btree, k);
+ if (ret2) {
+ ret = ret2;
+ goto err;
+ }
+
+ trace_and_count(c, io_read_fail_and_poison, &orig->bio);
+ }
+
struct printbuf buf = PRINTBUF;
bch2_read_err_msg_trans(trans, &buf, orig, read_pos);
prt_printf(&buf, "%s\n ", bch2_err_str(ret));
@@ -1263,12 +1359,15 @@ out_read_done:
int __bch2_read(struct btree_trans *trans, struct bch_read_bio *rbio,
struct bvec_iter bvec_iter, subvol_inum inum,
- struct bch_io_failures *failed, unsigned flags)
+ struct bch_io_failures *failed,
+ struct bkey_buf *prev_read,
+ unsigned flags)
{
struct bch_fs *c = trans->c;
struct btree_iter iter;
struct bkey_buf sk;
struct bkey_s_c k;
+ enum btree_id data_btree;
int ret;
EBUG_ON(rbio->data_update);
@@ -1279,7 +1378,7 @@ int __bch2_read(struct btree_trans *trans, struct bch_read_bio *rbio,
BTREE_ITER_slots);
while (1) {
- enum btree_id data_btree = BTREE_ID_extents;
+ data_btree = BTREE_ID_extents;
bch2_trans_begin(trans);
@@ -1311,6 +1410,12 @@ int __bch2_read(struct btree_trans *trans, struct bch_read_bio *rbio,
k = bkey_i_to_s_c(sk.k);
+ if (unlikely(flags & BCH_READ_in_retry)) {
+ if (!bkey_and_val_eq(k, bkey_i_to_s_c(prev_read->k)))
+ failed->nr = 0;
+ bch2_bkey_buf_copy(prev_read, c, sk.k);
+ }
+
/*
* With indirect extents, the amount of data to read is the min
* of the original extent and the indirect extent:
@@ -1326,13 +1431,14 @@ int __bch2_read(struct btree_trans *trans, struct bch_read_bio *rbio,
ret = __bch2_read_extent(trans, rbio, bvec_iter, iter.pos,
data_btree, k,
offset_into_extent, failed, flags, -1);
+ swap(bvec_iter.bi_size, bytes);
+
if (ret)
goto err;
if (flags & BCH_READ_last_fragment)
break;
- swap(bvec_iter.bi_size, bytes);
bio_advance_iter(&rbio->bio, &bvec_iter, bytes);
err:
if (ret == -BCH_ERR_data_read_retry_csum_err_maybe_userspace)
@@ -1344,9 +1450,7 @@ err:
break;
}
- bch2_trans_iter_exit(trans, &iter);
-
- if (ret) {
+ if (unlikely(ret)) {
struct printbuf buf = PRINTBUF;
lockrestart_do(trans,
bch2_inum_offset_err_msg_trans(trans, &buf, inum,
@@ -1362,6 +1466,7 @@ err:
bch2_rbio_done(rbio);
}
+ bch2_trans_iter_exit(trans, &iter);
bch2_bkey_buf_exit(&sk, c);
return ret;
}