diff options
-rw-r--r-- | fs/bcachefs/bcachefs_format.h | 2 | ||||
-rw-r--r-- | fs/bcachefs/btree_io.c | 2 | ||||
-rw-r--r-- | fs/bcachefs/errcode.h | 4 | ||||
-rw-r--r-- | fs/bcachefs/extents.c | 165 | ||||
-rw-r--r-- | fs/bcachefs/extents.h | 7 | ||||
-rw-r--r-- | fs/bcachefs/extents_types.h | 11 | ||||
-rw-r--r-- | fs/bcachefs/io_read.c | 10 | ||||
-rw-r--r-- | fs/bcachefs/opts.h | 5 | ||||
-rw-r--r-- | fs/bcachefs/super-io.c | 4 |
9 files changed, 128 insertions, 82 deletions
diff --git a/fs/bcachefs/bcachefs_format.h b/fs/bcachefs/bcachefs_format.h index 7a5b0d211a82..e96d87767020 100644 --- a/fs/bcachefs/bcachefs_format.h +++ b/fs/bcachefs/bcachefs_format.h @@ -842,6 +842,7 @@ LE64_BITMASK(BCH_SB_SHARD_INUMS, struct bch_sb, flags[3], 28, 29); LE64_BITMASK(BCH_SB_INODES_USE_KEY_CACHE,struct bch_sb, flags[3], 29, 30); LE64_BITMASK(BCH_SB_JOURNAL_FLUSH_DELAY,struct bch_sb, flags[3], 30, 62); LE64_BITMASK(BCH_SB_JOURNAL_FLUSH_DISABLED,struct bch_sb, flags[3], 62, 63); +/* one free bit */ LE64_BITMASK(BCH_SB_JOURNAL_RECLAIM_DELAY,struct bch_sb, flags[4], 0, 32); LE64_BITMASK(BCH_SB_JOURNAL_TRANSACTION_NAMES,struct bch_sb, flags[4], 32, 33); LE64_BITMASK(BCH_SB_NOCOW, struct bch_sb, flags[4], 33, 34); @@ -861,6 +862,7 @@ LE64_BITMASK(BCH_SB_VERSION_INCOMPAT_ALLOWED, struct bch_sb, flags[5], 48, 64); LE64_BITMASK(BCH_SB_SHARD_INUMS_NBITS, struct bch_sb, flags[6], 0, 4); LE64_BITMASK(BCH_SB_WRITE_ERROR_TIMEOUT,struct bch_sb, flags[6], 4, 14); +LE64_BITMASK(BCH_SB_CSUM_ERR_RETRY_NR, struct bch_sb, flags[6], 14, 20); static inline __u64 BCH_SB_COMPRESSION_TYPE(const struct bch_sb *sb) { diff --git a/fs/bcachefs/btree_io.c b/fs/bcachefs/btree_io.c index cd792fee7ee3..6abc9f17ea3c 100644 --- a/fs/bcachefs/btree_io.c +++ b/fs/bcachefs/btree_io.c @@ -1355,7 +1355,7 @@ start: percpu_ref_put(&ca->io_ref); rb->have_ioref = false; - bch2_mark_io_failure(&failed, &rb->pick); + bch2_mark_io_failure(&failed, &rb->pick, false); can_retry = bch2_bkey_pick_read_device(c, bkey_i_to_s_c(&b->key), diff --git a/fs/bcachefs/errcode.h b/fs/bcachefs/errcode.h index afa16d58041e..493cae4efc37 100644 --- a/fs/bcachefs/errcode.h +++ b/fs/bcachefs/errcode.h @@ -273,7 +273,6 @@ x(EIO, stripe_reconstruct) \ x(EIO, key_type_error) \ x(EIO, extent_poisened) \ - x(EIO, no_device_to_read_from) \ x(EIO, missing_indirect_extent) \ x(EIO, invalidate_stripe_to_dev) \ x(EIO, no_encryption_key) \ @@ -283,6 +282,9 @@ x(EIO, ec_block_read) \ x(EIO, ec_block_write) \ x(EIO, data_read) \ + x(BCH_ERR_data_read, no_device_to_read_from) \ + x(BCH_ERR_data_read, data_read_io_err) \ + x(BCH_ERR_data_read, data_read_csum_err) \ x(BCH_ERR_data_read, data_read_retry) \ x(BCH_ERR_data_read_retry, data_read_retry_avoid) \ x(BCH_ERR_data_read_retry_avoid,data_read_retry_device_offline) \ diff --git a/fs/bcachefs/extents.c b/fs/bcachefs/extents.c index 032cd0bda017..04946d9911f5 100644 --- a/fs/bcachefs/extents.c +++ b/fs/bcachefs/extents.c @@ -58,7 +58,8 @@ struct bch_dev_io_failures *bch2_dev_io_failures(struct bch_io_failures *f, } void bch2_mark_io_failure(struct bch_io_failures *failed, - struct extent_ptr_decoded *p) + struct extent_ptr_decoded *p, + bool csum_error) { struct bch_dev_io_failures *f = bch2_dev_io_failures(failed, p->ptr.dev); @@ -66,17 +67,16 @@ void bch2_mark_io_failure(struct bch_io_failures *failed, BUG_ON(failed->nr >= ARRAY_SIZE(failed->devs)); f = &failed->devs[failed->nr++]; - f->dev = p->ptr.dev; - f->idx = p->idx; - f->nr_failed = 1; - f->nr_retries = 0; - } else if (p->idx != f->idx) { - f->idx = p->idx; - f->nr_failed = 1; - f->nr_retries = 0; - } else { - f->nr_failed++; + memset(f, 0, sizeof(*f)); + f->dev = p->ptr.dev; } + + if (p->do_ec_reconstruct) + f->failed_ec = true; + else if (!csum_error) + f->failed_io = true; + else + f->failed_csum_nr++; } static inline u64 dev_latency(struct bch_dev *ca) @@ -94,37 +94,30 @@ static inline int dev_failed(struct bch_dev *ca) */ static inline bool ptr_better(struct bch_fs *c, const struct extent_ptr_decoded p1, - const struct extent_ptr_decoded p2) + u64 p1_latency, + struct bch_dev *ca1, + const struct extent_ptr_decoded p2, + u64 p2_latency) { - if (likely(!p1.idx && !p2.idx)) { - struct bch_dev *ca1 = bch2_dev_rcu(c, p1.ptr.dev); - struct bch_dev *ca2 = bch2_dev_rcu(c, p2.ptr.dev); - - int failed_delta = dev_failed(ca1) - dev_failed(ca2); - - if (failed_delta) - return failed_delta < 0; + struct bch_dev *ca2 = bch2_dev_rcu(c, p2.ptr.dev); - u64 l1 = dev_latency(ca1); - u64 l2 = dev_latency(ca2); + int failed_delta = dev_failed(ca1) - dev_failed(ca2); + if (unlikely(failed_delta)) + return failed_delta < 0; - /* - * Square the latencies, to bias more in favor of the faster - * device - we never want to stop issuing reads to the slower - * device altogether, so that we can update our latency numbers: - */ - l1 *= l1; - l2 *= l2; + if (unlikely(bch2_force_reconstruct_read)) + return p1.do_ec_reconstruct > p2.do_ec_reconstruct; - /* Pick at random, biased in favor of the faster device: */ + if (unlikely(p1.do_ec_reconstruct || p2.do_ec_reconstruct)) + return p1.do_ec_reconstruct < p2.do_ec_reconstruct; - return bch2_get_random_u64_below(l1 + l2) > l1; - } + int crc_retry_delta = (int) p1.crc_retry_nr - (int) p2.crc_retry_nr; + if (unlikely(crc_retry_delta)) + return crc_retry_delta < 0; - if (bch2_force_reconstruct_read) - return p1.idx > p2.idx; + /* Pick at random, biased in favor of the faster device: */ - return p1.idx < p2.idx; + return bch2_get_random_u64_below(p1_latency + p2_latency) > p1_latency; } /* @@ -133,73 +126,109 @@ static inline bool ptr_better(struct bch_fs *c, * other devices, it will still pick a pointer from avoid. */ int bch2_bkey_pick_read_device(struct bch_fs *c, struct bkey_s_c k, - struct bch_io_failures *failed, - struct extent_ptr_decoded *pick, - int dev) + struct bch_io_failures *failed, + struct extent_ptr_decoded *pick, + int dev) { - struct bkey_ptrs_c ptrs = bch2_bkey_ptrs_c(k); - const union bch_extent_entry *entry; - struct extent_ptr_decoded p; - struct bch_dev_io_failures *f; - int ret = 0; + bool have_csum_errors = false, have_io_errors = false, have_missing_devs = false; + bool have_dirty_ptrs = false, have_pick = false; if (k.k->type == KEY_TYPE_error) return -BCH_ERR_key_type_error; + struct bkey_ptrs_c ptrs = bch2_bkey_ptrs_c(k); + if (bch2_bkey_extent_ptrs_flags(ptrs) & BIT_ULL(BCH_EXTENT_FLAG_poisoned)) return -BCH_ERR_extent_poisened; rcu_read_lock(); + const union bch_extent_entry *entry; + struct extent_ptr_decoded p; + u64 pick_latency; + bkey_for_each_ptr_decode(k.k, ptrs, p, entry) { + have_dirty_ptrs |= !p.ptr.cached; + /* * Unwritten extent: no need to actually read, treat it as a * hole and return 0s: */ if (p.ptr.unwritten) { - ret = 0; - break; + rcu_read_unlock(); + return 0; } /* Are we being asked to read from a specific device? */ if (dev >= 0 && p.ptr.dev != dev) continue; - /* - * If there are any dirty pointers it's an error if we can't - * read: - */ - if (!ret && !p.ptr.cached) - ret = -BCH_ERR_no_device_to_read_from; - struct bch_dev *ca = bch2_dev_rcu(c, p.ptr.dev); if (p.ptr.cached && (!ca || dev_ptr_stale_rcu(ca, &p.ptr))) continue; - f = failed ? bch2_dev_io_failures(failed, p.ptr.dev) : NULL; - if (f) - p.idx = f->nr_failed < f->nr_retries - ? f->idx - : f->idx + 1; + struct bch_dev_io_failures *f = + unlikely(failed) ? bch2_dev_io_failures(failed, p.ptr.dev) : NULL; + if (unlikely(f)) { + p.crc_retry_nr = f->failed_csum_nr; + p.has_ec &= ~f->failed_ec; - if (!p.idx && (!ca || !bch2_dev_is_online(ca))) - p.idx++; + if (ca && ca->mi.state != BCH_MEMBER_STATE_failed) { + have_io_errors |= f->failed_io; + have_io_errors |= f->failed_ec; + } + have_csum_errors |= !!f->failed_csum_nr; - if (!p.idx && p.has_ec && bch2_force_reconstruct_read) - p.idx++; + if (p.has_ec && (f->failed_io || f->failed_csum_nr)) + p.do_ec_reconstruct = true; + else if (f->failed_io || + f->failed_csum_nr > c->opts.checksum_err_retry_nr) + continue; + } - if (p.idx > (unsigned) p.has_ec) - continue; + have_missing_devs |= ca && !bch2_dev_is_online(ca); - if (ret > 0 && !ptr_better(c, p, *pick)) - continue; + if (!ca || !bch2_dev_is_online(ca)) { + if (!p.has_ec) + continue; + p.do_ec_reconstruct = true; + } - *pick = p; - ret = 1; + if (bch2_force_reconstruct_read && p.has_ec) + p.do_ec_reconstruct = true; + + u64 p_latency = dev_latency(ca); + /* + * Square the latencies, to bias more in favor of the faster + * device - we never want to stop issuing reads to the slower + * device altogether, so that we can update our latency numbers: + */ + p_latency *= p_latency; + + if (!have_pick || + ptr_better(c, + p, p_latency, ca, + *pick, pick_latency)) { + *pick = p; + pick_latency = p_latency; + have_pick = true; + } } rcu_read_unlock(); - return ret; + if (have_pick) + return 1; + if (!have_dirty_ptrs) + return 0; + if (have_missing_devs) + return -BCH_ERR_no_device_to_read_from; + if (have_csum_errors) + return -BCH_ERR_data_read_csum_err; + if (have_io_errors) + return -BCH_ERR_data_read_io_err; + + WARN_ONCE(1, "unhandled error case in %s\n", __func__); + return -EINVAL; } /* KEY_TYPE_btree_ptr: */ diff --git a/fs/bcachefs/extents.h b/fs/bcachefs/extents.h index c50c4f353bab..e78a39e7e18f 100644 --- a/fs/bcachefs/extents.h +++ b/fs/bcachefs/extents.h @@ -320,8 +320,9 @@ static inline struct bkey_ptrs bch2_bkey_ptrs(struct bkey_s k) ({ \ __label__ out; \ \ - (_ptr).idx = 0; \ - (_ptr).has_ec = false; \ + (_ptr).has_ec = false; \ + (_ptr).do_ec_reconstruct = false; \ + (_ptr).crc_retry_nr = 0; \ \ __bkey_extent_entry_for_each_from(_entry, _end, _entry) \ switch (__extent_entry_type(_entry)) { \ @@ -401,7 +402,7 @@ out: \ struct bch_dev_io_failures *bch2_dev_io_failures(struct bch_io_failures *, unsigned); void bch2_mark_io_failure(struct bch_io_failures *, - struct extent_ptr_decoded *); + struct extent_ptr_decoded *, bool); int bch2_bkey_pick_read_device(struct bch_fs *, struct bkey_s_c, struct bch_io_failures *, struct extent_ptr_decoded *, int); diff --git a/fs/bcachefs/extents_types.h b/fs/bcachefs/extents_types.h index 43d6c341ecca..e51529dca4c2 100644 --- a/fs/bcachefs/extents_types.h +++ b/fs/bcachefs/extents_types.h @@ -20,8 +20,9 @@ struct bch_extent_crc_unpacked { }; struct extent_ptr_decoded { - unsigned idx; bool has_ec; + bool do_ec_reconstruct; + u8 crc_retry_nr; struct bch_extent_crc_unpacked crc; struct bch_extent_ptr ptr; struct bch_extent_stripe_ptr ec; @@ -31,10 +32,10 @@ struct bch_io_failures { u8 nr; struct bch_dev_io_failures { u8 dev; - u8 idx; - u8 nr_failed; - u8 nr_retries; - } devs[BCH_REPLICAS_MAX]; + unsigned failed_csum_nr:6, + failed_io:1, + failed_ec:1; + } devs[BCH_REPLICAS_MAX + 1]; }; #endif /* _BCACHEFS_EXTENTS_TYPES_H */ diff --git a/fs/bcachefs/io_read.c b/fs/bcachefs/io_read.c index 887e3c9ac181..aecc645e6516 100644 --- a/fs/bcachefs/io_read.c +++ b/fs/bcachefs/io_read.c @@ -487,7 +487,8 @@ static void bch2_rbio_retry(struct work_struct *work) bvec_iter_sectors(rbio->bvec_iter)); if (bch2_err_matches(rbio->ret, BCH_ERR_data_read_retry_avoid)) - bch2_mark_io_failure(&failed, &rbio->pick); + bch2_mark_io_failure(&failed, &rbio->pick, + rbio->ret == -BCH_ERR_data_read_retry_csum_err); if (!rbio->split) { rbio->bio.bi_status = 0; @@ -991,7 +992,7 @@ retry_pick: ca && unlikely(dev_ptr_stale(ca, &pick.ptr))) { read_from_stale_dirty_pointer(trans, ca, k, pick.ptr); - bch2_mark_io_failure(failed, &pick); + bch2_mark_io_failure(failed, &pick, false); percpu_ref_put(&ca->io_ref); goto retry_pick; } @@ -1154,7 +1155,7 @@ retry_pick: else bch2_trans_unlock_long(trans); - if (!rbio->pick.idx) { + if (likely(!rbio->pick.do_ec_reconstruct)) { if (unlikely(!rbio->have_ioref)) { struct printbuf buf = PRINTBUF; bch2_read_err_msg_trans(trans, &buf, rbio, read_pos); @@ -1215,7 +1216,8 @@ out: rbio = bch2_rbio_free(rbio); if (bch2_err_matches(ret, BCH_ERR_data_read_retry_avoid)) - bch2_mark_io_failure(failed, &pick); + bch2_mark_io_failure(failed, &pick, + ret == -BCH_ERR_data_read_retry_csum_err); return ret; } diff --git a/fs/bcachefs/opts.h b/fs/bcachefs/opts.h index afb89d318d24..baa9c11acb1a 100644 --- a/fs/bcachefs/opts.h +++ b/fs/bcachefs/opts.h @@ -186,6 +186,11 @@ enum fsck_err_opts { OPT_STR(__bch2_csum_opts), \ BCH_SB_DATA_CSUM_TYPE, BCH_CSUM_OPT_crc32c, \ NULL, NULL) \ + x(checksum_err_retry_nr, u8, \ + OPT_FS|OPT_FORMAT|OPT_MOUNT|OPT_RUNTIME, \ + OPT_UINT(0, 32), \ + BCH_SB_CSUM_ERR_RETRY_NR, 3, \ + NULL, NULL) \ x(compression, u8, \ OPT_FS|OPT_INODE|OPT_FORMAT|OPT_MOUNT|OPT_RUNTIME, \ OPT_FN(bch2_opt_compression), \ diff --git a/fs/bcachefs/super-io.c b/fs/bcachefs/super-io.c index ee32d043414a..f87e3bf33ec0 100644 --- a/fs/bcachefs/super-io.c +++ b/fs/bcachefs/super-io.c @@ -457,6 +457,10 @@ static int bch2_sb_validate(struct bch_sb_handle *disk_sb, if (!BCH_SB_WRITE_ERROR_TIMEOUT(sb)) SET_BCH_SB_WRITE_ERROR_TIMEOUT(sb, 30); + + if (le16_to_cpu(sb->version) <= bcachefs_metadata_version_extent_flags && + !BCH_SB_CSUM_ERR_RETRY_NR(sb)) + SET_BCH_SB_CSUM_ERR_RETRY_NR(sb, 3); } #ifdef __KERNEL__ |