summaryrefslogtreecommitdiff
path: root/libbcachefs/extents.c
diff options
context:
space:
mode:
authorKent Overstreet <kent.overstreet@linux.dev>2025-03-16 16:08:41 -0400
committerKent Overstreet <kent.overstreet@linux.dev>2025-03-17 14:23:49 -0400
commitc0836924b19ae84ad95d7ec97455c96f61b81201 (patch)
treee48afe4496a7e6ef8c7ec6a1d5d14064f69747ba /libbcachefs/extents.c
parentf42ee45c6e6409ad7c971aa37aef69b97d761006 (diff)
Update bcachefs sources to 4d28432bcc5f bcachefs: Validate bch_sb.offset field
Signed-off-by: Kent Overstreet <kent.overstreet@linux.dev>
Diffstat (limited to 'libbcachefs/extents.c')
-rw-r--r--libbcachefs/extents.c147
1 files changed, 80 insertions, 67 deletions
diff --git a/libbcachefs/extents.c b/libbcachefs/extents.c
index f62ee96b..1da754a8 100644
--- a/libbcachefs/extents.c
+++ b/libbcachefs/extents.c
@@ -28,6 +28,8 @@
#include "trace.h"
#include "util.h"
+#include <linux/random.h>
+
static const char * const bch2_extent_flags_strs[] = {
#define x(n, v) [BCH_EXTENT_FLAG_##n] = #n,
BCH_EXTENT_FLAGS()
@@ -94,38 +96,30 @@ static inline int dev_failed(struct bch_dev *ca)
*/
static inline bool ptr_better(struct bch_fs *c,
const struct extent_ptr_decoded p1,
- const struct extent_ptr_decoded p2)
+ u64 p1_latency,
+ struct bch_dev *ca1,
+ const struct extent_ptr_decoded p2,
+ u64 p2_latency)
{
- if (likely(!p1.do_ec_reconstruct &&
- !p2.do_ec_reconstruct)) {
- struct bch_dev *ca1 = bch2_dev_rcu(c, p1.ptr.dev);
- struct bch_dev *ca2 = bch2_dev_rcu(c, p2.ptr.dev);
-
- int failed_delta = dev_failed(ca1) - dev_failed(ca2);
-
- if (failed_delta)
- return failed_delta < 0;
+ struct bch_dev *ca2 = bch2_dev_rcu(c, p2.ptr.dev);
- u64 l1 = dev_latency(ca1);
- u64 l2 = dev_latency(ca2);
+ int failed_delta = dev_failed(ca1) - dev_failed(ca2);
+ if (unlikely(failed_delta))
+ return failed_delta < 0;
- /*
- * Square the latencies, to bias more in favor of the faster
- * device - we never want to stop issuing reads to the slower
- * device altogether, so that we can update our latency numbers:
- */
- l1 *= l1;
- l2 *= l2;
+ if (unlikely(bch2_force_reconstruct_read))
+ return p1.do_ec_reconstruct > p2.do_ec_reconstruct;
- /* Pick at random, biased in favor of the faster device: */
+ if (unlikely(p1.do_ec_reconstruct || p2.do_ec_reconstruct))
+ return p1.do_ec_reconstruct < p2.do_ec_reconstruct;
- return bch2_rand_range(l1 + l2) > l1;
- }
+ int crc_retry_delta = (int) p1.crc_retry_nr - (int) p2.crc_retry_nr;
+ if (unlikely(crc_retry_delta))
+ return crc_retry_delta < 0;
- if (bch2_force_reconstruct_read)
- return p1.do_ec_reconstruct > p2.do_ec_reconstruct;
+ /* Pick at random, biased in favor of the faster device: */
- return p1.do_ec_reconstruct < p2.do_ec_reconstruct;
+ return get_random_u64_below(p1_latency + p2_latency) > p1_latency;
}
/*
@@ -138,86 +132,105 @@ int bch2_bkey_pick_read_device(struct bch_fs *c, struct bkey_s_c k,
struct extent_ptr_decoded *pick,
int dev)
{
- struct bkey_ptrs_c ptrs = bch2_bkey_ptrs_c(k);
- const union bch_extent_entry *entry;
- struct extent_ptr_decoded p;
- struct bch_dev_io_failures *f;
- unsigned csum_retry = 0;
- bool have_csum_retries = false;
- int ret = 0;
+ bool have_csum_errors = false, have_io_errors = false, have_missing_devs = false;
+ bool have_dirty_ptrs = false, have_pick = false;
if (k.k->type == KEY_TYPE_error)
return -BCH_ERR_key_type_error;
- if (bch2_bkey_extent_ptrs_flags(ptrs) & BCH_EXTENT_FLAG_poisoned)
+ struct bkey_ptrs_c ptrs = bch2_bkey_ptrs_c(k);
+
+ if (bch2_bkey_extent_ptrs_flags(ptrs) & BIT_ULL(BCH_EXTENT_FLAG_poisoned))
return -BCH_ERR_extent_poisened;
-again:
+
rcu_read_lock();
+ const union bch_extent_entry *entry;
+ struct extent_ptr_decoded p;
+ u64 pick_latency;
+
bkey_for_each_ptr_decode(k.k, ptrs, p, entry) {
+ have_dirty_ptrs |= !p.ptr.cached;
+
/*
* Unwritten extent: no need to actually read, treat it as a
* hole and return 0s:
*/
if (p.ptr.unwritten) {
- ret = 0;
- break;
+ rcu_read_unlock();
+ return 0;
}
/* Are we being asked to read from a specific device? */
if (dev >= 0 && p.ptr.dev != dev)
continue;
- /*
- * If there are any dirty pointers it's an error if we can't
- * read:
- */
- if (!ret && !p.ptr.cached)
- ret = -BCH_ERR_no_device_to_read_from;
-
struct bch_dev *ca = bch2_dev_rcu(c, p.ptr.dev);
if (p.ptr.cached && (!ca || dev_ptr_stale_rcu(ca, &p.ptr)))
continue;
- if (unlikely(failed) &&
- (f = bch2_dev_io_failures(failed, p.ptr.dev))) {
- have_csum_retries |= !f->failed_io && f->failed_csum_nr < BCH_MAX_CSUM_RETRIES;
+ struct bch_dev_io_failures *f =
+ unlikely(failed) ? bch2_dev_io_failures(failed, p.ptr.dev) : NULL;
+ if (unlikely(f)) {
+ p.crc_retry_nr = f->failed_csum_nr;
+ p.has_ec &= ~f->failed_ec;
- if (p.has_ec &&
- !f->failed_ec &&
- (f->failed_io || f->failed_csum_nr))
+ if (ca && ca->mi.state != BCH_MEMBER_STATE_failed) {
+ have_io_errors |= f->failed_io;
+ have_io_errors |= f->failed_ec;
+ }
+ have_csum_errors |= !!f->failed_csum_nr;
+
+ if (p.has_ec && (f->failed_io || f->failed_csum_nr))
p.do_ec_reconstruct = true;
else if (f->failed_io ||
- f->failed_csum_nr > csum_retry)
+ f->failed_csum_nr > c->opts.checksum_err_retry_nr)
continue;
}
+ have_missing_devs |= ca && !bch2_dev_is_online(ca);
+
if (!ca || !bch2_dev_is_online(ca)) {
- if (p.has_ec)
- p.do_ec_reconstruct = true;
- else
+ if (!p.has_ec)
continue;
+ p.do_ec_reconstruct = true;
}
- if (p.has_ec && bch2_force_reconstruct_read)
+ if (bch2_force_reconstruct_read && p.has_ec)
p.do_ec_reconstruct = true;
- if (ret > 0 && !ptr_better(c, p, *pick))
- continue;
-
- *pick = p;
- ret = 1;
+ u64 p_latency = dev_latency(ca);
+ /*
+ * Square the latencies, to bias more in favor of the faster
+ * device - we never want to stop issuing reads to the slower
+ * device altogether, so that we can update our latency numbers:
+ */
+ p_latency *= p_latency;
+
+ if (!have_pick ||
+ ptr_better(c,
+ p, p_latency, ca,
+ *pick, pick_latency)) {
+ *pick = p;
+ pick_latency = p_latency;
+ have_pick = true;
+ }
}
rcu_read_unlock();
- if (unlikely(ret == -BCH_ERR_no_device_to_read_from &&
- have_csum_retries &&
- csum_retry < BCH_MAX_CSUM_RETRIES)) {
- csum_retry++;
- goto again;
- }
-
- return ret;
+ if (have_pick)
+ return 1;
+ if (!have_dirty_ptrs)
+ return 0;
+ if (have_missing_devs)
+ return -BCH_ERR_no_device_to_read_from;
+ if (have_csum_errors)
+ return -BCH_ERR_data_read_csum_err;
+ if (have_io_errors)
+ return -BCH_ERR_data_read_io_err;
+
+ WARN_ONCE(1, "unhandled error case in %s\n", __func__);
+ return -EINVAL;
}
/* KEY_TYPE_btree_ptr: */