summaryrefslogtreecommitdiff
path: root/fs/bcachefs/extents.c
diff options
context:
space:
mode:
Diffstat (limited to 'fs/bcachefs/extents.c')
-rw-r--r--fs/bcachefs/extents.c196
1 files changed, 137 insertions, 59 deletions
diff --git a/fs/bcachefs/extents.c b/fs/bcachefs/extents.c
index a3ec1cc9a5d2..4b012ba1f302 100644
--- a/fs/bcachefs/extents.c
+++ b/fs/bcachefs/extents.c
@@ -193,29 +193,41 @@ unsigned bch2_extent_nr_dirty_ptrs(struct bkey_s_c k)
return nr_ptrs;
}
-unsigned bch2_extent_ptr_durability(struct bch_fs *c,
- const struct bch_extent_ptr *ptr)
+static unsigned bch2_extent_ptr_durability(struct bch_fs *c,
+ struct extent_ptr_decoded p)
{
+ unsigned i, durability = 0;
struct bch_dev *ca;
- if (ptr->cached)
+ if (p.ptr.cached)
return 0;
- ca = bch_dev_bkey_exists(c, ptr->dev);
+ ca = bch_dev_bkey_exists(c, p.ptr.dev);
- if (ca->mi.state == BCH_MEMBER_STATE_FAILED)
- return 0;
+ if (ca->mi.state != BCH_MEMBER_STATE_FAILED)
+ durability = max_t(unsigned, durability, ca->mi.durability);
+
+ for (i = 0; i < p.ec_nr; i++) {
+ struct ec_stripe *s =
+ genradix_ptr(&c->ec_stripes, p.idx);
- return ca->mi.durability;
+ if (WARN_ON(!s))
+ continue;
+
+ durability = max_t(unsigned, durability, s->nr_redundant);
+ }
+
+ return durability;
}
unsigned bch2_extent_durability(struct bch_fs *c, struct bkey_s_c_extent e)
{
- const struct bch_extent_ptr *ptr;
+ const union bch_extent_entry *entry;
+ struct extent_ptr_decoded p;
unsigned durability = 0;
- extent_for_each_ptr(e, ptr)
- durability += bch2_extent_ptr_durability(c, ptr);
+ extent_for_each_ptr_decode(e, p, entry)
+ durability += bch2_extent_ptr_durability(c, p);
return durability;
}
@@ -258,30 +270,46 @@ bool bch2_extent_matches_ptr(struct bch_fs *c, struct bkey_s_c_extent e,
return false;
}
+static union bch_extent_entry *extent_entry_prev(struct bkey_s_extent e,
+ union bch_extent_entry *entry)
+{
+ union bch_extent_entry *i = e.v->start;
+
+ if (i == entry)
+ return NULL;
+
+ while (extent_entry_next(i) != entry)
+ i = extent_entry_next(i);
+ return i;
+}
+
union bch_extent_entry *bch2_extent_drop_ptr(struct bkey_s_extent e,
struct bch_extent_ptr *ptr)
{
- union bch_extent_entry *dst;
- union bch_extent_entry *src;
+ union bch_extent_entry *dst, *src, *prev;
+ bool drop_crc = true;
EBUG_ON(ptr < &e.v->start->ptr ||
ptr >= &extent_entry_last(e)->ptr);
EBUG_ON(ptr->type != 1 << BCH_EXTENT_ENTRY_ptr);
- src = to_entry(ptr + 1);
-
+ src = extent_entry_next(to_entry(ptr));
if (src != extent_entry_last(e) &&
- extent_entry_type(src) == BCH_EXTENT_ENTRY_ptr) {
- dst = to_entry(ptr);
- } else {
- extent_for_each_entry(e, dst) {
- if (dst == to_entry(ptr))
- break;
+ !extent_entry_is_crc(src))
+ drop_crc = false;
- if (extent_entry_next(dst) == to_entry(ptr) &&
- extent_entry_is_crc(dst))
- break;
+ dst = to_entry(ptr);
+ while ((prev = extent_entry_prev(e, dst))) {
+ if (extent_entry_is_ptr(prev))
+ break;
+
+ if (extent_entry_is_crc(prev)) {
+ if (drop_crc)
+ dst = prev;
+ break;
}
+
+ dst = prev;
}
memmove_u64s_down(dst, src,
@@ -423,6 +451,8 @@ void bch2_ptr_swab(const struct bkey_format *f, struct bkey_packed *k)
entry->crc128.csum.lo = (__force __le64)
swab64((__force u64) entry->crc128.csum.lo);
break;
+ case BCH_EXTENT_ENTRY_stripe_ptr:
+ break;
}
}
break;
@@ -470,6 +500,7 @@ static void extent_print_ptrs(struct printbuf *out, struct bch_fs *c,
const union bch_extent_entry *entry;
struct bch_extent_crc_unpacked crc;
const struct bch_extent_ptr *ptr;
+ const struct bch_extent_stripe_ptr *ec;
struct bch_dev *ca;
bool first = true;
@@ -478,6 +509,18 @@ static void extent_print_ptrs(struct printbuf *out, struct bch_fs *c,
pr_buf(out, " ");
switch (__extent_entry_type(entry)) {
+ case BCH_EXTENT_ENTRY_ptr:
+ ptr = entry_to_ptr(entry);
+ ca = ptr->dev < c->sb.nr_devices && c->devs[ptr->dev]
+ ? bch_dev_bkey_exists(c, ptr->dev)
+ : NULL;
+
+ pr_buf(out, "ptr: %u:%llu gen %u%s%s", ptr->dev,
+ (u64) ptr->offset, ptr->gen,
+ ptr->cached ? " cached" : "",
+ ca && ptr_stale(ca, ptr)
+ ? " stale" : "");
+ break;
case BCH_EXTENT_ENTRY_crc32:
case BCH_EXTENT_ENTRY_crc64:
case BCH_EXTENT_ENTRY_crc128:
@@ -490,17 +533,11 @@ static void extent_print_ptrs(struct printbuf *out, struct bch_fs *c,
crc.csum_type,
crc.compression_type);
break;
- case BCH_EXTENT_ENTRY_ptr:
- ptr = entry_to_ptr(entry);
- ca = ptr->dev < c->sb.nr_devices && c->devs[ptr->dev]
- ? bch_dev_bkey_exists(c, ptr->dev)
- : NULL;
+ case BCH_EXTENT_ENTRY_stripe_ptr:
+ ec = &entry->stripe_ptr;
- pr_buf(out, "ptr: %u:%llu gen %u%s%s", ptr->dev,
- (u64) ptr->offset, ptr->gen,
- ptr->cached ? " cached" : "",
- ca && ptr_stale(ca, ptr)
- ? " stale" : "");
+ pr_buf(out, "ec: idx %llu block %u",
+ (u64) ec->idx, ec->block);
break;
default:
pr_buf(out, "(invalid extent entry %.16llx)", *((u64 *) entry));
@@ -536,6 +573,11 @@ void bch2_mark_io_failure(struct bch_io_failures *failed,
f = &failed->devs[failed->nr++];
f->dev = p->ptr.dev;
+ f->idx = p->idx;
+ f->nr_failed = 1;
+ f->nr_retries = 0;
+ } else if (p->idx != f->idx) {
+ f->idx = p->idx;
f->nr_failed = 1;
f->nr_retries = 0;
} else {
@@ -550,15 +592,22 @@ static inline bool ptr_better(struct bch_fs *c,
const struct extent_ptr_decoded p1,
const struct extent_ptr_decoded p2)
{
- struct bch_dev *dev1 = bch_dev_bkey_exists(c, p1.ptr.dev);
- struct bch_dev *dev2 = bch_dev_bkey_exists(c, p2.ptr.dev);
+ if (likely(!p1.idx && !p2.idx)) {
+ struct bch_dev *dev1 = bch_dev_bkey_exists(c, p1.ptr.dev);
+ struct bch_dev *dev2 = bch_dev_bkey_exists(c, p2.ptr.dev);
+
+ u64 l1 = atomic64_read(&dev1->cur_latency[READ]);
+ u64 l2 = atomic64_read(&dev2->cur_latency[READ]);
- u64 l1 = atomic64_read(&dev1->cur_latency[READ]);
- u64 l2 = atomic64_read(&dev2->cur_latency[READ]);
+ /* Pick at random, biased in favor of the faster device: */
+
+ return bch2_rand_range(l1 + l2) > l1;
+ }
- /* Pick at random, biased in favor of the faster device: */
+ if (force_reconstruct_read(c))
+ return p1.idx > p2.idx;
- return bch2_rand_range(l1 + l2) > l1;
+ return p1.idx < p2.idx;
}
static int extent_pick_read_device(struct bch_fs *c,
@@ -579,7 +628,20 @@ static int extent_pick_read_device(struct bch_fs *c,
continue;
f = failed ? dev_io_failures(failed, p.ptr.dev) : NULL;
- if (f && f->nr_failed >= f->nr_retries)
+ if (f)
+ p.idx = f->nr_failed < f->nr_retries
+ ? f->idx
+ : f->idx + 1;
+
+ if (!p.idx &&
+ !bch2_dev_is_readable(ca))
+ p.idx++;
+
+ if (!p.idx && p.ec_nr)
+ p.idx++;
+
+ if (force_reconstruct_read(c) &&
+ p.idx >= p.ec_nr + 1)
continue;
if (ret && !ptr_better(c, p, *pick))
@@ -616,8 +678,8 @@ const char *bch2_btree_ptr_invalid(const struct bch_fs *c, struct bkey_s_c k)
if (__extent_entry_type(entry) >= BCH_EXTENT_ENTRY_MAX)
return "invalid extent entry type";
- if (extent_entry_is_crc(entry))
- return "has crc field";
+ if (!extent_entry_is_ptr(entry))
+ return "has non ptr field";
}
extent_for_each_ptr(e, ptr) {
@@ -754,6 +816,8 @@ static bool __bch2_cut_front(struct bpos where, struct bkey_s k)
case BCH_EXTENT_ENTRY_crc128:
entry->crc128.offset += e.k->size - len;
break;
+ case BCH_EXTENT_ENTRY_stripe_ptr:
+ break;
}
if (extent_entry_is_crc(entry))
@@ -1512,7 +1576,18 @@ const char *bch2_extent_invalid(const struct bch_fs *c, struct bkey_s_c k)
if (__extent_entry_type(entry) >= BCH_EXTENT_ENTRY_MAX)
return "invalid extent entry type";
- if (extent_entry_is_crc(entry)) {
+ switch (extent_entry_type(entry)) {
+ case BCH_EXTENT_ENTRY_ptr:
+ ptr = entry_to_ptr(entry);
+
+ reason = extent_ptr_invalid(c, e, &entry->ptr,
+ size_ondisk, false);
+ if (reason)
+ return reason;
+ break;
+ case BCH_EXTENT_ENTRY_crc32:
+ case BCH_EXTENT_ENTRY_crc64:
+ case BCH_EXTENT_ENTRY_crc128:
crc = bch2_extent_crc_unpack(e.k, entry_to_crc(entry));
if (crc.offset + e.k->size >
@@ -1533,13 +1608,9 @@ const char *bch2_extent_invalid(const struct bch_fs *c, struct bkey_s_c k)
else if (nonce != crc.offset + crc.nonce)
return "incorrect nonce";
}
- } else {
- ptr = entry_to_ptr(entry);
-
- reason = extent_ptr_invalid(c, e, &entry->ptr,
- size_ondisk, false);
- if (reason)
- return reason;
+ break;
+ case BCH_EXTENT_ENTRY_stripe_ptr:
+ break;
}
}
@@ -1744,6 +1815,7 @@ void bch2_extent_ptr_decoded_append(struct bkey_i_extent *e,
{
struct bch_extent_crc_unpacked crc;
union bch_extent_entry *pos;
+ unsigned i;
extent_for_each_crc(extent_i_to_s(e), crc, pos)
if (!bch2_crc_unpacked_cmp(crc, p->crc))
@@ -1754,6 +1826,11 @@ void bch2_extent_ptr_decoded_append(struct bkey_i_extent *e,
found:
p->ptr.type = 1 << BCH_EXTENT_ENTRY_ptr;
__extent_entry_insert(e, pos, to_entry(&p->ptr));
+
+ for (i = 0; i < p->ec_nr; i++) {
+ p->ec[i].type = 1 << BCH_EXTENT_ENTRY_stripe_ptr;
+ __extent_entry_insert(e, pos, to_entry(&p->ec[i]));
+ }
}
/*
@@ -1808,26 +1885,27 @@ void bch2_extent_mark_replicas_cached(struct bch_fs *c,
unsigned target,
unsigned nr_desired_replicas)
{
- struct bch_extent_ptr *ptr;
+ union bch_extent_entry *entry;
+ struct extent_ptr_decoded p;
int extra = bch2_extent_durability(c, e.c) - nr_desired_replicas;
if (target && extra > 0)
- extent_for_each_ptr(e, ptr) {
- int n = bch2_extent_ptr_durability(c, ptr);
+ extent_for_each_ptr_decode(e, p, entry) {
+ int n = bch2_extent_ptr_durability(c, p);
if (n && n <= extra &&
- !bch2_dev_in_target(c, ptr->dev, target)) {
- ptr->cached = true;
+ !bch2_dev_in_target(c, p.ptr.dev, target)) {
+ entry->ptr.cached = true;
extra -= n;
}
}
if (extra > 0)
- extent_for_each_ptr(e, ptr) {
- int n = bch2_extent_ptr_durability(c, ptr);
+ extent_for_each_ptr_decode(e, p, entry) {
+ int n = bch2_extent_ptr_durability(c, p);
if (n && n <= extra) {
- ptr->cached = true;
+ entry->ptr.cached = true;
extra -= n;
}
}
@@ -1903,7 +1981,7 @@ enum merge_result bch2_extent_merge(struct bch_fs *c, struct btree *b,
if ((extent_entry_type(en_l) !=
extent_entry_type(en_r)) ||
- extent_entry_is_crc(en_l))
+ !extent_entry_is_ptr(en_l))
return BCH_MERGE_NOMERGE;
lp = &en_l->ptr;