summaryrefslogtreecommitdiff
path: root/fs/bcachefs/io.c
diff options
context:
space:
mode:
Diffstat (limited to 'fs/bcachefs/io.c')
-rw-r--r--fs/bcachefs/io.c137
1 files changed, 91 insertions, 46 deletions
diff --git a/fs/bcachefs/io.c b/fs/bcachefs/io.c
index 34cab253c86d..12d77ec6de23 100644
--- a/fs/bcachefs/io.c
+++ b/fs/bcachefs/io.c
@@ -15,6 +15,7 @@
#include "clock.h"
#include "debug.h"
#include "disk_groups.h"
+#include "ec.h"
#include "error.h"
#include "extents.h"
#include "io.h"
@@ -302,6 +303,7 @@ static void __bch2_write_index(struct bch_write_op *op)
struct bkey_s_extent e;
struct bch_extent_ptr *ptr;
struct bkey_i *src, *dst = keys->keys, *n, *k;
+ unsigned dev;
int ret;
for (src = keys->keys; src != keys->top; src = n) {
@@ -345,6 +347,10 @@ static void __bch2_write_index(struct bch_write_op *op)
}
}
out:
+ /* If some a bucket wasn't written, we can't erasure code it: */
+ for_each_set_bit(dev, op->failed.d, BCH_SB_MEMBERS_MAX)
+ bch2_open_bucket_write_error(c, &op->open_buckets, dev);
+
bch2_open_buckets_put(c, &op->open_buckets);
return;
err:
@@ -421,7 +427,8 @@ static void init_append_extent(struct bch_write_op *op,
static struct bio *bch2_write_bio_alloc(struct bch_fs *c,
struct write_point *wp,
struct bio *src,
- bool *page_alloc_failed)
+ bool *page_alloc_failed,
+ void *buf)
{
struct bch_write_bio *wbio;
struct bio *bio;
@@ -431,11 +438,18 @@ static struct bio *bch2_write_bio_alloc(struct bch_fs *c,
bio = bio_alloc_bioset(GFP_NOIO, pages, &c->bio_write);
wbio = wbio_init(bio);
- wbio->bounce = true;
wbio->put_bio = true;
/* copy WRITE_SYNC flag */
wbio->bio.bi_opf = src->bi_opf;
+ if (buf) {
+ bio->bi_iter.bi_size = output_available;
+ bch2_bio_map(bio, buf);
+ return bio;
+ }
+
+ wbio->bounce = true;
+
/*
* We can't use mempool for more than c->sb.encoded_extent_max
* worth of pages, but we'd like to allocate more if we can:
@@ -600,14 +614,18 @@ static int bch2_write_extent(struct bch_write_op *op, struct write_point *wp)
struct bio *src = &op->wbio.bio, *dst = src;
struct bvec_iter saved_iter;
struct bkey_i *key_to_write;
+ void *ec_buf;
unsigned key_to_write_offset = op->insert_keys.top_p -
op->insert_keys.keys_p;
- unsigned total_output = 0;
- bool bounce = false, page_alloc_failed = false;
+ unsigned total_output = 0, total_input = 0;
+ bool bounce = false;
+ bool page_alloc_failed = false;
int ret, more = 0;
BUG_ON(!bio_sectors(src));
+ ec_buf = bch2_writepoint_ec_buf(c, wp);
+
switch (bch2_write_prep_encoded_data(op, wp)) {
case PREP_ENCODED_OK:
break;
@@ -617,16 +635,26 @@ static int bch2_write_extent(struct bch_write_op *op, struct write_point *wp)
case PREP_ENCODED_CHECKSUM_ERR:
goto csum_err;
case PREP_ENCODED_DO_WRITE:
+ if (ec_buf) {
+ dst = bch2_write_bio_alloc(c, wp, src,
+ &page_alloc_failed,
+ ec_buf);
+ bio_copy_data(dst, src);
+ bounce = true;
+ }
init_append_extent(op, wp, op->version, op->crc);
goto do_write;
}
- if (op->compression_type ||
+ if (ec_buf ||
+ op->compression_type ||
(op->csum_type &&
!(op->flags & BCH_WRITE_PAGES_STABLE)) ||
(bch2_csum_type_is_encryption(op->csum_type) &&
!(op->flags & BCH_WRITE_PAGES_OWNED))) {
- dst = bch2_write_bio_alloc(c, wp, src, &page_alloc_failed);
+ dst = bch2_write_bio_alloc(c, wp, src,
+ &page_alloc_failed,
+ ec_buf);
bounce = true;
}
@@ -729,7 +757,8 @@ static int bch2_write_extent(struct bch_write_op *op, struct write_point *wp)
if (dst != src)
bio_advance(dst, dst_len);
bio_advance(src, src_len);
- total_output += dst_len;
+ total_output += dst_len;
+ total_input += src_len;
} while (dst->bi_iter.bi_size &&
src->bi_iter.bi_size &&
wp->sectors_free &&
@@ -742,16 +771,20 @@ static int bch2_write_extent(struct bch_write_op *op, struct write_point *wp)
dst->bi_iter = saved_iter;
- if (!bounce && more) {
- dst = bio_split(src, total_output >> 9,
+ if (dst == src && more) {
+ BUG_ON(total_output != total_input);
+
+ dst = bio_split(src, total_input >> 9,
GFP_NOIO, &c->bio_write);
- wbio_init(dst)->put_bio = true;
+ wbio_init(dst)->put_bio = true;
+ /* copy WRITE_SYNC flag */
+ dst->bi_opf = src->bi_opf;
}
dst->bi_iter.bi_size = total_output;
/* Free unneeded pages after compressing: */
- if (bounce)
+ if (to_wbio(dst)->bounce)
while (dst->bi_vcnt > DIV_ROUND_UP(dst->bi_iter.bi_size, PAGE_SIZE))
mempool_free(dst->bi_io_vec[--dst->bi_vcnt].bv_page,
&c->bio_bounce_pages);
@@ -760,6 +793,10 @@ do_write:
key_to_write = (void *) (op->insert_keys.keys_p + key_to_write_offset);
+ bch2_ec_add_backpointer(c, wp,
+ bkey_start_pos(&key_to_write->k),
+ total_input >> 9);
+
dst->bi_end_io = bch2_write_endio;
dst->bi_private = &op->cl;
bio_set_op_attrs(dst, REQ_OP_WRITE, 0);
@@ -774,10 +811,10 @@ csum_err:
"rewriting existing data (memory corruption?)");
ret = -EIO;
err:
- if (bounce) {
+ if (to_wbio(dst)->bounce)
bch2_bio_free_pages_pool(c, dst);
+ if (to_wbio(dst)->put_bio)
bio_put(dst);
- }
return ret;
}
@@ -789,6 +826,8 @@ static void __bch2_write(struct closure *cl)
struct write_point *wp;
int ret;
again:
+ memset(&op->failed, 0, sizeof(op->failed));
+
do {
/* +1 for possible cache device: */
if (op->open_buckets.nr + op->nr_replicas + 1 >
@@ -803,6 +842,7 @@ again:
wp = bch2_alloc_sectors_start(c,
op->target,
+ op->opts.erasure_code,
op->write_point,
&op->devs_have,
op->nr_replicas,
@@ -882,8 +922,6 @@ void bch2_write(struct closure *cl)
op->start_time = local_clock();
- memset(&op->failed, 0, sizeof(op->failed));
-
bch2_keylist_init(&op->insert_keys, op->inline_keys);
wbio_init(&op->wbio.bio)->put_bio = false;
@@ -1557,8 +1595,10 @@ int __bch2_read_extent(struct bch_fs *c, struct bch_read_bio *orig,
if (!pick_ret)
goto hole;
- if (pick_ret < 0)
- goto no_device;
+ if (pick_ret < 0) {
+ __bcache_io_error(c, "no device to read from");
+ goto err;
+ }
if (pick_ret > 0)
ca = bch_dev_bkey_exists(c, pick.ptr.dev);
@@ -1683,31 +1723,46 @@ noclone:
bch2_increment_clock(c, bio_sectors(&rbio->bio), READ);
- if (!rbio->have_ioref)
- goto no_device_postclone;
-
percpu_down_read_preempt_disable(&c->usage_lock);
bucket_io_clock_reset(c, ca, PTR_BUCKET_NR(ca, &pick.ptr), READ);
percpu_up_read_preempt_enable(&c->usage_lock);
- this_cpu_add(ca->io_done->sectors[READ][BCH_DATA_USER],
- bio_sectors(&rbio->bio));
+ if (likely(!(flags & (BCH_READ_IN_RETRY|BCH_READ_LAST_FRAGMENT)))) {
+ bio_inc_remaining(&orig->bio);
+ trace_read_split(&orig->bio);
+ }
+
+ if (!rbio->pick.idx) {
+ if (!rbio->have_ioref) {
+ __bcache_io_error(c, "no device to read from");
+ bch2_rbio_error(rbio, READ_RETRY_AVOID, BLK_STS_IOERR);
+ goto out;
+ }
- bio_set_dev(&rbio->bio, ca->disk_sb.bdev);
+ this_cpu_add(ca->io_done->sectors[READ][BCH_DATA_USER],
+ bio_sectors(&rbio->bio));
+ bio_set_dev(&rbio->bio, ca->disk_sb.bdev);
- if (likely(!(flags & BCH_READ_IN_RETRY))) {
- if (!(flags & BCH_READ_LAST_FRAGMENT)) {
- bio_inc_remaining(&orig->bio);
- trace_read_split(&orig->bio);
+ if (likely(!(flags & BCH_READ_IN_RETRY)))
+ submit_bio(&rbio->bio);
+ else
+ submit_bio_wait(&rbio->bio);
+ } else {
+ /* Attempting reconstruct read: */
+ if (bch2_ec_read_extent(c, rbio)) {
+ bch2_rbio_error(rbio, READ_RETRY_AVOID, BLK_STS_IOERR);
+ goto out;
}
- submit_bio(&rbio->bio);
+ if (likely(!(flags & BCH_READ_IN_RETRY)))
+ bio_endio(&rbio->bio);
+ }
+out:
+ if (likely(!(flags & BCH_READ_IN_RETRY))) {
return 0;
} else {
int ret;
- submit_bio_wait(&rbio->bio);
-
rbio->context = RBIO_CONTEXT_UNBOUND;
bch2_read_endio(&rbio->bio);
@@ -1722,22 +1777,12 @@ noclone:
return ret;
}
-no_device_postclone:
- if (!rbio->split)
- rbio->bio.bi_end_io = rbio->end_io;
- bch2_rbio_free(rbio);
-no_device:
- __bcache_io_error(c, "no device to read from");
-
- if (likely(!(flags & BCH_READ_IN_RETRY))) {
- orig->bio.bi_status = BLK_STS_IOERR;
-
- if (flags & BCH_READ_LAST_FRAGMENT)
- bch2_rbio_done(orig);
- return 0;
- } else {
+err:
+ if (flags & BCH_READ_IN_RETRY)
return READ_ERR;
- }
+
+ orig->bio.bi_status = BLK_STS_IOERR;
+ goto out_read_done;
hole:
/*
@@ -1749,7 +1794,7 @@ hole:
orig->hole = true;
zero_fill_bio_iter(&orig->bio, iter);
-
+out_read_done:
if (flags & BCH_READ_LAST_FRAGMENT)
bch2_rbio_done(orig);
return 0;