diff options
Diffstat (limited to 'libbcachefs/io_write.c')
-rw-r--r-- | libbcachefs/io_write.c | 297 |
1 files changed, 120 insertions, 177 deletions
diff --git a/libbcachefs/io_write.c b/libbcachefs/io_write.c index a2e6b305..07b55839 100644 --- a/libbcachefs/io_write.c +++ b/libbcachefs/io_write.c @@ -402,61 +402,36 @@ static int bch2_write_index_default(struct bch_write_op *op) /* Writes */ -void bch2_write_op_error_trans(struct btree_trans *trans, struct printbuf *out, - struct bch_write_op *op, u64 offset, const char *fmt, ...) +void bch2_write_op_error(struct bch_write_op *op, u64 offset, const char *fmt, ...) { - if (op->subvol) - lockrestart_do(trans, - bch2_inum_offset_err_msg_trans(trans, out, - (subvol_inum) { op->subvol, op->pos.inode, }, - offset << 9)); - else { - struct bpos pos = op->pos; - pos.offset = offset; - lockrestart_do(trans, bch2_inum_snap_offset_err_msg_trans(trans, out, pos)); - } - - prt_str(out, "write error: "); - - va_list args; - va_start(args, fmt); - prt_vprintf(out, fmt, args); - va_end(args); - - if (op->flags & BCH_WRITE_move) { - struct data_update *u = container_of(op, struct data_update, op); - - prt_printf(out, "\n from internal move "); - bch2_bkey_val_to_text(out, op->c, bkey_i_to_s_c(u->k.k)); - } -} + struct printbuf buf = PRINTBUF; -void bch2_write_op_error(struct printbuf *out, struct bch_write_op *op, u64 offset, - const char *fmt, ...) -{ - if (op->subvol) - bch2_inum_offset_err_msg(op->c, out, + if (op->subvol) { + bch2_inum_offset_err_msg(op->c, &buf, (subvol_inum) { op->subvol, op->pos.inode, }, offset << 9); - else { + } else { struct bpos pos = op->pos; pos.offset = offset; - bch2_inum_snap_offset_err_msg(op->c, out, pos); + bch2_inum_snap_offset_err_msg(op->c, &buf, pos); } - prt_str(out, "write error: "); + prt_str(&buf, "write error: "); va_list args; va_start(args, fmt); - prt_vprintf(out, fmt, args); + prt_vprintf(&buf, fmt, args); va_end(args); if (op->flags & BCH_WRITE_move) { struct data_update *u = container_of(op, struct data_update, op); - prt_printf(out, "\n from internal move "); - bch2_bkey_val_to_text(out, op->c, bkey_i_to_s_c(u->k.k)); + prt_printf(&buf, "\n from internal move "); + bch2_bkey_val_to_text(&buf, op->c, bkey_i_to_s_c(u->k.k)); } + + bch_err_ratelimited(op->c, "%s", buf.buf); + printbuf_exit(&buf); } void bch2_submit_wbio_replicas(struct bch_write_bio *wbio, struct bch_fs *c, @@ -554,7 +529,7 @@ static noinline int bch2_write_drop_io_error_ptrs(struct bch_write_op *op) test_bit(ptr->dev, op->failed.d)); if (!bch2_bkey_nr_ptrs(bkey_i_to_s_c(src))) - return -EIO; + return -BCH_ERR_data_write_io; } if (dst != src) @@ -598,11 +573,8 @@ static void __bch2_write_index(struct bch_write_op *op) if (unlikely(ret && !bch2_err_matches(ret, EROFS))) { struct bkey_i *insert = bch2_keylist_front(&op->insert_keys); - struct printbuf buf = PRINTBUF; - bch2_write_op_error(&buf, op, bkey_start_offset(&insert->k), + bch2_write_op_error(op, bkey_start_offset(&insert->k), "btree update error: %s", bch2_err_str(ret)); - bch_err_ratelimited(c, "%s", buf.buf); - printbuf_exit(&buf); } if (ret) @@ -611,7 +583,7 @@ static void __bch2_write_index(struct bch_write_op *op) out: /* If some a bucket wasn't written, we can't erasure code it: */ for_each_set_bit(dev, op->failed.d, BCH_SB_MEMBERS_MAX) - bch2_open_bucket_write_error(c, &op->open_buckets, dev); + bch2_open_bucket_write_error(c, &op->open_buckets, dev, -BCH_ERR_data_write_io); bch2_open_buckets_put(c, &op->open_buckets); return; @@ -837,7 +809,6 @@ static int bch2_write_rechecksum(struct bch_fs *c, { struct bio *bio = &op->wbio.bio; struct bch_extent_crc_unpacked new_crc; - int ret; /* bch2_rechecksum_bio() can't encrypt or decrypt data: */ @@ -845,10 +816,10 @@ static int bch2_write_rechecksum(struct bch_fs *c, bch2_csum_type_is_encryption(new_csum_type)) new_csum_type = op->crc.csum_type; - ret = bch2_rechecksum_bio(c, bio, op->version, op->crc, - NULL, &new_crc, - op->crc.offset, op->crc.live_size, - new_csum_type); + int ret = bch2_rechecksum_bio(c, bio, op->version, op->crc, + NULL, &new_crc, + op->crc.offset, op->crc.live_size, + new_csum_type); if (ret) return ret; @@ -858,44 +829,12 @@ static int bch2_write_rechecksum(struct bch_fs *c, return 0; } -static int bch2_write_decrypt(struct bch_write_op *op) -{ - struct bch_fs *c = op->c; - struct nonce nonce = extent_nonce(op->version, op->crc); - struct bch_csum csum; - int ret; - - if (!bch2_csum_type_is_encryption(op->crc.csum_type)) - return 0; - - /* - * If we need to decrypt data in the write path, we'll no longer be able - * to verify the existing checksum (poly1305 mac, in this case) after - * it's decrypted - this is the last point we'll be able to reverify the - * checksum: - */ - csum = bch2_checksum_bio(c, op->crc.csum_type, nonce, &op->wbio.bio); - if (bch2_crc_cmp(op->crc.csum, csum) && !c->opts.no_data_io) - return -EIO; - - ret = bch2_encrypt_bio(c, op->crc.csum_type, nonce, &op->wbio.bio); - op->crc.csum_type = 0; - op->crc.csum = (struct bch_csum) { 0, 0 }; - return ret; -} - -static enum prep_encoded_ret { - PREP_ENCODED_OK, - PREP_ENCODED_ERR, - PREP_ENCODED_CHECKSUM_ERR, - PREP_ENCODED_DO_WRITE, -} bch2_write_prep_encoded_data(struct bch_write_op *op, struct write_point *wp) +static noinline int bch2_write_prep_encoded_data(struct bch_write_op *op, struct write_point *wp) { struct bch_fs *c = op->c; struct bio *bio = &op->wbio.bio; - - if (!(op->flags & BCH_WRITE_data_encoded)) - return PREP_ENCODED_OK; + struct bch_csum csum; + int ret = 0; BUG_ON(bio_sectors(bio) != op->crc.compressed_size); @@ -906,12 +845,13 @@ static enum prep_encoded_ret { (op->crc.compression_type == bch2_compression_opt_to_type(op->compression_opt) || op->incompressible)) { if (!crc_is_compressed(op->crc) && - op->csum_type != op->crc.csum_type && - bch2_write_rechecksum(c, op, op->csum_type) && - !c->opts.no_data_io) - return PREP_ENCODED_CHECKSUM_ERR; + op->csum_type != op->crc.csum_type) { + ret = bch2_write_rechecksum(c, op, op->csum_type); + if (ret) + return ret; + } - return PREP_ENCODED_DO_WRITE; + return 1; } /* @@ -919,20 +859,24 @@ static enum prep_encoded_ret { * is, we have to decompress it: */ if (crc_is_compressed(op->crc)) { - struct bch_csum csum; - - if (bch2_write_decrypt(op)) - return PREP_ENCODED_CHECKSUM_ERR; - /* Last point we can still verify checksum: */ - csum = bch2_checksum_bio(c, op->crc.csum_type, - extent_nonce(op->version, op->crc), - bio); + struct nonce nonce = extent_nonce(op->version, op->crc); + csum = bch2_checksum_bio(c, op->crc.csum_type, nonce, bio); if (bch2_crc_cmp(op->crc.csum, csum) && !c->opts.no_data_io) - return PREP_ENCODED_CHECKSUM_ERR; + goto csum_err; + + if (bch2_csum_type_is_encryption(op->crc.csum_type)) { + ret = bch2_encrypt_bio(c, op->crc.csum_type, nonce, bio); + if (ret) + return ret; - if (bch2_bio_uncompress_inplace(op, bio)) - return PREP_ENCODED_ERR; + op->crc.csum_type = 0; + op->crc.csum = (struct bch_csum) { 0, 0 }; + } + + ret = bch2_bio_uncompress_inplace(op, bio); + if (ret) + return ret; } /* @@ -944,22 +888,44 @@ static enum prep_encoded_ret { * If the data is checksummed and we're only writing a subset, * rechecksum and adjust bio to point to currently live data: */ - if ((op->crc.live_size != op->crc.uncompressed_size || - op->crc.csum_type != op->csum_type) && - bch2_write_rechecksum(c, op, op->csum_type) && - !c->opts.no_data_io) - return PREP_ENCODED_CHECKSUM_ERR; + if (op->crc.live_size != op->crc.uncompressed_size || + op->crc.csum_type != op->csum_type) { + ret = bch2_write_rechecksum(c, op, op->csum_type); + if (ret) + return ret; + } /* * If we want to compress the data, it has to be decrypted: */ - if ((op->compression_opt || - bch2_csum_type_is_encryption(op->crc.csum_type) != - bch2_csum_type_is_encryption(op->csum_type)) && - bch2_write_decrypt(op)) - return PREP_ENCODED_CHECKSUM_ERR; + if (bch2_csum_type_is_encryption(op->crc.csum_type) && + (op->compression_opt || op->crc.csum_type != op->csum_type)) { + struct nonce nonce = extent_nonce(op->version, op->crc); + csum = bch2_checksum_bio(c, op->crc.csum_type, nonce, bio); + if (bch2_crc_cmp(op->crc.csum, csum) && !c->opts.no_data_io) + goto csum_err; + + ret = bch2_encrypt_bio(c, op->crc.csum_type, nonce, bio); + if (ret) + return ret; - return PREP_ENCODED_OK; + op->crc.csum_type = 0; + op->crc.csum = (struct bch_csum) { 0, 0 }; + } + + return 0; +csum_err: + bch2_write_op_error(op, op->pos.offset, + "error verifying existing checksum while moving existing data (memory corruption?)\n" + " expected %0llx:%0llx got %0llx:%0llx type %s", + op->crc.csum.hi, + op->crc.csum.lo, + csum.hi, + csum.lo, + op->crc.csum_type < BCH_CSUM_NR + ? __bch2_csum_types[op->crc.csum_type] + : "(unknown)"); + return -BCH_ERR_data_write_csum; } static int bch2_write_extent(struct bch_write_op *op, struct write_point *wp, @@ -974,29 +940,28 @@ static int bch2_write_extent(struct bch_write_op *op, struct write_point *wp, bool page_alloc_failed = false; int ret, more = 0; + if (op->incompressible) + op->compression_opt = 0; + BUG_ON(!bio_sectors(src)); ec_buf = bch2_writepoint_ec_buf(c, wp); - switch (bch2_write_prep_encoded_data(op, wp)) { - case PREP_ENCODED_OK: - break; - case PREP_ENCODED_ERR: - ret = -EIO; - goto err; - case PREP_ENCODED_CHECKSUM_ERR: - goto csum_err; - case PREP_ENCODED_DO_WRITE: - /* XXX look for bug here */ - if (ec_buf) { - dst = bch2_write_bio_alloc(c, wp, src, - &page_alloc_failed, - ec_buf); - bio_copy_data(dst, src); - bounce = true; + if (unlikely(op->flags & BCH_WRITE_data_encoded)) { + ret = bch2_write_prep_encoded_data(op, wp); + if (ret < 0) + goto err; + if (ret) { + if (ec_buf) { + dst = bch2_write_bio_alloc(c, wp, src, + &page_alloc_failed, + ec_buf); + bio_copy_data(dst, src); + bounce = true; + } + init_append_extent(op, wp, op->version, op->crc); + goto do_write; } - init_append_extent(op, wp, op->version, op->crc); - goto do_write; } if (ec_buf || @@ -1089,12 +1054,13 @@ static int bch2_write_extent(struct bch_write_op *op, struct write_point *wp, * data can't be modified (by userspace) while it's in * flight. */ - if (bch2_rechecksum_bio(c, src, version, op->crc, + ret = bch2_rechecksum_bio(c, src, version, op->crc, &crc, &op->crc, src_len >> 9, bio_sectors(src) - (src_len >> 9), - op->csum_type)) - goto csum_err; + op->csum_type); + if (ret) + goto err; /* * rchecksum_bio sets compression_type on crc from op->crc, * this isn't always correct as sometimes we're changing @@ -1104,12 +1070,12 @@ static int bch2_write_extent(struct bch_write_op *op, struct write_point *wp, crc.nonce = nonce; } else { if ((op->flags & BCH_WRITE_data_encoded) && - bch2_rechecksum_bio(c, src, version, op->crc, + (ret = bch2_rechecksum_bio(c, src, version, op->crc, NULL, &op->crc, src_len >> 9, bio_sectors(src) - (src_len >> 9), - op->crc.csum_type)) - goto csum_err; + op->crc.csum_type))) + goto err; crc.compressed_size = dst_len >> 9; crc.uncompressed_size = src_len >> 9; @@ -1168,16 +1134,6 @@ static int bch2_write_extent(struct bch_write_op *op, struct write_point *wp, do_write: *_dst = dst; return more; -csum_err: - { - struct printbuf buf = PRINTBUF; - bch2_write_op_error(&buf, op, op->pos.offset, - "error verifying existing checksum while rewriting existing data (memory corruption?)"); - bch_err_ratelimited(c, "%s", buf.buf); - printbuf_exit(&buf); - } - - ret = -EIO; err: if (to_wbio(dst)->bounce) bch2_bio_free_pages_pool(c, dst); @@ -1255,38 +1211,35 @@ static void bch2_nocow_write_convert_unwritten(struct bch_write_op *op) { struct bch_fs *c = op->c; struct btree_trans *trans = bch2_trans_get(c); + int ret = 0; for_each_keylist_key(&op->insert_keys, orig) { - int ret = for_each_btree_key_max_commit(trans, iter, BTREE_ID_extents, + ret = for_each_btree_key_max_commit(trans, iter, BTREE_ID_extents, bkey_start_pos(&orig->k), orig->k.p, BTREE_ITER_intent, k, NULL, NULL, BCH_TRANS_COMMIT_no_enospc, ({ bch2_nocow_write_convert_one_unwritten(trans, &iter, orig, k, op->new_i_size); })); - - if (ret && !bch2_err_matches(ret, EROFS)) { - struct bkey_i *insert = bch2_keylist_front(&op->insert_keys); - - struct printbuf buf = PRINTBUF; - bch2_write_op_error_trans(trans, &buf, op, bkey_start_offset(&insert->k), - "btree update error: %s", bch2_err_str(ret)); - bch_err_ratelimited(c, "%s", buf.buf); - printbuf_exit(&buf); - } - - if (ret) { - op->error = ret; + if (ret) break; - } } bch2_trans_put(trans); + + if (ret && !bch2_err_matches(ret, EROFS)) { + struct bkey_i *insert = bch2_keylist_front(&op->insert_keys); + bch2_write_op_error(op, bkey_start_offset(&insert->k), + "btree update error: %s", bch2_err_str(ret)); + } + + if (ret) + op->error = ret; } static void __bch2_nocow_write_done(struct bch_write_op *op) { if (unlikely(op->flags & BCH_WRITE_io_error)) { - op->error = -EIO; + op->error = -BCH_ERR_data_write_io; } else if (unlikely(op->flags & BCH_WRITE_convert_unwritten)) bch2_nocow_write_convert_unwritten(op); } @@ -1436,11 +1389,8 @@ err: darray_exit(&buckets); if (ret) { - struct printbuf buf = PRINTBUF; - bch2_write_op_error(&buf, op, op->pos.offset, + bch2_write_op_error(op, op->pos.offset, "%s(): btree lookup error: %s", __func__, bch2_err_str(ret)); - bch_err_ratelimited(c, "%s", buf.buf); - printbuf_exit(&buf); op->error = ret; op->flags |= BCH_WRITE_submitted; } @@ -1480,7 +1430,7 @@ err_bucket_stale: "pointer to invalid bucket in nocow path on device %llu\n %s", stale_at->b.inode, (bch2_bkey_val_to_text(&buf, c, k), buf.buf))) { - ret = -EIO; + ret = -BCH_ERR_data_write_invalid_ptr; } else { /* We can retry this: */ ret = -BCH_ERR_transaction_restart; @@ -1558,13 +1508,9 @@ err: op->flags |= BCH_WRITE_submitted; if (unlikely(ret < 0)) { - if (!(op->flags & BCH_WRITE_alloc_nowait)) { - struct printbuf buf = PRINTBUF; - bch2_write_op_error(&buf, op, op->pos.offset, + if (!(op->flags & BCH_WRITE_alloc_nowait)) + bch2_write_op_error(op, op->pos.offset, "%s(): %s", __func__, bch2_err_str(ret)); - bch_err_ratelimited(c, "%s", buf.buf); - printbuf_exit(&buf); - } op->error = ret; break; } @@ -1691,11 +1637,8 @@ CLOSURE_CALLBACK(bch2_write) wbio_init(bio)->put_bio = false; if (unlikely(bio->bi_iter.bi_size & (c->opts.block_size - 1))) { - struct printbuf buf = PRINTBUF; - bch2_write_op_error(&buf, op, op->pos.offset, - "misaligned write"); - printbuf_exit(&buf); - op->error = -EIO; + bch2_write_op_error(op, op->pos.offset, "misaligned write"); + op->error = -BCH_ERR_data_write_misaligned; goto err; } |