diff options
author | Kent Overstreet <kent.overstreet@gmail.com> | 2020-06-24 11:05:50 -0400 |
---|---|---|
committer | Kent Overstreet <kent.overstreet@gmail.com> | 2020-06-28 18:12:46 -0400 |
commit | dc618b78628105f479bbab1d649c5a301d51f2c3 (patch) | |
tree | 0f02e5f9497f9c9365659610462356e9b75893d5 | |
parent | ad83e28c5be20e9afcd9c16bafc556f4ff3433e3 (diff) |
bcachefs: Add support for passing through reads to block device
-rw-r--r-- | fs/bcachefs/fs-common.h | 3 | ||||
-rw-r--r-- | fs/bcachefs/fs-io.c | 5 | ||||
-rw-r--r-- | fs/bcachefs/io.c | 136 | ||||
-rw-r--r-- | fs/bcachefs/io.h | 11 | ||||
-rw-r--r-- | fs/bcachefs/io_types.h | 1 |
5 files changed, 101 insertions, 55 deletions
diff --git a/fs/bcachefs/fs-common.h b/fs/bcachefs/fs-common.h index 2273b7961c9b..67c56eb59c3d 100644 --- a/fs/bcachefs/fs-common.h +++ b/fs/bcachefs/fs-common.h @@ -2,6 +2,9 @@ #ifndef _BCACHEFS_FS_COMMON_H #define _BCACHEFS_FS_COMMON_H +#include "inode.h" +#include "dirent.h" + struct posix_acl; int bch2_create_trans(struct btree_trans *, u64, diff --git a/fs/bcachefs/fs-io.c b/fs/bcachefs/fs-io.c index b53eaa8243a6..da3293e92bda 100644 --- a/fs/bcachefs/fs-io.c +++ b/fs/bcachefs/fs-io.c @@ -1741,7 +1741,10 @@ start: if (iter->count) closure_get(&dio->cl); - bch2_read(c, rbio_init(bio, opts), inode->v.i_ino); + bch2_read(c, rbio_init(bio, opts), inode->v.i_ino, + BCH_READ_RETRY_IF_STALE| + BCH_READ_MAY_PROMOTE| + BCH_READ_USER_MAPPED); } iter->count += shorten; diff --git a/fs/bcachefs/io.c b/fs/bcachefs/io.c index 71eede585701..70c28bf6380a 100644 --- a/fs/bcachefs/io.c +++ b/fs/bcachefs/io.c @@ -1159,7 +1159,8 @@ flush_io: * However, if we're running out of a workqueue, we can't block here * because we'll be blocking other work items from completing: */ - if (current->flags & PF_WQ_WORKER) { + if ((current->flags & PF_WQ_WORKER) || + current->bio_list) { continue_at(cl, bch2_write_index, index_update_wq(op)); return; } @@ -1632,7 +1633,7 @@ static void bch2_read_retry(struct bch_fs *c, struct bch_read_bio *rbio, struct bch_io_failures *failed, unsigned flags) { struct btree_trans trans; - struct btree_iter *iter; + struct btree_iter *iter = NULL; struct bkey_on_stack sk; struct bkey_s_c k; int ret; @@ -1673,7 +1674,8 @@ retry: offset_into_extent, failed, flags); switch (ret) { case READ_RETRY: - goto retry; + ret = -EINTR; + break; case READ_ERR: goto err; }; @@ -1684,6 +1686,7 @@ retry: swap(bvec_iter.bi_size, bytes); bio_advance_iter(&rbio->bio, &bvec_iter, bytes); } + bch2_trans_iter_put(&trans, iter); if (ret == -EINTR) goto retry; @@ -1931,29 +1934,37 @@ static void bch2_read_endio(struct bio *bio) if (!rbio->split) rbio->bio.bi_end_io = rbio->end_io; - if (bch2_dev_io_err_on(bio->bi_status, ca, "data read")) { - bch2_rbio_error(rbio, READ_RETRY_AVOID, bio->bi_status); + if (bio->bi_status) { + if (rbio->have_ioref) + bch2_dev_io_error(ca, "data read"); + + bch2_rbio_error(rbio, + rbio->read_from_ptr + ? READ_RETRY_AVOID + : READ_ERR, bio->bi_status); return; } - if (rbio->pick.ptr.cached && - (((rbio->flags & BCH_READ_RETRY_IF_STALE) && race_fault()) || - ptr_stale(ca, &rbio->pick.ptr))) { - atomic_long_inc(&c->read_realloc_races); + if (rbio->read_from_ptr) { + if (rbio->pick.ptr.cached && + (((rbio->flags & BCH_READ_RETRY_IF_STALE) && race_fault()) || + ptr_stale(ca, &rbio->pick.ptr))) { + atomic_long_inc(&c->read_realloc_races); - if (rbio->flags & BCH_READ_RETRY_IF_STALE) - bch2_rbio_error(rbio, READ_RETRY, BLK_STS_AGAIN); - else - bch2_rbio_error(rbio, READ_ERR, BLK_STS_AGAIN); - return; - } + if (rbio->flags & BCH_READ_RETRY_IF_STALE) + bch2_rbio_error(rbio, READ_RETRY, BLK_STS_AGAIN); + else + bch2_rbio_error(rbio, READ_ERR, BLK_STS_AGAIN); + return; + } - if (rbio->narrow_crcs || - crc_is_compressed(rbio->pick.crc) || - bch2_csum_type_is_encryption(rbio->pick.crc.csum_type)) - context = RBIO_CONTEXT_UNBOUND, wq = system_unbound_wq; - else if (rbio->pick.crc.csum_type) - context = RBIO_CONTEXT_HIGHPRI, wq = system_highpri_wq; + if (rbio->narrow_crcs || + crc_is_compressed(rbio->pick.crc) || + bch2_csum_type_is_encryption(rbio->pick.crc.csum_type)) + context = RBIO_CONTEXT_UNBOUND, wq = system_unbound_wq; + else if (rbio->pick.crc.csum_type) + context = RBIO_CONTEXT_HIGHPRI, wq = system_highpri_wq; + } bch2_rbio_punt(rbio, __bch2_read_endio, context, wq); } @@ -2001,9 +2012,9 @@ int __bch2_read_extent(struct bch_fs *c, struct bch_read_bio *orig, unsigned offset_into_extent, struct bch_io_failures *failed, unsigned flags) { - struct extent_ptr_decoded pick; + struct extent_ptr_decoded pick = { 0 }; struct bch_read_bio *rbio = NULL; - struct bch_dev *ca; + struct bch_dev *ca = NULL; struct promote_op *promote = NULL; bool bounce = false, read_full = false, narrow_crcs = false; struct bpos pos = bkey_start_pos(k.k); @@ -2017,22 +2028,33 @@ int __bch2_read_extent(struct bch_fs *c, struct bch_read_bio *orig, swap(iter.bi_size, bytes); memcpy_to_bio(&orig->bio, iter, d.v->data); swap(iter.bi_size, bytes); + /* + * The inline data may be less than the size of the key - zero + * out the rest: + */ bio_advance_iter(&orig->bio, &iter, bytes); zero_fill_bio_iter(&orig->bio, iter); goto out_read_done; } pick_ret = bch2_bkey_pick_read_device(c, k, failed, &pick); - - /* hole or reservation - just zero fill: */ - if (!pick_ret) - goto hole; - if (pick_ret < 0) { __bcache_io_error(c, "no device to read from"); goto err; } + /* hole or reservation - just zero fill: */ + if (!pick_ret) { + if (!(flags & BCH_READ_PASSTHROUGH_BLOCK_DEV)) + goto hole; + + pick.crc.compressed_size = bvec_iter_sectors(iter); + pick.crc.uncompressed_size = bvec_iter_sectors(iter); + pick.crc.live_size = bvec_iter_sectors(iter); + offset_into_extent = 0; + goto get_bio; + } + if (pick_ret > 0) ca = bch_dev_bkey_exists(c, pick.ptr.dev); @@ -2044,14 +2066,10 @@ int __bch2_read_extent(struct bch_fs *c, struct bch_read_bio *orig, if (pick.crc.compressed_size > orig->bio.bi_vcnt * PAGE_SECTORS) goto hole; - iter.bi_size = pick.crc.compressed_size << 9; + iter.bi_size = pick.crc.compressed_size << 9; goto get_bio; } - if (!(flags & BCH_READ_LAST_FRAGMENT) || - bio_flagged(&orig->bio, BIO_CHAIN)) - flags |= BCH_READ_MUST_CLONE; - narrow_crcs = !(flags & BCH_READ_IN_RETRY) && bch2_can_narrow_extent_crcs(k, pick.crc); @@ -2074,6 +2092,12 @@ int __bch2_read_extent(struct bch_fs *c, struct bch_read_bio *orig, promote = promote_alloc(c, iter, k, &pick, orig->opts, flags, &rbio, &bounce, &read_full); + /* + * the endio path expects rbio->pick.crc to describe what was actually + * read, because for checksummed/compressed extents we have to read the + * entire extent: if we're not reading the entire extent, adjust + * pick.crc to match: + */ if (!read_full) { EBUG_ON(crc_is_compressed(pick.crc)); EBUG_ON(pick.crc.csum_type && @@ -2093,6 +2117,10 @@ int __bch2_read_extent(struct bch_fs *c, struct bch_read_bio *orig, offset_into_extent = 0; } get_bio: + if (!(flags & BCH_READ_LAST_FRAGMENT) || + bio_flagged(&orig->bio, BIO_CHAIN)) + flags |= BCH_READ_MUST_CLONE; + if (rbio) { /* * promote already allocated bounce rbio: @@ -2101,7 +2129,7 @@ get_bio: * here: */ EBUG_ON(rbio->bio.bi_iter.bi_size < - pick.crc.compressed_size << 9); + pick.crc.compressed_size << 9); rbio->bio.bi_iter.bi_size = pick.crc.compressed_size << 9; } else if (bounce) { @@ -2113,8 +2141,14 @@ get_bio: orig->opts); bch2_bio_alloc_pages_pool(c, &rbio->bio, sectors << 9); - rbio->bounce = true; - rbio->split = true; + rbio->bounce = true; + rbio->split = true; + rbio->bio.bi_disk = orig->bio.bi_disk; + rbio->bio.bi_partno = orig->bio.bi_partno; + rbio->bio.bi_opf = orig->bio.bi_opf; + rbio->bio.bi_ioprio = orig->bio.bi_ioprio; + rbio->bio.bi_write_hint = orig->bio.bi_write_hint; + rbio->bio.bi_iter.bi_sector = orig->bio.bi_iter.bi_sector; } else if (flags & BCH_READ_MUST_CLONE) { /* * Have to clone if there were any splits, due to error @@ -2146,6 +2180,7 @@ get_bio: rbio->bvec_iter = iter; rbio->offset_into_extent= offset_into_extent; rbio->flags = flags; + rbio->read_from_ptr = pick_ret > 0; rbio->have_ioref = pick_ret > 0 && bch2_dev_get_ioref(ca, READ); rbio->narrow_crcs = narrow_crcs; rbio->hole = 0; @@ -2160,23 +2195,29 @@ get_bio: INIT_WORK(&rbio->work, NULL); rbio->bio.bi_opf = orig->bio.bi_opf; - rbio->bio.bi_iter.bi_sector = pick.ptr.offset; rbio->bio.bi_end_io = bch2_read_endio; + if (!(flags & (BCH_READ_IN_RETRY|BCH_READ_LAST_FRAGMENT))) { + bio_inc_remaining(&orig->bio); + trace_read_split(&orig->bio); + } + if (rbio->bounce) trace_read_bounce(&rbio->bio); + if (!rbio->read_from_ptr) { + BUG_ON(!(flags & BCH_READ_PASSTHROUGH_BLOCK_DEV)); + goto submit; + } + + rbio->bio.bi_iter.bi_sector = pick.ptr.offset; + bch2_increment_clock(c, bio_sectors(&rbio->bio), READ); rcu_read_lock(); bucket_io_clock_reset(c, ca, PTR_BUCKET_NR(ca, &pick.ptr), READ); rcu_read_unlock(); - if (!(flags & (BCH_READ_IN_RETRY|BCH_READ_LAST_FRAGMENT))) { - bio_inc_remaining(&orig->bio); - trace_read_split(&orig->bio); - } - if (!rbio->pick.idx) { if (!rbio->have_ioref) { __bcache_io_error(c, "no device to read from"); @@ -2187,7 +2228,7 @@ get_bio: this_cpu_add(ca->io_done->sectors[READ][BCH_DATA_USER], bio_sectors(&rbio->bio)); bio_set_dev(&rbio->bio, ca->disk_sb.bdev); - +submit: if (likely(!(flags & BCH_READ_IN_RETRY))) submit_bio(&rbio->bio); else @@ -2200,7 +2241,7 @@ get_bio: } if (likely(!(flags & BCH_READ_IN_RETRY))) - bio_endio(&rbio->bio); + bch2_rbio_done(rbio); } out: if (likely(!(flags & BCH_READ_IN_RETRY))) { @@ -2221,7 +2262,6 @@ out: return ret; } - err: if (flags & BCH_READ_IN_RETRY) return READ_ERR; @@ -2245,15 +2285,13 @@ out_read_done: return 0; } -void bch2_read(struct bch_fs *c, struct bch_read_bio *rbio, u64 inode) +void bch2_read(struct bch_fs *c, struct bch_read_bio *rbio, + u64 inode, unsigned flags) { struct btree_trans trans; struct btree_iter *iter; struct bkey_on_stack sk; struct bkey_s_c k; - unsigned flags = BCH_READ_RETRY_IF_STALE| - BCH_READ_MAY_PROMOTE| - BCH_READ_USER_MAPPED; int ret; BUG_ON(rbio->_state); diff --git a/fs/bcachefs/io.h b/fs/bcachefs/io.h index 8814a8fb260f..c12a4bac7568 100644 --- a/fs/bcachefs/io.h +++ b/fs/bcachefs/io.h @@ -127,12 +127,13 @@ enum bch_read_flags { BCH_READ_MAY_PROMOTE = 1 << 1, BCH_READ_USER_MAPPED = 1 << 2, BCH_READ_NODECODE = 1 << 3, - BCH_READ_LAST_FRAGMENT = 1 << 4, + BCH_READ_PASSTHROUGH_BLOCK_DEV = 1 << 4, + BCH_READ_LAST_FRAGMENT = 1 << 5, /* internal: */ - BCH_READ_MUST_BOUNCE = 1 << 5, - BCH_READ_MUST_CLONE = 1 << 6, - BCH_READ_IN_RETRY = 1 << 7, + BCH_READ_MUST_BOUNCE = 1 << 6, + BCH_READ_MUST_CLONE = 1 << 7, + BCH_READ_IN_RETRY = 1 << 8, }; int __bch2_read_extent(struct bch_fs *, struct bch_read_bio *, @@ -149,7 +150,7 @@ static inline void bch2_read_extent(struct bch_fs *c, offset_into_extent, NULL, flags); } -void bch2_read(struct bch_fs *, struct bch_read_bio *, u64); +void bch2_read(struct bch_fs *, struct bch_read_bio *, u64, unsigned); static inline struct bch_read_bio *rbio_init(struct bio *bio, struct bch_io_opts opts) diff --git a/fs/bcachefs/io_types.h b/fs/bcachefs/io_types.h index 684e4c9a5d98..19b14835bdf2 100644 --- a/fs/bcachefs/io_types.h +++ b/fs/bcachefs/io_types.h @@ -46,6 +46,7 @@ struct bch_read_bio { u16 bounce:1, split:1, kmalloc:1, + read_from_ptr:1, have_ioref:1, narrow_crcs:1, hole:1, |