summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorKent Overstreet <kent.overstreet@gmail.com>2017-03-13 11:55:46 -0800
committerKent Overstreet <kent.overstreet@gmail.com>2017-03-17 19:49:28 -0800
commit4399d160b1a688dfafdd3a470e16b34192bb70ee (patch)
tree23b73cba456308e6ea0bc10f23bc8057c88ed155
parentdcc012886b8874822a1aeb6ce67665812c5a7ca7 (diff)
bcachefs: improve buffered read performance
-rw-r--r--fs/bcachefs/fs-io.c280
-rw-r--r--fs/bcachefs/io.c10
-rw-r--r--fs/bcachefs/request.c2
-rw-r--r--include/trace/events/bcachefs.h10
4 files changed, 196 insertions, 106 deletions
diff --git a/fs/bcachefs/fs-io.c b/fs/bcachefs/fs-io.c
index af9936f23c67..8a73d4a3b07c 100644
--- a/fs/bcachefs/fs-io.c
+++ b/fs/bcachefs/fs-io.c
@@ -522,6 +522,17 @@ static bool bio_can_add_page_contig(struct bio *bio, struct page *page)
bio_end_sector(bio) == offset;
}
+static void __bio_add_page(struct bio *bio, struct page *page)
+{
+ bio->bi_io_vec[bio->bi_vcnt++] = (struct bio_vec) {
+ .bv_page = page,
+ .bv_len = PAGE_SIZE,
+ .bv_offset = 0,
+ };
+
+ bio->bi_iter.bi_size += PAGE_SIZE;
+}
+
static int bio_add_page_contig(struct bio *bio, struct page *page)
{
sector_t offset = (sector_t) page->index << (PAGE_SHIFT - 9);
@@ -533,14 +544,7 @@ static int bio_add_page_contig(struct bio *bio, struct page *page)
else if (!bio_can_add_page_contig(bio, page))
return -1;
- bio->bi_io_vec[bio->bi_vcnt++] = (struct bio_vec) {
- .bv_page = page,
- .bv_len = PAGE_SIZE,
- .bv_offset = 0,
- };
-
- bio->bi_iter.bi_size += PAGE_SIZE;
-
+ __bio_add_page(bio, page);
return 0;
}
@@ -564,36 +568,48 @@ static void bch_readpages_end_io(struct bio *bio)
bio_put(bio);
}
-static inline struct page *__readpage_next_page(struct address_space *mapping,
- struct list_head *pages,
- unsigned *nr_pages)
+struct readpages_iter {
+ struct address_space *mapping;
+ struct list_head pages;
+ unsigned nr_pages;
+};
+
+static int readpage_add_page(struct readpages_iter *iter, struct page *page)
{
- struct page *page;
+ struct bch_page_state *s = page_state(page);
int ret;
- while (*nr_pages) {
- page = list_entry(pages->prev, struct page, lru);
- prefetchw(&page->flags);
- list_del(&page->lru);
+ BUG_ON(s->reserved);
+ s->allocated = 1;
+ s->sectors = 0;
- ret = add_to_page_cache_lru(page, mapping, page->index, GFP_NOFS);
+ prefetchw(&page->flags);
+ ret = add_to_page_cache_lru(page, iter->mapping,
+ page->index, GFP_NOFS);
+ put_page(page);
+ return ret;
+}
- /* if add_to_page_cache_lru() succeeded, page is locked: */
- put_page(page);
+static inline struct page *readpage_iter_next(struct readpages_iter *iter)
+{
+ while (iter->nr_pages) {
+ struct page *page =
+ list_last_entry(&iter->pages, struct page, lru);
- if (!ret)
- return page;
+ prefetchw(&page->flags);
+ list_del(&page->lru);
+ iter->nr_pages--;
- (*nr_pages)--;
+ if (!readpage_add_page(iter, page))
+ return page;
}
return NULL;
}
-#define for_each_readpage_page(_mapping, _pages, _nr_pages, _page) \
+#define for_each_readpage_page(_iter, _page) \
for (; \
- ((_page) = __readpage_next_page(_mapping, _pages, &(_nr_pages)));\
- (_nr_pages)--)
+ ((_page) = __readpage_next_page(&(_iter)));) \
static void bch_mark_pages_unalloc(struct bio *bio)
{
@@ -628,52 +644,87 @@ static void bch_add_page_sectors(struct bio *bio, struct bkey_s_c k)
}
}
-static void bchfs_read(struct bch_fs *c, struct bch_read_bio *rbio, u64 inode)
+static void readpage_bio_extend(struct readpages_iter *iter,
+ struct bio *bio, u64 offset,
+ bool get_more)
{
- struct bio *bio = &rbio->bio;
- struct btree_iter iter;
- struct bkey_s_c k;
- struct bio_vec *bv;
- unsigned i;
+ struct page *page;
+ pgoff_t page_offset;
int ret;
- bch_increment_clock(c, bio_sectors(bio), READ);
+ while (bio_end_sector(bio) < offset &&
+ bio->bi_vcnt < bio->bi_max_vecs) {
+ page_offset = bio_end_sector(bio) >> PAGE_SECTOR_SHIFT;
- /*
- * Initialize page state:
- * If a page is partly allocated and partly a hole, we want it to be
- * marked BCH_PAGE_UNALLOCATED - so we initially mark all pages
- * allocated and then mark them unallocated as we find holes:
- *
- * Note that the bio hasn't been split yet - it's the only bio that
- * points to these pages. As we walk extents and split @bio, that
- * necessarily be true, the splits won't necessarily be on page
- * boundaries:
- */
- bio_for_each_segment_all(bv, bio, i) {
- struct bch_page_state *s = page_state(bv->bv_page);
+ if (iter->nr_pages) {
+ page = list_last_entry(&iter->pages, struct page, lru);
+ if (page->index != page_offset)
+ break;
+
+ list_del(&page->lru);
+ iter->nr_pages--;
+ } else if (get_more) {
+ rcu_read_lock();
+ page = radix_tree_lookup(&iter->mapping->page_tree, page_offset);
+ rcu_read_unlock();
- EBUG_ON(s->reserved);
+ if (page && !radix_tree_exceptional_entry(page))
+ break;
+
+ page = __page_cache_alloc(readahead_gfp_mask(iter->mapping));
+ if (!page)
+ break;
- s->allocated = 1;
- s->sectors = 0;
+ page->index = page_offset;
+ ClearPageReadahead(bio->bi_io_vec[bio->bi_vcnt - 1].bv_page);
+ } else {
+ break;
+ }
+
+ ret = readpage_add_page(iter, page);
+ if (ret)
+ break;
+
+ __bio_add_page(bio, page);
}
- for_each_btree_key_with_holes(&iter, c, BTREE_ID_EXTENTS,
- POS(inode, bio->bi_iter.bi_sector), k) {
- BKEY_PADDED(k) tmp;
+ if (!iter->nr_pages)
+ SetPageReadahead(bio->bi_io_vec[bio->bi_vcnt - 1].bv_page);
+}
+
+static void bchfs_read(struct bch_fs *c, struct btree_iter *iter,
+ struct bch_read_bio *rbio, u64 inode,
+ struct readpages_iter *readpages_iter)
+{
+ struct bio *bio = &rbio->bio;
+ int flags = BCH_READ_RETRY_IF_STALE|
+ BCH_READ_PROMOTE|
+ BCH_READ_MAY_REUSE_BIO;
+
+ while (1) {
struct extent_pick_ptr pick;
- unsigned bytes, sectors;
+ BKEY_PADDED(k) tmp;
+ struct bkey_s_c k;
+ unsigned bytes;
bool is_last;
+ bch_btree_iter_set_pos(iter, POS(inode, bio->bi_iter.bi_sector));
+
+ k = bch_btree_iter_peek_with_holes(iter);
+ BUG_ON(!k.k);
+
+ if (IS_ERR(k.k)) {
+ int ret = bch_btree_iter_unlock(iter);
+ BUG_ON(!ret);
+ bcache_io_error(c, bio, "btree IO error %i", ret);
+ bio_endio(bio);
+ return;
+ }
+
bkey_reassemble(&tmp.k, k);
- bch_btree_iter_unlock(&iter);
+ bch_btree_iter_unlock(iter);
k = bkey_i_to_s_c(&tmp.k);
- if (!bkey_extent_is_allocation(k.k) ||
- bkey_extent_is_compressed(k))
- bch_mark_pages_unalloc(bio);
-
bch_extent_pick_ptr(c, k, &pick);
if (IS_ERR(pick.ca)) {
bcache_io_error(c, bio, "no device to read from");
@@ -681,25 +732,36 @@ static void bchfs_read(struct bch_fs *c, struct bch_read_bio *rbio, u64 inode)
return;
}
- sectors = min_t(u64, k.k->p.offset, bio_end_sector(bio)) -
- bio->bi_iter.bi_sector;
- bytes = sectors << 9;
+ if (readpages_iter)
+ readpage_bio_extend(readpages_iter,
+ bio, k.k->p.offset,
+ pick.ca &&
+ (pick.crc.csum_type ||
+ pick.crc.compression_type));
+
+ bytes = (min_t(u64, k.k->p.offset, bio_end_sector(bio)) -
+ bio->bi_iter.bi_sector) << 9;
is_last = bytes == bio->bi_iter.bi_size;
swap(bio->bi_iter.bi_size, bytes);
if (bkey_extent_is_allocation(k.k))
bch_add_page_sectors(bio, k);
+ if (!bkey_extent_is_allocation(k.k) ||
+ bkey_extent_is_compressed(k))
+ bch_mark_pages_unalloc(bio);
+
+ if (is_last)
+ flags |= BCH_READ_IS_LAST;
+
if (pick.ca) {
PTR_BUCKET(pick.ca, &pick.ptr)->read_prio =
c->prio_clock[READ].hand;
- bch_read_extent(c, rbio, k, &pick,
- BCH_READ_RETRY_IF_STALE|
- BCH_READ_PROMOTE|
- (is_last ? BCH_READ_IS_LAST : 0));
+ bch_read_extent(c, rbio, k, &pick, flags);
+ flags &= ~BCH_READ_MAY_REUSE_BIO;
} else {
- zero_fill_bio_iter(bio, bio->bi_iter);
+ zero_fill_bio(bio);
if (is_last)
bio_endio(bio);
@@ -711,15 +773,6 @@ static void bchfs_read(struct bch_fs *c, struct bch_read_bio *rbio, u64 inode)
swap(bio->bi_iter.bi_size, bytes);
bio_advance(bio, bytes);
}
-
- /*
- * If we get here, it better have been because there was an error
- * reading a btree node
- */
- ret = bch_btree_iter_unlock(&iter);
- BUG_ON(!ret);
- bcache_io_error(c, bio, "btree IO error %i", ret);
- bio_endio(bio);
}
int bch_readpages(struct file *file, struct address_space *mapping,
@@ -727,43 +780,71 @@ int bch_readpages(struct file *file, struct address_space *mapping,
{
struct inode *inode = mapping->host;
struct bch_fs *c = inode->i_sb->s_fs_info;
- struct bch_read_bio *rbio = NULL;
+ struct btree_iter iter;
struct page *page;
+ struct readpages_iter readpages_iter = {
+ .mapping = mapping, .nr_pages = nr_pages
+ };
+
+ bch_btree_iter_init(&iter, c, BTREE_ID_EXTENTS, POS_MIN);
- pr_debug("reading %u pages", nr_pages);
+ INIT_LIST_HEAD(&readpages_iter.pages);
+ list_add(&readpages_iter.pages, pages);
+ list_del_init(pages);
if (current->pagecache_lock != &mapping->add_lock)
pagecache_add_get(&mapping->add_lock);
- for_each_readpage_page(mapping, pages, nr_pages, page) {
-again:
- if (!rbio) {
- rbio = container_of(bio_alloc_bioset(GFP_NOFS,
- min_t(unsigned, nr_pages,
- BIO_MAX_PAGES),
- &c->bio_read),
- struct bch_read_bio, bio);
+ while ((page = readpage_iter_next(&readpages_iter))) {
+ unsigned n = max(min_t(unsigned, readpages_iter.nr_pages + 1,
+ BIO_MAX_PAGES),
+ BCH_ENCODED_EXTENT_MAX >> PAGE_SECTOR_SHIFT);
- rbio->bio.bi_end_io = bch_readpages_end_io;
- }
+ struct bch_read_bio *rbio =
+ container_of(bio_alloc_bioset(GFP_NOFS, n,
+ &c->bio_read),
+ struct bch_read_bio, bio);
- if (bio_add_page_contig(&rbio->bio, page)) {
- bchfs_read(c, rbio, inode->i_ino);
- rbio = NULL;
- goto again;
- }
+ rbio->bio.bi_end_io = bch_readpages_end_io;
+ bio_add_page_contig(&rbio->bio, page);
+ bchfs_read(c, &iter, rbio, inode->i_ino, &readpages_iter);
}
- if (rbio)
- bchfs_read(c, rbio, inode->i_ino);
-
if (current->pagecache_lock != &mapping->add_lock)
pagecache_add_put(&mapping->add_lock);
- pr_debug("success");
return 0;
}
+static void __bchfs_readpage(struct bch_fs *c, struct bch_read_bio *rbio,
+ u64 inode, struct page *page)
+{
+ struct btree_iter iter;
+
+ /*
+ * Initialize page state:
+ * If a page is partly allocated and partly a hole, we want it to be
+ * marked BCH_PAGE_UNALLOCATED - so we initially mark all pages
+ * allocated and then mark them unallocated as we find holes:
+ *
+ * Note that the bio hasn't been split yet - it's the only bio that
+ * points to these pages. As we walk extents and split @bio, that
+ * necessarily be true, the splits won't necessarily be on page
+ * boundaries:
+ */
+ struct bch_page_state *s = page_state(page);
+
+ EBUG_ON(s->reserved);
+ s->allocated = 1;
+ s->sectors = 0;
+
+ bio_set_op_attrs(&rbio->bio, REQ_OP_READ, REQ_SYNC);
+ bio_add_page_contig(&rbio->bio, page);
+
+ bch_btree_iter_init(&iter, c, BTREE_ID_EXTENTS, POS_MIN);
+ bchfs_read(c, &iter, rbio, inode, NULL);
+}
+
int bch_readpage(struct file *file, struct page *page)
{
struct address_space *mapping = page->mapping;
@@ -774,12 +855,9 @@ int bch_readpage(struct file *file, struct page *page)
rbio = container_of(bio_alloc_bioset(GFP_NOFS, 1,
&c->bio_read),
struct bch_read_bio, bio);
- bio_set_op_attrs(&rbio->bio, REQ_OP_READ, REQ_SYNC);
rbio->bio.bi_end_io = bch_readpages_end_io;
- bio_add_page_contig(&rbio->bio, page);
- bchfs_read(c, rbio, inode->i_ino);
-
+ __bchfs_readpage(c, rbio, inode->i_ino, page);
return 0;
}
@@ -1163,12 +1241,10 @@ static int bch_read_single_page(struct page *page,
rbio = container_of(bio_alloc_bioset(GFP_NOFS, 1,
&c->bio_read),
struct bch_read_bio, bio);
- bio_set_op_attrs(&rbio->bio, REQ_OP_READ, REQ_SYNC);
rbio->bio.bi_private = &done;
rbio->bio.bi_end_io = bch_read_single_page_end_io;
- bio_add_page_contig(&rbio->bio, page);
- bchfs_read(c, rbio, inode->i_ino);
+ __bchfs_readpage(c, rbio, inode->i_ino, page);
wait_for_completion(&done);
ret = rbio->bio.bi_error;
diff --git a/fs/bcachefs/io.c b/fs/bcachefs/io.c
index 2b2bae52e85a..f4ad2d194413 100644
--- a/fs/bcachefs/io.c
+++ b/fs/bcachefs/io.c
@@ -1138,6 +1138,8 @@ void bch_read_extent_iter(struct bch_fs *c, struct bch_read_bio *orig,
unsigned skip = iter.bi_sector - bkey_start_offset(k.k);
bool bounce = false, split, read_full = false;
+ bch_increment_clock(c, bio_sectors(&orig->bio), READ);
+
EBUG_ON(bkey_start_offset(k.k) > iter.bi_sector ||
k.k->p.offset < bvec_iter_end_sector(iter));
@@ -1297,6 +1299,12 @@ void bch_read_extent_iter(struct bch_fs *c, struct bch_read_bio *orig,
rbio->submit_time_us = local_clock_us();
+ if (bounce)
+ trace_bcache_read_bounce(&rbio->bio);
+
+ if (!(flags & BCH_READ_IS_LAST))
+ trace_bcache_read_split(&rbio->bio);
+
generic_make_request(&rbio->bio);
}
@@ -1375,8 +1383,6 @@ static void bch_read_iter(struct bch_fs *c, struct bch_read_bio *rbio,
void bch_read(struct bch_fs *c, struct bch_read_bio *bio, u64 inode)
{
- bch_increment_clock(c, bio_sectors(&bio->bio), READ);
-
bch_read_iter(c, bio, bio->bio.bi_iter, inode,
BCH_READ_RETRY_IF_STALE|
BCH_READ_PROMOTE|
diff --git a/fs/bcachefs/request.c b/fs/bcachefs/request.c
index e5670646c165..8d7627499988 100644
--- a/fs/bcachefs/request.c
+++ b/fs/bcachefs/request.c
@@ -467,8 +467,6 @@ static void cached_dev_read(struct cached_dev *dc, struct search *s)
struct bkey_s_c k;
int ret;
- bch_increment_clock(c, bio_sectors(bio), READ);
-
for_each_btree_key_with_holes(&iter, c, BTREE_ID_EXTENTS,
POS(s->inode, bio->bi_iter.bi_sector), k) {
BKEY_PADDED(k) tmp;
diff --git a/include/trace/events/bcachefs.h b/include/trace/events/bcachefs.h
index feceaf6132b1..24e6478015f2 100644
--- a/include/trace/events/bcachefs.h
+++ b/include/trace/events/bcachefs.h
@@ -131,6 +131,16 @@ DEFINE_EVENT(bkey, bcache_promote_collision,
TP_ARGS(k)
);
+DEFINE_EVENT(bcache_bio, bcache_read_split,
+ TP_PROTO(struct bio *bio),
+ TP_ARGS(bio)
+);
+
+DEFINE_EVENT(bcache_bio, bcache_read_bounce,
+ TP_PROTO(struct bio *bio),
+ TP_ARGS(bio)
+);
+
TRACE_EVENT(bcache_read,
TP_PROTO(struct bio *bio, bool hit, bool bypass),
TP_ARGS(bio, hit, bypass),