diff options
author | Kent Overstreet <kent.overstreet@gmail.com> | 2016-11-17 11:42:17 -0900 |
---|---|---|
committer | Kent Overstreet <kent.overstreet@gmail.com> | 2016-11-23 14:09:23 -0900 |
commit | 53e32f70166b9d19b910c4cf79b3c13b5af50296 (patch) | |
tree | 4698571230bc880b190ae96542282a9d711dc92f | |
parent | 49b02a1ff0c18c0a2facc7d981bde42efead0fed (diff) |
bcache: btree write path improvements, kill struct bbio
-rw-r--r-- | drivers/md/bcache/blockdev.c | 2 | ||||
-rw-r--r-- | drivers/md/bcache/btree_cache.c | 7 | ||||
-rw-r--r-- | drivers/md/bcache/btree_gc.c | 2 | ||||
-rw-r--r-- | drivers/md/bcache/btree_io.c | 182 | ||||
-rw-r--r-- | drivers/md/bcache/btree_io.h | 11 | ||||
-rw-r--r-- | drivers/md/bcache/btree_types.h | 1 | ||||
-rw-r--r-- | drivers/md/bcache/btree_update.c | 21 | ||||
-rw-r--r-- | drivers/md/bcache/debug.c | 6 | ||||
-rw-r--r-- | drivers/md/bcache/error.h | 1 | ||||
-rw-r--r-- | drivers/md/bcache/fs-io.c | 26 | ||||
-rw-r--r-- | drivers/md/bcache/fs.c | 4 | ||||
-rw-r--r-- | drivers/md/bcache/io.c | 126 | ||||
-rw-r--r-- | drivers/md/bcache/io.h | 13 | ||||
-rw-r--r-- | drivers/md/bcache/io_types.h | 24 | ||||
-rw-r--r-- | drivers/md/bcache/move.c | 8 | ||||
-rw-r--r-- | drivers/md/bcache/request.c | 14 | ||||
-rw-r--r-- | drivers/md/bcache/super.c | 6 |
17 files changed, 220 insertions, 234 deletions
diff --git a/drivers/md/bcache/blockdev.c b/drivers/md/bcache/blockdev.c index 81fc5c8dd360..cd231f5ee737 100644 --- a/drivers/md/bcache/blockdev.c +++ b/drivers/md/bcache/blockdev.c @@ -205,7 +205,7 @@ static int bcache_device_init(struct bcache_device *d, unsigned block_size, } if (!(d->disk = alloc_disk(1)) || - bioset_init(&d->bio_split, 4, offsetof(struct bbio, bio))) { + bioset_init(&d->bio_split, 4, offsetof(struct bch_read_bio, bio))) { pr_err("cannot allocate disk"); ida_simple_remove(&bch_blockdev_minor, minor); return -ENOMEM; diff --git a/drivers/md/bcache/btree_cache.c b/drivers/md/bcache/btree_cache.c index 3ae92ef40039..b1c51e71304b 100644 --- a/drivers/md/bcache/btree_cache.c +++ b/drivers/md/bcache/btree_cache.c @@ -144,9 +144,6 @@ static struct btree *mca_find(struct cache_set *c, */ static int mca_reap_notrace(struct cache_set *c, struct btree *b, bool flush) { - struct closure cl; - - closure_init_stack(&cl); lockdep_assert_held(&c->btree_cache_lock); if (!six_trylock_intent(&b->lock)) @@ -171,9 +168,7 @@ static int mca_reap_notrace(struct cache_set *c, struct btree *b, bool flush) * after the write, since this node is about to be evicted: */ if (btree_node_dirty(b)) - __bch_btree_node_write(b, &cl, -1); - - closure_sync(&cl); + __bch_btree_node_write(c, b, NULL, -1); /* wait for any in flight btree write */ wait_on_bit_io(&b->flags, BTREE_NODE_write_in_flight, diff --git a/drivers/md/bcache/btree_gc.c b/drivers/md/bcache/btree_gc.c index d577f1fd0824..ec97a3679902 100644 --- a/drivers/md/bcache/btree_gc.c +++ b/drivers/md/bcache/btree_gc.c @@ -591,7 +591,7 @@ static void bch_coalesce_nodes(struct btree *old_nodes[GC_MERGE_NODES], bch_btree_build_aux_trees(n); six_unlock_write(&n->lock); - bch_btree_node_write(n, &as->cl, NULL); + bch_btree_node_write(c, n, &as->cl, SIX_LOCK_intent, NULL, -1); } /* diff --git a/drivers/md/bcache/btree_io.c b/drivers/md/bcache/btree_io.c index 53993699874a..1a551b4b319b 100644 --- a/drivers/md/bcache/btree_io.c +++ b/drivers/md/bcache/btree_io.c @@ -670,7 +670,7 @@ err: static void btree_node_read_endio(struct bio *bio) { - bch_bbio_endio(to_bbio(bio)); + closure_put(bio->bi_private); } void bch_btree_node_read(struct cache_set *c, struct btree *b) @@ -691,9 +691,9 @@ void bch_btree_node_read(struct cache_set *c, struct btree *b) return; } - percpu_ref_get(&pick.ca->ref); - bio = bio_alloc_bioset(GFP_NOIO, btree_pages(c), &c->btree_read_bio); + bio->bi_bdev = pick.ca->disk_sb.bdev; + bio->bi_iter.bi_sector = pick.ptr.offset; bio->bi_iter.bi_size = btree_bytes(c); bio->bi_end_io = btree_node_read_endio; bio->bi_private = &cl; @@ -701,9 +701,8 @@ void bch_btree_node_read(struct cache_set *c, struct btree *b) bch_bio_map(bio, b->data); - bio_get(bio); - bch_submit_bbio(to_bbio(bio), pick.ca, &pick.ptr, true); - + closure_get(&cl); + bch_generic_make_request(bio, c); closure_sync(&cl); if (cache_fatal_io_err_on(bio->bi_error, @@ -764,18 +763,9 @@ void bch_btree_complete_write(struct cache_set *c, struct btree *b, closure_wake_up(&w->wait); } -static void btree_node_write_unlock(struct closure *cl) +static void btree_node_write_done(struct cache_set *c, struct btree *b) { - struct btree *b = container_of(cl, struct btree, io); - - btree_node_io_unlock(b); -} - -static void btree_node_write_done(struct closure *cl) -{ - struct btree *b = container_of(cl, struct btree, io); struct btree_write *w = btree_prev_write(b); - struct cache_set *c = b->c; /* * Before calling bch_btree_complete_write() - if the write errored, we @@ -786,34 +776,47 @@ static void btree_node_write_done(struct closure *cl) bch_journal_halt(&c->journal); bch_btree_complete_write(c, b, w); - closure_return_with_destructor(cl, btree_node_write_unlock); + btree_node_io_unlock(b); } static void btree_node_write_endio(struct bio *bio) { - struct closure *cl = bio->bi_private; - struct btree *b = container_of(cl, struct btree, io); + struct btree *b = bio->bi_private; + struct cache_set *c = b->c; struct bch_write_bio *wbio = to_wbio(bio); + struct bio *orig = wbio->split ? wbio->orig : NULL; + struct closure *cl = !wbio->split ? wbio->cl : NULL; + struct cache *ca = wbio->ca; - if (cache_fatal_io_err_on(bio->bi_error, wbio->bio.ca, "btree write") || + if (cache_fatal_io_err_on(bio->bi_error, ca, "btree write") || bch_meta_write_fault("btree")) set_btree_node_write_error(b); - if (wbio->orig) - bio_endio(wbio->orig); - else if (wbio->bounce) - bch_bio_free_pages_pool(b->c, bio); + if (wbio->bounce) + bch_bio_free_pages_pool(c, bio); + + if (wbio->put_bio) + bio_put(bio); - bch_bbio_endio(to_bbio(bio)); + if (orig) { + bio_endio(orig); + } else { + btree_node_write_done(c, b); + if (cl) + closure_put(cl); + } + + if (ca) + percpu_ref_put(&ca->ref); } -static void do_btree_node_write(struct closure *cl) +void __bch_btree_node_write(struct cache_set *c, struct btree *b, + struct closure *parent, + int idx_to_write) { - struct btree *b = container_of(cl, struct btree, io); struct bio *bio; struct bch_write_bio *wbio; - struct cache_set *c = b->c; - struct bset *i = btree_bset_last(b); + struct bset *i; BKEY_PADDED(key) k; struct bkey_s_extent e; struct bch_extent_ptr *ptr; @@ -821,8 +824,36 @@ static void do_btree_node_write(struct closure *cl) size_t sectors_to_write; void *data; + /* + * We may only have a read lock on the btree node - the dirty bit is our + * "lock" against racing with other threads that may be trying to start + * a write, we do a write iff we clear the dirty bit. Since setting the + * dirty bit requires a write lock, we can't race with other threads + * redirtying it: + */ + if (!test_and_clear_bit(BTREE_NODE_dirty, &b->flags)) + return; + + btree_node_io_lock(b); + + BUG_ON(!list_empty(&b->write_blocked)); +#if 0 + /* + * This is an optimization for when journal flushing races with the + * btree node being written for some other reason, and the write the + * journal wanted to flush has already happened - in that case we'd + * prefer not to write a mostly empty bset. It seemed to be buggy, + * though: + */ + if (idx_to_write != -1 && + idx_to_write != btree_node_write_idx(b)) { + btree_node_io_unlock(b); + return; + } +#endif trace_bcache_btree_write(b); + i = btree_bset_last(b); BUG_ON(b->written >= c->sb.btree_node_size); BUG_ON(b->written && !i->u64s); BUG_ON(btree_bset_first(b)->seq != i->seq); @@ -870,25 +901,26 @@ static void do_btree_node_write(struct closure *cl) * break: */ if (bch_journal_error(&c->journal)) { - struct btree_write *w = btree_prev_write(b); - set_btree_node_write_error(b); b->written += sectors_to_write; - bch_btree_complete_write(c, b, w); - closure_return_with_destructor(cl, btree_node_write_unlock); + btree_node_write_done(c, b); + return; } bio = bio_alloc_bioset(GFP_NOIO, btree_pages(c), &c->bio_write); - wbio = to_wbio(bio); - wbio->orig = NULL; - wbio->bounce = true; - + wbio = to_wbio(bio); + wbio->cl = parent; + wbio->bounce = true; + wbio->put_bio = true; bio->bi_end_io = btree_node_write_endio; - bio->bi_private = cl; + bio->bi_private = b; bio_set_op_attrs(bio, REQ_OP_WRITE, REQ_META|WRITE_SYNC|REQ_FUA); + if (parent) + closure_get(parent); + bch_bio_alloc_pages_pool(c, bio, sectors_to_write << 9); memcpy_to_bio(bio, bio->bi_iter, data); @@ -920,67 +952,36 @@ static void do_btree_node_write(struct closure *cl) b->written += sectors_to_write; - bch_submit_bbio_replicas(wbio, c, &k.key, true); - continue_at(cl, btree_node_write_done, NULL); + bch_submit_wbio_replicas(wbio, c, &k.key, true); } -/* - * Only requires a read lock: - */ -void __bch_btree_node_write(struct btree *b, struct closure *parent, - int idx_to_write) +void bch_btree_node_write(struct cache_set *c, struct btree *b, + struct closure *parent, + enum six_lock_type lock_type_held, + struct btree_iter *iter, + int idx_to_write) { - /* - * We may only have a read lock on the btree node - the dirty bit is our - * "lock" against racing with other threads that may be trying to start - * a write, we do a write iff we clear the dirty bit. Since setting the - * dirty bit requires a write lock, we can't race with other threads - * redirtying it: - */ - if (!test_and_clear_bit(BTREE_NODE_dirty, &b->flags)) - return; - - btree_node_io_lock(b); + BUG_ON(lock_type_held == SIX_LOCK_write); - BUG_ON(!list_empty(&b->write_blocked)); -#if 0 - /* - * This is an optimization for when journal flushing races with the - * btree node being written for some other reason, and the write the - * journal wanted to flush has already happened - in that case we'd - * prefer not to write a mostly empty bset. It seemed to be buggy, - * though: - */ - if (idx_to_write != -1 && - idx_to_write != btree_node_write_idx(b)) { - btree_node_io_unlock(b); - return; - } -#endif - /* - * do_btree_node_write() must not run asynchronously (NULL is passed for - * workqueue) - it needs the lock we have on the btree node - */ - closure_call(&b->io, do_btree_node_write, NULL, parent ?: &b->c->cl); -} + __bch_btree_node_write(c, b, parent, -1); -/* - * Use this one if the node is intent locked: - */ -void bch_btree_node_write(struct btree *b, struct closure *parent, - struct btree_iter *iter) -{ - __bch_btree_node_write(b, parent, -1); + if (lock_type_held == SIX_LOCK_intent || + six_trylock_convert(&b->lock, SIX_LOCK_read, + SIX_LOCK_intent)) { + bch_btree_init_next(c, b, iter); - bch_btree_init_next(b->c, b, iter); + if (lock_type_held == SIX_LOCK_read) + six_lock_downgrade(&b->lock); + } } -static void bch_btree_node_write_dirty(struct btree *b, struct closure *parent) +static void bch_btree_node_write_dirty(struct cache_set *c, struct btree *b, + struct closure *parent) { six_lock_read(&b->lock); BUG_ON(b->level); - __bch_btree_node_write(b, parent, -1); + bch_btree_node_write(c, b, parent, SIX_LOCK_read, NULL, -1); six_unlock_read(&b->lock); } @@ -990,7 +991,8 @@ static void bch_btree_node_write_dirty(struct btree *b, struct closure *parent) /* * Write leaf nodes if the unwritten bset is getting too big: */ -void bch_btree_node_write_lazy(struct btree *b, struct btree_iter *iter) +void bch_btree_node_write_lazy(struct cache_set *c, struct btree *b, + struct btree_iter *iter) { struct btree_node_entry *bne = container_of(btree_bset_last(b), @@ -1001,7 +1003,7 @@ void bch_btree_node_write_lazy(struct btree *b, struct btree_iter *iter) PAGE_SIZE) - bytes < 48 || bytes > BTREE_WRITE_SET_BUFFER) && !btree_node_write_in_flight(b)) - bch_btree_node_write(b, NULL, iter); + bch_btree_node_write(c, b, NULL, SIX_LOCK_intent, iter, -1); } /* @@ -1035,7 +1037,7 @@ restart: */ if (!b->level && btree_node_dirty(b)) { rcu_read_unlock(); - bch_btree_node_write_dirty(b, &cl); + bch_btree_node_write_dirty(c, b, &cl); dropped_lock = true; rcu_read_lock(); goto restart; diff --git a/drivers/md/bcache/btree_io.h b/drivers/md/bcache/btree_io.h index 75e514105e85..701e86d484a4 100644 --- a/drivers/md/bcache/btree_io.h +++ b/drivers/md/bcache/btree_io.h @@ -34,10 +34,13 @@ int bch_btree_root_read(struct cache_set *, enum btree_id, void bch_btree_complete_write(struct cache_set *, struct btree *, struct btree_write *); -void __bch_btree_node_write(struct btree *, struct closure *, int); -void bch_btree_node_write(struct btree *, struct closure *, - struct btree_iter *); -void bch_btree_node_write_lazy(struct btree *, struct btree_iter *); +void __bch_btree_node_write(struct cache_set *, struct btree *, + struct closure *, int); +void bch_btree_node_write(struct cache_set *, struct btree *, + struct closure *, enum six_lock_type, + struct btree_iter *, int); +void bch_btree_node_write_lazy(struct cache_set *, struct btree *, + struct btree_iter *); void bch_btree_flush(struct cache_set *); void bch_btree_node_flush_journal_entries(struct cache_set *, struct btree *, diff --git a/drivers/md/bcache/btree_types.h b/drivers/md/bcache/btree_types.h index fc6bfe0dad46..0f2603fba344 100644 --- a/drivers/md/bcache/btree_types.h +++ b/drivers/md/bcache/btree_types.h @@ -75,7 +75,6 @@ struct btree { /* lru list */ struct list_head list; - struct closure io; struct btree_write writes[2]; }; diff --git a/drivers/md/bcache/btree_update.c b/drivers/md/bcache/btree_update.c index d9a0d5bf3b41..8b40e74805cf 100644 --- a/drivers/md/bcache/btree_update.c +++ b/drivers/md/bcache/btree_update.c @@ -605,7 +605,7 @@ int bch_btree_root_alloc(struct cache_set *c, enum btree_id id, b = __btree_root_alloc(c, 0, id, reserve); - bch_btree_node_write(b, writes, NULL); + bch_btree_node_write(c, b, writes, SIX_LOCK_intent, NULL, -1); bch_btree_set_root_initial(c, b, reserve); btree_open_bucket_put(c, b); @@ -700,6 +700,7 @@ overwrite: static void btree_node_flush(struct journal *j, struct journal_entry_pin *pin) { + struct cache_set *c = container_of(j, struct cache_set, journal); struct btree_write *w = container_of(pin, struct btree_write, journal); struct btree *b = container_of(w, struct btree, writes[w->index]); @@ -718,7 +719,7 @@ static void btree_node_flush(struct journal *j, struct journal_entry_pin *pin) * shouldn't: */ if (!b->level) - __bch_btree_node_write(b, NULL, w->index); + __bch_btree_node_write(c, b, NULL, w->index); six_unlock_read(&b->lock); } @@ -875,7 +876,8 @@ retry: list_del(&as->write_blocked_list); if (list_empty(&b->write_blocked)) - __bch_btree_node_write(b, NULL, -1); + bch_btree_node_write(c, b, NULL, SIX_LOCK_read, + NULL, -1); six_unlock_read(&b->lock); break; @@ -1334,7 +1336,7 @@ static void btree_split(struct btree *b, struct btree_iter *iter, six_unlock_write(&n2->lock); six_unlock_write(&n1->lock); - bch_btree_node_write(n2, &as->cl, NULL); + bch_btree_node_write(c, n2, &as->cl, SIX_LOCK_intent, NULL, -1); /* * Note that on recursive parent_keys == insert_keys, so we @@ -1354,7 +1356,8 @@ static void btree_split(struct btree *b, struct btree_iter *iter, btree_split_insert_keys(iter, n3, &as->parent_keys, reserve); - bch_btree_node_write(n3, &as->cl, NULL); + bch_btree_node_write(c, n3, &as->cl, SIX_LOCK_intent, + NULL, -1); } } else { trace_bcache_btree_node_compact(b, b->keys.nr.live_u64s); @@ -1365,7 +1368,7 @@ static void btree_split(struct btree *b, struct btree_iter *iter, bch_keylist_add(&as->parent_keys, &n1->key); } - bch_btree_node_write(n1, &as->cl, NULL); + bch_btree_node_write(c, n1, &as->cl, SIX_LOCK_intent, NULL, -1); /* New nodes all written, now make them visible: */ @@ -1664,7 +1667,7 @@ retry: bch_keylist_add(&as->parent_keys, &delete); bch_keylist_add(&as->parent_keys, &n->key); - bch_btree_node_write(n, &as->cl, NULL); + bch_btree_node_write(c, n, &as->cl, SIX_LOCK_intent, NULL, -1); bch_btree_insert_node(parent, iter, &as->parent_keys, reserve, as); @@ -1901,7 +1904,7 @@ unlock: foreground_maybe_merge(i->iter, btree_next_sib); if (btree_node_relock(i->iter, 0)) - bch_btree_node_write_lazy(i->iter->nodes[0], + bch_btree_node_write_lazy(c, i->iter->nodes[0], i->iter); } out: @@ -2193,7 +2196,7 @@ int bch_btree_node_rewrite(struct btree_iter *iter, struct btree *b, trace_bcache_btree_gc_rewrite_node(b); - bch_btree_node_write(n, &as->cl, NULL); + bch_btree_node_write(c, n, &as->cl, SIX_LOCK_intent, NULL, -1); if (parent) { bch_btree_insert_node(parent, iter, diff --git a/drivers/md/bcache/debug.c b/drivers/md/bcache/debug.c index abca3398afe2..95ad2ad2e3d7 100644 --- a/drivers/md/bcache/debug.c +++ b/drivers/md/bcache/debug.c @@ -67,15 +67,17 @@ void __bch_btree_verify(struct cache_set *c, struct btree *b) bio = bio_alloc_bioset(GFP_NOIO, btree_pages(c), &c->btree_read_bio); bio->bi_bdev = pick.ca->disk_sb.bdev; + bio->bi_iter.bi_sector = pick.ptr.offset; bio->bi_iter.bi_size = btree_bytes(c); bio_set_op_attrs(bio, REQ_OP_READ, REQ_META|READ_SYNC); bio->bi_private = &cl; bio->bi_end_io = btree_verify_endio; bch_bio_map(bio, n_sorted); - bch_submit_bbio(to_bbio(bio), pick.ca, &pick.ptr, true); - + closure_get(&cl); + bch_generic_make_request(bio, c); closure_sync(&cl); + bio_put(bio); memcpy(n_ondisk, n_sorted, btree_bytes(c)); diff --git a/drivers/md/bcache/error.h b/drivers/md/bcache/error.h index 5fb052a70b53..48fccbe0d2de 100644 --- a/drivers/md/bcache/error.h +++ b/drivers/md/bcache/error.h @@ -5,7 +5,6 @@ struct cache; struct cache_set; -struct bbio; /* * XXX: separate out errors that indicate on disk data is inconsistent, and flag diff --git a/drivers/md/bcache/fs-io.c b/drivers/md/bcache/fs-io.c index 92d7a9bf8dcd..a76f519082ec 100644 --- a/drivers/md/bcache/fs-io.c +++ b/drivers/md/bcache/fs-io.c @@ -770,7 +770,7 @@ static void bch_writepage_io_free(struct closure *cl) { struct bch_writepage_io *io = container_of(cl, struct bch_writepage_io, cl); - struct bio *bio = &io->bio.bio.bio; + struct bio *bio = &io->bio.bio; bio_put(bio); } @@ -780,7 +780,7 @@ static void bch_writepage_io_done(struct closure *cl) struct bch_writepage_io *io = container_of(cl, struct bch_writepage_io, cl); struct cache_set *c = io->op.op.c; - struct bio *bio = &io->bio.bio.bio; + struct bio *bio = &io->bio.bio; struct bio_vec *bvec; unsigned i; @@ -841,9 +841,9 @@ static void bch_writepage_do_io(struct bch_writepage_state *w) struct bch_writepage_io *io = w->io; w->io = NULL; - atomic_add(io->bio.bio.bio.bi_vcnt, &io->op.op.c->writeback_pages); + atomic_add(io->bio.bio.bi_vcnt, &io->op.op.c->writeback_pages); - io->op.op.pos.offset = io->bio.bio.bio.bi_iter.bi_sector; + io->op.op.pos.offset = io->bio.bio.bi_iter.bi_sector; closure_call(&io->op.op.cl, bch_write, NULL, &io->cl); continue_at(&io->cl, bch_writepage_io_done, NULL); @@ -865,7 +865,7 @@ alloc_io: w->io = container_of(bio_alloc_bioset(GFP_NOFS, BIO_MAX_PAGES, bch_writepage_bioset), - struct bch_writepage_io, bio.bio.bio); + struct bch_writepage_io, bio.bio); closure_init(&w->io->cl, NULL); w->io->op.ei = ei; @@ -881,7 +881,7 @@ alloc_io: w->io->op.op.index_update_fn = bchfs_write_index_update; } - if (bio_add_page_contig(&w->io->bio.bio.bio, page)) { + if (bio_add_page_contig(&w->io->bio.bio, page)) { bch_writepage_do_io(w); goto alloc_io; } @@ -932,7 +932,7 @@ do_io: w->io->op.new_i_size = i_size; if (wbc->sync_mode == WB_SYNC_ALL) - w->io->bio.bio.bio.bi_opf |= WRITE_SYNC; + w->io->bio.bio.bi_opf |= WRITE_SYNC; /* Before unlocking the page, transfer reservation to w->io: */ old = page_state_cmpxchg(page_state(page), new, { @@ -1002,7 +1002,7 @@ get_pages: done_index = page->index; if (w.io && - !bio_can_add_page_contig(&w.io->bio.bio.bio, page)) + !bio_can_add_page_contig(&w.io->bio.bio, page)) bch_writepage_do_io(&w); if (!w.io && @@ -1389,7 +1389,7 @@ static long __bch_dio_write_complete(struct dio_write *dio) if (dio->iovec && dio->iovec != dio->inline_vecs) kfree(dio->iovec); - bio_put(&dio->bio.bio.bio); + bio_put(&dio->bio.bio); return ret; } @@ -1411,11 +1411,11 @@ static void bch_dio_write_done(struct dio_write *dio) if (dio->iop.op.error) dio->error = dio->iop.op.error; - bio_for_each_segment_all(bv, &dio->bio.bio.bio, i) + bio_for_each_segment_all(bv, &dio->bio.bio, i) put_page(bv->bv_page); if (dio->iter.count) - bio_reset(&dio->bio.bio.bio); + bio_reset(&dio->bio.bio); } static void bch_do_direct_IO_write(struct dio_write *dio) @@ -1423,7 +1423,7 @@ static void bch_do_direct_IO_write(struct dio_write *dio) struct file *file = dio->req->ki_filp; struct inode *inode = file->f_inode; struct bch_inode_info *ei = to_bch_ei(inode); - struct bio *bio = &dio->bio.bio.bio; + struct bio *bio = &dio->bio.bio; unsigned flags = 0; int ret; @@ -1513,7 +1513,7 @@ static int bch_direct_IO_write(struct cache_set *c, struct kiocb *req, bio = bio_alloc_bioset(GFP_KERNEL, iov_iter_npages(iter, BIO_MAX_PAGES), bch_dio_write_bioset); - dio = container_of(bio, struct dio_write, bio.bio.bio); + dio = container_of(bio, struct dio_write, bio.bio); dio->req = req; dio->c = c; dio->written = 0; diff --git a/drivers/md/bcache/fs.c b/drivers/md/bcache/fs.c index 024459719e39..2b732c71db7a 100644 --- a/drivers/md/bcache/fs.c +++ b/drivers/md/bcache/fs.c @@ -1490,7 +1490,7 @@ int __init bch_fs_init(void) goto err; bch_writepage_bioset = - bioset_create(4, offsetof(struct bch_writepage_io, bio.bio.bio)); + bioset_create(4, offsetof(struct bch_writepage_io, bio.bio)); if (!bch_writepage_bioset) goto err; @@ -1498,7 +1498,7 @@ int __init bch_fs_init(void) if (!bch_dio_read_bioset) goto err; - bch_dio_write_bioset = bioset_create(4, offsetof(struct dio_write, bio.bio.bio)); + bch_dio_write_bioset = bioset_create(4, offsetof(struct dio_write, bio.bio)); if (!bch_dio_write_bioset) goto err; diff --git a/drivers/md/bcache/io.c b/drivers/md/bcache/io.c index 9e750e67ce22..c8dc3cb900a8 100644 --- a/drivers/md/bcache/io.c +++ b/drivers/md/bcache/io.c @@ -115,31 +115,24 @@ void bch_bio_alloc_pages_pool(struct cache_set *c, struct bio *bio, /* Bios with headers */ -static void bch_bbio_prep(struct bbio *b, struct cache *ca) +static void bch_submit_wbio(struct cache_set *c, struct bch_write_bio *wbio, + struct cache *ca, const struct bch_extent_ptr *ptr, + bool punt) { - b->ca = ca; - b->bio.bi_iter.bi_sector = b->ptr.offset; - b->bio.bi_bdev = ca ? ca->disk_sb.bdev : NULL; -} - -void bch_submit_bbio(struct bbio *b, struct cache *ca, - const struct bch_extent_ptr *ptr, bool punt) -{ - struct bio *bio = &b->bio; - - b->ptr = *ptr; - bch_bbio_prep(b, ca); - b->submit_time_us = local_clock_us(); - - if (!ca) { - bcache_io_error(ca->set, bio, "device has been removed"); - } else if (punt) - closure_bio_submit_punt(bio, bio->bi_private, ca->set); + wbio->ca = ca; + wbio->submit_time_us = local_clock_us(); + wbio->bio.bi_iter.bi_sector = ptr->offset; + wbio->bio.bi_bdev = ca ? ca->disk_sb.bdev : NULL; + + if (!ca) + bcache_io_error(c, &wbio->bio, "device has been removed"); + else if (punt) + bch_generic_make_request(&wbio->bio, c); else - closure_bio_submit(bio, bio->bi_private); + generic_make_request(&wbio->bio); } -void bch_submit_bbio_replicas(struct bch_write_bio *bio, struct cache_set *c, +void bch_submit_wbio_replicas(struct bch_write_bio *wbio, struct cache_set *c, const struct bkey_i *k, bool punt) { struct bkey_s_c_extent e = bkey_i_to_s_c_extent(k); @@ -147,7 +140,7 @@ void bch_submit_bbio_replicas(struct bch_write_bio *bio, struct cache_set *c, struct bch_write_bio *n; struct cache *ca; - BUG_ON(bio->orig); + wbio->split = false; extent_for_each_ptr(e, ptr) { rcu_read_lock(); @@ -157,44 +150,34 @@ void bch_submit_bbio_replicas(struct bch_write_bio *bio, struct cache_set *c, rcu_read_unlock(); if (!ca) { - bch_submit_bbio(&bio->bio, ca, ptr, punt); + bch_submit_wbio(c, wbio, ca, ptr, punt); break; } if (ptr + 1 < &extent_entry_last(e)->ptr) { - n = to_wbio(bio_clone_fast(&bio->bio.bio, GFP_NOIO, + n = to_wbio(bio_clone_fast(&wbio->bio, GFP_NOIO, &ca->replica_set)); - n->bio.bio.bi_end_io = bio->bio.bio.bi_end_io; - n->bio.bio.bi_private = bio->bio.bio.bi_private; - n->orig = &bio->bio.bio; + n->bio.bi_end_io = wbio->bio.bi_end_io; + n->bio.bi_private = wbio->bio.bi_private; + n->orig = &wbio->bio; + n->bounce = false; + n->split = true; + n->bio.bi_opf = wbio->bio.bi_opf; __bio_inc_remaining(n->orig); } else { - n = bio; + n = wbio; } if (!journal_flushes_device(ca)) - n->bio.bio.bi_opf |= REQ_FUA; + n->bio.bi_opf |= REQ_FUA; - bch_submit_bbio(&n->bio, ca, ptr, punt); + bch_submit_wbio(c, n, ca, ptr, punt); } } /* IO errors */ -void bch_bbio_endio(struct bbio *bio) -{ - struct closure *cl = bio->bio.bi_private; - struct cache *ca = bio->ca; - - bch_account_io_completion_time(ca, bio->submit_time_us, - bio_op(&bio->bio)); - bio_put(&bio->bio); - if (ca) - percpu_ref_put(&ca->ref); - closure_put(cl); -} - /* Writes */ static void __bch_write(struct closure *); @@ -295,7 +278,7 @@ static void bch_write_index(struct closure *cl) static void bch_write_discard(struct closure *cl) { struct bch_write_op *op = container_of(cl, struct bch_write_op, cl); - struct bio *bio = &op->bio->bio.bio; + struct bio *bio = &op->bio->bio; struct bpos end = op->pos; end.offset += bio_sectors(bio); @@ -356,28 +339,31 @@ static void bch_write_endio(struct bio *bio) struct closure *cl = bio->bi_private; struct bch_write_op *op = container_of(cl, struct bch_write_op, cl); struct bch_write_bio *wbio = to_wbio(bio); - struct cache *ca = wbio->bio.ca; + struct bio *orig = wbio->orig; + struct cache *ca = wbio->ca; if (cache_nonfatal_io_err_on(bio->bi_error, ca, "data write")) set_closure_fn(cl, bch_write_io_error, op->c->wq); - if (wbio->orig) { - if (bio->bi_error) - wbio->orig->bi_error = bio->bi_error; - bio_endio(wbio->orig); - } else if (wbio->bounce) { - bch_bio_free_pages_pool(op->c, bio); - } - - bch_account_io_completion_time(ca, - wbio->bio.submit_time_us, + bch_account_io_completion_time(ca, wbio->submit_time_us, REQ_OP_WRITE); - if (wbio->split) - bio_put(&wbio->bio.bio); if (ca) percpu_ref_put(&ca->ref); - closure_put(cl); + + if (bio->bi_error && orig) + orig->bi_error = bio->bi_error; + + if (wbio->bounce) + bch_bio_free_pages_pool(op->c, bio); + + if (wbio->put_bio) + bio_put(bio); + + if (orig) + bio_endio(orig); + else + closure_put(cl); } static int bch_write_extent(struct bch_write_op *op, @@ -423,7 +409,7 @@ static int bch_write_extent(struct bch_write_op *op, wbio = to_wbio(bio); wbio->orig = NULL; wbio->bounce = false; - wbio->split = false; + wbio->put_bio = false; ret = 0; } else if (csum_type != BCH_CSUM_NONE || compression_type != BCH_COMPRESSION_NONE) { @@ -458,7 +444,7 @@ static int bch_write_extent(struct bch_write_op *op, wbio = to_wbio(bio); wbio->orig = NULL; wbio->bounce = true; - wbio->split = true; + wbio->put_bio = true; /* * Set the (uncompressed) size of the key we're creating to the @@ -510,7 +496,7 @@ static int bch_write_extent(struct bch_write_op *op, wbio = to_wbio(bio); wbio->orig = NULL; wbio->bounce = false; - wbio->split = bio != orig; + wbio->put_bio = bio != orig; ret = bio != orig; } @@ -519,11 +505,11 @@ static int bch_write_extent(struct bch_write_op *op, bio->bi_private = &op->cl; bio_set_op_attrs(bio, REQ_OP_WRITE, 0); + closure_get(bio->bi_private); #ifndef CONFIG_BCACHE_NO_IO - bch_submit_bbio_replicas(wbio, op->c, &e->k_i, false); + bch_submit_wbio_replicas(wbio, op->c, &e->k_i, false); #else - bch_bbio_prep(&wbio->bio, NULL); - closure_get(bio->bi_private); + wbio->ca = NULL; bio_endio(bio); #endif return ret; @@ -532,7 +518,7 @@ static int bch_write_extent(struct bch_write_op *op, static void __bch_write(struct closure *cl) { struct bch_write_op *op = container_of(cl, struct bch_write_op, cl); - struct bio *bio = &op->bio->bio.bio; + struct bio *bio = &op->bio->bio; unsigned open_bucket_nr = 0; struct open_bucket *b; int ret; @@ -718,7 +704,7 @@ void bch_wake_delayed_writes(unsigned long data) void bch_write(struct closure *cl) { struct bch_write_op *op = container_of(cl, struct bch_write_op, cl); - struct bio *bio = &op->bio->bio.bio; + struct bio *bio = &op->bio->bio; struct cache_set *c = op->c; u64 inode = op->pos.inode; @@ -982,7 +968,7 @@ static void cache_promote_done(struct closure *cl) struct cache_promote_op *op = container_of(cl, struct cache_promote_op, cl); - bch_bio_free_pages_pool(op->write.op.c, &op->write.wbio.bio.bio); + bch_bio_free_pages_pool(op->write.op.c, &op->write.wbio.bio); kfree(op); } @@ -1006,7 +992,7 @@ static void __bch_read_endio(struct cache_set *c, struct bch_read_bio *rbio) BUG_ON(!rbio->split || !rbio->bounce); /* we now own pages: */ - swap(promote->write.wbio.bio.bio.bi_vcnt, rbio->bio.bi_vcnt); + swap(promote->write.wbio.bio.bi_vcnt, rbio->bio.bi_vcnt); rbio->promote = NULL; bch_rbio_done(c, rbio); @@ -1099,7 +1085,7 @@ void bch_read_extent_iter(struct cache_set *c, struct bch_read_bio *orig, promote_op = kmalloc(sizeof(*promote_op) + sizeof(struct bio_vec) * pages, GFP_NOIO); if (promote_op) { - struct bio *promote_bio = &promote_op->write.wbio.bio.bio; + struct bio *promote_bio = &promote_op->write.wbio.bio; bio_init(promote_bio); promote_bio->bi_max_vecs = pages; @@ -1186,7 +1172,7 @@ void bch_read_extent_iter(struct cache_set *c, struct bch_read_bio *orig, rbio->bio.bi_end_io = bch_read_endio; if (promote_op) { - struct bio *promote_bio = &promote_op->write.wbio.bio.bio; + struct bio *promote_bio = &promote_op->write.wbio.bio; promote_bio->bi_iter = rbio->bio.bi_iter; memcpy(promote_bio->bi_io_vec, rbio->bio.bi_io_vec, diff --git a/drivers/md/bcache/io.h b/drivers/md/bcache/io.h index 6f9148c37230..b7668b4e7194 100644 --- a/drivers/md/bcache/io.h +++ b/drivers/md/bcache/io.h @@ -3,10 +3,11 @@ #include "io_types.h" -#define to_bbio(_bio) container_of((_bio), struct bbio, bio) - #define to_wbio(_bio) \ - container_of((_bio), struct bch_write_bio, bio.bio) + container_of((_bio), struct bch_write_bio, bio) + +#define to_rbio(_bio) \ + container_of((_bio), struct bch_read_bio, bio) void bch_bio_free_pages_pool(struct cache_set *, struct bio *); void bch_bio_alloc_pages_pool(struct cache_set *, struct bio *, size_t); @@ -72,13 +73,9 @@ enum bch_read_flags { void bch_read(struct cache_set *, struct bch_read_bio *, u64); -void bch_bbio_endio(struct bbio *); - void bch_generic_make_request(struct bio *, struct cache_set *); void bch_bio_submit_work(struct work_struct *); -void bch_submit_bbio(struct bbio *, struct cache *, - const struct bch_extent_ptr *, bool); -void bch_submit_bbio_replicas(struct bch_write_bio *, struct cache_set *, +void bch_submit_wbio_replicas(struct bch_write_bio *, struct cache_set *, const struct bkey_i *, bool); int bch_discard(struct cache_set *, struct bpos, struct bpos, diff --git a/drivers/md/bcache/io_types.h b/drivers/md/bcache/io_types.h index 28365dc40c24..b8c47978a9e9 100644 --- a/drivers/md/bcache/io_types.h +++ b/drivers/md/bcache/io_types.h @@ -8,14 +8,6 @@ #include <linux/llist.h> #include <linux/workqueue.h> -/* XXX kill kill kill */ -struct bbio { - struct cache *ca; - struct bch_extent_ptr ptr; - unsigned submit_time_us; - struct bio bio; -}; - struct bch_read_bio { /* * Reads will often have to be split, and if the extent being read from @@ -70,10 +62,18 @@ bch_rbio_parent(struct bch_read_bio *rbio) } struct bch_write_bio { - struct bio *orig; - unsigned bounce:1, - split:1; - struct bbio bio; + struct cache *ca; + union { + struct bio *orig; + struct closure *cl; + }; + + unsigned submit_time_us; + unsigned split:1, + bounce:1, + put_bio:1; + + struct bio bio; }; struct bch_replace_info { diff --git a/drivers/md/bcache/move.c b/drivers/md/bcache/move.c index cf87fde10a23..03e36e2fecc9 100644 --- a/drivers/md/bcache/move.c +++ b/drivers/md/bcache/move.c @@ -186,7 +186,7 @@ static void moving_io_destructor(struct closure *cl) atomic_sub(io->write.key.k.size, &ctxt->sectors_in_flight); wake_up(&ctxt->wait); - bch_bio_free_pages(&io->write.wbio.bio.bio); + bch_bio_free_pages(&io->write.wbio.bio); kfree(io); } @@ -296,9 +296,9 @@ int bch_data_move(struct cache_set *c, return -ENOMEM; } - migrate_bio_init(io, &io->write.wbio.bio.bio, k.k->size); - bio_get(&io->write.wbio.bio.bio); - io->write.wbio.bio.bio.bi_iter.bi_sector = bkey_start_offset(k.k); + migrate_bio_init(io, &io->write.wbio.bio, k.k->size); + bio_get(&io->write.wbio.bio); + io->write.wbio.bio.bi_iter.bi_sector = bkey_start_offset(k.k); bch_migrate_write_init(c, &io->write, wp, k, move_ptr, 0); diff --git a/drivers/md/bcache/request.c b/drivers/md/bcache/request.c index c8ed80f58bf8..66bca3ee71da 100644 --- a/drivers/md/bcache/request.c +++ b/drivers/md/bcache/request.c @@ -212,7 +212,7 @@ static void bio_complete(struct search *s) static void do_bio_hook(struct search *s, struct bio *orig_bio) { int rw = bio_data_dir(orig_bio); - struct bio *bio = rw ? &s->wbio.bio.bio : &s->rbio.bio; + struct bio *bio = rw ? &s->wbio.bio : &s->rbio.bio; bio_init(bio); __bio_clone_fast(bio, orig_bio); @@ -229,7 +229,7 @@ static void search_free(struct closure *cl) bio_complete(s); if (s->iop.bio) - bio_put(&s->iop.bio->bio.bio); + bio_put(&s->iop.bio->bio); closure_debug_destroy(cl); mempool_free(s, &s->d->c->search); @@ -331,7 +331,7 @@ static void cached_dev_read_done_bh(struct closure *cl) * * @orig_bio must actually be a bbio with a valid key. */ -void __cache_promote(struct cache_set *c, struct bbio *orig_bio, +void __cache_promote(struct cache_set *c, struct bch_read_bio *orig_bio, struct bkey_s_c old, struct bkey_s_c new, unsigned write_flags) @@ -437,10 +437,10 @@ static int cached_dev_cache_miss(struct btree_iter *iter, struct search *s, //to_bbio(miss)->key.k = KEY(s->inode, // bio_end_sector(miss), // bio_sectors(miss)); - to_bbio(miss)->ca = NULL; + to_rbio(miss)->ca = NULL; closure_get(&s->cl); - __cache_promote(s->iop.c, to_bbio(miss), + __cache_promote(s->iop.c, to_rbio(miss), bkey_i_to_s_c(&replace.key), bkey_to_s_c(&KEY(replace.key.k.p.inode, replace.key.k.p.offset, @@ -548,7 +548,7 @@ static void cached_dev_write_complete(struct closure *cl) static void cached_dev_write(struct cached_dev *dc, struct search *s) { struct closure *cl = &s->cl; - struct bio *bio = &s->wbio.bio.bio; + struct bio *bio = &s->wbio.bio; bool writeback = false; bool bypass = s->bypass; struct bkey insert_key = KEY(s->inode, @@ -656,7 +656,7 @@ static void __cached_dev_make_request(struct request_queue *q, struct bio *bio) s = search_alloc(bio, d); trace_bcache_request_start(s->d, bio); - clone = rw ? &s->wbio.bio.bio : &s->rbio.bio; + clone = rw ? &s->wbio.bio : &s->rbio.bio; if (!bio->bi_iter.bi_size) { if (s->orig_bio->bi_opf & (REQ_PREFLUSH|REQ_FUA)) diff --git a/drivers/md/bcache/super.c b/drivers/md/bcache/super.c index 23a6ccdecf77..3c5a207196b6 100644 --- a/drivers/md/bcache/super.c +++ b/drivers/md/bcache/super.c @@ -1150,10 +1150,10 @@ static struct cache_set *bch_cache_set_alloc(struct cache_sb *sb, mempool_init_kmalloc_pool(&c->btree_interior_update_pool, 1, sizeof(struct btree_interior_update)) || mempool_init_kmalloc_pool(&c->fill_iter, 1, iter_size) || - bioset_init(&c->btree_read_bio, 1, offsetof(struct bbio, bio)) || + bioset_init(&c->btree_read_bio, 1, 0) || bioset_init(&c->bio_read, 1, offsetof(struct bch_read_bio, bio)) || bioset_init(&c->bio_read_split, 1, offsetof(struct bch_read_bio, bio)) || - bioset_init(&c->bio_write, 1, offsetof(struct bch_write_bio, bio.bio)) || + bioset_init(&c->bio_write, 1, offsetof(struct bch_write_bio, bio)) || mempool_init_page_pool(&c->bio_bounce_pages, max_t(unsigned, c->sb.btree_node_size, @@ -1976,7 +1976,7 @@ static const char *cache_alloc(struct bcache_superblock *sb, sizeof(u64), GFP_KERNEL)) || !(ca->bio_prio = bio_kmalloc(GFP_NOIO, bucket_pages(ca))) || bioset_init(&ca->replica_set, 4, - offsetof(struct bch_write_bio, bio.bio)) || + offsetof(struct bch_write_bio, bio)) || !(ca->sectors_written = alloc_percpu(*ca->sectors_written))) goto err; |