diff options
-rw-r--r-- | drivers/md/bcache/Makefile | 6 | ||||
-rw-r--r-- | drivers/md/bcache/backingdev.h | 39 | ||||
-rw-r--r-- | drivers/md/bcache/bch2.c | 540 | ||||
-rw-r--r-- | drivers/md/bcache/bch2.h | 11 | ||||
-rw-r--r-- | drivers/md/bcache/io.c | 18 | ||||
-rw-r--r-- | drivers/md/bcache/io.h | 4 | ||||
-rw-r--r-- | drivers/md/bcache/request.c | 110 | ||||
-rw-r--r-- | drivers/md/bcache/super.c | 14 | ||||
-rw-r--r-- | drivers/md/bcache/sysfs.c | 6 | ||||
-rw-r--r-- | drivers/md/bcache/writeback.h | 26 |
10 files changed, 680 insertions, 94 deletions
diff --git a/drivers/md/bcache/Makefile b/drivers/md/bcache/Makefile index 0fb1b6009da3..c45be858d941 100644 --- a/drivers/md/bcache/Makefile +++ b/drivers/md/bcache/Makefile @@ -2,6 +2,6 @@ obj-$(CONFIG_BCACHE) += bcache.o -bcache-y := alloc.o bset.o btree.o debug.o extents.o io.o\ - journal.o movinggc.o request.o stats.o super.o sysfs.o trace.o\ - util.o writeback.o +bcache-y := alloc.o bch2.o bset.o btree.o debug.o extents.o\ + io.o journal.o movinggc.o request.o stats.o super.o sysfs.o\ + trace.o util.o writeback.o diff --git a/drivers/md/bcache/backingdev.h b/drivers/md/bcache/backingdev.h index 58362eb7902a..038b532e91d8 100644 --- a/drivers/md/bcache/backingdev.h +++ b/drivers/md/bcache/backingdev.h @@ -22,7 +22,11 @@ struct bcache_device { struct kobject kobj; struct cache_set *c; - unsigned int id; + struct bch_fs *c2; + + u64 id; + struct inode *inode; + #define BCACHEDEVNAME_SIZE 12 char name[BCACHEDEVNAME_SIZE]; @@ -49,6 +53,11 @@ struct bcache_device { unsigned int cmd, unsigned long arg); }; +static inline bool bcache_dev_is_attached(struct bcache_device *d) +{ + return d->c != NULL || d->c2 != NULL; +} + enum stop_on_failure { BCH_CACHED_DEV_STOP_AUTO = 0, BCH_CACHED_DEV_STOP_ALWAYS, @@ -87,6 +96,9 @@ struct cached_dev { */ atomic_t running; + struct bio_set bch2_bio_read; + mempool_t bch2_io_write; + /* * Writes take a shared lock from start to finish; scanning for dirty * data to refill the rb tree requires an exclusive lock. @@ -225,6 +237,31 @@ static inline bool bcache_dev_stripe_dirty(struct cached_dev *dc, } } +static inline bool should_writeback(struct cached_dev *dc, struct bio *bio, + unsigned int cache_mode, bool would_skip, + unsigned int in_use) +{ + if (cache_mode != CACHE_MODE_WRITEBACK || + test_bit(BCACHE_DEV_DETACHING, &dc->disk.flags) || + in_use > bch_cutoff_writeback_sync) + return false; + + if (bio_op(bio) == REQ_OP_DISCARD) + return false; + + if (dc->partial_stripes_expensive && + bcache_dev_stripe_dirty(dc, bio->bi_iter.bi_sector, + bio_sectors(bio))) + return true; + + if (would_skip) + return false; + + return (op_is_sync(bio->bi_opf) || + bio->bi_opf & (REQ_META|REQ_PRIO) || + in_use <= bch_cutoff_writeback); +} + static inline void bch_writeback_queue(struct cached_dev *dc) { if (!IS_ERR_OR_NULL(dc->writeback_thread)) diff --git a/drivers/md/bcache/bch2.c b/drivers/md/bcache/bch2.c new file mode 100644 index 000000000000..b0ada5a61564 --- /dev/null +++ b/drivers/md/bcache/bch2.c @@ -0,0 +1,540 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Main bcache entry point - handle a read or a write request and decide what to + * do with it; the make_request functions are called by the block layer. + * + * Copyright 2010, 2011 Kent Overstreet <kent.overstreet@gmail.com> + * Copyright 2012 Google, Inc. + */ + +#include "backingdev.h" +#include "bch2.h" + +#include "../../../fs/bcachefs/bcachefs.h" +#include "../../../fs/bcachefs/alloc_foreground.h" +#include "../../../fs/bcachefs/btree_update.h" +#include "../../../fs/bcachefs/buckets.h" +#include "../../../fs/bcachefs/io.h" +#include "../../../fs/bcachefs/fs.h" +#include "../../../fs/bcachefs/fs-common.h" +#include "../../../fs/bcachefs/str_hash.h" + +#include "io.h" + +#include <linux/kthread.h> +//#include <trace/events/bcache.h> + +static unsigned fs_used_percent(struct bch_fs *c) +{ + struct bch_fs_usage_short usage = bch2_fs_usage_read_short(c); + + return div64_u64(usage.used * 100, usage.capacity); +} + +static inline bool bch_keybuf_check_overlapping(struct keybuf *buf, struct bkey *start, + struct bkey *end) +{ + return false; +} + +/* Reads: */ + +struct bch_cached_dev_rbio { + struct bio *orig; + struct cached_dev *dc; + struct bch_read_bio rbio; +}; + +static void cached_dev_read_endio(struct bio *bio) +{ + struct bch_cached_dev_rbio *c_rbio = + container_of(bio, struct bch_cached_dev_rbio, rbio.bio); + struct bio *orig = c_rbio->orig; + struct cached_dev *dc = c_rbio->dc; + + bio_put(bio); + cached_dev_put(dc); + bio_endio(orig); +} + +static void cached_dev_read(struct cached_dev *dc, struct bio *bio) +{ + struct bch_fs *c = dc->disk.c2; + struct bch_read_bio *rbio; + struct bch_cached_dev_rbio *c_rbio; + struct bch_io_opts opts = { 0 }; + unsigned flags = BCH_READ_RETRY_IF_STALE| + BCH_READ_USER_MAPPED| + BCH_READ_PASSTHROUGH_BLOCK_DEV; + + if (!bch_check_should_bypass(dc, bio, c->opts.block_size, 0)) { + /* XXX: implement promotes from block devices in bch2: + flags |= BCH_READ_MAY_PROMOTE; + */ + } + + /* XXX: plumb through write point for promotes: + unsigned write_point = writepoint_hashed((unsigned long) current); + */ + + rbio = rbio_init(bio_clone_fast(bio, GFP_NOIO, &dc->bch2_bio_read), opts); + rbio->bio.bi_end_io = cached_dev_read_endio; + c_rbio = container_of(rbio, struct bch_cached_dev_rbio, rbio); + c_rbio->orig = bio; + c_rbio->dc = dc; + + bch2_read(c, rbio, dc->disk.id, flags); +} + +/* Writes: */ + +struct bch_write { + struct closure cl; + + struct bcache_device *d; + struct bio *orig_bio; + struct bio backingdev_bio; + + blk_status_t status; + unsigned long start_time; + + unsigned int bypass:1; + unsigned int writeback:1; + struct bch_write_op op; +}; + +static void cached_dev_bio_complete(struct closure *cl) +{ + struct bch_write *io = container_of(cl, struct bch_write, cl); + struct cached_dev *dc = container_of(io->d, struct cached_dev, disk); + + generic_end_io_acct(io->d->disk->queue, bio_op(io->orig_bio), + &io->d->disk->part0, io->start_time); + + //trace_bcache_request_end(s->d, s->orig_bio); + io->orig_bio->bi_status = io->status; + bio_endio(io->orig_bio); + + closure_debug_destroy(cl); + mempool_free(io, &dc->bch2_io_write); + + cached_dev_put(dc); +} + +static void cached_dev_write_complete(struct closure *cl) +{ + struct bch_write *s = container_of(cl, struct bch_write, cl); + struct cached_dev *dc = container_of(s->d, struct cached_dev, disk); + + up_read_non_owner(&dc->writeback_lock); + cached_dev_bio_complete(cl); +} + +static void backingdev_endio(struct bio *bio) +{ + struct bch_write *io = container_of(bio, struct bch_write, backingdev_bio); + struct cached_dev *dc = container_of(io->d, struct cached_dev, disk); + + if (bio->bi_status) { + io->status = bio->bi_status; + bch_count_backing_io_errors(dc, bio); + } + + closure_put(&io->cl); +} + +static void submit_backingdev_io(struct bch_write *io) +{ + struct cached_dev *dc = container_of(io->d, struct cached_dev, disk); + + /* + * If it's a discard and the backing device doesn't support discards, no + * need to submit it: + */ + if (bio_op(io->orig_bio) == REQ_OP_DISCARD && + !blk_queue_discard(bdev_get_queue(dc->bdev))) + return; + + bio_init(&io->backingdev_bio, NULL, 0); + __bio_clone_fast(&io->backingdev_bio, io->orig_bio); + io->backingdev_bio.bi_end_io = backingdev_endio; + + closure_get(&io->cl); + generic_make_request(&io->backingdev_bio); +} + +static void cached_dev_write(struct cached_dev *dc, struct bio *orig_bio) +{ + struct bch_fs *c = dc->disk.c2; + struct bch_write *io; + struct bkey start = KEY(dc->disk.id, orig_bio->bi_iter.bi_sector, 0); + struct bkey end = KEY(dc->disk.id, bio_end_sector(orig_bio), 0); + struct bch_io_opts opts = { 0 }; + unsigned in_use = fs_used_percent(c); + + io = mempool_alloc(&dc->bch2_io_write, GFP_NOIO); + closure_init(&io->cl, NULL); + io->d = &dc->disk; + io->orig_bio = orig_bio; + io->status = 0; + io->start_time = jiffies; + io->bypass = bch_check_should_bypass(dc, orig_bio, + c->opts.block_size, in_use); + io->writeback = false; + + down_read_non_owner(&dc->writeback_lock); + if (bch_keybuf_check_overlapping(dc->writeback_keys, &start, &end)) { + /* + * We overlap with some dirty data undergoing background + * writeback, force this write to writeback + */ + io->bypass = false; + io->writeback = true; + } + + /* + * Discards aren't _required_ to do anything, so skipping if + * check_overlapping returned true is ok + * + * But check_overlapping drops dirty keys for which io hasn't started, + * so we still want to call it. + */ + if (bio_op(orig_bio) == REQ_OP_DISCARD) { + io->bypass = true; + io->writeback = false; + } + + if (should_writeback(dc, io->orig_bio, cache_mode(dc), + io->bypass, in_use)) { + io->bypass = false; + io->writeback = true; + } + + /* + * Submit IO to backing device, if we're not doing a writeback write: + * + * If it's a discard and the backing device doesn't support discards, no + * need to submit to the backing device: + */ + if (!io->writeback) + submit_backingdev_io(io); + + /* If we're bypassing, delete the range we're writing to from the cache: */ + if (io->bypass) { + u64 journal_seq = 0; + + bch2_btree_delete_range(c, BTREE_ID_EXTENTS, + POS(dc->disk.id, orig_bio->bi_iter.bi_sector), + POS(dc->disk.id, bio_end_sector(orig_bio)), + &journal_seq); + + if ((orig_bio->bi_opf & (REQ_PREFLUSH|REQ_FUA)) && + !(c->opts.journal_flush_disabled)) + bch2_journal_flush_seq_async(&c->journal, journal_seq, &io->cl); + } else { + bch2_write_op_init(&io->op, c, opts); + bio_init(&io->op.wbio.bio, NULL, 0); + __bio_clone_fast(&io->op.wbio.bio, orig_bio); + io->op.nr_replicas = 1; + io->op.write_point = writepoint_hashed((unsigned long) current); + io->op.new_i_size = U64_MAX; + io->op.pos = POS(dc->disk.id, orig_bio->bi_iter.bi_sector); + + if (orig_bio->bi_opf & (REQ_FUA|REQ_PREFLUSH)) + io->op.flags |= BCH_WRITE_FLUSH; + + if (io->writeback) { + int ret = bch2_disk_reservation_get(c, &io->op.res, bio_sectors(orig_bio), + io->op.nr_replicas, 0); + if (ret) { + io->status = BLK_STS_RESOURCE; + goto err; + } + + /* Mark superblock dirty, if necessary: */ + bch_writeback_add(dc); + } else { + io->op.flags |= BCH_WRITE_CACHED; + } + + closure_call(&io->op.cl, bch2_write, NULL, &io->cl); + } +err: + continue_at(&io->cl, cached_dev_write_complete, NULL); +} + +static void cached_dev_nodata(struct cached_dev *dc, struct bio *orig_bio) +{ + struct bch_fs *c = dc->disk.c2; + bool flush_backingdev = cache_mode(dc) != CACHE_MODE_WRITEBACK; + bool flush_cache = !c->opts.journal_flush_disabled; + struct bch_write *io; + + if (!(orig_bio->bi_opf & REQ_PREFLUSH)) { + generic_make_request(orig_bio); + return; + } + + if (!flush_backingdev && !flush_cache) { + bio_endio(orig_bio); + return; + } + + if (!flush_cache) { + generic_make_request(orig_bio); + return; + } + + io = mempool_alloc(&dc->bch2_io_write, GFP_NOIO); + closure_init(&io->cl, NULL); + io->d = &dc->disk; + io->orig_bio = orig_bio; + io->status = 0; + io->start_time = jiffies; + io->bypass = false; + io->writeback = false; + + if (flush_backingdev) + submit_backingdev_io(io); + + bch2_journal_flush_async(&c->journal, &io->cl); + continue_at(&io->cl, cached_dev_bio_complete, NULL); +} + +void bch2_cached_dev_make_request(struct cached_dev *dc, struct bio *bio) +{ + //trace_bcache_request_start(d, bio); + + if (!bio->bi_iter.bi_size) + cached_dev_nodata(dc, bio); + else if (bio_data_dir(bio) == WRITE) + cached_dev_write(dc, bio); + else + cached_dev_read(dc, bio); + +} + +static int bch2_dev_attach_trans(struct btree_trans *trans, + struct qstr *name, + u64 *inum, + bool must_exist) +{ + struct bch_fs *c = trans->c; + struct bch_inode_unpacked root_inode; + struct bch_inode_unpacked dev_inode; + struct bch_hash_info root_hash_info; + struct btree_iter *iter; + int ret; + + ret = bch2_inode_find_by_inum_trans(trans, BCACHEFS_ROOT_INO, &root_inode); + if (ret) + return ret; + + root_hash_info = bch2_hash_info_init(c, &root_inode); + + iter = __bch2_dirent_lookup_trans(trans, BCACHEFS_ROOT_INO, + &root_hash_info, name, 0); + ret = PTR_ERR_OR_ZERO(iter); + if (ret && ret != -ENOENT) + return ret; + + if (!ret) { + struct bkey_s_c k = bch2_btree_iter_peek_slot(iter); + *inum = le64_to_cpu(bkey_s_c_to_dirent(k).v->d_inum); + return 0; + } + + if (must_exist) + return ret; + + /* Doesn't exist, create it: */ + bch2_inode_init_early(c, &dev_inode); + + ret = bch2_create_trans(trans, BCACHEFS_ROOT_INO, + &root_inode, &dev_inode, + name, 0, 0, S_IFREG, 0, NULL, NULL) ?: + bch2_trans_commit(trans, NULL, NULL, 0); + *inum = dev_inode.bi_inum; + return ret; +} + +static int bch2_cached_dev_attach_one(struct cached_dev *dc, struct bch_fs *c, + uint8_t *fs_uuid) +{ + char backingdev_filename[80]; + struct qstr backingdev_qstr; + struct inode *inode = NULL; + u64 inum; + int ret = 0; + + snprintf(backingdev_filename, sizeof(backingdev_filename), + "backing-device-%pU", dc->sb.uuid); + backingdev_qstr = (struct qstr) QSTR_INIT(backingdev_filename, + strlen(backingdev_filename)); + + if (bcache_dev_is_attached(&dc->disk)) { + pr_err("Can't attach %s: already attached", + dc->backing_dev_name); + return -EINVAL; + } +#if 0 + if (test_bit(CACHE_SET_STOPPING, &c->flags)) { + pr_err("Can't attach %s: shutting down", + dc->backing_dev_name); + return -EINVAL; + } +#endif + if (dc->sb.block_size < c->opts.block_size) { + /* Will die */ + pr_err("Couldn't attach %s: block size less than set's block size", + dc->backing_dev_name); + return -EINVAL; + } + + ret = bch2_trans_do(c, NULL, NULL, 0, + bch2_dev_attach_trans(&trans, &backingdev_qstr, &inum, + BDEV_STATE(&dc->sb) == BDEV_STATE_DIRTY)); + if (ret) { + pr_err("Error attaching %s: %i\n", + dc->backing_dev_name, ret); + return ret; + } + + inode = bch2_vfs_inode_get(c, inum); + if (IS_ERR(inode)) { + pr_err("Can't attach %s: error getting inode %li", + dc->backing_dev_name, PTR_ERR(inode)); + return PTR_ERR(inode); + } + + ret = get_write_access(inode); + if (ret) { + pr_err("Can't attach %s: error getting inode %i", + dc->backing_dev_name, ret); + iput(inode); + return ret; + } + + /* XXX should we be calling __mnt_want_write() too? */ + + if (BDEV_STATE(&dc->sb) == BDEV_STATE_STALE) { + struct closure cl; + + closure_init_stack(&cl); + + ret = bch2_fpunch(c, inum, 0, U64_MAX, NULL, NULL); + if (ret) { + pr_err("Error attaching %s: error deleting existing data %i\n", + dc->backing_dev_name, ret); + return ret; + } + + SET_BDEV_STATE(&dc->sb, BDEV_STATE_CLEAN); + bch_write_bdev_super(dc, &cl); + closure_sync(&cl); + } + + /* + * XXX: set inode size + */ + + dc->disk.id = inum; + dc->disk.inode = inode; + dc->disk.c2 = c; +#if 0 + bcache_device_attach(&dc->disk, c, inum); + list_move(&dc->list, &c->cached_devs); + calc_cached_dev_sectors(c); +#endif + /* + * dc->c must be set before dc->count != 0 - paired with the mb in + * cached_dev_get() + */ + smp_wmb(); + refcount_set(&dc->count, 1); +#if 0 + /* Block writeback thread, but spawn it */ + down_write(&dc->writeback_lock); + if (bch_cached_dev_writeback_start(dc)) { + up_write(&dc->writeback_lock); + pr_err("Couldn't start writeback facilities for %s", + dc->disk.disk->disk_name); + return -ENOMEM; + } + + if (BDEV_STATE(&dc->sb) == BDEV_STATE_DIRTY) { + atomic_set(&dc->has_dirty, 1); + bch_writeback_queue(dc); + } + + bch_sectors_dirty_init(&dc->disk); + + ret = bch_cached_dev_run(dc); + if (ret && (ret != -EBUSY)) { + up_write(&dc->writeback_lock); + /* + * bch_register_lock is held, bcache_device_stop() is not + * able to be directly called. The kthread and kworker + * created previously in bch_cached_dev_writeback_start() + * have to be stopped manually here. + */ + kthread_stop(dc->writeback_thread); + cancel_writeback_rate_update_dwork(dc); + pr_err("Couldn't run cached device %s", + dc->backing_dev_name); + return ret; + } + + /* Allow the writeback thread to proceed */ + up_write(&dc->writeback_lock); +#endif + +#if 0 + bcache_device_link(&dc->disk, c, "bdev"); + atomic_inc(&c->attached_dev_nr); +#endif + + pr_info("Caching %s as %s on set %pU", + dc->backing_dev_name, + dc->disk.disk->disk_name, + &dc->disk.c2->sb.uuid); + return 0; + +} + +int bch2_cached_dev_attach(struct cached_dev *dc, uint8_t *fs_uuid) +{ + struct bch_fs *c; + int ret; + + mutex_lock(&bch2_fs_list_lock); + list_for_each_entry(c, &bch2_fs_list, list) { + if (fs_uuid + ? !memcmp(fs_uuid, &c->sb.user_uuid, 16) + : !memcmp(dc->sb.set_uuid, &c->sb.uuid, 16)) { + closure_get(&c->cl); + mutex_unlock(&bch2_fs_list_lock); + goto found; + } + } + mutex_unlock(&bch2_fs_list_lock); + return -ENOENT; +found: + ret = bch2_cached_dev_attach_one(dc, c, fs_uuid); + closure_put(&c->cl); + return ret; +} + +void bch2_request_exit(struct cached_dev *dc) +{ + mempool_exit(&dc->bch2_io_write); + bioset_exit(&dc->bch2_bio_read); +} + +int bch2_request_init(struct cached_dev *dc) +{ + return bioset_init(&dc->bch2_bio_read, 1, + offsetof(struct bch_cached_dev_rbio, rbio.bio), + BIOSET_NEED_RESCUER) ?: + mempool_init_kmalloc_pool(&dc->bch2_io_write, 1, sizeof(struct bch_write)); +} diff --git a/drivers/md/bcache/bch2.h b/drivers/md/bcache/bch2.h new file mode 100644 index 000000000000..40d30db8a43a --- /dev/null +++ b/drivers/md/bcache/bch2.h @@ -0,0 +1,11 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef _BCACHE_BCH2_H +#define _BCACHE_BCH2_H + +void bch2_cached_dev_make_request(struct cached_dev *dc, struct bio *bio); +int bch2_cached_dev_attach(struct cached_dev *dc, uint8_t *fs_uuid); + +void bch2_request_exit(struct cached_dev *dc); +int bch2_request_init(struct cached_dev *dc); + +#endif /* _BCACHE_BCH2_H_ */ diff --git a/drivers/md/bcache/io.c b/drivers/md/bcache/io.c index 8ba675b8a2da..90758649eb11 100644 --- a/drivers/md/bcache/io.c +++ b/drivers/md/bcache/io.c @@ -59,25 +59,17 @@ static struct hlist_head *iohash(struct cached_dev *dc, uint64_t k) return &dc->io_hash[hash_64(k, RECENT_IO_BITS)]; } -bool bch_check_should_bypass(struct cached_dev *dc, struct bio *bio) +bool bch_check_should_bypass(struct cached_dev *dc, struct bio *bio, + unsigned int block_size, + unsigned int in_use) { unsigned int mode = cache_mode(dc); - unsigned int sectors, congested, dirty_percentage, block_size; + unsigned int sectors, congested; struct task_struct *task = current; struct io *i; - if (dc->disk.c) { - dirty_percentage = dc->disk.c->gc_stats.in_use; - block_size = dc->disk.c->sb.block_size; - } else { - /* XXX bcache2: */ - dirty_percentage = 0; - block_size = 0; - //block_size = dc->disk.c2->sb.block_size; - } - if (test_bit(BCACHE_DEV_DETACHING, &dc->disk.flags) || - dirty_percentage > CUTOFF_CACHE_ADD || + in_use > CUTOFF_CACHE_ADD || (bio_op(bio) == REQ_OP_DISCARD)) goto skip; diff --git a/drivers/md/bcache/io.h b/drivers/md/bcache/io.h index 59bc40c8b107..08bfbd521e96 100644 --- a/drivers/md/bcache/io.h +++ b/drivers/md/bcache/io.h @@ -7,7 +7,9 @@ struct cache; struct cache_set; unsigned int bch_get_congested(const struct cache_set *c); -bool bch_check_should_bypass(struct cached_dev *dc, struct bio *bio); +bool bch_check_should_bypass(struct cached_dev *dc, struct bio *bio, + unsigned int block_size, + unsigned int dirty_percentage); void bch_count_backing_io_errors(struct cached_dev *dc, struct bio *bio); void bch_count_io_errors(struct cache *ca, blk_status_t error, diff --git a/drivers/md/bcache/request.c b/drivers/md/bcache/request.c index 755bc6a448cb..ac4d1c545d61 100644 --- a/drivers/md/bcache/request.c +++ b/drivers/md/bcache/request.c @@ -8,11 +8,11 @@ */ #include "bcache.h" +#include "bch2.h" #include "btree.h" #include "debug.h" #include "io.h" #include "request.h" -#include "request2.h" #include "writeback.h" #include <linux/module.h> @@ -851,7 +851,8 @@ static void cached_dev_write(struct cached_dev *dc, struct search *s) if (should_writeback(dc, s->orig_bio, cache_mode(dc), - s->iop.bypass)) { + s->iop.bypass, + dc->disk.c->gc_stats.in_use)) { s->iop.bypass = false; s->iop.writeback = true; } @@ -1013,34 +1014,57 @@ static void quit_max_writeback_rate(struct cache_set *c, /* Cached devices - read & write stuff */ -blk_qc_t cached_dev_make_request(struct request_queue *q, struct bio *bio) +static void bch1_cached_dev_make_request(struct cached_dev *dc, struct bio *bio) { + struct bcache_device *d = &dc->disk; + struct cache_set *c = d->c; struct search *s; - struct bcache_device *d = bio->bi_disk->private_data; - struct cached_dev *dc = container_of(d, struct cached_dev, disk); - int rw = bio_data_dir(bio); - if (unlikely((d->c && test_bit(CACHE_SET_IO_DISABLE, &d->c->flags)) || - dc->io_disable)) { + if (unlikely(test_bit(CACHE_SET_IO_DISABLE, &c->flags))) { + cached_dev_put(dc); bio->bi_status = BLK_STS_IOERR; bio_endio(bio); - return BLK_QC_T_NONE; + return; + } + + s = search_alloc(bio, d); + trace_bcache_request_start(s->d, bio); + + if (atomic_read(&c->idle_counter)) + atomic_set(&c->idle_counter, 0); + /* + * If at_max_writeback_rate of cache set is true and new I/O + * comes, quit max writeback rate of all cached devices + * attached to this cache set, and set at_max_writeback_rate + * to false. + */ + if (unlikely(atomic_read(&c->at_max_writeback_rate) == 1)) { + atomic_set(&c->at_max_writeback_rate, 0); + quit_max_writeback_rate(c, dc); } - if (likely(d->c)) { - if (atomic_read(&d->c->idle_counter)) - atomic_set(&d->c->idle_counter, 0); + if (!bio->bi_iter.bi_size) { /* - * If at_max_writeback_rate of cache set is true and new I/O - * comes, quit max writeback rate of all cached devices - * attached to this cache set, and set at_max_writeback_rate - * to false. + * can't call bch_journal_meta from under + * generic_make_request */ - if (unlikely(atomic_read(&d->c->at_max_writeback_rate) == 1)) { - atomic_set(&d->c->at_max_writeback_rate, 0); - quit_max_writeback_rate(d->c, dc); - } + continue_at_nobarrier(&s->cl, cached_dev_nodata, bcache_wq); + } else { + s->iop.bypass = bch_check_should_bypass(dc, bio, + c->sb.block_size, + c->gc_stats.in_use); + + if (bio_data_dir(bio) == WRITE) + cached_dev_write(dc, s); + else + cached_dev_read(dc, s); } +} + +blk_qc_t cached_dev_make_request(struct request_queue *q, struct bio *bio) +{ + struct bcache_device *d = bio->bi_disk->private_data; + struct cached_dev *dc = container_of(d, struct cached_dev, disk); generic_start_io_acct(q, bio_op(bio), @@ -1050,29 +1074,21 @@ blk_qc_t cached_dev_make_request(struct request_queue *q, struct bio *bio) bio_set_dev(bio, dc->bdev); bio->bi_iter.bi_sector += dc->sb.data_offset; - if (cached_dev_get(dc)) { - s = search_alloc(bio, d); - trace_bcache_request_start(s->d, bio); - - if (!bio->bi_iter.bi_size) { - /* - * can't call bch_journal_meta from under - * generic_make_request - */ - continue_at_nobarrier(&s->cl, - cached_dev_nodata, - bcache_wq); - } else { - s->iop.bypass = check_should_bypass(dc, bio); + if (unlikely(dc->io_disable)) { + bio->bi_status = BLK_STS_IOERR; + bio_endio(bio); + return BLK_QC_T_NONE; + } - if (rw) - cached_dev_write(dc, s); - else - cached_dev_read(dc, s); - } - } else + if (cached_dev_get(dc)) { + if (d->c) + bch1_cached_dev_make_request(dc, bio); + else + bch2_cached_dev_make_request(dc, bio); + } else { /* I/O request sent to backing device */ detached_dev_do_request(d, bio); + } return BLK_QC_T_NONE; } @@ -1099,12 +1115,16 @@ static int cached_dev_congested(void *data, int bits) return 1; if (cached_dev_get(dc)) { - unsigned int i; - struct cache *ca; + if (d->c) { + unsigned int i; + struct cache *ca; - for_each_cache(ca, d->c, i) { - q = bdev_get_queue(ca->bdev); - ret |= bdi_congested(q->backing_dev_info, bits); + for_each_cache(ca, d->c, i) { + q = bdev_get_queue(ca->bdev); + ret |= bdi_congested(q->backing_dev_info, bits); + } + } else { + /* bcache2: */ } cached_dev_put(dc); diff --git a/drivers/md/bcache/super.c b/drivers/md/bcache/super.c index f3f42862acc4..c2a7b8ae64d7 100644 --- a/drivers/md/bcache/super.c +++ b/drivers/md/bcache/super.c @@ -8,6 +8,7 @@ */ #include "bcache.h" +#include "bch2.h" #include "btree.h" #include "debug.h" #include "extents.h" @@ -726,7 +727,7 @@ static void bcache_device_link(struct bcache_device *d, struct cache_set *c, bd_link_disk_holder(ca->bdev, d->disk); snprintf(d->name, BCACHEDEVNAME_SIZE, - "%s%u", name, d->id); + "%s%llu", name, d->id); ret = sysfs_create_link(&d->kobj, &c->kobj, "cache"); if (ret < 0) @@ -948,7 +949,6 @@ static int cached_dev_status_update(void *arg) return 0; } - int bch_cached_dev_run(struct cached_dev *dc) { struct bcache_device *d = &dc->disk; @@ -978,7 +978,7 @@ int bch_cached_dev_run(struct cached_dev *dc) return -EBUSY; } - if (!d->c && + if (!bcache_dev_is_attached(d) && BDEV_STATE(&dc->sb) != BDEV_STATE_NONE) { struct closure cl; @@ -1118,7 +1118,7 @@ int bch_cached_dev_attach(struct cached_dev *dc, struct cache_set *c, (!set_uuid && memcmp(dc->sb.set_uuid, c->sb.set_uuid, 16))) return -ENOENT; - if (dc->disk.c) { + if (bcache_dev_is_attached(&dc->disk)) { pr_err("Can't attach %s: already attached", dc->backing_dev_name); return -EINVAL; @@ -1292,6 +1292,7 @@ static void cached_dev_free(struct closure *cl) wake_up(&unregister_wait); kfree(dc->writeback_keys); + bch2_request_exit(dc); kobject_put(&dc->disk.kobj); } @@ -1357,7 +1358,8 @@ static int cached_dev_init(struct cached_dev *dc, unsigned int block_size) dc->stop_when_cache_set_failed = BCH_CACHED_DEV_STOP_AUTO; bch_cached_dev_request_init(dc); - return bch_cached_dev_writeback_init(dc); + return bch_cached_dev_writeback_init(dc) ?: + bch2_request_init(dc); } /* Cached device - bcache superblock */ @@ -1393,6 +1395,8 @@ static int register_bdev(struct cache_sb *sb, struct cache_sb_disk *sb_disk, list_for_each_entry(c, &bch_cache_sets, list) bch_cached_dev_attach(dc, c, NULL); + bch2_cached_dev_attach(dc, NULL); + if (BDEV_STATE(&dc->sb) == BDEV_STATE_NONE || BDEV_STATE(&dc->sb) == BDEV_STATE_STALE) { err = "failed to run cached device"; diff --git a/drivers/md/bcache/sysfs.c b/drivers/md/bcache/sysfs.c index 34d09d94e95d..3b3939523708 100644 --- a/drivers/md/bcache/sysfs.c +++ b/drivers/md/bcache/sysfs.c @@ -7,6 +7,7 @@ */ #include "bcache.h" +#include "bch2.h" #include "sysfs.h" #include "btree.h" #include "io.h" @@ -407,6 +408,11 @@ STORE(__cached_dev) if (!v) return size; } + + v = bch2_cached_dev_attach(dc, set_uuid); + if (!v) + return size; + if (v == -ENOENT) pr_err("Can't attach %s: cache set not found", buf); return v; diff --git a/drivers/md/bcache/writeback.h b/drivers/md/bcache/writeback.h index a83700acc4ce..2a83ed3d4ba0 100644 --- a/drivers/md/bcache/writeback.h +++ b/drivers/md/bcache/writeback.h @@ -42,32 +42,6 @@ struct bch_dirty_init_state { struct dirty_init_thrd_info infos[BCH_DIRTY_INIT_THRD_MAX]; }; -static inline bool should_writeback(struct cached_dev *dc, struct bio *bio, - unsigned int cache_mode, bool would_skip) -{ - unsigned int in_use = dc->disk.c->gc_stats.in_use; - - if (cache_mode != CACHE_MODE_WRITEBACK || - test_bit(BCACHE_DEV_DETACHING, &dc->disk.flags) || - in_use > bch_cutoff_writeback_sync) - return false; - - if (bio_op(bio) == REQ_OP_DISCARD) - return false; - - if (dc->partial_stripes_expensive && - bcache_dev_stripe_dirty(dc, bio->bi_iter.bi_sector, - bio_sectors(bio))) - return true; - - if (would_skip) - return false; - - return (op_is_sync(bio->bi_opf) || - bio->bi_opf & (REQ_META|REQ_PRIO) || - in_use <= bch_cutoff_writeback); -} - void bcache_dev_sectors_dirty_add(struct cache_set *c, unsigned int inode, uint64_t offset, int nr_sectors); |