Initial bcache/bcachefs integrationbcache2

author: Kent Overstreet <kent.overstreet@gmail.com> 2020-06-22 17:35:56 -0400
committer: Kent Overstreet <kent.overstreet@gmail.com> 2020-06-28 18:12:46 -0400
commit: 8aa6e6e10ce712bcb10be9a206ec13d00ad0e7e6 (patch)
tree: 347feb718da57de42ae6f2e5367df3e2e5ad6466
parent: 7b42d65c0607af212b9ef3aa834ea449e3f846b1 (diff)
10 files changed, 680 insertions, 94 deletions
diff --git a/drivers/md/bcache/Makefile b/drivers/md/bcache/Makefile
index 0fb1b6009da3..c45be858d941 100644
--- a/drivers/md/bcache/Makefile
+++ b/drivers/md/bcache/Makefile
@@ -2,6 +2,6 @@
 
 obj-$(CONFIG_BCACHE)	+= bcache.o
 
-bcache-y		:= alloc.o bset.o btree.o debug.o extents.o io.o\
-	journal.o movinggc.o request.o stats.o super.o sysfs.o trace.o\
-	util.o writeback.o
+bcache-y		:= alloc.o bch2.o bset.o btree.o debug.o extents.o\
+	io.o journal.o movinggc.o request.o stats.o super.o sysfs.o\
+	trace.o util.o writeback.o
diff --git a/drivers/md/bcache/backingdev.h b/drivers/md/bcache/backingdev.h
index 58362eb7902a..038b532e91d8 100644
--- a/drivers/md/bcache/backingdev.h
+++ b/drivers/md/bcache/backingdev.h
@@ -22,7 +22,11 @@ struct bcache_device {
 	struct kobject		kobj;
 
 	struct cache_set	*c;
-	unsigned int		id;
+	struct bch_fs		*c2;
+
+	u64			id;
+	struct inode		*inode;
+
 #define BCACHEDEVNAME_SIZE	12
 	char			name[BCACHEDEVNAME_SIZE];
 
@@ -49,6 +53,11 @@ struct bcache_device {
 		     unsigned int cmd, unsigned long arg);
 };
 
+static inline bool bcache_dev_is_attached(struct bcache_device *d)
+{
+	return d->c != NULL || d->c2 != NULL;
+}
+
 enum stop_on_failure {
 	BCH_CACHED_DEV_STOP_AUTO = 0,
 	BCH_CACHED_DEV_STOP_ALWAYS,
@@ -87,6 +96,9 @@ struct cached_dev {
 	 */
 	atomic_t		running;
 
+	struct bio_set		bch2_bio_read;
+	mempool_t		bch2_io_write;
+
 	/*
 	 * Writes take a shared lock from start to finish; scanning for dirty
 	 * data to refill the rb tree requires an exclusive lock.
@@ -225,6 +237,31 @@ static inline bool bcache_dev_stripe_dirty(struct cached_dev *dc,
 	}
 }
 
+static inline bool should_writeback(struct cached_dev *dc, struct bio *bio,
+				    unsigned int cache_mode, bool would_skip,
+				    unsigned int in_use)
+{
+	if (cache_mode != CACHE_MODE_WRITEBACK ||
+	    test_bit(BCACHE_DEV_DETACHING, &dc->disk.flags) ||
+	    in_use > bch_cutoff_writeback_sync)
+		return false;
+
+	if (bio_op(bio) == REQ_OP_DISCARD)
+		return false;
+
+	if (dc->partial_stripes_expensive &&
+	    bcache_dev_stripe_dirty(dc, bio->bi_iter.bi_sector,
+				    bio_sectors(bio)))
+		return true;
+
+	if (would_skip)
+		return false;
+
+	return (op_is_sync(bio->bi_opf) ||
+		bio->bi_opf & (REQ_META|REQ_PRIO) ||
+		in_use <= bch_cutoff_writeback);
+}
+
 static inline void bch_writeback_queue(struct cached_dev *dc)
 {
 	if (!IS_ERR_OR_NULL(dc->writeback_thread))
diff --git a/drivers/md/bcache/bch2.c b/drivers/md/bcache/bch2.c
new file mode 100644
index 000000000000..b0ada5a61564
--- /dev/null
+++ b/drivers/md/bcache/bch2.c
@@ -0,0 +1,540 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Main bcache entry point - handle a read or a write request and decide what to
+ * do with it; the make_request functions are called by the block layer.
+ *
+ * Copyright 2010, 2011 Kent Overstreet <kent.overstreet@gmail.com>
+ * Copyright 2012 Google, Inc.
+ */
+
+#include "backingdev.h"
+#include "bch2.h"
+
+#include "../../../fs/bcachefs/bcachefs.h"
+#include "../../../fs/bcachefs/alloc_foreground.h"
+#include "../../../fs/bcachefs/btree_update.h"
+#include "../../../fs/bcachefs/buckets.h"
+#include "../../../fs/bcachefs/io.h"
+#include "../../../fs/bcachefs/fs.h"
+#include "../../../fs/bcachefs/fs-common.h"
+#include "../../../fs/bcachefs/str_hash.h"
+
+#include "io.h"
+
+#include <linux/kthread.h>
+//#include <trace/events/bcache.h>
+
+static unsigned fs_used_percent(struct bch_fs *c)
+{
+	struct bch_fs_usage_short usage = bch2_fs_usage_read_short(c);
+
+	return div64_u64(usage.used * 100, usage.capacity);
+}
+
+static inline bool bch_keybuf_check_overlapping(struct keybuf *buf, struct bkey *start,
+						struct bkey *end)
+{
+	return false;
+}
+
+/* Reads: */
+
+struct bch_cached_dev_rbio {
+	struct bio		*orig;
+	struct cached_dev	*dc;
+	struct bch_read_bio	rbio;
+};
+
+static void cached_dev_read_endio(struct bio *bio)
+{
+	struct bch_cached_dev_rbio *c_rbio =
+		container_of(bio, struct bch_cached_dev_rbio, rbio.bio);
+	struct bio *orig = c_rbio->orig;
+	struct cached_dev *dc = c_rbio->dc;
+
+	bio_put(bio);
+	cached_dev_put(dc);
+	bio_endio(orig);
+}
+
+static void cached_dev_read(struct cached_dev *dc, struct bio *bio)
+{
+	struct bch_fs *c = dc->disk.c2;
+	struct bch_read_bio *rbio;
+	struct bch_cached_dev_rbio *c_rbio;
+	struct bch_io_opts opts = { 0 };
+	unsigned flags = BCH_READ_RETRY_IF_STALE|
+		  BCH_READ_USER_MAPPED|
+		  BCH_READ_PASSTHROUGH_BLOCK_DEV;
+
+	if (!bch_check_should_bypass(dc, bio, c->opts.block_size, 0)) {
+		/* XXX: implement promotes from block devices in bch2:
+		flags |= BCH_READ_MAY_PROMOTE;
+		*/
+	}
+
+	/* XXX: plumb through write point for promotes:
+	unsigned write_point	= writepoint_hashed((unsigned long) current);
+	*/
+
+	rbio = rbio_init(bio_clone_fast(bio, GFP_NOIO, &dc->bch2_bio_read), opts);
+	rbio->bio.bi_end_io = cached_dev_read_endio;
+	c_rbio		= container_of(rbio, struct bch_cached_dev_rbio, rbio);
+	c_rbio->orig	= bio;
+	c_rbio->dc	= dc;
+
+	bch2_read(c, rbio, dc->disk.id, flags);
+}
+
+/* Writes: */
+
+struct bch_write {
+	struct closure		cl;
+
+	struct bcache_device	*d;
+	struct bio		*orig_bio;
+	struct bio		backingdev_bio;
+
+	blk_status_t		status;
+	unsigned long		start_time;
+
+	unsigned int		bypass:1;
+	unsigned int		writeback:1;
+	struct bch_write_op	op;
+};
+
+static void cached_dev_bio_complete(struct closure *cl)
+{
+	struct bch_write *io = container_of(cl, struct bch_write, cl);
+	struct cached_dev *dc = container_of(io->d, struct cached_dev, disk);
+
+	generic_end_io_acct(io->d->disk->queue, bio_op(io->orig_bio),
+			    &io->d->disk->part0, io->start_time);
+
+	//trace_bcache_request_end(s->d, s->orig_bio);
+	io->orig_bio->bi_status = io->status;
+	bio_endio(io->orig_bio);
+
+	closure_debug_destroy(cl);
+	mempool_free(io, &dc->bch2_io_write);
+
+	cached_dev_put(dc);
+}
+
+static void cached_dev_write_complete(struct closure *cl)
+{
+	struct bch_write *s = container_of(cl, struct bch_write, cl);
+	struct cached_dev *dc = container_of(s->d, struct cached_dev, disk);
+
+	up_read_non_owner(&dc->writeback_lock);
+	cached_dev_bio_complete(cl);
+}
+
+static void backingdev_endio(struct bio *bio)
+{
+	struct bch_write *io = container_of(bio, struct bch_write, backingdev_bio);
+	struct cached_dev *dc = container_of(io->d, struct cached_dev, disk);
+
+	if (bio->bi_status) {
+		io->status = bio->bi_status;
+		bch_count_backing_io_errors(dc, bio);
+	}
+
+	closure_put(&io->cl);
+}
+
+static void submit_backingdev_io(struct bch_write *io)
+{
+	struct cached_dev *dc = container_of(io->d, struct cached_dev, disk);
+
+	/*
+	 * If it's a discard and the backing device doesn't support discards, no
+	 * need to submit it:
+	 */
+	if (bio_op(io->orig_bio) == REQ_OP_DISCARD &&
+	    !blk_queue_discard(bdev_get_queue(dc->bdev)))
+		return;
+
+	bio_init(&io->backingdev_bio, NULL, 0);
+	__bio_clone_fast(&io->backingdev_bio, io->orig_bio);
+	io->backingdev_bio.bi_end_io = backingdev_endio;
+
+	closure_get(&io->cl);
+	generic_make_request(&io->backingdev_bio);
+}
+
+static void cached_dev_write(struct cached_dev *dc, struct bio *orig_bio)
+{
+	struct bch_fs *c = dc->disk.c2;
+	struct bch_write *io;
+	struct bkey start = KEY(dc->disk.id, orig_bio->bi_iter.bi_sector, 0);
+	struct bkey end = KEY(dc->disk.id, bio_end_sector(orig_bio), 0);
+	struct bch_io_opts opts = { 0 };
+	unsigned in_use = fs_used_percent(c);
+
+	io = mempool_alloc(&dc->bch2_io_write, GFP_NOIO);
+	closure_init(&io->cl, NULL);
+	io->d		= &dc->disk;
+	io->orig_bio	= orig_bio;
+	io->status	= 0;
+	io->start_time	= jiffies;
+	io->bypass	= bch_check_should_bypass(dc, orig_bio,
+					c->opts.block_size, in_use);
+	io->writeback	= false;
+
+	down_read_non_owner(&dc->writeback_lock);
+	if (bch_keybuf_check_overlapping(dc->writeback_keys, &start, &end)) {
+		/*
+		 * We overlap with some dirty data undergoing background
+		 * writeback, force this write to writeback
+		 */
+		io->bypass = false;
+		io->writeback = true;
+	}
+
+	/*
+	 * Discards aren't _required_ to do anything, so skipping if
+	 * check_overlapping returned true is ok
+	 *
+	 * But check_overlapping drops dirty keys for which io hasn't started,
+	 * so we still want to call it.
+	 */
+	if (bio_op(orig_bio) == REQ_OP_DISCARD) {
+		io->bypass = true;
+		io->writeback = false;
+	}
+
+	if (should_writeback(dc, io->orig_bio, cache_mode(dc),
+			     io->bypass, in_use)) {
+		io->bypass = false;
+		io->writeback = true;
+	}
+
+	/*
+	 * Submit IO to backing device, if we're not doing a writeback write:
+	 *
+	 * If it's a discard and the backing device doesn't support discards, no
+	 * need to submit to the backing device:
+	 */
+	if (!io->writeback)
+		submit_backingdev_io(io);
+
+	/* If we're bypassing, delete the range we're writing to from the cache: */
+	if (io->bypass) {
+		u64 journal_seq = 0;
+
+		bch2_btree_delete_range(c, BTREE_ID_EXTENTS,
+					POS(dc->disk.id, orig_bio->bi_iter.bi_sector),
+					POS(dc->disk.id, bio_end_sector(orig_bio)),
+					&journal_seq);
+
+		if ((orig_bio->bi_opf & (REQ_PREFLUSH|REQ_FUA)) &&
+		    !(c->opts.journal_flush_disabled))
+			bch2_journal_flush_seq_async(&c->journal, journal_seq, &io->cl);
+	} else {
+		bch2_write_op_init(&io->op, c, opts);
+		bio_init(&io->op.wbio.bio, NULL, 0);
+		__bio_clone_fast(&io->op.wbio.bio, orig_bio);
+		io->op.nr_replicas	= 1;
+		io->op.write_point	= writepoint_hashed((unsigned long) current);
+		io->op.new_i_size	= U64_MAX;
+		io->op.pos		= POS(dc->disk.id, orig_bio->bi_iter.bi_sector);
+
+		if (orig_bio->bi_opf & (REQ_FUA|REQ_PREFLUSH))
+			io->op.flags |= BCH_WRITE_FLUSH;
+
+		if (io->writeback) {
+			int ret = bch2_disk_reservation_get(c, &io->op.res, bio_sectors(orig_bio),
+							    io->op.nr_replicas, 0);
+			if (ret) {
+				io->status = BLK_STS_RESOURCE;
+				goto err;
+			}
+
+			/* Mark superblock dirty, if necessary: */
+			bch_writeback_add(dc);
+		} else {
+			io->op.flags |= BCH_WRITE_CACHED;
+		}
+
+		closure_call(&io->op.cl, bch2_write, NULL, &io->cl);
+	}
+err:
+	continue_at(&io->cl, cached_dev_write_complete, NULL);
+}
+
+static void cached_dev_nodata(struct cached_dev *dc, struct bio *orig_bio)
+{
+	struct bch_fs *c = dc->disk.c2;
+	bool flush_backingdev = cache_mode(dc) != CACHE_MODE_WRITEBACK;
+	bool flush_cache = !c->opts.journal_flush_disabled;
+	struct bch_write *io;
+
+	if (!(orig_bio->bi_opf & REQ_PREFLUSH)) {
+		generic_make_request(orig_bio);
+		return;
+	}
+
+	if (!flush_backingdev && !flush_cache) {
+		bio_endio(orig_bio);
+		return;
+	}
+
+	if (!flush_cache) {
+		generic_make_request(orig_bio);
+		return;
+	}
+
+	io = mempool_alloc(&dc->bch2_io_write, GFP_NOIO);
+	closure_init(&io->cl, NULL);
+	io->d		= &dc->disk;
+	io->orig_bio	= orig_bio;
+	io->status	= 0;
+	io->start_time	= jiffies;
+	io->bypass	= false;
+	io->writeback	= false;
+
+	if (flush_backingdev)
+		submit_backingdev_io(io);
+
+	bch2_journal_flush_async(&c->journal, &io->cl);
+	continue_at(&io->cl, cached_dev_bio_complete, NULL);
+}
+
+void bch2_cached_dev_make_request(struct cached_dev *dc, struct bio *bio)
+{
+	//trace_bcache_request_start(d, bio);
+
+	if (!bio->bi_iter.bi_size)
+		cached_dev_nodata(dc, bio);
+	else if (bio_data_dir(bio) == WRITE)
+		cached_dev_write(dc, bio);
+	else
+		cached_dev_read(dc, bio);
+
+}
+
+static int bch2_dev_attach_trans(struct btree_trans *trans,
+				 struct qstr *name,
+				 u64 *inum,
+				 bool must_exist)
+{
+	struct bch_fs *c = trans->c;
+	struct bch_inode_unpacked root_inode;
+	struct bch_inode_unpacked dev_inode;
+	struct bch_hash_info root_hash_info;
+	struct btree_iter *iter;
+	int ret;
+
+	ret = bch2_inode_find_by_inum_trans(trans, BCACHEFS_ROOT_INO, &root_inode);
+	if (ret)
+		return ret;
+
+	root_hash_info = bch2_hash_info_init(c, &root_inode);
+
+	iter = __bch2_dirent_lookup_trans(trans, BCACHEFS_ROOT_INO,
+					  &root_hash_info, name, 0);
+	ret = PTR_ERR_OR_ZERO(iter);
+	if (ret && ret != -ENOENT)
+		return ret;
+
+	if (!ret) {
+		struct bkey_s_c k = bch2_btree_iter_peek_slot(iter);
+		*inum = le64_to_cpu(bkey_s_c_to_dirent(k).v->d_inum);
+		return 0;
+	}
+
+	if (must_exist)
+		return ret;
+
+	/* Doesn't exist, create it: */
+	bch2_inode_init_early(c, &dev_inode);
+
+	ret   = bch2_create_trans(trans, BCACHEFS_ROOT_INO,
+				  &root_inode, &dev_inode,
+				  name, 0, 0, S_IFREG, 0, NULL, NULL) ?:
+		bch2_trans_commit(trans, NULL, NULL, 0);
+	*inum = dev_inode.bi_inum;
+	return ret;
+}
+
+static int bch2_cached_dev_attach_one(struct cached_dev *dc, struct bch_fs *c,
+				      uint8_t *fs_uuid)
+{
+	char backingdev_filename[80];
+	struct qstr backingdev_qstr;
+	struct inode *inode = NULL;
+	u64 inum;
+	int ret = 0;
+
+	snprintf(backingdev_filename, sizeof(backingdev_filename),
+		 "backing-device-%pU", dc->sb.uuid);
+	backingdev_qstr = (struct qstr) QSTR_INIT(backingdev_filename,
+				    strlen(backingdev_filename));
+
+	if (bcache_dev_is_attached(&dc->disk)) {
+		pr_err("Can't attach %s: already attached",
+		       dc->backing_dev_name);
+		return -EINVAL;
+	}
+#if 0
+	if (test_bit(CACHE_SET_STOPPING, &c->flags)) {
+		pr_err("Can't attach %s: shutting down",
+		       dc->backing_dev_name);
+		return -EINVAL;
+	}
+#endif
+	if (dc->sb.block_size < c->opts.block_size) {
+		/* Will die */
+		pr_err("Couldn't attach %s: block size less than set's block size",
+		       dc->backing_dev_name);
+		return -EINVAL;
+	}
+
+	ret = bch2_trans_do(c, NULL, NULL, 0,
+			bch2_dev_attach_trans(&trans, &backingdev_qstr, &inum,
+					      BDEV_STATE(&dc->sb) == BDEV_STATE_DIRTY));
+	if (ret) {
+		pr_err("Error attaching %s: %i\n",
+		       dc->backing_dev_name, ret);
+		return ret;
+	}
+
+	inode = bch2_vfs_inode_get(c, inum);
+	if (IS_ERR(inode)) {
+		pr_err("Can't attach %s: error getting inode %li",
+		       dc->backing_dev_name, PTR_ERR(inode));
+		return PTR_ERR(inode);
+	}
+
+	ret = get_write_access(inode);
+	if (ret) {
+		pr_err("Can't attach %s: error getting inode %i",
+		       dc->backing_dev_name, ret);
+		iput(inode);
+		return ret;
+	}
+
+	/* XXX should we be calling __mnt_want_write() too? */
+
+	if (BDEV_STATE(&dc->sb) == BDEV_STATE_STALE) {
+		struct closure cl;
+
+		closure_init_stack(&cl);
+
+		ret = bch2_fpunch(c, inum, 0, U64_MAX, NULL, NULL);
+		if (ret) {
+			pr_err("Error attaching %s: error deleting existing data %i\n",
+			       dc->backing_dev_name, ret);
+			return ret;
+		}
+
+		SET_BDEV_STATE(&dc->sb, BDEV_STATE_CLEAN);
+		bch_write_bdev_super(dc, &cl);
+		closure_sync(&cl);
+	}
+
+	/*
+	 * XXX: set inode size
+	 */
+
+	dc->disk.id	= inum;
+	dc->disk.inode	= inode;
+	dc->disk.c2	= c;
+#if 0
+	bcache_device_attach(&dc->disk, c, inum);
+	list_move(&dc->list, &c->cached_devs);
+	calc_cached_dev_sectors(c);
+#endif
+	/*
+	 * dc->c must be set before dc->count != 0 - paired with the mb in
+	 * cached_dev_get()
+	 */
+	smp_wmb();
+	refcount_set(&dc->count, 1);
+#if 0
+	/* Block writeback thread, but spawn it */
+	down_write(&dc->writeback_lock);
+	if (bch_cached_dev_writeback_start(dc)) {
+		up_write(&dc->writeback_lock);
+		pr_err("Couldn't start writeback facilities for %s",
+		       dc->disk.disk->disk_name);
+		return -ENOMEM;
+	}
+
+	if (BDEV_STATE(&dc->sb) == BDEV_STATE_DIRTY) {
+		atomic_set(&dc->has_dirty, 1);
+		bch_writeback_queue(dc);
+	}
+
+	bch_sectors_dirty_init(&dc->disk);
+
+	ret = bch_cached_dev_run(dc);
+	if (ret && (ret != -EBUSY)) {
+		up_write(&dc->writeback_lock);
+		/*
+		 * bch_register_lock is held, bcache_device_stop() is not
+		 * able to be directly called. The kthread and kworker
+		 * created previously in bch_cached_dev_writeback_start()
+		 * have to be stopped manually here.
+		 */
+		kthread_stop(dc->writeback_thread);
+		cancel_writeback_rate_update_dwork(dc);
+		pr_err("Couldn't run cached device %s",
+		       dc->backing_dev_name);
+		return ret;
+	}
+
+	/* Allow the writeback thread to proceed */
+	up_write(&dc->writeback_lock);
+#endif
+
+#if 0
+	bcache_device_link(&dc->disk, c, "bdev");
+	atomic_inc(&c->attached_dev_nr);
+#endif
+
+	pr_info("Caching %s as %s on set %pU",
+		dc->backing_dev_name,
+		dc->disk.disk->disk_name,
+		&dc->disk.c2->sb.uuid);
+	return 0;
+
+}
+
+int bch2_cached_dev_attach(struct cached_dev *dc, uint8_t *fs_uuid)
+{
+	struct bch_fs *c;
+	int ret;
+
+	mutex_lock(&bch2_fs_list_lock);
+	list_for_each_entry(c, &bch2_fs_list, list) {
+		if (fs_uuid
+		    ? !memcmp(fs_uuid, &c->sb.user_uuid, 16)
+		    : !memcmp(dc->sb.set_uuid, &c->sb.uuid, 16)) {
+			closure_get(&c->cl);
+			mutex_unlock(&bch2_fs_list_lock);
+			goto found;
+		}
+	}
+	mutex_unlock(&bch2_fs_list_lock);
+	return -ENOENT;
+found:
+	ret = bch2_cached_dev_attach_one(dc, c, fs_uuid);
+	closure_put(&c->cl);
+	return ret;
+}
+
+void bch2_request_exit(struct cached_dev *dc)
+{
+	mempool_exit(&dc->bch2_io_write);
+	bioset_exit(&dc->bch2_bio_read);
+}
+
+int bch2_request_init(struct cached_dev *dc)
+{
+	return  bioset_init(&dc->bch2_bio_read, 1,
+			    offsetof(struct bch_cached_dev_rbio, rbio.bio),
+			    BIOSET_NEED_RESCUER) ?:
+		mempool_init_kmalloc_pool(&dc->bch2_io_write, 1, sizeof(struct bch_write));
+}
diff --git a/drivers/md/bcache/bch2.h b/drivers/md/bcache/bch2.h
new file mode 100644
index 000000000000..40d30db8a43a
--- /dev/null
+++ b/drivers/md/bcache/bch2.h
@@ -0,0 +1,11 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _BCACHE_BCH2_H
+#define _BCACHE_BCH2_H
+
+void bch2_cached_dev_make_request(struct cached_dev *dc, struct bio *bio);
+int bch2_cached_dev_attach(struct cached_dev *dc, uint8_t *fs_uuid);
+
+void bch2_request_exit(struct cached_dev *dc);
+int bch2_request_init(struct cached_dev *dc);
+
+#endif /* _BCACHE_BCH2_H_ */
diff --git a/drivers/md/bcache/io.c b/drivers/md/bcache/io.c
index 8ba675b8a2da..90758649eb11 100644
--- a/drivers/md/bcache/io.c
+++ b/drivers/md/bcache/io.c
@@ -59,25 +59,17 @@ static struct hlist_head *iohash(struct cached_dev *dc, uint64_t k)
 	return &dc->io_hash[hash_64(k, RECENT_IO_BITS)];
 }
 
-bool bch_check_should_bypass(struct cached_dev *dc, struct bio *bio)
+bool bch_check_should_bypass(struct cached_dev *dc, struct bio *bio,
+			     unsigned int block_size,
+			     unsigned int in_use)
 {
 	unsigned int mode = cache_mode(dc);
-	unsigned int sectors, congested, dirty_percentage, block_size;
+	unsigned int sectors, congested;
 	struct task_struct *task = current;
 	struct io *i;
 
-	if (dc->disk.c) {
-		dirty_percentage = dc->disk.c->gc_stats.in_use;
-		block_size = dc->disk.c->sb.block_size;
-	} else {
-		/* XXX bcache2: */
-		dirty_percentage = 0;
-		block_size = 0;
-		//block_size = dc->disk.c2->sb.block_size;
-	}
-
 	if (test_bit(BCACHE_DEV_DETACHING, &dc->disk.flags) ||
-	    dirty_percentage > CUTOFF_CACHE_ADD ||
+	    in_use > CUTOFF_CACHE_ADD ||
 	    (bio_op(bio) == REQ_OP_DISCARD))
 		goto skip;
 
diff --git a/drivers/md/bcache/io.h b/drivers/md/bcache/io.h
index 59bc40c8b107..08bfbd521e96 100644
--- a/drivers/md/bcache/io.h
+++ b/drivers/md/bcache/io.h
@@ -7,7 +7,9 @@ struct cache;
 struct cache_set;
 
 unsigned int bch_get_congested(const struct cache_set *c);
-bool bch_check_should_bypass(struct cached_dev *dc, struct bio *bio);
+bool bch_check_should_bypass(struct cached_dev *dc, struct bio *bio,
+			     unsigned int block_size,
+			     unsigned int dirty_percentage);
 
 void bch_count_backing_io_errors(struct cached_dev *dc, struct bio *bio);
 void bch_count_io_errors(struct cache *ca, blk_status_t error,
diff --git a/drivers/md/bcache/request.c b/drivers/md/bcache/request.c
index 755bc6a448cb..ac4d1c545d61 100644
--- a/drivers/md/bcache/request.c
+++ b/drivers/md/bcache/request.c
@@ -8,11 +8,11 @@
  */
 
 #include "bcache.h"
+#include "bch2.h"
 #include "btree.h"
 #include "debug.h"
 #include "io.h"
 #include "request.h"
-#include "request2.h"
 #include "writeback.h"
 
 #include <linux/module.h>
@@ -851,7 +851,8 @@ static void cached_dev_write(struct cached_dev *dc, struct search *s)
 
 	if (should_writeback(dc, s->orig_bio,
 			     cache_mode(dc),
-			     s->iop.bypass)) {
+			     s->iop.bypass,
+			     dc->disk.c->gc_stats.in_use)) {
 		s->iop.bypass = false;
 		s->iop.writeback = true;
 	}
@@ -1013,34 +1014,57 @@ static void quit_max_writeback_rate(struct cache_set *c,
 
 /* Cached devices - read & write stuff */
 
-blk_qc_t cached_dev_make_request(struct request_queue *q, struct bio *bio)
+static void bch1_cached_dev_make_request(struct cached_dev *dc, struct bio *bio)
 {
+	struct bcache_device *d = &dc->disk;
+	struct cache_set *c = d->c;
 	struct search *s;
-	struct bcache_device *d = bio->bi_disk->private_data;
-	struct cached_dev *dc = container_of(d, struct cached_dev, disk);
-	int rw = bio_data_dir(bio);
 
-	if (unlikely((d->c && test_bit(CACHE_SET_IO_DISABLE, &d->c->flags)) ||
-		     dc->io_disable)) {
+	if (unlikely(test_bit(CACHE_SET_IO_DISABLE, &c->flags))) {
+		cached_dev_put(dc);
 		bio->bi_status = BLK_STS_IOERR;
 		bio_endio(bio);
-		return BLK_QC_T_NONE;
+		return;
+	}
+
+	s = search_alloc(bio, d);
+	trace_bcache_request_start(s->d, bio);
+
+	if (atomic_read(&c->idle_counter))
+		atomic_set(&c->idle_counter, 0);
+	/*
+	 * If at_max_writeback_rate of cache set is true and new I/O
+	 * comes, quit max writeback rate of all cached devices
+	 * attached to this cache set, and set at_max_writeback_rate
+	 * to false.
+	 */
+	if (unlikely(atomic_read(&c->at_max_writeback_rate) == 1)) {
+		atomic_set(&c->at_max_writeback_rate, 0);
+		quit_max_writeback_rate(c, dc);
 	}
 
-	if (likely(d->c)) {
-		if (atomic_read(&d->c->idle_counter))
-			atomic_set(&d->c->idle_counter, 0);
+	if (!bio->bi_iter.bi_size) {
 		/*
-		 * If at_max_writeback_rate of cache set is true and new I/O
-		 * comes, quit max writeback rate of all cached devices
-		 * attached to this cache set, and set at_max_writeback_rate
-		 * to false.
+		 * can't call bch_journal_meta from under
+		 * generic_make_request
 		 */
-		if (unlikely(atomic_read(&d->c->at_max_writeback_rate) == 1)) {
-			atomic_set(&d->c->at_max_writeback_rate, 0);
-			quit_max_writeback_rate(d->c, dc);
-		}
+		continue_at_nobarrier(&s->cl, cached_dev_nodata, bcache_wq);
+	} else {
+		s->iop.bypass = bch_check_should_bypass(dc, bio,
+						c->sb.block_size,
+						c->gc_stats.in_use);
+
+		if (bio_data_dir(bio) == WRITE)
+			cached_dev_write(dc, s);
+		else
+			cached_dev_read(dc, s);
 	}
+}
+
+blk_qc_t cached_dev_make_request(struct request_queue *q, struct bio *bio)
+{
+	struct bcache_device *d = bio->bi_disk->private_data;
+	struct cached_dev *dc = container_of(d, struct cached_dev, disk);
 
 	generic_start_io_acct(q,
 			      bio_op(bio),
@@ -1050,29 +1074,21 @@ blk_qc_t cached_dev_make_request(struct request_queue *q, struct bio *bio)
 	bio_set_dev(bio, dc->bdev);
 	bio->bi_iter.bi_sector += dc->sb.data_offset;
 
-	if (cached_dev_get(dc)) {
-		s = search_alloc(bio, d);
-		trace_bcache_request_start(s->d, bio);
-
-		if (!bio->bi_iter.bi_size) {
-			/*
-			 * can't call bch_journal_meta from under
-			 * generic_make_request
-			 */
-			continue_at_nobarrier(&s->cl,
-					      cached_dev_nodata,
-					      bcache_wq);
-		} else {
-			s->iop.bypass = check_should_bypass(dc, bio);
+	if (unlikely(dc->io_disable)) {
+		bio->bi_status = BLK_STS_IOERR;
+		bio_endio(bio);
+		return BLK_QC_T_NONE;
+	}
 
-			if (rw)
-				cached_dev_write(dc, s);
-			else
-				cached_dev_read(dc, s);
-		}
-	} else
+	if (cached_dev_get(dc)) {
+		if (d->c)
+			bch1_cached_dev_make_request(dc, bio);
+		else
+			bch2_cached_dev_make_request(dc, bio);
+	} else {
 		/* I/O request sent to backing device */
 		detached_dev_do_request(d, bio);
+	}
 
 	return BLK_QC_T_NONE;
 }
@@ -1099,12 +1115,16 @@ static int cached_dev_congested(void *data, int bits)
 		return 1;
 
 	if (cached_dev_get(dc)) {
-		unsigned int i;
-		struct cache *ca;
+		if (d->c) {
+			unsigned int i;
+			struct cache *ca;
 
-		for_each_cache(ca, d->c, i) {
-			q = bdev_get_queue(ca->bdev);
-			ret |= bdi_congested(q->backing_dev_info, bits);
+			for_each_cache(ca, d->c, i) {
+				q = bdev_get_queue(ca->bdev);
+				ret |= bdi_congested(q->backing_dev_info, bits);
+			}
+		} else {
+			/* bcache2: */
 		}
 
 		cached_dev_put(dc);
diff --git a/drivers/md/bcache/super.c b/drivers/md/bcache/super.c
index f3f42862acc4..c2a7b8ae64d7 100644
--- a/drivers/md/bcache/super.c
+++ b/drivers/md/bcache/super.c
@@ -8,6 +8,7 @@
  */
 
 #include "bcache.h"
+#include "bch2.h"
 #include "btree.h"
 #include "debug.h"
 #include "extents.h"
@@ -726,7 +727,7 @@ static void bcache_device_link(struct bcache_device *d, struct cache_set *c,
 		bd_link_disk_holder(ca->bdev, d->disk);
 
 	snprintf(d->name, BCACHEDEVNAME_SIZE,
-		 "%s%u", name, d->id);
+		 "%s%llu", name, d->id);
 
 	ret = sysfs_create_link(&d->kobj, &c->kobj, "cache");
 	if (ret < 0)
@@ -948,7 +949,6 @@ static int cached_dev_status_update(void *arg)
 	return 0;
 }
 
-
 int bch_cached_dev_run(struct cached_dev *dc)
 {
 	struct bcache_device *d = &dc->disk;
@@ -978,7 +978,7 @@ int bch_cached_dev_run(struct cached_dev *dc)
 		return -EBUSY;
 	}
 
-	if (!d->c &&
+	if (!bcache_dev_is_attached(d) &&
 	    BDEV_STATE(&dc->sb) != BDEV_STATE_NONE) {
 		struct closure cl;
 
@@ -1118,7 +1118,7 @@ int bch_cached_dev_attach(struct cached_dev *dc, struct cache_set *c,
 	    (!set_uuid && memcmp(dc->sb.set_uuid, c->sb.set_uuid, 16)))
 		return -ENOENT;
 
-	if (dc->disk.c) {
+	if (bcache_dev_is_attached(&dc->disk)) {
 		pr_err("Can't attach %s: already attached",
 		       dc->backing_dev_name);
 		return -EINVAL;
@@ -1292,6 +1292,7 @@ static void cached_dev_free(struct closure *cl)
 	wake_up(&unregister_wait);
 
 	kfree(dc->writeback_keys);
+	bch2_request_exit(dc);
 	kobject_put(&dc->disk.kobj);
 }
 
@@ -1357,7 +1358,8 @@ static int cached_dev_init(struct cached_dev *dc, unsigned int block_size)
 	dc->stop_when_cache_set_failed = BCH_CACHED_DEV_STOP_AUTO;
 
 	bch_cached_dev_request_init(dc);
-	return bch_cached_dev_writeback_init(dc);
+	return bch_cached_dev_writeback_init(dc) ?:
+		bch2_request_init(dc);
 }
 
 /* Cached device - bcache superblock */
@@ -1393,6 +1395,8 @@ static int register_bdev(struct cache_sb *sb, struct cache_sb_disk *sb_disk,
 	list_for_each_entry(c, &bch_cache_sets, list)
 		bch_cached_dev_attach(dc, c, NULL);
 
+	bch2_cached_dev_attach(dc, NULL);
+
 	if (BDEV_STATE(&dc->sb) == BDEV_STATE_NONE ||
 	    BDEV_STATE(&dc->sb) == BDEV_STATE_STALE) {
 		err = "failed to run cached device";
diff --git a/drivers/md/bcache/sysfs.c b/drivers/md/bcache/sysfs.c
index 34d09d94e95d..3b3939523708 100644
--- a/drivers/md/bcache/sysfs.c
+++ b/drivers/md/bcache/sysfs.c
@@ -7,6 +7,7 @@
  */
 
 #include "bcache.h"
+#include "bch2.h"
 #include "sysfs.h"
 #include "btree.h"
 #include "io.h"
@@ -407,6 +408,11 @@ STORE(__cached_dev)
 			if (!v)
 				return size;
 		}
+
+		v = bch2_cached_dev_attach(dc, set_uuid);
+		if (!v)
+			return size;
+
 		if (v == -ENOENT)
 			pr_err("Can't attach %s: cache set not found", buf);
 		return v;
diff --git a/drivers/md/bcache/writeback.h b/drivers/md/bcache/writeback.h
index a83700acc4ce..2a83ed3d4ba0 100644
--- a/drivers/md/bcache/writeback.h
+++ b/drivers/md/bcache/writeback.h
@@ -42,32 +42,6 @@ struct bch_dirty_init_state {
 	struct dirty_init_thrd_info	infos[BCH_DIRTY_INIT_THRD_MAX];
 };
 
-static inline bool should_writeback(struct cached_dev *dc, struct bio *bio,
-				    unsigned int cache_mode, bool would_skip)
-{
-	unsigned int in_use = dc->disk.c->gc_stats.in_use;
-
-	if (cache_mode != CACHE_MODE_WRITEBACK ||
-	    test_bit(BCACHE_DEV_DETACHING, &dc->disk.flags) ||
-	    in_use > bch_cutoff_writeback_sync)
-		return false;
-
-	if (bio_op(bio) == REQ_OP_DISCARD)
-		return false;
-
-	if (dc->partial_stripes_expensive &&
-	    bcache_dev_stripe_dirty(dc, bio->bi_iter.bi_sector,
-				    bio_sectors(bio)))
-		return true;
-
-	if (would_skip)
-		return false;
-
-	return (op_is_sync(bio->bi_opf) ||
-		bio->bi_opf & (REQ_META|REQ_PRIO) ||
-		in_use <= bch_cutoff_writeback);
-}
-
 void bcache_dev_sectors_dirty_add(struct cache_set *c, unsigned int inode,
 				  uint64_t offset, int nr_sectors);
author	Kent Overstreet <kent.overstreet@gmail.com>	2020-06-22 17:35:56 -0400
committer	Kent Overstreet <kent.overstreet@gmail.com>	2020-06-28 18:12:46 -0400
commit	8aa6e6e10ce712bcb10be9a206ec13d00ad0e7e6 (patch)
tree	347feb718da57de42ae6f2e5367df3e2e5ad6466
parent	7b42d65c0607af212b9ef3aa834ea449e3f846b1 (diff)