summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorKent Overstreet <kent.overstreet@gmail.com>2020-06-22 17:35:56 -0400
committerKent Overstreet <kent.overstreet@gmail.com>2020-06-28 18:12:46 -0400
commit8aa6e6e10ce712bcb10be9a206ec13d00ad0e7e6 (patch)
tree347feb718da57de42ae6f2e5367df3e2e5ad6466
parent7b42d65c0607af212b9ef3aa834ea449e3f846b1 (diff)
Initial bcache/bcachefs integrationbcache2
-rw-r--r--drivers/md/bcache/Makefile6
-rw-r--r--drivers/md/bcache/backingdev.h39
-rw-r--r--drivers/md/bcache/bch2.c540
-rw-r--r--drivers/md/bcache/bch2.h11
-rw-r--r--drivers/md/bcache/io.c18
-rw-r--r--drivers/md/bcache/io.h4
-rw-r--r--drivers/md/bcache/request.c110
-rw-r--r--drivers/md/bcache/super.c14
-rw-r--r--drivers/md/bcache/sysfs.c6
-rw-r--r--drivers/md/bcache/writeback.h26
10 files changed, 680 insertions, 94 deletions
diff --git a/drivers/md/bcache/Makefile b/drivers/md/bcache/Makefile
index 0fb1b6009da3..c45be858d941 100644
--- a/drivers/md/bcache/Makefile
+++ b/drivers/md/bcache/Makefile
@@ -2,6 +2,6 @@
obj-$(CONFIG_BCACHE) += bcache.o
-bcache-y := alloc.o bset.o btree.o debug.o extents.o io.o\
- journal.o movinggc.o request.o stats.o super.o sysfs.o trace.o\
- util.o writeback.o
+bcache-y := alloc.o bch2.o bset.o btree.o debug.o extents.o\
+ io.o journal.o movinggc.o request.o stats.o super.o sysfs.o\
+ trace.o util.o writeback.o
diff --git a/drivers/md/bcache/backingdev.h b/drivers/md/bcache/backingdev.h
index 58362eb7902a..038b532e91d8 100644
--- a/drivers/md/bcache/backingdev.h
+++ b/drivers/md/bcache/backingdev.h
@@ -22,7 +22,11 @@ struct bcache_device {
struct kobject kobj;
struct cache_set *c;
- unsigned int id;
+ struct bch_fs *c2;
+
+ u64 id;
+ struct inode *inode;
+
#define BCACHEDEVNAME_SIZE 12
char name[BCACHEDEVNAME_SIZE];
@@ -49,6 +53,11 @@ struct bcache_device {
unsigned int cmd, unsigned long arg);
};
+static inline bool bcache_dev_is_attached(struct bcache_device *d)
+{
+ return d->c != NULL || d->c2 != NULL;
+}
+
enum stop_on_failure {
BCH_CACHED_DEV_STOP_AUTO = 0,
BCH_CACHED_DEV_STOP_ALWAYS,
@@ -87,6 +96,9 @@ struct cached_dev {
*/
atomic_t running;
+ struct bio_set bch2_bio_read;
+ mempool_t bch2_io_write;
+
/*
* Writes take a shared lock from start to finish; scanning for dirty
* data to refill the rb tree requires an exclusive lock.
@@ -225,6 +237,31 @@ static inline bool bcache_dev_stripe_dirty(struct cached_dev *dc,
}
}
+static inline bool should_writeback(struct cached_dev *dc, struct bio *bio,
+ unsigned int cache_mode, bool would_skip,
+ unsigned int in_use)
+{
+ if (cache_mode != CACHE_MODE_WRITEBACK ||
+ test_bit(BCACHE_DEV_DETACHING, &dc->disk.flags) ||
+ in_use > bch_cutoff_writeback_sync)
+ return false;
+
+ if (bio_op(bio) == REQ_OP_DISCARD)
+ return false;
+
+ if (dc->partial_stripes_expensive &&
+ bcache_dev_stripe_dirty(dc, bio->bi_iter.bi_sector,
+ bio_sectors(bio)))
+ return true;
+
+ if (would_skip)
+ return false;
+
+ return (op_is_sync(bio->bi_opf) ||
+ bio->bi_opf & (REQ_META|REQ_PRIO) ||
+ in_use <= bch_cutoff_writeback);
+}
+
static inline void bch_writeback_queue(struct cached_dev *dc)
{
if (!IS_ERR_OR_NULL(dc->writeback_thread))
diff --git a/drivers/md/bcache/bch2.c b/drivers/md/bcache/bch2.c
new file mode 100644
index 000000000000..b0ada5a61564
--- /dev/null
+++ b/drivers/md/bcache/bch2.c
@@ -0,0 +1,540 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Main bcache entry point - handle a read or a write request and decide what to
+ * do with it; the make_request functions are called by the block layer.
+ *
+ * Copyright 2010, 2011 Kent Overstreet <kent.overstreet@gmail.com>
+ * Copyright 2012 Google, Inc.
+ */
+
+#include "backingdev.h"
+#include "bch2.h"
+
+#include "../../../fs/bcachefs/bcachefs.h"
+#include "../../../fs/bcachefs/alloc_foreground.h"
+#include "../../../fs/bcachefs/btree_update.h"
+#include "../../../fs/bcachefs/buckets.h"
+#include "../../../fs/bcachefs/io.h"
+#include "../../../fs/bcachefs/fs.h"
+#include "../../../fs/bcachefs/fs-common.h"
+#include "../../../fs/bcachefs/str_hash.h"
+
+#include "io.h"
+
+#include <linux/kthread.h>
+//#include <trace/events/bcache.h>
+
+static unsigned fs_used_percent(struct bch_fs *c)
+{
+ struct bch_fs_usage_short usage = bch2_fs_usage_read_short(c);
+
+ return div64_u64(usage.used * 100, usage.capacity);
+}
+
+static inline bool bch_keybuf_check_overlapping(struct keybuf *buf, struct bkey *start,
+ struct bkey *end)
+{
+ return false;
+}
+
+/* Reads: */
+
+struct bch_cached_dev_rbio {
+ struct bio *orig;
+ struct cached_dev *dc;
+ struct bch_read_bio rbio;
+};
+
+static void cached_dev_read_endio(struct bio *bio)
+{
+ struct bch_cached_dev_rbio *c_rbio =
+ container_of(bio, struct bch_cached_dev_rbio, rbio.bio);
+ struct bio *orig = c_rbio->orig;
+ struct cached_dev *dc = c_rbio->dc;
+
+ bio_put(bio);
+ cached_dev_put(dc);
+ bio_endio(orig);
+}
+
+static void cached_dev_read(struct cached_dev *dc, struct bio *bio)
+{
+ struct bch_fs *c = dc->disk.c2;
+ struct bch_read_bio *rbio;
+ struct bch_cached_dev_rbio *c_rbio;
+ struct bch_io_opts opts = { 0 };
+ unsigned flags = BCH_READ_RETRY_IF_STALE|
+ BCH_READ_USER_MAPPED|
+ BCH_READ_PASSTHROUGH_BLOCK_DEV;
+
+ if (!bch_check_should_bypass(dc, bio, c->opts.block_size, 0)) {
+ /* XXX: implement promotes from block devices in bch2:
+ flags |= BCH_READ_MAY_PROMOTE;
+ */
+ }
+
+ /* XXX: plumb through write point for promotes:
+ unsigned write_point = writepoint_hashed((unsigned long) current);
+ */
+
+ rbio = rbio_init(bio_clone_fast(bio, GFP_NOIO, &dc->bch2_bio_read), opts);
+ rbio->bio.bi_end_io = cached_dev_read_endio;
+ c_rbio = container_of(rbio, struct bch_cached_dev_rbio, rbio);
+ c_rbio->orig = bio;
+ c_rbio->dc = dc;
+
+ bch2_read(c, rbio, dc->disk.id, flags);
+}
+
+/* Writes: */
+
+struct bch_write {
+ struct closure cl;
+
+ struct bcache_device *d;
+ struct bio *orig_bio;
+ struct bio backingdev_bio;
+
+ blk_status_t status;
+ unsigned long start_time;
+
+ unsigned int bypass:1;
+ unsigned int writeback:1;
+ struct bch_write_op op;
+};
+
+static void cached_dev_bio_complete(struct closure *cl)
+{
+ struct bch_write *io = container_of(cl, struct bch_write, cl);
+ struct cached_dev *dc = container_of(io->d, struct cached_dev, disk);
+
+ generic_end_io_acct(io->d->disk->queue, bio_op(io->orig_bio),
+ &io->d->disk->part0, io->start_time);
+
+ //trace_bcache_request_end(s->d, s->orig_bio);
+ io->orig_bio->bi_status = io->status;
+ bio_endio(io->orig_bio);
+
+ closure_debug_destroy(cl);
+ mempool_free(io, &dc->bch2_io_write);
+
+ cached_dev_put(dc);
+}
+
+static void cached_dev_write_complete(struct closure *cl)
+{
+ struct bch_write *s = container_of(cl, struct bch_write, cl);
+ struct cached_dev *dc = container_of(s->d, struct cached_dev, disk);
+
+ up_read_non_owner(&dc->writeback_lock);
+ cached_dev_bio_complete(cl);
+}
+
+static void backingdev_endio(struct bio *bio)
+{
+ struct bch_write *io = container_of(bio, struct bch_write, backingdev_bio);
+ struct cached_dev *dc = container_of(io->d, struct cached_dev, disk);
+
+ if (bio->bi_status) {
+ io->status = bio->bi_status;
+ bch_count_backing_io_errors(dc, bio);
+ }
+
+ closure_put(&io->cl);
+}
+
+static void submit_backingdev_io(struct bch_write *io)
+{
+ struct cached_dev *dc = container_of(io->d, struct cached_dev, disk);
+
+ /*
+ * If it's a discard and the backing device doesn't support discards, no
+ * need to submit it:
+ */
+ if (bio_op(io->orig_bio) == REQ_OP_DISCARD &&
+ !blk_queue_discard(bdev_get_queue(dc->bdev)))
+ return;
+
+ bio_init(&io->backingdev_bio, NULL, 0);
+ __bio_clone_fast(&io->backingdev_bio, io->orig_bio);
+ io->backingdev_bio.bi_end_io = backingdev_endio;
+
+ closure_get(&io->cl);
+ generic_make_request(&io->backingdev_bio);
+}
+
+static void cached_dev_write(struct cached_dev *dc, struct bio *orig_bio)
+{
+ struct bch_fs *c = dc->disk.c2;
+ struct bch_write *io;
+ struct bkey start = KEY(dc->disk.id, orig_bio->bi_iter.bi_sector, 0);
+ struct bkey end = KEY(dc->disk.id, bio_end_sector(orig_bio), 0);
+ struct bch_io_opts opts = { 0 };
+ unsigned in_use = fs_used_percent(c);
+
+ io = mempool_alloc(&dc->bch2_io_write, GFP_NOIO);
+ closure_init(&io->cl, NULL);
+ io->d = &dc->disk;
+ io->orig_bio = orig_bio;
+ io->status = 0;
+ io->start_time = jiffies;
+ io->bypass = bch_check_should_bypass(dc, orig_bio,
+ c->opts.block_size, in_use);
+ io->writeback = false;
+
+ down_read_non_owner(&dc->writeback_lock);
+ if (bch_keybuf_check_overlapping(dc->writeback_keys, &start, &end)) {
+ /*
+ * We overlap with some dirty data undergoing background
+ * writeback, force this write to writeback
+ */
+ io->bypass = false;
+ io->writeback = true;
+ }
+
+ /*
+ * Discards aren't _required_ to do anything, so skipping if
+ * check_overlapping returned true is ok
+ *
+ * But check_overlapping drops dirty keys for which io hasn't started,
+ * so we still want to call it.
+ */
+ if (bio_op(orig_bio) == REQ_OP_DISCARD) {
+ io->bypass = true;
+ io->writeback = false;
+ }
+
+ if (should_writeback(dc, io->orig_bio, cache_mode(dc),
+ io->bypass, in_use)) {
+ io->bypass = false;
+ io->writeback = true;
+ }
+
+ /*
+ * Submit IO to backing device, if we're not doing a writeback write:
+ *
+ * If it's a discard and the backing device doesn't support discards, no
+ * need to submit to the backing device:
+ */
+ if (!io->writeback)
+ submit_backingdev_io(io);
+
+ /* If we're bypassing, delete the range we're writing to from the cache: */
+ if (io->bypass) {
+ u64 journal_seq = 0;
+
+ bch2_btree_delete_range(c, BTREE_ID_EXTENTS,
+ POS(dc->disk.id, orig_bio->bi_iter.bi_sector),
+ POS(dc->disk.id, bio_end_sector(orig_bio)),
+ &journal_seq);
+
+ if ((orig_bio->bi_opf & (REQ_PREFLUSH|REQ_FUA)) &&
+ !(c->opts.journal_flush_disabled))
+ bch2_journal_flush_seq_async(&c->journal, journal_seq, &io->cl);
+ } else {
+ bch2_write_op_init(&io->op, c, opts);
+ bio_init(&io->op.wbio.bio, NULL, 0);
+ __bio_clone_fast(&io->op.wbio.bio, orig_bio);
+ io->op.nr_replicas = 1;
+ io->op.write_point = writepoint_hashed((unsigned long) current);
+ io->op.new_i_size = U64_MAX;
+ io->op.pos = POS(dc->disk.id, orig_bio->bi_iter.bi_sector);
+
+ if (orig_bio->bi_opf & (REQ_FUA|REQ_PREFLUSH))
+ io->op.flags |= BCH_WRITE_FLUSH;
+
+ if (io->writeback) {
+ int ret = bch2_disk_reservation_get(c, &io->op.res, bio_sectors(orig_bio),
+ io->op.nr_replicas, 0);
+ if (ret) {
+ io->status = BLK_STS_RESOURCE;
+ goto err;
+ }
+
+ /* Mark superblock dirty, if necessary: */
+ bch_writeback_add(dc);
+ } else {
+ io->op.flags |= BCH_WRITE_CACHED;
+ }
+
+ closure_call(&io->op.cl, bch2_write, NULL, &io->cl);
+ }
+err:
+ continue_at(&io->cl, cached_dev_write_complete, NULL);
+}
+
+static void cached_dev_nodata(struct cached_dev *dc, struct bio *orig_bio)
+{
+ struct bch_fs *c = dc->disk.c2;
+ bool flush_backingdev = cache_mode(dc) != CACHE_MODE_WRITEBACK;
+ bool flush_cache = !c->opts.journal_flush_disabled;
+ struct bch_write *io;
+
+ if (!(orig_bio->bi_opf & REQ_PREFLUSH)) {
+ generic_make_request(orig_bio);
+ return;
+ }
+
+ if (!flush_backingdev && !flush_cache) {
+ bio_endio(orig_bio);
+ return;
+ }
+
+ if (!flush_cache) {
+ generic_make_request(orig_bio);
+ return;
+ }
+
+ io = mempool_alloc(&dc->bch2_io_write, GFP_NOIO);
+ closure_init(&io->cl, NULL);
+ io->d = &dc->disk;
+ io->orig_bio = orig_bio;
+ io->status = 0;
+ io->start_time = jiffies;
+ io->bypass = false;
+ io->writeback = false;
+
+ if (flush_backingdev)
+ submit_backingdev_io(io);
+
+ bch2_journal_flush_async(&c->journal, &io->cl);
+ continue_at(&io->cl, cached_dev_bio_complete, NULL);
+}
+
+void bch2_cached_dev_make_request(struct cached_dev *dc, struct bio *bio)
+{
+ //trace_bcache_request_start(d, bio);
+
+ if (!bio->bi_iter.bi_size)
+ cached_dev_nodata(dc, bio);
+ else if (bio_data_dir(bio) == WRITE)
+ cached_dev_write(dc, bio);
+ else
+ cached_dev_read(dc, bio);
+
+}
+
+static int bch2_dev_attach_trans(struct btree_trans *trans,
+ struct qstr *name,
+ u64 *inum,
+ bool must_exist)
+{
+ struct bch_fs *c = trans->c;
+ struct bch_inode_unpacked root_inode;
+ struct bch_inode_unpacked dev_inode;
+ struct bch_hash_info root_hash_info;
+ struct btree_iter *iter;
+ int ret;
+
+ ret = bch2_inode_find_by_inum_trans(trans, BCACHEFS_ROOT_INO, &root_inode);
+ if (ret)
+ return ret;
+
+ root_hash_info = bch2_hash_info_init(c, &root_inode);
+
+ iter = __bch2_dirent_lookup_trans(trans, BCACHEFS_ROOT_INO,
+ &root_hash_info, name, 0);
+ ret = PTR_ERR_OR_ZERO(iter);
+ if (ret && ret != -ENOENT)
+ return ret;
+
+ if (!ret) {
+ struct bkey_s_c k = bch2_btree_iter_peek_slot(iter);
+ *inum = le64_to_cpu(bkey_s_c_to_dirent(k).v->d_inum);
+ return 0;
+ }
+
+ if (must_exist)
+ return ret;
+
+ /* Doesn't exist, create it: */
+ bch2_inode_init_early(c, &dev_inode);
+
+ ret = bch2_create_trans(trans, BCACHEFS_ROOT_INO,
+ &root_inode, &dev_inode,
+ name, 0, 0, S_IFREG, 0, NULL, NULL) ?:
+ bch2_trans_commit(trans, NULL, NULL, 0);
+ *inum = dev_inode.bi_inum;
+ return ret;
+}
+
+static int bch2_cached_dev_attach_one(struct cached_dev *dc, struct bch_fs *c,
+ uint8_t *fs_uuid)
+{
+ char backingdev_filename[80];
+ struct qstr backingdev_qstr;
+ struct inode *inode = NULL;
+ u64 inum;
+ int ret = 0;
+
+ snprintf(backingdev_filename, sizeof(backingdev_filename),
+ "backing-device-%pU", dc->sb.uuid);
+ backingdev_qstr = (struct qstr) QSTR_INIT(backingdev_filename,
+ strlen(backingdev_filename));
+
+ if (bcache_dev_is_attached(&dc->disk)) {
+ pr_err("Can't attach %s: already attached",
+ dc->backing_dev_name);
+ return -EINVAL;
+ }
+#if 0
+ if (test_bit(CACHE_SET_STOPPING, &c->flags)) {
+ pr_err("Can't attach %s: shutting down",
+ dc->backing_dev_name);
+ return -EINVAL;
+ }
+#endif
+ if (dc->sb.block_size < c->opts.block_size) {
+ /* Will die */
+ pr_err("Couldn't attach %s: block size less than set's block size",
+ dc->backing_dev_name);
+ return -EINVAL;
+ }
+
+ ret = bch2_trans_do(c, NULL, NULL, 0,
+ bch2_dev_attach_trans(&trans, &backingdev_qstr, &inum,
+ BDEV_STATE(&dc->sb) == BDEV_STATE_DIRTY));
+ if (ret) {
+ pr_err("Error attaching %s: %i\n",
+ dc->backing_dev_name, ret);
+ return ret;
+ }
+
+ inode = bch2_vfs_inode_get(c, inum);
+ if (IS_ERR(inode)) {
+ pr_err("Can't attach %s: error getting inode %li",
+ dc->backing_dev_name, PTR_ERR(inode));
+ return PTR_ERR(inode);
+ }
+
+ ret = get_write_access(inode);
+ if (ret) {
+ pr_err("Can't attach %s: error getting inode %i",
+ dc->backing_dev_name, ret);
+ iput(inode);
+ return ret;
+ }
+
+ /* XXX should we be calling __mnt_want_write() too? */
+
+ if (BDEV_STATE(&dc->sb) == BDEV_STATE_STALE) {
+ struct closure cl;
+
+ closure_init_stack(&cl);
+
+ ret = bch2_fpunch(c, inum, 0, U64_MAX, NULL, NULL);
+ if (ret) {
+ pr_err("Error attaching %s: error deleting existing data %i\n",
+ dc->backing_dev_name, ret);
+ return ret;
+ }
+
+ SET_BDEV_STATE(&dc->sb, BDEV_STATE_CLEAN);
+ bch_write_bdev_super(dc, &cl);
+ closure_sync(&cl);
+ }
+
+ /*
+ * XXX: set inode size
+ */
+
+ dc->disk.id = inum;
+ dc->disk.inode = inode;
+ dc->disk.c2 = c;
+#if 0
+ bcache_device_attach(&dc->disk, c, inum);
+ list_move(&dc->list, &c->cached_devs);
+ calc_cached_dev_sectors(c);
+#endif
+ /*
+ * dc->c must be set before dc->count != 0 - paired with the mb in
+ * cached_dev_get()
+ */
+ smp_wmb();
+ refcount_set(&dc->count, 1);
+#if 0
+ /* Block writeback thread, but spawn it */
+ down_write(&dc->writeback_lock);
+ if (bch_cached_dev_writeback_start(dc)) {
+ up_write(&dc->writeback_lock);
+ pr_err("Couldn't start writeback facilities for %s",
+ dc->disk.disk->disk_name);
+ return -ENOMEM;
+ }
+
+ if (BDEV_STATE(&dc->sb) == BDEV_STATE_DIRTY) {
+ atomic_set(&dc->has_dirty, 1);
+ bch_writeback_queue(dc);
+ }
+
+ bch_sectors_dirty_init(&dc->disk);
+
+ ret = bch_cached_dev_run(dc);
+ if (ret && (ret != -EBUSY)) {
+ up_write(&dc->writeback_lock);
+ /*
+ * bch_register_lock is held, bcache_device_stop() is not
+ * able to be directly called. The kthread and kworker
+ * created previously in bch_cached_dev_writeback_start()
+ * have to be stopped manually here.
+ */
+ kthread_stop(dc->writeback_thread);
+ cancel_writeback_rate_update_dwork(dc);
+ pr_err("Couldn't run cached device %s",
+ dc->backing_dev_name);
+ return ret;
+ }
+
+ /* Allow the writeback thread to proceed */
+ up_write(&dc->writeback_lock);
+#endif
+
+#if 0
+ bcache_device_link(&dc->disk, c, "bdev");
+ atomic_inc(&c->attached_dev_nr);
+#endif
+
+ pr_info("Caching %s as %s on set %pU",
+ dc->backing_dev_name,
+ dc->disk.disk->disk_name,
+ &dc->disk.c2->sb.uuid);
+ return 0;
+
+}
+
+int bch2_cached_dev_attach(struct cached_dev *dc, uint8_t *fs_uuid)
+{
+ struct bch_fs *c;
+ int ret;
+
+ mutex_lock(&bch2_fs_list_lock);
+ list_for_each_entry(c, &bch2_fs_list, list) {
+ if (fs_uuid
+ ? !memcmp(fs_uuid, &c->sb.user_uuid, 16)
+ : !memcmp(dc->sb.set_uuid, &c->sb.uuid, 16)) {
+ closure_get(&c->cl);
+ mutex_unlock(&bch2_fs_list_lock);
+ goto found;
+ }
+ }
+ mutex_unlock(&bch2_fs_list_lock);
+ return -ENOENT;
+found:
+ ret = bch2_cached_dev_attach_one(dc, c, fs_uuid);
+ closure_put(&c->cl);
+ return ret;
+}
+
+void bch2_request_exit(struct cached_dev *dc)
+{
+ mempool_exit(&dc->bch2_io_write);
+ bioset_exit(&dc->bch2_bio_read);
+}
+
+int bch2_request_init(struct cached_dev *dc)
+{
+ return bioset_init(&dc->bch2_bio_read, 1,
+ offsetof(struct bch_cached_dev_rbio, rbio.bio),
+ BIOSET_NEED_RESCUER) ?:
+ mempool_init_kmalloc_pool(&dc->bch2_io_write, 1, sizeof(struct bch_write));
+}
diff --git a/drivers/md/bcache/bch2.h b/drivers/md/bcache/bch2.h
new file mode 100644
index 000000000000..40d30db8a43a
--- /dev/null
+++ b/drivers/md/bcache/bch2.h
@@ -0,0 +1,11 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _BCACHE_BCH2_H
+#define _BCACHE_BCH2_H
+
+void bch2_cached_dev_make_request(struct cached_dev *dc, struct bio *bio);
+int bch2_cached_dev_attach(struct cached_dev *dc, uint8_t *fs_uuid);
+
+void bch2_request_exit(struct cached_dev *dc);
+int bch2_request_init(struct cached_dev *dc);
+
+#endif /* _BCACHE_BCH2_H_ */
diff --git a/drivers/md/bcache/io.c b/drivers/md/bcache/io.c
index 8ba675b8a2da..90758649eb11 100644
--- a/drivers/md/bcache/io.c
+++ b/drivers/md/bcache/io.c
@@ -59,25 +59,17 @@ static struct hlist_head *iohash(struct cached_dev *dc, uint64_t k)
return &dc->io_hash[hash_64(k, RECENT_IO_BITS)];
}
-bool bch_check_should_bypass(struct cached_dev *dc, struct bio *bio)
+bool bch_check_should_bypass(struct cached_dev *dc, struct bio *bio,
+ unsigned int block_size,
+ unsigned int in_use)
{
unsigned int mode = cache_mode(dc);
- unsigned int sectors, congested, dirty_percentage, block_size;
+ unsigned int sectors, congested;
struct task_struct *task = current;
struct io *i;
- if (dc->disk.c) {
- dirty_percentage = dc->disk.c->gc_stats.in_use;
- block_size = dc->disk.c->sb.block_size;
- } else {
- /* XXX bcache2: */
- dirty_percentage = 0;
- block_size = 0;
- //block_size = dc->disk.c2->sb.block_size;
- }
-
if (test_bit(BCACHE_DEV_DETACHING, &dc->disk.flags) ||
- dirty_percentage > CUTOFF_CACHE_ADD ||
+ in_use > CUTOFF_CACHE_ADD ||
(bio_op(bio) == REQ_OP_DISCARD))
goto skip;
diff --git a/drivers/md/bcache/io.h b/drivers/md/bcache/io.h
index 59bc40c8b107..08bfbd521e96 100644
--- a/drivers/md/bcache/io.h
+++ b/drivers/md/bcache/io.h
@@ -7,7 +7,9 @@ struct cache;
struct cache_set;
unsigned int bch_get_congested(const struct cache_set *c);
-bool bch_check_should_bypass(struct cached_dev *dc, struct bio *bio);
+bool bch_check_should_bypass(struct cached_dev *dc, struct bio *bio,
+ unsigned int block_size,
+ unsigned int dirty_percentage);
void bch_count_backing_io_errors(struct cached_dev *dc, struct bio *bio);
void bch_count_io_errors(struct cache *ca, blk_status_t error,
diff --git a/drivers/md/bcache/request.c b/drivers/md/bcache/request.c
index 755bc6a448cb..ac4d1c545d61 100644
--- a/drivers/md/bcache/request.c
+++ b/drivers/md/bcache/request.c
@@ -8,11 +8,11 @@
*/
#include "bcache.h"
+#include "bch2.h"
#include "btree.h"
#include "debug.h"
#include "io.h"
#include "request.h"
-#include "request2.h"
#include "writeback.h"
#include <linux/module.h>
@@ -851,7 +851,8 @@ static void cached_dev_write(struct cached_dev *dc, struct search *s)
if (should_writeback(dc, s->orig_bio,
cache_mode(dc),
- s->iop.bypass)) {
+ s->iop.bypass,
+ dc->disk.c->gc_stats.in_use)) {
s->iop.bypass = false;
s->iop.writeback = true;
}
@@ -1013,34 +1014,57 @@ static void quit_max_writeback_rate(struct cache_set *c,
/* Cached devices - read & write stuff */
-blk_qc_t cached_dev_make_request(struct request_queue *q, struct bio *bio)
+static void bch1_cached_dev_make_request(struct cached_dev *dc, struct bio *bio)
{
+ struct bcache_device *d = &dc->disk;
+ struct cache_set *c = d->c;
struct search *s;
- struct bcache_device *d = bio->bi_disk->private_data;
- struct cached_dev *dc = container_of(d, struct cached_dev, disk);
- int rw = bio_data_dir(bio);
- if (unlikely((d->c && test_bit(CACHE_SET_IO_DISABLE, &d->c->flags)) ||
- dc->io_disable)) {
+ if (unlikely(test_bit(CACHE_SET_IO_DISABLE, &c->flags))) {
+ cached_dev_put(dc);
bio->bi_status = BLK_STS_IOERR;
bio_endio(bio);
- return BLK_QC_T_NONE;
+ return;
+ }
+
+ s = search_alloc(bio, d);
+ trace_bcache_request_start(s->d, bio);
+
+ if (atomic_read(&c->idle_counter))
+ atomic_set(&c->idle_counter, 0);
+ /*
+ * If at_max_writeback_rate of cache set is true and new I/O
+ * comes, quit max writeback rate of all cached devices
+ * attached to this cache set, and set at_max_writeback_rate
+ * to false.
+ */
+ if (unlikely(atomic_read(&c->at_max_writeback_rate) == 1)) {
+ atomic_set(&c->at_max_writeback_rate, 0);
+ quit_max_writeback_rate(c, dc);
}
- if (likely(d->c)) {
- if (atomic_read(&d->c->idle_counter))
- atomic_set(&d->c->idle_counter, 0);
+ if (!bio->bi_iter.bi_size) {
/*
- * If at_max_writeback_rate of cache set is true and new I/O
- * comes, quit max writeback rate of all cached devices
- * attached to this cache set, and set at_max_writeback_rate
- * to false.
+ * can't call bch_journal_meta from under
+ * generic_make_request
*/
- if (unlikely(atomic_read(&d->c->at_max_writeback_rate) == 1)) {
- atomic_set(&d->c->at_max_writeback_rate, 0);
- quit_max_writeback_rate(d->c, dc);
- }
+ continue_at_nobarrier(&s->cl, cached_dev_nodata, bcache_wq);
+ } else {
+ s->iop.bypass = bch_check_should_bypass(dc, bio,
+ c->sb.block_size,
+ c->gc_stats.in_use);
+
+ if (bio_data_dir(bio) == WRITE)
+ cached_dev_write(dc, s);
+ else
+ cached_dev_read(dc, s);
}
+}
+
+blk_qc_t cached_dev_make_request(struct request_queue *q, struct bio *bio)
+{
+ struct bcache_device *d = bio->bi_disk->private_data;
+ struct cached_dev *dc = container_of(d, struct cached_dev, disk);
generic_start_io_acct(q,
bio_op(bio),
@@ -1050,29 +1074,21 @@ blk_qc_t cached_dev_make_request(struct request_queue *q, struct bio *bio)
bio_set_dev(bio, dc->bdev);
bio->bi_iter.bi_sector += dc->sb.data_offset;
- if (cached_dev_get(dc)) {
- s = search_alloc(bio, d);
- trace_bcache_request_start(s->d, bio);
-
- if (!bio->bi_iter.bi_size) {
- /*
- * can't call bch_journal_meta from under
- * generic_make_request
- */
- continue_at_nobarrier(&s->cl,
- cached_dev_nodata,
- bcache_wq);
- } else {
- s->iop.bypass = check_should_bypass(dc, bio);
+ if (unlikely(dc->io_disable)) {
+ bio->bi_status = BLK_STS_IOERR;
+ bio_endio(bio);
+ return BLK_QC_T_NONE;
+ }
- if (rw)
- cached_dev_write(dc, s);
- else
- cached_dev_read(dc, s);
- }
- } else
+ if (cached_dev_get(dc)) {
+ if (d->c)
+ bch1_cached_dev_make_request(dc, bio);
+ else
+ bch2_cached_dev_make_request(dc, bio);
+ } else {
/* I/O request sent to backing device */
detached_dev_do_request(d, bio);
+ }
return BLK_QC_T_NONE;
}
@@ -1099,12 +1115,16 @@ static int cached_dev_congested(void *data, int bits)
return 1;
if (cached_dev_get(dc)) {
- unsigned int i;
- struct cache *ca;
+ if (d->c) {
+ unsigned int i;
+ struct cache *ca;
- for_each_cache(ca, d->c, i) {
- q = bdev_get_queue(ca->bdev);
- ret |= bdi_congested(q->backing_dev_info, bits);
+ for_each_cache(ca, d->c, i) {
+ q = bdev_get_queue(ca->bdev);
+ ret |= bdi_congested(q->backing_dev_info, bits);
+ }
+ } else {
+ /* bcache2: */
}
cached_dev_put(dc);
diff --git a/drivers/md/bcache/super.c b/drivers/md/bcache/super.c
index f3f42862acc4..c2a7b8ae64d7 100644
--- a/drivers/md/bcache/super.c
+++ b/drivers/md/bcache/super.c
@@ -8,6 +8,7 @@
*/
#include "bcache.h"
+#include "bch2.h"
#include "btree.h"
#include "debug.h"
#include "extents.h"
@@ -726,7 +727,7 @@ static void bcache_device_link(struct bcache_device *d, struct cache_set *c,
bd_link_disk_holder(ca->bdev, d->disk);
snprintf(d->name, BCACHEDEVNAME_SIZE,
- "%s%u", name, d->id);
+ "%s%llu", name, d->id);
ret = sysfs_create_link(&d->kobj, &c->kobj, "cache");
if (ret < 0)
@@ -948,7 +949,6 @@ static int cached_dev_status_update(void *arg)
return 0;
}
-
int bch_cached_dev_run(struct cached_dev *dc)
{
struct bcache_device *d = &dc->disk;
@@ -978,7 +978,7 @@ int bch_cached_dev_run(struct cached_dev *dc)
return -EBUSY;
}
- if (!d->c &&
+ if (!bcache_dev_is_attached(d) &&
BDEV_STATE(&dc->sb) != BDEV_STATE_NONE) {
struct closure cl;
@@ -1118,7 +1118,7 @@ int bch_cached_dev_attach(struct cached_dev *dc, struct cache_set *c,
(!set_uuid && memcmp(dc->sb.set_uuid, c->sb.set_uuid, 16)))
return -ENOENT;
- if (dc->disk.c) {
+ if (bcache_dev_is_attached(&dc->disk)) {
pr_err("Can't attach %s: already attached",
dc->backing_dev_name);
return -EINVAL;
@@ -1292,6 +1292,7 @@ static void cached_dev_free(struct closure *cl)
wake_up(&unregister_wait);
kfree(dc->writeback_keys);
+ bch2_request_exit(dc);
kobject_put(&dc->disk.kobj);
}
@@ -1357,7 +1358,8 @@ static int cached_dev_init(struct cached_dev *dc, unsigned int block_size)
dc->stop_when_cache_set_failed = BCH_CACHED_DEV_STOP_AUTO;
bch_cached_dev_request_init(dc);
- return bch_cached_dev_writeback_init(dc);
+ return bch_cached_dev_writeback_init(dc) ?:
+ bch2_request_init(dc);
}
/* Cached device - bcache superblock */
@@ -1393,6 +1395,8 @@ static int register_bdev(struct cache_sb *sb, struct cache_sb_disk *sb_disk,
list_for_each_entry(c, &bch_cache_sets, list)
bch_cached_dev_attach(dc, c, NULL);
+ bch2_cached_dev_attach(dc, NULL);
+
if (BDEV_STATE(&dc->sb) == BDEV_STATE_NONE ||
BDEV_STATE(&dc->sb) == BDEV_STATE_STALE) {
err = "failed to run cached device";
diff --git a/drivers/md/bcache/sysfs.c b/drivers/md/bcache/sysfs.c
index 34d09d94e95d..3b3939523708 100644
--- a/drivers/md/bcache/sysfs.c
+++ b/drivers/md/bcache/sysfs.c
@@ -7,6 +7,7 @@
*/
#include "bcache.h"
+#include "bch2.h"
#include "sysfs.h"
#include "btree.h"
#include "io.h"
@@ -407,6 +408,11 @@ STORE(__cached_dev)
if (!v)
return size;
}
+
+ v = bch2_cached_dev_attach(dc, set_uuid);
+ if (!v)
+ return size;
+
if (v == -ENOENT)
pr_err("Can't attach %s: cache set not found", buf);
return v;
diff --git a/drivers/md/bcache/writeback.h b/drivers/md/bcache/writeback.h
index a83700acc4ce..2a83ed3d4ba0 100644
--- a/drivers/md/bcache/writeback.h
+++ b/drivers/md/bcache/writeback.h
@@ -42,32 +42,6 @@ struct bch_dirty_init_state {
struct dirty_init_thrd_info infos[BCH_DIRTY_INIT_THRD_MAX];
};
-static inline bool should_writeback(struct cached_dev *dc, struct bio *bio,
- unsigned int cache_mode, bool would_skip)
-{
- unsigned int in_use = dc->disk.c->gc_stats.in_use;
-
- if (cache_mode != CACHE_MODE_WRITEBACK ||
- test_bit(BCACHE_DEV_DETACHING, &dc->disk.flags) ||
- in_use > bch_cutoff_writeback_sync)
- return false;
-
- if (bio_op(bio) == REQ_OP_DISCARD)
- return false;
-
- if (dc->partial_stripes_expensive &&
- bcache_dev_stripe_dirty(dc, bio->bi_iter.bi_sector,
- bio_sectors(bio)))
- return true;
-
- if (would_skip)
- return false;
-
- return (op_is_sync(bio->bi_opf) ||
- bio->bi_opf & (REQ_META|REQ_PRIO) ||
- in_use <= bch_cutoff_writeback);
-}
-
void bcache_dev_sectors_dirty_add(struct cache_set *c, unsigned int inode,
uint64_t offset, int nr_sectors);