summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorKent Overstreet <kent.overstreet@gmail.com>2017-03-17 17:10:15 -0800
committerKent Overstreet <kent.overstreet@gmail.com>2017-03-19 17:23:29 -0800
commit8b9eb1274b81c9144238c589f0a9e12ea5961898 (patch)
tree2b506fff714509a6aab586e9a01c351da121a5ee
parent6115d0bcf8fee40ab9a98ff8068ea4d3bce5dbdb (diff)
bcachefs: delete old style bcache block device interfaces
-rw-r--r--fs/bcachefs/Makefile11
-rw-r--r--fs/bcachefs/bcache.h37
-rw-r--r--fs/bcachefs/bcachefs_format.h97
-rw-r--r--fs/bcachefs/blockdev.c819
-rw-r--r--fs/bcachefs/blockdev.h134
-rw-r--r--fs/bcachefs/blockdev_types.h123
-rw-r--r--fs/bcachefs/btree_gc.c2
-rw-r--r--fs/bcachefs/btree_io.c17
-rw-r--r--fs/bcachefs/debug.c52
-rw-r--r--fs/bcachefs/debug.h3
-rw-r--r--fs/bcachefs/error.c111
-rw-r--r--fs/bcachefs/error.h3
-rw-r--r--fs/bcachefs/extents.c117
-rw-r--r--fs/bcachefs/extents.h4
-rw-r--r--fs/bcachefs/io.c60
-rw-r--r--fs/bcachefs/io.h7
-rw-r--r--fs/bcachefs/journal.c8
-rw-r--r--fs/bcachefs/keybuf.c195
-rw-r--r--fs/bcachefs/keybuf.h16
-rw-r--r--fs/bcachefs/keybuf_types.h33
-rw-r--r--fs/bcachefs/notify.c105
-rw-r--r--fs/bcachefs/notify.h34
-rw-r--r--fs/bcachefs/request.c807
-rw-r--r--fs/bcachefs/request.h16
-rw-r--r--fs/bcachefs/stats.c219
-rw-r--r--fs/bcachefs/stats.h68
-rw-r--r--fs/bcachefs/stats_types.h56
-rw-r--r--fs/bcachefs/super-io.c5
-rw-r--r--fs/bcachefs/super.c217
-rw-r--r--fs/bcachefs/super.h5
-rw-r--r--fs/bcachefs/sysfs.c401
-rw-r--r--fs/bcachefs/trace.c1
-rw-r--r--fs/bcachefs/util.h8
-rw-r--r--fs/bcachefs/writeback.c657
-rw-r--r--fs/bcachefs/writeback.h122
-rw-r--r--include/linux/sched.h2
-rw-r--r--include/trace/events/bcachefs.h534
37 files changed, 124 insertions, 4982 deletions
diff --git a/fs/bcachefs/Makefile b/fs/bcachefs/Makefile
index 0ad8ba903460..2f4c628b69c3 100644
--- a/fs/bcachefs/Makefile
+++ b/fs/bcachefs/Makefile
@@ -1,10 +1,9 @@
obj-$(CONFIG_BCACHE_FS) += bcachefs.o
-bcachefs-y := acl.o alloc.o bkey.o bkey_methods.o blockdev.o\
- bset.o btree_cache.o btree_gc.o btree_io.o btree_iter.o btree_update.o\
+bcachefs-y := acl.o alloc.o bkey.o bkey_methods.o bset.o\
+ btree_cache.o btree_gc.o btree_io.o btree_iter.o btree_update.o\
buckets.o chardev.o checksum.o clock.o compress.o debug.o dirent.o\
- error.o extents.o fs.o fs-gc.o fs-io.o inode.o io.o journal.o keybuf.o\
- keylist.o migrate.o move.o movinggc.o notify.o opts.o request.o\
- siphash.o six.o stats.o super.o super-io.o sysfs.o tier.o trace.o\
- util.o writeback.o xattr.o
+ error.o extents.o fs.o fs-gc.o fs-io.o inode.o io.o journal.o keylist.o\
+ migrate.o move.o movinggc.o opts.o siphash.o six.o super.o super-io.o\
+ sysfs.o tier.o trace.o util.o xattr.o
diff --git a/fs/bcachefs/bcache.h b/fs/bcachefs/bcache.h
index 09e5de351dfb..61209bfb15d4 100644
--- a/fs/bcachefs/bcache.h
+++ b/fs/bcachefs/bcache.h
@@ -282,15 +282,12 @@ do { \
BCH_TIME_STAT(journal_flush_seq, us, us)
#include "alloc_types.h"
-#include "blockdev_types.h"
#include "buckets_types.h"
#include "clock_types.h"
#include "io_types.h"
#include "journal_types.h"
#include "keylist_types.h"
-#include "keybuf_types.h"
#include "move_types.h"
-#include "stats_types.h"
#include "super_types.h"
/* 256k, in sectors */
@@ -435,10 +432,6 @@ struct bch_dev {
struct work_struct io_error_work;
/* The rest of this all shows up in sysfs */
-#define IO_ERROR_SHIFT 20
- atomic_t io_errors;
- atomic_t io_count;
-
atomic64_t meta_sectors_written;
atomic64_t btree_sectors_written;
u64 __percpu *sectors_written;
@@ -454,7 +447,6 @@ struct bch_dev {
*/
enum {
BCH_FS_INITIAL_GC_DONE,
- BCH_FS_DETACHING,
BCH_FS_EMERGENCY_RO,
BCH_FS_WRITE_DISABLE_COMPLETE,
BCH_FS_GC_STOPPING,
@@ -723,11 +715,6 @@ struct bch_fs {
atomic64_t key_version;
- /* For punting bio submissions to workqueue, io.c */
- struct bio_list bio_submit_list;
- struct work_struct bio_submit_work;
- spinlock_t bio_submit_lock;
-
struct bio_list read_retry_list;
struct work_struct read_retry_work;
spinlock_t read_retry_lock;
@@ -738,10 +725,6 @@ struct bch_fs {
unsigned writeback_pages_max;
atomic_long_t nr_inodes;
- /* NOTIFICATIONS */
- struct mutex uevent_lock;
- struct kobj_uevent_env uevent_env;
-
/* DEBUG JUNK */
struct dentry *debug;
struct btree_debug btree_debug[BTREE_ID_NR];
@@ -765,28 +748,8 @@ struct bch_fs {
unsigned bucket_journal_seq;
- /* CACHING OTHER BLOCK DEVICES */
- mempool_t search;
- struct radix_tree_root devices;
- struct list_head cached_devs;
- u64 cached_dev_sectors;
- struct closure caching;
-
-#define CONGESTED_MAX 1024
- unsigned congested_last_us;
- atomic_t congested;
-
/* The rest of this all shows up in sysfs */
- unsigned congested_read_threshold_us;
- unsigned congested_write_threshold_us;
-
- struct cache_accounting accounting;
atomic_long_t cache_read_races;
- atomic_long_t writeback_keys_done;
- atomic_long_t writeback_keys_failed;
-
- unsigned error_limit;
- unsigned error_decay;
unsigned foreground_write_ratelimit_enabled:1;
unsigned copy_gc_enabled:1;
diff --git a/fs/bcachefs/bcachefs_format.h b/fs/bcachefs/bcachefs_format.h
index c221747b4a4b..5bed6ed488ab 100644
--- a/fs/bcachefs/bcachefs_format.h
+++ b/fs/bcachefs/bcachefs_format.h
@@ -1344,103 +1344,6 @@ struct btree_node_entry {
};
} __attribute__((packed));
-/* OBSOLETE */
-
-#define BITMASK(name, type, field, offset, end) \
-static const unsigned name##_OFFSET = offset; \
-static const unsigned name##_BITS = (end - offset); \
-static const __u64 name##_MAX = (1ULL << (end - offset)) - 1; \
- \
-static inline __u64 name(const type *k) \
-{ return (k->field >> offset) & ~(~0ULL << (end - offset)); } \
- \
-static inline void SET_##name(type *k, __u64 v) \
-{ \
- k->field &= ~(~(~0ULL << (end - offset)) << offset); \
- k->field |= (v & ~(~0ULL << (end - offset))) << offset; \
-}
-
-struct bkey_v0 {
- __u64 high;
- __u64 low;
- __u64 ptr[];
-};
-
-#define KEY0_FIELD(name, field, offset, size) \
- BITMASK(name, struct bkey_v0, field, offset, size)
-
-KEY0_FIELD(KEY0_PTRS, high, 60, 63)
-KEY0_FIELD(KEY0_CSUM, high, 56, 58)
-KEY0_FIELD(KEY0_DIRTY, high, 36, 37)
-
-KEY0_FIELD(KEY0_SIZE, high, 20, 36)
-KEY0_FIELD(KEY0_INODE, high, 0, 20)
-
-static inline unsigned long bkey_v0_u64s(const struct bkey_v0 *k)
-{
- return (sizeof(struct bkey_v0) / sizeof(__u64)) + KEY0_PTRS(k);
-}
-
-static inline struct bkey_v0 *bkey_v0_next(const struct bkey_v0 *k)
-{
- __u64 *d = (__u64 *) k;
-
- return (struct bkey_v0 *) (d + bkey_v0_u64s(k));
-}
-
-struct jset_v0 {
- __u64 csum;
- __u64 magic;
- __u64 seq;
- __u32 version;
- __u32 keys;
-
- __u64 last_seq;
-
- __BKEY_PADDED(uuid_bucket, 4);
- __BKEY_PADDED(btree_root, 4);
- __u16 btree_level;
- __u16 pad[3];
-
- __u64 prio_bucket[64];
-
- union {
- struct bkey start[0];
- __u64 d[0];
- };
-};
-
-/* UUIDS - per backing device/flash only volume metadata */
-
-struct uuid_entry_v0 {
- uuid_le uuid;
- __u8 label[32];
- __u32 first_reg;
- __u32 last_reg;
- __u32 invalidated;
- __u32 pad;
-};
-
-struct uuid_entry {
- union {
- struct {
- uuid_le uuid;
- __u8 label[32];
- __u32 first_reg;
- __u32 last_reg;
- __u32 invalidated;
-
- __u32 flags;
- /* Size of flash only volumes */
- __u64 sectors;
- };
-
- __u8 pad[128];
- };
-};
-
-BITMASK(UUID_FLASH_ONLY, struct uuid_entry, flags, 0, 1);
-
#ifdef __cplusplus
}
#endif
diff --git a/fs/bcachefs/blockdev.c b/fs/bcachefs/blockdev.c
deleted file mode 100644
index a4522ad2836f..000000000000
--- a/fs/bcachefs/blockdev.c
+++ /dev/null
@@ -1,819 +0,0 @@
-
-#include "bcache.h"
-#include "blockdev.h"
-#include "btree_iter.h"
-#include "btree_update.h"
-#include "checksum.h"
-#include "error.h"
-#include "inode.h"
-#include "request.h"
-#include "super-io.h"
-#include "writeback.h"
-
-#include <linux/kthread.h>
-#include <linux/module.h>
-#include <linux/random.h>
-
-static int bch_blockdev_major;
-static DEFINE_IDA(bch_blockdev_minor);
-static LIST_HEAD(uncached_devices);
-static DEFINE_MUTEX(bch_blockdev_lock);
-
-static struct kmem_cache *bch_search_cache;
-
-static void write_bdev_super_endio(struct bio *bio)
-{
- struct cached_dev *dc = bio->bi_private;
- /* XXX: error checking */
-
- closure_put(&dc->sb_write);
-}
-
-static void bch_write_bdev_super_unlock(struct closure *cl)
-{
- struct cached_dev *dc = container_of(cl, struct cached_dev, sb_write);
-
- up(&dc->sb_write_mutex);
-}
-
-void bch_write_bdev_super(struct cached_dev *dc, struct closure *parent)
-{
- struct backingdev_sb *sb = dc->disk_sb.sb;
- struct closure *cl = &dc->sb_write;
- struct bio *bio = dc->disk_sb.bio;
-
- down(&dc->sb_write_mutex);
- closure_init(cl, parent);
-
- sb->csum = csum_vstruct(NULL, BCH_CSUM_CRC64,
- (struct nonce) { 0 }, sb).lo;
-
- bio_reset(bio);
- bio->bi_bdev = dc->disk_sb.bdev;
- bio->bi_iter.bi_sector = le64_to_cpu(sb->offset);
- bio->bi_iter.bi_size =
- roundup(vstruct_bytes(sb),
- bdev_logical_block_size(dc->disk_sb.bdev));
- bio->bi_end_io = write_bdev_super_endio;
- bio->bi_private = dc;
- bio_set_op_attrs(bio, REQ_OP_WRITE, WRITE_FUA|REQ_META);
- bch_bio_map(bio, sb);
-
- closure_get(cl);
-
- closure_return_with_destructor(cl, bch_write_bdev_super_unlock);
-}
-
-static int open_dev(struct block_device *b, fmode_t mode)
-{
- struct bcache_device *d = b->bd_disk->private_data;
-
- if (test_bit(BCACHE_DEV_CLOSING, &d->flags))
- return -ENXIO;
-
- closure_get(&d->cl);
- return 0;
-}
-
-static void release_dev(struct gendisk *b, fmode_t mode)
-{
- struct bcache_device *d = b->private_data;
-
- closure_put(&d->cl);
-}
-
-static int ioctl_dev(struct block_device *b, fmode_t mode,
- unsigned int cmd, unsigned long arg)
-{
- struct bcache_device *d = b->bd_disk->private_data;
-
- return d->ioctl(d, mode, cmd, arg);
-}
-
-static const struct block_device_operations bcache_ops = {
- .open = open_dev,
- .release = release_dev,
- .ioctl = ioctl_dev,
- .owner = THIS_MODULE,
-};
-
-void bch_blockdev_stop(struct bcache_device *d)
-{
- if (!test_and_set_bit(BCACHE_DEV_CLOSING, &d->flags))
- closure_queue(&d->cl);
-}
-
-static void bcache_device_unlink(struct bcache_device *d)
-{
- if (d->c && !test_and_set_bit(BCACHE_DEV_UNLINK_DONE, &d->flags)) {
- sysfs_remove_link(&d->c->kobj, d->name);
- sysfs_remove_link(&d->kobj, "cache");
- }
-}
-
-static void bcache_device_link(struct bcache_device *d, struct bch_fs *c,
- const char *name)
-{
- snprintf(d->name, BCACHEDEVNAME_SIZE,
- "%s%llu", name, bcache_dev_inum(d));
-
- WARN(sysfs_create_link(&d->kobj, &c->kobj, "cache") ||
- sysfs_create_link(&c->kobj, &d->kobj, d->name),
- "Couldn't create device <-> cache set symlinks");
-
- clear_bit(BCACHE_DEV_UNLINK_DONE, &d->flags);
-}
-
-static void bcache_device_detach(struct bcache_device *d)
-{
- if (test_bit(BCACHE_DEV_DETACHING, &d->flags)) {
- mutex_lock(&d->inode_lock);
- bch_inode_rm(d->c, bcache_dev_inum(d));
- mutex_unlock(&d->inode_lock);
- }
-
- bcache_device_unlink(d);
-
- radix_tree_delete(&d->c->devices, bcache_dev_inum(d));
-
- closure_put(&d->c->caching);
- d->c = NULL;
-}
-
-static int bcache_device_attach(struct bcache_device *d, struct bch_fs *c)
-{
- int ret;
-
- ret = radix_tree_insert(&c->devices, bcache_dev_inum(d), d);
- if (ret) {
- pr_err("radix_tree_insert() error for inum %llu",
- bcache_dev_inum(d));
- return ret;
- }
-
- d->c = c;
- closure_get(&c->caching);
-
- return ret;
-}
-
-static void bcache_device_free(struct bcache_device *d)
-{
- pr_info("%s stopped", d->disk->disk_name);
-
- if (d->c)
- bcache_device_detach(d);
- if (d->disk && d->disk->flags & GENHD_FL_UP)
- del_gendisk(d->disk);
- if (d->disk && d->disk->queue)
- blk_cleanup_queue(d->disk->queue);
- if (d->disk) {
- ida_simple_remove(&bch_blockdev_minor, d->disk->first_minor);
- put_disk(d->disk);
- }
-
- bioset_exit(&d->bio_split);
-
- closure_debug_destroy(&d->cl);
-}
-
-static int bcache_device_init(struct bcache_device *d, unsigned block_size,
- sector_t sectors)
-{
- struct request_queue *q;
- int minor;
-
- mutex_init(&d->inode_lock);
-
- minor = ida_simple_get(&bch_blockdev_minor, 0, MINORMASK + 1, GFP_KERNEL);
- if (minor < 0) {
- pr_err("cannot allocate minor");
- return minor;
- }
-
- if (!(d->disk = alloc_disk(1)) ||
- bioset_init(&d->bio_split, 4, offsetof(struct bch_read_bio, bio))) {
- pr_err("cannot allocate disk");
- ida_simple_remove(&bch_blockdev_minor, minor);
- return -ENOMEM;
- }
-
- set_capacity(d->disk, sectors);
- snprintf(d->disk->disk_name, DISK_NAME_LEN, "bcache%i", minor);
-
- d->disk->major = bch_blockdev_major;
- d->disk->first_minor = minor;
- d->disk->fops = &bcache_ops;
- d->disk->private_data = d;
-
- q = blk_alloc_queue(GFP_KERNEL);
- if (!q) {
- pr_err("cannot allocate queue");
- return -ENOMEM;
- }
-
- blk_queue_make_request(q, NULL);
- d->disk->queue = q;
- q->queuedata = d;
- q->backing_dev_info.congested_data = d;
- q->limits.max_hw_sectors = UINT_MAX;
- q->limits.max_sectors = UINT_MAX;
- q->limits.max_segment_size = UINT_MAX;
- q->limits.max_segments = BIO_MAX_PAGES;
- blk_queue_max_discard_sectors(q, UINT_MAX);
- q->limits.discard_granularity = 512;
- q->limits.io_min = block_size;
- q->limits.logical_block_size = block_size;
- q->limits.physical_block_size = block_size;
- set_bit(QUEUE_FLAG_NONROT, &d->disk->queue->queue_flags);
- clear_bit(QUEUE_FLAG_ADD_RANDOM, &d->disk->queue->queue_flags);
- set_bit(QUEUE_FLAG_DISCARD, &d->disk->queue->queue_flags);
-
- blk_queue_write_cache(q, true, true);
-
- return 0;
-}
-
-/* Cached device */
-
-static void calc_cached_dev_sectors(struct bch_fs *c)
-{
- u64 sectors = 0;
- struct cached_dev *dc;
-
- list_for_each_entry(dc, &c->cached_devs, list)
- sectors += bdev_sectors(dc->disk_sb.bdev);
-
- c->cached_dev_sectors = sectors;
-}
-
-void bch_cached_dev_run(struct cached_dev *dc)
-{
- struct bcache_device *d = &dc->disk;
- char buf[BCH_SB_LABEL_SIZE + 1];
- char *env[] = {
- "DRIVER=bcache",
- kasprintf(GFP_KERNEL, "CACHED_UUID=%pU",
- dc->disk_sb.sb->disk_uuid.b),
- NULL,
- NULL,
- };
-
- memcpy(buf, dc->disk_sb.sb->label, BCH_SB_LABEL_SIZE);
- buf[BCH_SB_LABEL_SIZE] = '\0';
- env[2] = kasprintf(GFP_KERNEL, "CACHED_LABEL=%s", buf);
-
- if (atomic_xchg(&dc->running, 1)) {
- kfree(env[1]);
- kfree(env[2]);
- return;
- }
-
- if (!d->c &&
- BDEV_STATE(dc->disk_sb.sb) != BDEV_STATE_NONE) {
- struct closure cl;
-
- closure_init_stack(&cl);
-
- SET_BDEV_STATE(dc->disk_sb.sb, BDEV_STATE_STALE);
- bch_write_bdev_super(dc, &cl);
- closure_sync(&cl);
- }
-
- add_disk(d->disk);
- bd_link_disk_holder(dc->disk_sb.bdev, dc->disk.disk);
- /* won't show up in the uevent file, use udevadm monitor -e instead
- * only class / kset properties are persistent */
- kobject_uevent_env(&disk_to_dev(d->disk)->kobj, KOBJ_CHANGE, env);
- kfree(env[1]);
- kfree(env[2]);
-
- if (sysfs_create_link(&d->kobj, &disk_to_dev(d->disk)->kobj, "dev") ||
- sysfs_create_link(&disk_to_dev(d->disk)->kobj, &d->kobj, "bcache"))
- pr_debug("error creating sysfs link");
-}
-
-static void cached_dev_detach_finish(struct work_struct *w)
-{
- struct cached_dev *dc = container_of(w, struct cached_dev, detach);
- char buf[BDEVNAME_SIZE];
- struct closure cl;
-
- closure_init_stack(&cl);
-
- BUG_ON(!test_bit(BCACHE_DEV_DETACHING, &dc->disk.flags));
- BUG_ON(atomic_read(&dc->count));
-
- mutex_lock(&bch_blockdev_lock);
-
- memset(&dc->disk_sb.sb->set_uuid, 0, 16);
- SET_BDEV_STATE(dc->disk_sb.sb, BDEV_STATE_NONE);
-
- bch_write_bdev_super(dc, &cl);
- closure_sync(&cl);
-
- bcache_device_detach(&dc->disk);
- list_move(&dc->list, &uncached_devices);
-
- clear_bit(BCACHE_DEV_DETACHING, &dc->disk.flags);
- clear_bit(BCACHE_DEV_UNLINK_DONE, &dc->disk.flags);
-
- mutex_unlock(&bch_blockdev_lock);
-
- pr_info("Caching disabled for %s", bdevname(dc->disk_sb.bdev, buf));
-
- /* Drop ref we took in cached_dev_detach() */
- closure_put(&dc->disk.cl);
-}
-
-void bch_cached_dev_detach(struct cached_dev *dc)
-{
- if (test_bit(BCACHE_DEV_CLOSING, &dc->disk.flags))
- return;
-
- if (test_and_set_bit(BCACHE_DEV_DETACHING, &dc->disk.flags))
- return;
-
- /*
- * Block the device from being closed and freed until we're finished
- * detaching
- */
- closure_get(&dc->disk.cl);
-
- dc->writeback_pd.rate.rate = UINT_MAX;
- bch_writeback_queue(dc);
- cached_dev_put(dc);
-}
-
-int bch_cached_dev_attach(struct cached_dev *dc, struct bch_fs *c)
-{
- __le64 rtime = cpu_to_le64(ktime_get_seconds());
- char buf[BDEVNAME_SIZE];
- bool found;
- int ret;
-
- lockdep_assert_held(&c->state_lock);
-
- bdevname(dc->disk_sb.bdev, buf);
-
- if (memcmp(&dc->disk_sb.sb->set_uuid,
- &c->sb.uuid,
- sizeof(c->sb.uuid)))
- return -ENOENT;
-
- if (dc->disk.c) {
- pr_err("Can't attach %s: already attached", buf);
- return -EINVAL;
- }
-
- if (!bch_fs_running(c)) {
- pr_err("Can't attach %s: not running", buf);
- return -EINVAL;
- }
-
- if (le16_to_cpu(dc->disk_sb.sb->block_size) < c->sb.block_size) {
- /* Will die */
- pr_err("Couldn't attach %s: block size less than set's block size",
- buf);
- return -EINVAL;
- }
-
- found = !bch_cached_dev_inode_find_by_uuid(c,
- &dc->disk_sb.sb->disk_uuid,
- &dc->disk.inode);
-
- if (!found && BDEV_STATE(dc->disk_sb.sb) == BDEV_STATE_DIRTY) {
- pr_err("Couldn't find uuid for %s in set", buf);
- return -ENOENT;
- }
-
- if (found &&
- (BDEV_STATE(dc->disk_sb.sb) == BDEV_STATE_STALE ||
- BDEV_STATE(dc->disk_sb.sb) == BDEV_STATE_NONE)) {
- found = false;
- bch_inode_rm(c, bcache_dev_inum(&dc->disk));
- }
-
- /* Deadlocks since we're called via sysfs...
- sysfs_remove_file(&dc->kobj, &sysfs_attach);
- */
-
- if (!found) {
- struct closure cl;
-
- closure_init_stack(&cl);
-
- bkey_inode_blockdev_init(&dc->disk.inode.k_i);
- dc->disk.inode.k.type = BCH_INODE_BLOCKDEV;
- SET_CACHED_DEV(&dc->disk.inode.v, true);
- dc->disk.inode.v.i_uuid = dc->disk_sb.sb->disk_uuid;
- memcpy(dc->disk.inode.v.i_label,
- dc->disk_sb.sb->label, BCH_SB_LABEL_SIZE);
- dc->disk.inode.v.i_ctime = rtime;
- dc->disk.inode.v.i_mtime = rtime;
-
- ret = bch_inode_create(c, &dc->disk.inode.k_i,
- 0, BLOCKDEV_INODE_MAX,
- &c->unused_inode_hint);
- if (ret) {
- pr_err("Error %d, not caching %s", ret, buf);
- return ret;
- }
-
- pr_info("attached inode %llu", bcache_dev_inum(&dc->disk));
-
- dc->disk_sb.sb->set_uuid = c->sb.uuid;
- SET_BDEV_STATE(dc->disk_sb.sb, BDEV_STATE_CLEAN);
-
- bch_write_bdev_super(dc, &cl);
- closure_sync(&cl);
- } else {
- dc->disk.inode.v.i_mtime = rtime;
- bch_btree_update(c, BTREE_ID_INODES,
- &dc->disk.inode.k_i, NULL);
- }
-
- /* Count dirty sectors before attaching */
- if (BDEV_STATE(dc->disk_sb.sb) == BDEV_STATE_DIRTY)
- bch_sectors_dirty_init(dc, c);
-
- ret = bcache_device_attach(&dc->disk, c);
- if (ret)
- return ret;
-
- list_move(&dc->list, &c->cached_devs);
- calc_cached_dev_sectors(c);
-
- /*
- * dc->c must be set before dc->count != 0 - paired with the mb in
- * cached_dev_get()
- */
- smp_wmb();
- atomic_set(&dc->count, 1);
-
- if (bch_cached_dev_writeback_start(dc))
- return -ENOMEM;
-
- if (BDEV_STATE(dc->disk_sb.sb) == BDEV_STATE_DIRTY) {
- atomic_set(&dc->has_dirty, 1);
- atomic_inc(&dc->count);
- }
-
- bch_cached_dev_run(dc);
- bcache_device_link(&dc->disk, c, "bdev");
-
- pr_info("Caching %s as %s on set %pU",
- bdevname(dc->disk_sb.bdev, buf), dc->disk.disk->disk_name,
- dc->disk.c->sb.uuid.b);
- return 0;
-}
-
-void bch_attach_backing_devs(struct bch_fs *c)
-{
- struct cached_dev *dc, *t;
-
- lockdep_assert_held(&c->state_lock);
-
- mutex_lock(&bch_blockdev_lock);
-
- list_for_each_entry_safe(dc, t, &uncached_devices, list)
- bch_cached_dev_attach(dc, c);
-
- mutex_unlock(&bch_blockdev_lock);
-}
-
-void bch_cached_dev_release(struct kobject *kobj)
-{
- struct cached_dev *dc = container_of(kobj, struct cached_dev,
- disk.kobj);
- kfree(dc);
- module_put(THIS_MODULE);
-}
-
-static void cached_dev_free(struct closure *cl)
-{
- struct cached_dev *dc = container_of(cl, struct cached_dev, disk.cl);
-
- bch_cached_dev_writeback_stop(dc);
- bch_cached_dev_writeback_free(dc);
-
- mutex_lock(&bch_blockdev_lock);
-
- if (atomic_read(&dc->running))
- bd_unlink_disk_holder(dc->disk_sb.bdev, dc->disk.disk);
- bcache_device_free(&dc->disk);
- list_del(&dc->list);
-
- mutex_unlock(&bch_blockdev_lock);
-
- bch_free_super((void *) &dc->disk_sb);
-
- kobject_put(&dc->disk.kobj);
-}
-
-static void cached_dev_flush(struct closure *cl)
-{
- struct cached_dev *dc = container_of(cl, struct cached_dev, disk.cl);
- struct bcache_device *d = &dc->disk;
-
- bch_cache_accounting_destroy(&dc->accounting);
- bcache_device_unlink(d);
- kobject_del(&d->kobj);
-
- continue_at(cl, cached_dev_free, system_wq);
-}
-
-static int cached_dev_init(struct cached_dev *dc, unsigned block_size)
-{
- int ret;
- struct io *io;
- struct request_queue *q = bdev_get_queue(dc->disk_sb.bdev);
-
- dc->sequential_cutoff = 4 << 20;
-
- for (io = dc->io; io < dc->io + RECENT_IO; io++) {
- list_add(&io->lru, &dc->io_lru);
- hlist_add_head(&io->hash, dc->io_hash + RECENT_IO);
- }
-
- dc->disk.stripe_size = q->limits.io_opt >> 9;
-
- if (dc->disk.stripe_size)
- dc->partial_stripes_expensive =
- q->limits.raid_partial_stripes_expensive;
-
- ret = bcache_device_init(&dc->disk, block_size,
- dc->disk_sb.bdev->bd_part->nr_sects -
- le64_to_cpu(dc->disk_sb.sb->data_offset));
- if (ret)
- return ret;
-
- dc->disk.disk->queue->backing_dev_info.ra_pages =
- max(dc->disk.disk->queue->backing_dev_info.ra_pages,
- q->backing_dev_info.ra_pages);
-
- bch_cached_dev_request_init(dc);
- ret = bch_cached_dev_writeback_init(dc);
- if (ret)
- return ret;
-
- return 0;
-}
-
-/* Cached device - bcache superblock */
-
-static const char *bdev_validate_super(struct backingdev_sb *sb)
-{
- switch (le64_to_cpu(sb->version)) {
- case BCACHE_SB_VERSION_BDEV:
- sb->data_offset = cpu_to_le64(BDEV_DATA_START_DEFAULT);
- break;
- case BCACHE_SB_VERSION_BDEV_WITH_OFFSET:
- if (le64_to_cpu(sb->data_offset) < BDEV_DATA_START_DEFAULT)
- return "Bad data offset";
-
- break;
- default:
- return"Unsupported superblock version";
- }
-
- sb->last_mount = cpu_to_le32(get_seconds());
-
- return NULL;
-}
-
-const char *bch_backing_dev_register(struct bcache_superblock *sb)
-{
- char name[BDEVNAME_SIZE];
- const char *err;
- struct bch_fs *c;
- struct cached_dev *dc;
-
- dc = kzalloc(sizeof(*dc), GFP_KERNEL);
- if (!dc)
- return "cannot allocate memory";
-
- __module_get(THIS_MODULE);
- INIT_LIST_HEAD(&dc->list);
- closure_init(&dc->disk.cl, NULL);
- set_closure_fn(&dc->disk.cl, cached_dev_flush, system_wq);
- kobject_init(&dc->disk.kobj, &bch_cached_dev_ktype);
- INIT_WORK(&dc->detach, cached_dev_detach_finish);
- sema_init(&dc->sb_write_mutex, 1);
- INIT_LIST_HEAD(&dc->io_lru);
- spin_lock_init(&dc->io_lock);
- bch_cache_accounting_init(&dc->accounting, &dc->disk.cl);
-
- memcpy(&dc->disk_sb, sb, sizeof(*sb));
- dc->disk_sb.bdev->bd_holder = dc;
- memset(sb, 0, sizeof(*sb));
-
- err = bdev_validate_super(dc->disk_sb.sb);
- if (err)
- goto err;
-
- if (cached_dev_init(dc, le16_to_cpu(dc->disk_sb.sb->block_size) << 9))
- goto err;
-
- err = "error creating kobject";
- if (kobject_add(&dc->disk.kobj,
- &part_to_dev(dc->disk_sb.bdev->bd_part)->kobj,
- "bcache"))
- goto err;
-
- err = "error accounting kobject";
- if (bch_cache_accounting_add_kobjs(&dc->accounting, &dc->disk.kobj))
- goto err;
-
- pr_info("registered backing device %s",
- bdevname(dc->disk_sb.bdev, name));
-
- list_add(&dc->list, &uncached_devices);
- c = bch_uuid_to_fs(dc->disk_sb.sb->set_uuid);
- if (c) {
- bch_cached_dev_attach(dc, c);
- closure_put(&c->cl);
- }
-
- if (BDEV_STATE(dc->disk_sb.sb) == BDEV_STATE_NONE ||
- BDEV_STATE(dc->disk_sb.sb) == BDEV_STATE_STALE)
- bch_cached_dev_run(dc);
-
- return NULL;
-err:
- bch_blockdev_stop(&dc->disk);
- return err;
-}
-
-/* Flash only volumes */
-
-void bch_blockdev_volume_release(struct kobject *kobj)
-{
- struct bcache_device *d = container_of(kobj, struct bcache_device,
- kobj);
- kfree(d);
-}
-
-static void blockdev_volume_free(struct closure *cl)
-{
- struct bcache_device *d = container_of(cl, struct bcache_device, cl);
-
- bcache_device_free(d);
- kobject_put(&d->kobj);
-}
-
-static void blockdev_volume_flush(struct closure *cl)
-{
- struct bcache_device *d = container_of(cl, struct bcache_device, cl);
-
- bcache_device_unlink(d);
- kobject_del(&d->kobj);
- continue_at(cl, blockdev_volume_free, system_wq);
-}
-
-static int blockdev_volume_run(struct bch_fs *c,
- struct bkey_s_c_inode_blockdev inode)
-{
- struct bcache_device *d = kzalloc(sizeof(struct bcache_device),
- GFP_KERNEL);
- int ret = -ENOMEM;
-
- if (!d)
- return ret;
-
- bkey_reassemble(&d->inode.k_i, inode.s_c);
-
- closure_init(&d->cl, NULL);
- set_closure_fn(&d->cl, blockdev_volume_flush, system_wq);
-
- kobject_init(&d->kobj, &bch_blockdev_volume_ktype);
-
- ret = bcache_device_init(d, block_bytes(c),
- le64_to_cpu(inode.v->i_size) >> 9);
- if (ret)
- goto err;
-
- ret = bcache_device_attach(d, c);
- if (ret)
- goto err;
-
- bch_blockdev_volume_request_init(d);
- add_disk(d->disk);
-
- if (kobject_add(&d->kobj, &disk_to_dev(d->disk)->kobj, "bcache"))
- goto err;
-
- bcache_device_link(d, c, "volume");
-
- return 0;
-err:
- kobject_put(&d->kobj);
- return ret;
-}
-
-int bch_blockdev_volumes_start(struct bch_fs *c)
-{
- struct btree_iter iter;
- struct bkey_s_c k;
- struct bkey_s_c_inode_blockdev inode;
- int ret = 0;
-
- if (!bch_fs_running(c))
- return -EINVAL;
-
- for_each_btree_key(&iter, c, BTREE_ID_INODES, POS_MIN, k) {
- if (k.k->p.inode >= BLOCKDEV_INODE_MAX)
- break;
-
- if (k.k->type != BCH_INODE_BLOCKDEV)
- continue;
-
- inode = bkey_s_c_to_inode_blockdev(k);
-
- ret = blockdev_volume_run(c, inode);
- if (ret)
- break;
- }
- bch_btree_iter_unlock(&iter);
-
- return ret;
-}
-
-int bch_blockdev_volume_create(struct bch_fs *c, u64 size)
-{
- __le64 rtime = cpu_to_le64(ktime_get_seconds());
- struct bkey_i_inode_blockdev inode;
- int ret;
-
- bkey_inode_blockdev_init(&inode.k_i);
- get_random_bytes(&inode.v.i_uuid, sizeof(inode.v.i_uuid));
- inode.v.i_ctime = rtime;
- inode.v.i_mtime = rtime;
- inode.v.i_size = cpu_to_le64(size);
-
- ret = bch_inode_create(c, &inode.k_i, 0, BLOCKDEV_INODE_MAX,
- &c->unused_inode_hint);
- if (ret) {
- pr_err("Can't create volume: %d", ret);
- return ret;
- }
-
- return blockdev_volume_run(c, inode_blockdev_i_to_s_c(&inode));
-}
-
-void bch_blockdevs_stop(struct bch_fs *c)
-{
- struct cached_dev *dc;
- struct bcache_device *d;
- struct radix_tree_iter iter;
- void **slot;
-
- mutex_lock(&bch_blockdev_lock);
- rcu_read_lock();
-
- radix_tree_for_each_slot(slot, &c->devices, &iter, 0) {
- d = radix_tree_deref_slot(slot);
-
- if (CACHED_DEV(&d->inode.v) &&
- test_bit(BCH_FS_DETACHING, &c->flags)) {
- dc = container_of(d, struct cached_dev, disk);
- bch_cached_dev_detach(dc);
- } else {
- bch_blockdev_stop(d);
- }
- }
-
- rcu_read_unlock();
- mutex_unlock(&bch_blockdev_lock);
-}
-
-void bch_fs_blockdev_exit(struct bch_fs *c)
-{
- mempool_exit(&c->search);
-}
-
-int bch_fs_blockdev_init(struct bch_fs *c)
-{
- return mempool_init_slab_pool(&c->search, 1, bch_search_cache);
-}
-
-void bch_blockdev_exit(void)
-{
- kmem_cache_destroy(bch_search_cache);
-
- if (bch_blockdev_major >= 0)
- unregister_blkdev(bch_blockdev_major, "bcache");
-}
-
-int __init bch_blockdev_init(void)
-{
- bch_blockdev_major = register_blkdev(0, "bcache");
- if (bch_blockdev_major < 0)
- return bch_blockdev_major;
-
- bch_search_cache = KMEM_CACHE(search, 0);
- if (!bch_search_cache)
- return -ENOMEM;
-
- return 0;
-}
diff --git a/fs/bcachefs/blockdev.h b/fs/bcachefs/blockdev.h
deleted file mode 100644
index 5423d77644f8..000000000000
--- a/fs/bcachefs/blockdev.h
+++ /dev/null
@@ -1,134 +0,0 @@
-#ifndef _BCACHE_BLOCKDEV_H
-#define _BCACHE_BLOCKDEV_H
-
-#include "blockdev_types.h"
-#include "io_types.h"
-
-struct search {
- /* Stack frame for bio_complete */
- struct closure cl;
-
- union {
- struct bch_read_bio rbio;
- struct bch_write_bio wbio;
- };
- /* Not modified */
- struct bio *orig_bio;
- struct bcache_device *d;
-
- unsigned inode;
- unsigned write:1;
-
- /* Flags only used for reads */
- unsigned recoverable:1;
- unsigned read_dirty_data:1;
- unsigned cache_miss:1;
-
- /*
- * For reads: bypass read from cache and insertion into cache
- * For writes: discard key range from cache, sending the write to
- * the backing device (if there is a backing device)
- */
- unsigned bypass:1;
-
- unsigned long start_time;
-
- /*
- * Mostly only used for writes. For reads, we still make use of
- * some trivial fields:
- * - c
- * - error
- */
- struct bch_write_op iop;
-};
-
-#ifndef NO_BCACHE_BLOCKDEV
-
-extern struct kobj_type bch_cached_dev_ktype;
-extern struct kobj_type bch_blockdev_volume_ktype;
-
-void bch_write_bdev_super(struct cached_dev *, struct closure *);
-
-void bch_cached_dev_release(struct kobject *);
-void bch_blockdev_volume_release(struct kobject *);
-
-int bch_cached_dev_attach(struct cached_dev *, struct bch_fs *);
-void bch_attach_backing_devs(struct bch_fs *);
-
-void bch_cached_dev_detach(struct cached_dev *);
-void bch_cached_dev_run(struct cached_dev *);
-void bch_blockdev_stop(struct bcache_device *);
-
-const char *bch_backing_dev_register(struct bcache_superblock *);
-
-int bch_blockdev_volume_create(struct bch_fs *, u64);
-int bch_blockdev_volumes_start(struct bch_fs *);
-
-void bch_blockdevs_stop(struct bch_fs *);
-
-void bch_fs_blockdev_exit(struct bch_fs *);
-int bch_fs_blockdev_init(struct bch_fs *);
-void bch_blockdev_exit(void);
-int bch_blockdev_init(void);
-
-#else
-
-static inline void bch_write_bdev_super(struct cached_dev *dc,
- struct closure *cl) {}
-
-static inline void bch_cached_dev_release(struct kobject *kobj) {}
-static inline void bch_blockdev_volume_release(struct kobject *kobj) {}
-
-static inline int bch_cached_dev_attach(struct cached_dev *dc, struct bch_fs *c)
-{
- return 0;
-}
-static inline void bch_attach_backing_devs(struct bch_fs *c) {}
-
-static inline void bch_cached_dev_detach(struct cached_dev *dc) {}
-static inline void bch_cached_dev_run(struct cached_dev *dc) {}
-static inline void bch_blockdev_stop(struct bcache_device *d) {}
-
-static inline const char *bch_backing_dev_register(struct bcache_superblock *sb)
-{
- return "not implemented";
-}
-
-static inline int bch_blockdev_volume_create(struct bch_fs *c, u64 s) { return 0; }
-static inline int bch_blockdev_volumes_start(struct bch_fs *c) { return 0; }
-
-static inline void bch_blockdevs_stop(struct bch_fs *c) {}
-static inline void bch_fs_blockdev_exit(struct bch_fs *c) {}
-static inline int bch_fs_blockdev_init(struct bch_fs *c) { return 0; }
-static inline void bch_blockdev_exit(void) {}
-static inline int bch_blockdev_init(void) { return 0; }
-
-#endif
-
-static inline void cached_dev_put(struct cached_dev *dc)
-{
- if (atomic_dec_and_test(&dc->count))
- schedule_work(&dc->detach);
-}
-
-static inline bool cached_dev_get(struct cached_dev *dc)
-{
- if (!atomic_inc_not_zero(&dc->count))
- return false;
-
- /* Paired with the mb in cached_dev_attach */
- smp_mb__after_atomic();
- return true;
-}
-
-static inline u64 bcache_dev_inum(struct bcache_device *d)
-{
- return d->inode.k.p.inode;
-}
-
-static inline struct bcache_device *bch_dev_find(struct bch_fs *c, u64 inode)
-{
- return radix_tree_lookup(&c->devices, inode);
-}
-
-#endif /* _BCACHE_BLOCKDEV_H */
diff --git a/fs/bcachefs/blockdev_types.h b/fs/bcachefs/blockdev_types.h
deleted file mode 100644
index e51720041f6f..000000000000
--- a/fs/bcachefs/blockdev_types.h
+++ /dev/null
@@ -1,123 +0,0 @@
-#ifndef _BCACHE_BLOCKDEV_TYPES_H
-#define _BCACHE_BLOCKDEV_TYPES_H
-
-#include "keybuf_types.h"
-#include "stats_types.h"
-#include "super_types.h"
-#include "util.h"
-
-struct bcache_device {
- struct closure cl;
-
- struct kobject kobj;
-
- struct bch_fs *c;
-
- struct rb_node node;
- struct bkey_i_inode_blockdev inode;
- struct mutex inode_lock;
-
-#define BCACHEDEVNAME_SIZE 12
- char name[BCACHEDEVNAME_SIZE];
-
- struct gendisk *disk;
-
- unsigned long flags;
-#define BCACHE_DEV_CLOSING 0
-#define BCACHE_DEV_DETACHING 1
-#define BCACHE_DEV_UNLINK_DONE 2
-
- unsigned nr_stripes;
- unsigned stripe_size;
- atomic_t *stripe_sectors_dirty;
- unsigned long *full_dirty_stripes;
-
- struct bio_set bio_split;
-
- unsigned data_csum:1;
-
- int (*ioctl)(struct bcache_device *, fmode_t, unsigned, unsigned long);
-};
-
-struct io {
- /* Used to track sequential IO so it can be skipped */
- struct hlist_node hash;
- struct list_head lru;
-
- unsigned long last_io;
- unsigned sequential;
- sector_t last;
-};
-
-struct cached_dev {
- struct list_head list;
- struct bcache_device disk;
-
- //struct backingdev_sb sb;
-
- struct {
- struct backingdev_sb *sb;
- struct block_device *bdev;
- struct bio *bio;
- unsigned page_order;
- } disk_sb;
- struct closure sb_write;
- struct semaphore sb_write_mutex;
-
- /* Refcount on the cache set. Always nonzero when we're caching. */
- atomic_t count;
- struct work_struct detach;
-
- /*
- * Device might not be running if it's dirty and the cache set hasn't
- * showed up yet.
- */
- atomic_t running;
-
- /*
- * Writes take a shared lock from start to finish; scanning for dirty
- * data to refill the rb tree requires an exclusive lock.
- */
- struct rw_semaphore writeback_lock;
-
- /*
- * Nonzero, and writeback has a refcount (d->count), iff there is dirty
- * data in the cache. Protected by writeback_lock; must have an
- * shared lock to set and exclusive lock to clear.
- */
- atomic_t has_dirty;
-
- /* for dynamic rate control of writeback */
- struct bch_pd_controller writeback_pd;
- struct delayed_work writeback_pd_update;
- unsigned writeback_pd_update_seconds;
-
- struct task_struct *writeback_thread;
- struct keybuf writeback_keys;
- mempool_t writeback_io_pool;
- mempool_t writeback_page_pool;
-
- /* For tracking sequential IO */
-#define RECENT_IO_BITS 7
-#define RECENT_IO (1 << RECENT_IO_BITS)
- struct io io[RECENT_IO];
- struct hlist_head io_hash[RECENT_IO + 1];
- struct list_head io_lru;
- spinlock_t io_lock;
-
- struct cache_accounting accounting;
-
- /* The rest of this all shows up in sysfs */
- unsigned sequential_cutoff;
- unsigned readahead;
-
- unsigned verify:1;
- unsigned bypass_torture_test:1;
-
- unsigned partial_stripes_expensive:1;
- unsigned writeback_metadata:1;
- unsigned writeback_running:1;
- unsigned char writeback_percent;
-};
-
-#endif /* _BCACHE_BLOCKDEV_TYPES_H */
diff --git a/fs/bcachefs/btree_gc.c b/fs/bcachefs/btree_gc.c
index 7e8a3f6a17df..c86e7ac7abfd 100644
--- a/fs/bcachefs/btree_gc.c
+++ b/fs/bcachefs/btree_gc.c
@@ -19,7 +19,6 @@
#include "keylist.h"
#include "move.h"
#include "super-io.h"
-#include "writeback.h"
#include <linux/slab.h>
#include <linux/bitops.h>
@@ -463,7 +462,6 @@ void bch_gc(struct bch_fs *c)
bch_mark_metadata(c);
bch_mark_pending_btree_node_frees(c);
- bch_writeback_recalc_oldest_gens(c);
for_each_member_device(ca, c, i)
atomic_long_set(&ca->saturated_count, 0);
diff --git a/fs/bcachefs/btree_io.c b/fs/bcachefs/btree_io.c
index 71478fb1cc89..13e280cc4e18 100644
--- a/fs/bcachefs/btree_io.c
+++ b/fs/bcachefs/btree_io.c
@@ -1177,22 +1177,14 @@ err:
goto out;
}
-static void btree_node_read_endio(struct bio *bio)
-{
- closure_put(bio->bi_private);
-}
-
void bch_btree_node_read(struct bch_fs *c, struct btree *b)
{
uint64_t start_time = local_clock();
- struct closure cl;
struct bio *bio;
struct extent_pick_ptr pick;
trace_bcache_btree_read(c, b);
- closure_init_stack(&cl);
-
pick = bch_btree_pick_ptr(c, b);
if (bch_fs_fatal_err_on(!pick.ca, c,
"no cache device for btree node")) {
@@ -1204,15 +1196,10 @@ void bch_btree_node_read(struct bch_fs *c, struct btree *b)
bio->bi_bdev = pick.ca->disk_sb.bdev;
bio->bi_iter.bi_sector = pick.ptr.offset;
bio->bi_iter.bi_size = btree_bytes(c);
- bio->bi_end_io = btree_node_read_endio;
- bio->bi_private = &cl;
bio_set_op_attrs(bio, REQ_OP_READ, REQ_META|READ_SYNC);
-
bch_bio_map(bio, b->data);
- closure_get(&cl);
- bch_generic_make_request(bio, c);
- closure_sync(&cl);
+ submit_bio_wait(bio);
if (bch_dev_fatal_io_err_on(bio->bi_error,
pick.ca, "IO error reading bucket %zu",
@@ -1562,7 +1549,7 @@ void __bch_btree_node_write(struct bch_fs *c, struct btree *b,
b->written += sectors_to_write;
- bch_submit_wbio_replicas(wbio, c, &k.key, true);
+ bch_submit_wbio_replicas(wbio, c, &k.key);
}
/*
diff --git a/fs/bcachefs/debug.c b/fs/bcachefs/debug.c
index b91f53d261d2..f19ccadfbeb8 100644
--- a/fs/bcachefs/debug.c
+++ b/fs/bcachefs/debug.c
@@ -30,13 +30,6 @@ static struct dentry *bch_debug;
#ifdef CONFIG_BCACHEFS_DEBUG
-static void btree_verify_endio(struct bio *bio)
-{
- struct closure *cl = bio->bi_private;
-
- closure_put(cl);
-}
-
void __bch_btree_verify(struct bch_fs *c, struct btree *b)
{
struct btree *v = c->verify_data;
@@ -44,13 +37,10 @@ void __bch_btree_verify(struct bch_fs *c, struct btree *b)
struct bset *sorted, *inmemory;
struct extent_pick_ptr pick;
struct bio *bio;
- struct closure cl;
if (c->opts.nochanges)
return;
- closure_init_stack(&cl);
-
btree_node_io_lock(b);
mutex_lock(&c->verify_lock);
@@ -73,13 +63,9 @@ void __bch_btree_verify(struct bch_fs *c, struct btree *b)
bio->bi_iter.bi_sector = pick.ptr.offset;
bio->bi_iter.bi_size = btree_bytes(c);
bio_set_op_attrs(bio, REQ_OP_READ, REQ_META|READ_SYNC);
- bio->bi_private = &cl;
- bio->bi_end_io = btree_verify_endio;
bch_bio_map(bio, n_sorted);
- closure_get(&cl);
- bch_generic_make_request(bio, c);
- closure_sync(&cl);
+ submit_bio_wait(bio);
bio_put(bio);
@@ -146,42 +132,6 @@ void __bch_btree_verify(struct bch_fs *c, struct btree *b)
btree_node_io_unlock(b);
}
-void bch_data_verify(struct cached_dev *dc, struct bio *bio)
-{
- char name[BDEVNAME_SIZE];
- struct bio *check;
- struct bio_vec bv;
- struct bvec_iter iter;
-
- check = bio_clone(bio, GFP_NOIO);
- if (!check)
- return;
- bio_set_op_attrs(check, REQ_OP_READ, READ_SYNC);
-
- if (bio_alloc_pages(check, GFP_NOIO))
- goto out_put;
-
- submit_bio_wait(check);
-
- bio_for_each_segment(bv, bio, iter) {
- void *p1 = kmap_atomic(bv.bv_page);
- void *p2 = page_address(check->bi_io_vec[iter.bi_idx].bv_page);
-
- if (memcmp(p1 + bv.bv_offset,
- p2 + bv.bv_offset,
- bv.bv_len))
- panic("verify failed at dev %s sector %llu\n",
- bdevname(dc->disk_sb.bdev, name),
- (uint64_t) bio->bi_iter.bi_sector);
-
- kunmap_atomic(p1);
- }
-
- bio_free_pages(check);
-out_put:
- bio_put(check);
-}
-
#endif
#ifdef CONFIG_DEBUG_FS
diff --git a/fs/bcachefs/debug.h b/fs/bcachefs/debug.h
index 035b247886dd..7cb4f4787a88 100644
--- a/fs/bcachefs/debug.h
+++ b/fs/bcachefs/debug.h
@@ -5,7 +5,6 @@
struct bio;
struct btree;
-struct cached_dev;
struct bch_fs;
#define BCH_DEBUG_PARAM(name, description) extern bool bch_##name;
@@ -27,7 +26,6 @@ BCH_DEBUG_PARAMS_DEBUG()
#undef BCH_DEBUG_PARAM
void __bch_btree_verify(struct bch_fs *, struct btree *);
-void bch_data_verify(struct cached_dev *, struct bio *);
#define bypass_torture_test(d) ((d)->bypass_torture_test)
@@ -39,7 +37,6 @@ BCH_DEBUG_PARAMS_DEBUG()
#undef BCH_DEBUG_PARAM
static inline void __bch_btree_verify(struct bch_fs *c, struct btree *b) {}
-static inline void bch_data_verify(struct cached_dev *dc, struct bio *bio) {}
#define bypass_torture_test(d) 0
diff --git a/fs/bcachefs/error.c b/fs/bcachefs/error.c
index ba46d2d12f59..0c8ac1d79c97 100644
--- a/fs/bcachefs/error.c
+++ b/fs/bcachefs/error.c
@@ -1,7 +1,6 @@
#include "bcache.h"
#include "error.h"
#include "io.h"
-#include "notify.h"
#include "super.h"
void bch_inconsistent_error(struct bch_fs *c)
@@ -12,12 +11,6 @@ void bch_inconsistent_error(struct bch_fs *c)
case BCH_ON_ERROR_CONTINUE:
break;
case BCH_ON_ERROR_RO:
- if (!test_bit(BCH_FS_INITIAL_GC_DONE, &c->flags)) {
- /* XXX do something better here? */
- bch_fs_stop_async(c);
- return;
- }
-
if (bch_fs_emergency_read_only(c))
bch_err(c, "emergency read only");
break;
@@ -33,108 +26,26 @@ void bch_fatal_error(struct bch_fs *c)
bch_err(c, "emergency read only");
}
-/* Nonfatal IO errors, IO error/latency accounting: */
-
-/* Just does IO error accounting: */
-void bch_account_io_completion(struct bch_dev *ca)
-{
- /*
- * The halflife of an error is:
- * log2(1/2)/log2(127/128) * refresh ~= 88 * refresh
- */
-
- if (ca->fs->error_decay) {
- unsigned count = atomic_inc_return(&ca->io_count);
-
- while (count > ca->fs->error_decay) {
- unsigned errors;
- unsigned old = count;
- unsigned new = count - ca->fs->error_decay;
-
- /*
- * First we subtract refresh from count; each time we
- * succesfully do so, we rescale the errors once:
- */
-
- count = atomic_cmpxchg(&ca->io_count, old, new);
-
- if (count == old) {
- count = new;
-
- errors = atomic_read(&ca->io_errors);
- do {
- old = errors;
- new = ((uint64_t) errors * 127) / 128;
- errors = atomic_cmpxchg(&ca->io_errors,
- old, new);
- } while (old != errors);
- }
- }
- }
-}
-
-/* IO error accounting and latency accounting: */
-void bch_account_io_completion_time(struct bch_dev *ca,
- unsigned submit_time_us, int op)
-{
- struct bch_fs *c;
- unsigned threshold;
-
- if (!ca)
- return;
-
- c = ca->fs;
- threshold = op_is_write(op)
- ? c->congested_write_threshold_us
- : c->congested_read_threshold_us;
-
- if (threshold && submit_time_us) {
- unsigned t = local_clock_us();
-
- int us = t - submit_time_us;
- int congested = atomic_read(&c->congested);
-
- if (us > (int) threshold) {
- int ms = us / 1024;
- c->congested_last_us = t;
-
- ms = min(ms, CONGESTED_MAX + congested);
- atomic_sub(ms, &c->congested);
- } else if (congested < 0)
- atomic_inc(&c->congested);
- }
-
- bch_account_io_completion(ca);
-}
-
void bch_nonfatal_io_error_work(struct work_struct *work)
{
struct bch_dev *ca = container_of(work, struct bch_dev, io_error_work);
struct bch_fs *c = ca->fs;
- unsigned errors = atomic_read(&ca->io_errors);
bool dev;
- if (errors < c->error_limit) {
- bch_notify_dev_error(ca, false);
- } else {
- bch_notify_dev_error(ca, true);
-
- mutex_lock(&c->state_lock);
- dev = bch_dev_state_allowed(c, ca, BCH_MEMBER_STATE_RO,
- BCH_FORCE_IF_DEGRADED);
- if (dev
- ? __bch_dev_set_state(c, ca, BCH_MEMBER_STATE_RO,
- BCH_FORCE_IF_DEGRADED)
- : bch_fs_emergency_read_only(c))
- bch_err(ca,
- "too many IO errors, setting %s RO",
- dev ? "device" : "filesystem");
- mutex_unlock(&c->state_lock);
- }
+ mutex_lock(&c->state_lock);
+ dev = bch_dev_state_allowed(c, ca, BCH_MEMBER_STATE_RO,
+ BCH_FORCE_IF_DEGRADED);
+ if (dev
+ ? __bch_dev_set_state(c, ca, BCH_MEMBER_STATE_RO,
+ BCH_FORCE_IF_DEGRADED)
+ : bch_fs_emergency_read_only(c))
+ bch_err(ca,
+ "too many IO errors, setting %s RO",
+ dev ? "device" : "filesystem");
+ mutex_unlock(&c->state_lock);
}
void bch_nonfatal_io_error(struct bch_dev *ca)
{
- atomic_add(1 << IO_ERROR_SHIFT, &ca->io_errors);
queue_work(system_long_wq, &ca->io_error_work);
}
diff --git a/fs/bcachefs/error.h b/fs/bcachefs/error.h
index 726b20d4434b..2d7f15803ae9 100644
--- a/fs/bcachefs/error.h
+++ b/fs/bcachefs/error.h
@@ -191,9 +191,6 @@ do { \
* don't (necessarily) want to shut down the fs:
*/
-void bch_account_io_completion(struct bch_dev *);
-void bch_account_io_completion_time(struct bch_dev *, unsigned, int);
-
void bch_nonfatal_io_error_work(struct work_struct *);
/* Does the error handling without logging a message */
diff --git a/fs/bcachefs/extents.c b/fs/bcachefs/extents.c
index ce69c411c34c..28f5766049fc 100644
--- a/fs/bcachefs/extents.c
+++ b/fs/bcachefs/extents.c
@@ -17,7 +17,6 @@
#include "inode.h"
#include "journal.h"
#include "super-io.h"
-#include "writeback.h"
#include "xattr.h"
#include <trace/events/bcachefs.h>
@@ -950,10 +949,6 @@ static void bch_add_sectors(struct extent_insert_state *s,
bch_mark_key(c, k, sectors, false, gc_pos_btree_node(b),
&s->stats, s->trans->journal_res.seq);
-
- if (bkey_extent_is_data(k.k) &&
- !bkey_extent_is_cached(k.k))
- bcache_dev_sectors_dirty_add(c, k.k->p.inode, offset, sectors);
}
static void bch_subtract_sectors(struct extent_insert_state *s,
@@ -988,118 +983,6 @@ static void bch_drop_subtract(struct extent_insert_state *s, struct bkey_s k)
__set_bkey_deleted(k.k);
}
-/*
- * Note: If this returns true because only some pointers matched,
- * we can lose some caching that had happened in the interim.
- * Because cache promotion only promotes the part of the extent
- * actually read, and not the whole extent, and due to the key
- * splitting done in bch_extent_insert_fixup, preserving such
- * caching is difficult.
- */
-static bool bch_extent_cmpxchg_cmp(struct bkey_s_c l, struct bkey_s_c r)
-{
- struct bkey_s_c_extent le, re;
- const struct bch_extent_ptr *lp, *rp;
- s64 offset;
-
- BUG_ON(!l.k->size || !r.k->size);
-
- if (l.k->type != r.k->type ||
- bversion_cmp(l.k->version, r.k->version))
- return false;
-
- switch (l.k->type) {
- case KEY_TYPE_COOKIE:
- return !memcmp(bkey_s_c_to_cookie(l).v,
- bkey_s_c_to_cookie(r).v,
- sizeof(struct bch_cookie));
-
- case BCH_EXTENT:
- case BCH_EXTENT_CACHED:
- le = bkey_s_c_to_extent(l);
- re = bkey_s_c_to_extent(r);
-
- /*
- * bkey_cmpxchg() handles partial matches - when either l or r
- * has been trimmed - so we need just to handle l or r not
- * starting at the same place when checking for a match here.
- *
- * If the starts of the keys are different, we just apply that
- * offset to the device pointer offsets when checking those -
- * matching how bch_cut_front() adjusts device pointer offsets
- * when adjusting the start of a key:
- */
- offset = bkey_start_offset(l.k) - bkey_start_offset(r.k);
-
- /*
- * XXX: perhaps we only raced with copygc or tiering replacing
- * one of the pointers: it should suffice to find _any_ matching
- * pointer
- */
-
- if (bkey_val_u64s(le.k) != bkey_val_u64s(re.k))
- return false;
-
- extent_for_each_ptr(le, lp) {
- const union bch_extent_entry *entry =
- vstruct_idx(re.v, (u64 *) lp - le.v->_data);
-
- if (!extent_entry_is_ptr(entry))
- return false;
-
- rp = &entry->ptr;
-
- if (lp->offset != rp->offset + offset ||
- lp->dev != rp->dev ||
- lp->gen != rp->gen)
- return false;
- }
-
- return true;
- default:
- return false;
- }
-
-}
-
-/*
- * Returns true on success, false on failure (and false means @new no longer
- * overlaps with @k)
- *
- * If returned true, we may have inserted up to one key in @b.
- * If returned false, we may have inserted up to two keys in @b.
- *
- * On return, there is room in @res for at least one more key of the same size
- * as @new.
- */
-enum extent_insert_hook_ret bch_extent_cmpxchg(struct extent_insert_hook *hook,
- struct bpos committed_pos,
- struct bpos next_pos,
- struct bkey_s_c k,
- const struct bkey_i *new)
-{
- struct bch_replace_info *replace = container_of(hook,
- struct bch_replace_info, hook);
- struct bkey_i *old = &replace->key;
-
- EBUG_ON(bkey_cmp(committed_pos, bkey_start_pos(&new->k)) < 0);
-
- /* must have something to compare against */
- EBUG_ON(!bkey_val_u64s(&old->k));
-
- /* new must be a subset of old */
- EBUG_ON(bkey_cmp(new->k.p, old->k.p) > 0 ||
- bkey_cmp(bkey_start_pos(&new->k), bkey_start_pos(&old->k)) < 0);
-
- if (k.k && bch_extent_cmpxchg_cmp(k, bkey_i_to_s_c(old))) {
- replace->successes++;
- return BTREE_HOOK_DO_INSERT;
- } else {
- replace->failures++;
- return BTREE_HOOK_NO_INSERT;
- }
-}
-
static bool bch_extent_merge_inline(struct bch_fs *,
struct btree_iter *,
struct bkey_packed *,
diff --git a/fs/bcachefs/extents.h b/fs/bcachefs/extents.h
index db7bd4f14988..070b64048168 100644
--- a/fs/bcachefs/extents.h
+++ b/fs/bcachefs/extents.h
@@ -42,10 +42,6 @@ bch_extent_pick_ptr(struct bch_fs *c, struct bkey_s_c k,
bch_extent_pick_ptr_avoiding(c, k, NULL, ret);
}
-enum extent_insert_hook_ret
-bch_extent_cmpxchg(struct extent_insert_hook *, struct bpos, struct bpos,
- struct bkey_s_c, const struct bkey_i *);
-
enum btree_insert_ret
bch_insert_fixup_extent(struct btree_insert *,
struct btree_insert_entry *);
diff --git a/fs/bcachefs/io.c b/fs/bcachefs/io.c
index f4ad2d194413..bdf93c4f3632 100644
--- a/fs/bcachefs/io.c
+++ b/fs/bcachefs/io.c
@@ -20,8 +20,6 @@
#include "journal.h"
#include "keylist.h"
#include "move.h"
-#include "notify.h"
-#include "stats.h"
#include "super-io.h"
#include <linux/blkdev.h>
@@ -36,34 +34,6 @@ static inline void __bio_inc_remaining(struct bio *bio)
atomic_inc(&bio->__bi_remaining);
}
-void bch_generic_make_request(struct bio *bio, struct bch_fs *c)
-{
- if (current->bio_list) {
- spin_lock(&c->bio_submit_lock);
- bio_list_add(&c->bio_submit_list, bio);
- spin_unlock(&c->bio_submit_lock);
- queue_work(bcache_io_wq, &c->bio_submit_work);
- } else {
- generic_make_request(bio);
- }
-}
-
-void bch_bio_submit_work(struct work_struct *work)
-{
- struct bch_fs *c = container_of(work, struct bch_fs,
- bio_submit_work);
- struct bio_list bl;
- struct bio *bio;
-
- spin_lock(&c->bio_submit_lock);
- bl = c->bio_submit_list;
- bio_list_init(&c->bio_submit_list);
- spin_unlock(&c->bio_submit_lock);
-
- while ((bio = bio_list_pop(&bl)))
- generic_make_request(bio);
-}
-
/* Allocate, free from mempool: */
void bch_bio_free_pages_pool(struct bch_fs *c, struct bio *bio)
@@ -116,8 +86,7 @@ void bch_bio_alloc_pages_pool(struct bch_fs *c, struct bio *bio,
/* Bios with headers */
static void bch_submit_wbio(struct bch_fs *c, struct bch_write_bio *wbio,
- struct bch_dev *ca, const struct bch_extent_ptr *ptr,
- bool punt)
+ struct bch_dev *ca, const struct bch_extent_ptr *ptr)
{
wbio->ca = ca;
wbio->submit_time_us = local_clock_us();
@@ -126,14 +95,12 @@ static void bch_submit_wbio(struct bch_fs *c, struct bch_write_bio *wbio,
if (!ca)
bcache_io_error(c, &wbio->bio, "device has been removed");
- else if (punt)
- bch_generic_make_request(&wbio->bio, c);
else
generic_make_request(&wbio->bio);
}
void bch_submit_wbio_replicas(struct bch_write_bio *wbio, struct bch_fs *c,
- const struct bkey_i *k, bool punt)
+ const struct bkey_i *k)
{
struct bkey_s_c_extent e = bkey_i_to_s_c_extent(k);
const struct bch_extent_ptr *ptr;
@@ -148,7 +115,7 @@ void bch_submit_wbio_replicas(struct bch_write_bio *wbio, struct bch_fs *c,
extent_for_each_ptr(e, ptr) {
ca = c->devs[ptr->dev];
if (!percpu_ref_tryget(&ca->io_ref)) {
- bch_submit_wbio(c, wbio, NULL, ptr, punt);
+ bch_submit_wbio(c, wbio, NULL, ptr);
break;
}
@@ -172,7 +139,7 @@ void bch_submit_wbio_replicas(struct bch_write_bio *wbio, struct bch_fs *c,
if (!journal_flushes_device(ca))
n->bio.bi_opf |= REQ_FUA;
- bch_submit_wbio(c, n, ca, ptr, punt);
+ bch_submit_wbio(c, n, ca, ptr);
}
}
@@ -358,8 +325,6 @@ static void bch_write_endio(struct bio *bio)
set_closure_fn(cl, bch_write_io_error, index_update_wq(op));
}
- bch_account_io_completion_time(ca, wbio->submit_time_us,
- REQ_OP_WRITE);
if (ca)
percpu_ref_put(&ca->io_ref);
@@ -574,7 +539,7 @@ static int bch_write_extent(struct bch_write_op *op,
bch_check_mark_super(c, key_to_write, false);
- bch_submit_wbio_replicas(to_wbio(bio), c, key_to_write, false);
+ bch_submit_wbio_replicas(to_wbio(bio), c, key_to_write);
return ret;
}
@@ -756,10 +721,6 @@ void bch_write(struct closure *cl)
struct bch_fs *c = op->c;
u64 inode = op->pos.inode;
- trace_bcache_write(c, inode, bio,
- !(op->flags & BCH_WRITE_CACHED),
- op->flags & BCH_WRITE_DISCARD);
-
if (c->opts.nochanges ||
!percpu_ref_tryget(&c->writes)) {
__bcache_io_error(c, "read only");
@@ -776,11 +737,6 @@ void bch_write(struct closure *cl)
if (!(op->flags & BCH_WRITE_DISCARD))
bch_increment_clock(c, bio_sectors(bio), WRITE);
- if (!(op->flags & BCH_WRITE_DISCARD))
- bch_mark_foreground_write(c, bio_sectors(bio));
- else
- bch_mark_discard(c, bio_sectors(bio));
-
/* Don't call bch_next_delay() if rate is >= 1 GB/sec */
if (c->foreground_write_ratelimit_enabled &&
@@ -1065,6 +1021,8 @@ static void __bch_read_endio(struct work_struct *work)
BUG_ON(!rbio->split || !rbio->bounce);
+ trace_bcache_promote(&rbio->bio);
+
/* we now own pages: */
swap(promote->write.wbio.bio.bi_vcnt, rbio->bio.bi_vcnt);
rbio->promote = NULL;
@@ -1085,10 +1043,6 @@ static void bch_read_endio(struct bio *bio)
container_of(bio, struct bch_read_bio, bio);
struct bch_fs *c = rbio->c;
- if (rbio->flags & BCH_READ_ACCOUNT_TIMES)
- bch_account_io_completion_time(rbio->ca, rbio->submit_time_us,
- REQ_OP_READ);
-
if (bch_dev_nonfatal_io_err_on(bio->bi_error, rbio->ca, "data read")) {
/* XXX: retry IO errors when we have another replica */
bch_rbio_error(rbio, bio->bi_error);
diff --git a/fs/bcachefs/io.h b/fs/bcachefs/io.h
index 9239ca4aac16..bc7c9cf81494 100644
--- a/fs/bcachefs/io.h
+++ b/fs/bcachefs/io.h
@@ -69,16 +69,13 @@ enum bch_read_flags {
BCH_READ_PROMOTE = 1 << 2,
BCH_READ_IS_LAST = 1 << 3,
BCH_READ_MAY_REUSE_BIO = 1 << 4,
- BCH_READ_ACCOUNT_TIMES = 1 << 5,
- BCH_READ_USER_MAPPED = 1 << 6,
+ BCH_READ_USER_MAPPED = 1 << 5,
};
void bch_read(struct bch_fs *, struct bch_read_bio *, u64);
-void bch_generic_make_request(struct bio *, struct bch_fs *);
-void bch_bio_submit_work(struct work_struct *);
void bch_submit_wbio_replicas(struct bch_write_bio *, struct bch_fs *,
- const struct bkey_i *, bool);
+ const struct bkey_i *);
int bch_discard(struct bch_fs *, struct bpos, struct bpos,
struct bversion, struct disk_reservation *,
diff --git a/fs/bcachefs/journal.c b/fs/bcachefs/journal.c
index 109c27c88be5..136b8c87ea3a 100644
--- a/fs/bcachefs/journal.c
+++ b/fs/bcachefs/journal.c
@@ -1482,8 +1482,6 @@ int bch_journal_replay(struct bch_fs *c, struct list_head *list)
*/
BUG_ON(bch_disk_reservation_get(c, &disk_res, 0, 0));
- trace_bcache_journal_replay_key(&k->k);
-
ret = bch_btree_insert(c, entry->btree_id, k,
&disk_res, NULL, NULL,
BTREE_INSERT_NOFAIL|
@@ -2085,8 +2083,6 @@ static int journal_write_alloc(struct journal *j, unsigned sectors)
.dev = ca->dev_idx,
});
replicas++;
-
- trace_bcache_journal_next_bucket(ca, ja->cur_idx, ja->last_idx);
}
spin_unlock(&j->devs.lock);
@@ -2280,7 +2276,7 @@ static void journal_write(struct closure *cl)
bch_bio_map(bio, jset);
trace_bcache_journal_write(bio);
- closure_bio_submit_punt(bio, cl, c);
+ closure_bio_submit(bio, cl);
ca->journal.bucket_seq[ca->journal.cur_idx] = le64_to_cpu(w->data->seq);
}
@@ -2296,7 +2292,7 @@ static void journal_write(struct closure *cl)
bio->bi_end_io = journal_write_endio;
bio->bi_private = ca;
bio_set_op_attrs(bio, REQ_OP_WRITE, WRITE_FLUSH);
- closure_bio_submit_punt(bio, cl, c);
+ closure_bio_submit(bio, cl);
}
no_io:
diff --git a/fs/bcachefs/keybuf.c b/fs/bcachefs/keybuf.c
deleted file mode 100644
index 15a1f85d353d..000000000000
--- a/fs/bcachefs/keybuf.c
+++ /dev/null
@@ -1,195 +0,0 @@
-
-#include "bcache.h"
-#include "btree_gc.h"
-#include "btree_iter.h"
-#include "keybuf.h"
-
-#include <trace/events/bcachefs.h>
-
-/*
- * For buffered iteration over the btree, with predicates and ratelimiting and
- * whatnot
- */
-
-static inline int keybuf_cmp(struct keybuf_key *l, struct keybuf_key *r)
-{
- /* Overlapping keys compare equal */
- if (bkey_cmp(l->key.k.p, bkey_start_pos(&r->key.k)) <= 0)
- return -1;
- if (bkey_cmp(bkey_start_pos(&l->key.k), r->key.k.p) >= 0)
- return 1;
- return 0;
-}
-
-static inline int keybuf_nonoverlapping_cmp(struct keybuf_key *l,
- struct keybuf_key *r)
-{
- return clamp_t(s64, bkey_cmp(l->key.k.p, r->key.k.p), -1, 1);
-}
-
-void bch_refill_keybuf(struct bch_fs *c, struct keybuf *buf,
- struct bpos end, keybuf_pred_fn *pred)
-{
- struct bpos start = buf->last_scanned;
- struct btree_iter iter;
- struct bkey_s_c k;
- unsigned nr_found = 0;
-
- for_each_btree_key(&iter, c, BTREE_ID_EXTENTS, buf->last_scanned, k) {
- if (bkey_cmp(k.k->p, end) >= 0) {
- buf->last_scanned = k.k->p;
- goto done;
- }
-
- if (pred(buf, k)) {
- struct keybuf_key *w;
-
- spin_lock(&buf->lock);
-
- w = array_alloc(&buf->freelist);
- if (!w) {
- spin_unlock(&buf->lock);
- goto done;
- }
-
- bkey_reassemble(&w->key, k);
- atomic_set(&w->ref, -1); /* -1 means hasn't started */
-
- if (RB_INSERT(&buf->keys, w, node, keybuf_cmp))
- array_free(&buf->freelist, w);
- else
- nr_found++;
-
- spin_unlock(&buf->lock);
- }
-
- buf->last_scanned = k.k->p;
- bch_btree_iter_cond_resched(&iter);
- }
-
- /* If we end up here, it means:
- * - the map_fn didn't fill up the keybuf
- * - the map_fn didn't see the end key
- * - there were no more keys to map over
- * Therefore, we are at the end of the key space */
- buf->last_scanned = POS_MAX;
-done:
- bch_btree_iter_unlock(&iter);
-
- trace_bcache_keyscan(nr_found,
- start.inode, start.offset,
- buf->last_scanned.inode,
- buf->last_scanned.offset);
-
- spin_lock(&buf->lock);
-
- if (!RB_EMPTY_ROOT(&buf->keys)) {
- struct keybuf_key *w;
-
- w = RB_FIRST(&buf->keys, struct keybuf_key, node);
- buf->start = bkey_start_pos(&w->key.k);
-
- w = RB_LAST(&buf->keys, struct keybuf_key, node);
- buf->end = w->key.k.p;
- } else {
- buf->start = POS_MAX;
- buf->end = POS_MAX;
- }
-
- spin_unlock(&buf->lock);
-}
-
-static void bch_keybuf_del(struct keybuf *buf, struct keybuf_key *w)
-{
- rb_erase(&w->node, &buf->keys);
- array_free(&buf->freelist, w);
-}
-
-void bch_keybuf_put(struct keybuf *buf, struct keybuf_key *w)
-{
- BUG_ON(atomic_read(&w->ref) <= 0);
-
- if (atomic_dec_and_test(&w->ref)) {
- up(&buf->in_flight);
-
- spin_lock(&buf->lock);
- bch_keybuf_del(buf, w);
- spin_unlock(&buf->lock);
- }
-}
-
-void bch_keybuf_recalc_oldest_gens(struct bch_fs *c, struct keybuf *buf)
-{
- struct keybuf_key *w, *n;
-
- spin_lock(&buf->lock);
- rbtree_postorder_for_each_entry_safe(w, n,
- &buf->keys, node)
- bch_btree_key_recalc_oldest_gen(c, bkey_i_to_s_c(&w->key));
- spin_unlock(&buf->lock);
-}
-
-bool bch_keybuf_check_overlapping(struct keybuf *buf, struct bpos start,
- struct bpos end)
-{
- bool ret = false;
- struct keybuf_key *w, *next, s = { .key.k.p = start };
-
- if (bkey_cmp(end, buf->start) <= 0 ||
- bkey_cmp(start, buf->end) >= 0)
- return false;
-
- spin_lock(&buf->lock);
-
- for (w = RB_GREATER(&buf->keys, s, node, keybuf_nonoverlapping_cmp);
- w && bkey_cmp(bkey_start_pos(&w->key.k), end) < 0;
- w = next) {
- next = RB_NEXT(w, node);
-
- if (atomic_read(&w->ref) == -1)
- bch_keybuf_del(buf, w);
- else
- ret = true;
- }
-
- spin_unlock(&buf->lock);
- return ret;
-}
-
-struct keybuf_key *bch_keybuf_next(struct keybuf *buf)
-{
- struct keybuf_key *w;
-
- spin_lock(&buf->lock);
-
- w = RB_FIRST(&buf->keys, struct keybuf_key, node);
-
- while (w && atomic_read(&w->ref) != -1)
- w = RB_NEXT(w, node);
-
- if (!w) {
- spin_unlock(&buf->lock);
- return NULL;
- }
-
- atomic_set(&w->ref, 1);
- spin_unlock(&buf->lock);
-
- down(&buf->in_flight);
-
- return w;
-}
-
-void bch_keybuf_init(struct keybuf *buf)
-{
- sema_init(&buf->in_flight, KEYBUF_REFILL_BATCH / 2);
-
- buf->last_scanned = POS_MAX;
- buf->start = POS_MIN;
- buf->end = POS_MIN;
-
- buf->keys = RB_ROOT;
-
- spin_lock_init(&buf->lock);
- array_allocator_init(&buf->freelist);
-}
diff --git a/fs/bcachefs/keybuf.h b/fs/bcachefs/keybuf.h
deleted file mode 100644
index dd1402d3d68c..000000000000
--- a/fs/bcachefs/keybuf.h
+++ /dev/null
@@ -1,16 +0,0 @@
-#ifndef _BCACHE_KEYBUF_H
-#define _BCACHE_KEYBUF_H
-
-#include "keybuf_types.h"
-
-typedef bool (keybuf_pred_fn)(struct keybuf *, struct bkey_s_c);
-
-void bch_keybuf_init(struct keybuf *);
-void bch_refill_keybuf(struct bch_fs *, struct keybuf *,
- struct bpos, keybuf_pred_fn *);
-void bch_keybuf_recalc_oldest_gens(struct bch_fs *, struct keybuf *);
-bool bch_keybuf_check_overlapping(struct keybuf *, struct bpos, struct bpos);
-void bch_keybuf_put(struct keybuf *, struct keybuf_key *);
-struct keybuf_key *bch_keybuf_next(struct keybuf *);
-
-#endif /* _BCACHE_KEYBUF_H */
diff --git a/fs/bcachefs/keybuf_types.h b/fs/bcachefs/keybuf_types.h
deleted file mode 100644
index 3facc4a07526..000000000000
--- a/fs/bcachefs/keybuf_types.h
+++ /dev/null
@@ -1,33 +0,0 @@
-#ifndef _BCACHE_KEYBUF_TYPES_H
-#define _BCACHE_KEYBUF_TYPES_H
-
-struct keybuf_key {
- struct rb_node node;
- BKEY_PADDED(key);
- atomic_t ref;
-};
-
-#define KEYBUF_REFILL_BATCH 500
-
-struct keybuf {
- struct bpos last_scanned;
- spinlock_t lock;
-
- /*
- * Beginning and end of range in rb tree - so that we can skip taking
- * lock and checking the rb tree when we need to check for overlapping
- * keys.
- */
- struct bpos start;
- struct bpos end;
-
- struct rb_root keys;
-
- unsigned max_in_flight;
- struct semaphore in_flight;
-
- DECLARE_ARRAY_ALLOCATOR(struct keybuf_key, freelist,
- KEYBUF_REFILL_BATCH);
-};
-
-#endif /* _BCACHE_KEYBUF_TYPES_H */
diff --git a/fs/bcachefs/notify.c b/fs/bcachefs/notify.c
deleted file mode 100644
index b06a87499b2d..000000000000
--- a/fs/bcachefs/notify.c
+++ /dev/null
@@ -1,105 +0,0 @@
-/*
- * Code for sending uevent notifications to user-space.
- *
- * Copyright 2015 Datera, Inc.
- */
-
-#include "bcache.h"
-#include "notify.h"
-
-#include <linux/kobject.h>
-
-#define notify_var(c, format, ...) \
-({ \
- int ret; \
- lockdep_assert_held(&(c)->uevent_lock); \
- ret = add_uevent_var(&(c)->uevent_env, format, ##__VA_ARGS__); \
- WARN_ON_ONCE(ret); \
-})
-
-static void notify_get(struct bch_fs *c)
-{
- struct kobj_uevent_env *env = &c->uevent_env;
-
- mutex_lock(&c->uevent_lock);
- env->envp_idx = 0;
- env->buflen = 0;
-
- notify_var(c, "SET_UUID=%pU", c->sb.user_uuid.b);
-}
-
-static void notify_get_cache(struct bch_dev *ca)
-{
- struct bch_fs *c = ca->fs;
-
- notify_get(c);
- notify_var(c, "UUID=%pU", ca->uuid.b);
- notify_var(c, "BLOCKDEV=%s", ca->name);
-}
-
-static void notify_put(struct bch_fs *c)
-{
- struct kobj_uevent_env *env = &c->uevent_env;
-
- env->envp[env->envp_idx] = NULL;
- kobject_uevent_env(&c->kobj, KOBJ_CHANGE, env->envp);
- mutex_unlock(&c->uevent_lock);
-}
-
-void bch_notify_fs_read_write(struct bch_fs *c)
-{
- notify_get(c);
- notify_var(c, "STATE=active");
- notify_put(c);
-}
-
-void bch_notify_fs_read_only(struct bch_fs *c)
-{
- notify_get(c);
- notify_var(c, "STATE=readonly");
- notify_put(c);
-}
-
-void bch_notify_fs_stopped(struct bch_fs *c)
-{
- notify_get(c);
- notify_var(c, "STATE=stopped");
- notify_put(c);
-}
-
-void bch_notify_dev_read_write(struct bch_dev *ca)
-{
- struct bch_fs *c = ca->fs;
-
- notify_get_cache(ca);
- notify_var(c, "STATE=active");
- notify_put(c);
-}
-
-void bch_notify_dev_read_only(struct bch_dev *ca)
-{
- struct bch_fs *c = ca->fs;
-
- notify_get_cache(ca);
- notify_var(c, "STATE=readonly");
- notify_put(c);
-}
-
-void bch_notify_dev_added(struct bch_dev *ca)
-{
- struct bch_fs *c = ca->fs;
-
- notify_get_cache(ca);
- notify_var(c, "STATE=removing");
- notify_put(c);
-}
-
-void bch_notify_dev_error(struct bch_dev *ca, bool fatal)
-{
- struct bch_fs *c = ca->fs;
-
- notify_get_cache(ca);
- notify_var(c, "STATE=error");
- notify_var(c, "FATAL=%d", fatal);
- notify_put(c);
-}
diff --git a/fs/bcachefs/notify.h b/fs/bcachefs/notify.h
deleted file mode 100644
index 2c1e367913f0..000000000000
--- a/fs/bcachefs/notify.h
+++ /dev/null
@@ -1,34 +0,0 @@
-/*
- * Code for sending uevent notifications to user-space.
- *
- * Copyright 2015 Datera, Inc.
- */
-
-#ifndef _NOTIFY_H
-#define _NOTIFY_H
-
-#ifndef NO_BCACHE_NOTIFY
-
-void bch_notify_fs_read_write(struct bch_fs *);
-void bch_notify_fs_read_only(struct bch_fs *);
-void bch_notify_fs_stopped(struct bch_fs *);
-
-void bch_notify_dev_read_write(struct bch_dev *);
-void bch_notify_dev_read_only(struct bch_dev *);
-void bch_notify_dev_added(struct bch_dev *);
-void bch_notify_dev_error(struct bch_dev *, bool);
-
-#else
-
-static inline void bch_notify_fs_read_write(struct bch_fs *c) {}
-static inline void bch_notify_fs_read_only(struct bch_fs *c) {}
-static inline void bch_notify_fs_stopped(struct bch_fs *c) {}
-
-static inline void bch_notify_dev_read_write(struct bch_dev *ca) {}
-static inline void bch_notify_dev_read_only(struct bch_dev *ca) {}
-static inline void bch_notify_dev_added(struct bch_dev *ca) {}
-static inline void bch_notify_dev_error(struct bch_dev *ca, bool b) {}
-
-#endif
-
-#endif /* _NOTIFY_H */
diff --git a/fs/bcachefs/request.c b/fs/bcachefs/request.c
deleted file mode 100644
index 8d7627499988..000000000000
--- a/fs/bcachefs/request.c
+++ /dev/null
@@ -1,807 +0,0 @@
-/*
- * Handle a read or a write request and decide what to do with it.
- *
- * Copyright 2010, 2011 Kent Overstreet <kent.overstreet@gmail.com>
- * Copyright 2012 Google, Inc.
- *
- * Main pieces here:
- *
- * 1) Data insert path, via bch_data_insert() -- writes data to cache and
- * updates extents btree
- * 2) Read path, via bch_read() -- for now only used by bcachefs and ioctl
- * interface
- * 3) Read path, via cache_lookup() and struct search -- used by block device
- * make_request functions
- * 4) Cache promotion -- used by bch_read() and cache_lookup() to copy data to
- * the cache, either from a backing device or a cache device in a higher tier
- *
- * One tricky thing that comes up is a race condition where a bucket may be
- * re-used while reads from it are still in flight. To guard against this, we
- * save the ptr that is being read and check if it is stale once the read
- * completes. If the ptr is stale, the read is retried.
- *
- * #2 and #3 will be unified further in the future.
- */
-
-#include "bcache.h"
-#include "blockdev.h"
-#include "btree_update.h"
-#include "btree_iter.h"
-#include "clock.h"
-#include "debug.h"
-#include "error.h"
-#include "extents.h"
-#include "io.h"
-#include "journal.h"
-#include "keybuf.h"
-#include "request.h"
-#include "writeback.h"
-#include "stats.h"
-
-#include <linux/module.h>
-#include <linux/hash.h>
-#include <linux/random.h>
-#include <linux/backing-dev.h>
-
-#include <trace/events/bcachefs.h>
-
-#define CUTOFF_CACHE_ADD 10
-#define CUTOFF_CACHE_READA 15
-
-/* Congested? */
-
-unsigned bch_get_congested(struct bch_fs *c)
-{
- int i;
- long rand;
-
- if (!c->congested_read_threshold_us &&
- !c->congested_write_threshold_us)
- return 0;
-
- i = (local_clock_us() - c->congested_last_us) / 1024;
- if (i < 0)
- return 0;
-
- i += atomic_read(&c->congested);
- if (i >= 0)
- return 0;
-
- i += CONGESTED_MAX;
-
- if (i > 0)
- i = fract_exp_two(i, 6);
-
- rand = get_random_int();
- i -= bitmap_weight(&rand, BITS_PER_LONG);
-
- return i > 0 ? i : 1;
-}
-
-static void add_sequential(struct task_struct *t)
-{
- t->sequential_io_avg = ewma_add(t->sequential_io_avg,
- t->sequential_io, 3);
- t->sequential_io = 0;
-}
-
-static struct hlist_head *iohash(struct cached_dev *dc, uint64_t k)
-{
- return &dc->io_hash[hash_64(k, RECENT_IO_BITS)];
-}
-
-static bool check_should_bypass(struct cached_dev *dc, struct bio *bio, int rw)
-{
- struct bch_fs *c = dc->disk.c;
- unsigned mode = BDEV_CACHE_MODE(dc->disk_sb.sb);
- unsigned sectors, congested = bch_get_congested(c);
- struct task_struct *task = current;
- struct io *i;
-
- if (test_bit(BCACHE_DEV_DETACHING, &dc->disk.flags) ||
- sectors_available(c) * 100 < c->capacity * CUTOFF_CACHE_ADD ||
- (bio_op(bio) == REQ_OP_DISCARD))
- goto skip;
-
- if (mode == CACHE_MODE_NONE ||
- (mode == CACHE_MODE_WRITEAROUND &&
- op_is_write(bio_op(bio))))
- goto skip;
-
- if (bio->bi_iter.bi_sector & (c->sb.block_size - 1) ||
- bio_sectors(bio) & (c->sb.block_size - 1)) {
- pr_debug("skipping unaligned io");
- goto skip;
- }
-
- if (bypass_torture_test(dc)) {
- if ((get_random_int() & 3) == 3)
- goto skip;
- else
- goto rescale;
- }
-
- if (!congested && !dc->sequential_cutoff)
- goto rescale;
-
- if (!congested &&
- mode == CACHE_MODE_WRITEBACK &&
- op_is_write(bio_op(bio)) &&
- (bio->bi_opf & REQ_SYNC))
- goto rescale;
-
- spin_lock(&dc->io_lock);
-
- hlist_for_each_entry(i, iohash(dc, bio->bi_iter.bi_sector), hash)
- if (i->last == bio->bi_iter.bi_sector &&
- time_before(jiffies, i->last_io))
- goto found;
-
- i = list_first_entry(&dc->io_lru, struct io, lru);
-
- add_sequential(task);
- i->sequential = 0;
-found:
- if (i->sequential + bio->bi_iter.bi_size > i->sequential)
- i->sequential += bio->bi_iter.bi_size;
-
- i->last = bio_end_sector(bio);
- i->last_io = jiffies + msecs_to_jiffies(5000);
- task->sequential_io = i->sequential;
-
- hlist_del(&i->hash);
- hlist_add_head(&i->hash, iohash(dc, i->last));
- list_move_tail(&i->lru, &dc->io_lru);
-
- spin_unlock(&dc->io_lock);
-
- sectors = max(task->sequential_io,
- task->sequential_io_avg) >> 9;
-
- if (dc->sequential_cutoff &&
- sectors >= dc->sequential_cutoff >> 9) {
- trace_bcache_bypass_sequential(bio);
- goto skip;
- }
-
- if (congested && sectors >= congested) {
- trace_bcache_bypass_congested(bio);
- goto skip;
- }
-
-rescale:
- return false;
-skip:
- bch_mark_sectors_bypassed(c, dc, bio_sectors(bio));
- return true;
-}
-
-/* Common code for the make_request functions */
-
-/**
- * request_endio - endio function for backing device bios
- */
-static void request_endio(struct bio *bio)
-{
- struct closure *cl = bio->bi_private;
-
- if (bio->bi_error) {
- struct search *s = container_of(cl, struct search, cl);
- s->iop.error = bio->bi_error;
- /* Only cache read errors are recoverable */
- s->recoverable = false;
- }
-
- bio_put(bio);
- closure_put(cl);
-}
-
-static void bio_complete(struct search *s)
-{
- if (s->orig_bio) {
- generic_end_io_acct(bio_data_dir(s->orig_bio),
- &s->d->disk->part0, s->start_time);
-
- trace_bcache_request_end(s->d, s->orig_bio);
- s->orig_bio->bi_error = s->iop.error;
- bio_endio(s->orig_bio);
- s->orig_bio = NULL;
- }
-}
-
-static void do_bio_hook(struct search *s, struct bio *orig_bio)
-{
- int rw = bio_data_dir(orig_bio);
- struct bio *bio = rw ? &s->wbio.bio : &s->rbio.bio;
-
- bio_init(bio);
- __bio_clone_fast(bio, orig_bio);
- bio->bi_end_io = request_endio;
- bio->bi_private = &s->cl;
-
- bio_cnt_set(bio, 3);
-}
-
-static void search_free(struct closure *cl)
-{
- struct search *s = container_of(cl, struct search, cl);
-
- bio_complete(s);
-
- if (s->iop.bio)
- bio_put(&s->iop.bio->bio);
-
- closure_debug_destroy(cl);
- mempool_free(s, &s->d->c->search);
-}
-
-static inline struct search *search_alloc(struct bio *bio,
- struct bcache_device *d)
-{
- struct search *s;
-
- s = mempool_alloc(&d->c->search, GFP_NOIO);
-
- closure_init(&s->cl, NULL);
- do_bio_hook(s, bio);
-
- s->orig_bio = bio;
- s->d = d;
- s->recoverable = 1;
- s->bypass = 0;
- s->write = op_is_write(bio_op(bio));
- s->read_dirty_data = 0;
- s->cache_miss = 0;
- s->start_time = jiffies;
- s->inode = bcache_dev_inum(d);
-
- s->iop.c = d->c;
- s->iop.bio = NULL;
- s->iop.error = 0;
-
- return s;
-}
-
-/* Cached devices */
-
-static void cached_dev_bio_complete(struct closure *cl)
-{
- struct search *s = container_of(cl, struct search, cl);
- struct cached_dev *dc = container_of(s->d, struct cached_dev, disk);
-
- search_free(cl);
- cached_dev_put(dc);
-}
-
-/* Process reads */
-
-static void cached_dev_read_error(struct closure *cl)
-{
- struct search *s = container_of(cl, struct search, cl);
- struct bio *bio = &s->rbio.bio;
-
- if (s->recoverable) {
- /* Read bucket invalidate races are handled here, also plain
- * old IO errors from the cache that can be retried from the
- * backing device (reads of clean data) */
- trace_bcache_read_retry(s->orig_bio);
-
- s->iop.error = 0;
- do_bio_hook(s, s->orig_bio);
-
- /* XXX: invalidate cache, don't count twice */
-
- closure_bio_submit(bio, cl);
- }
-
- continue_at(cl, cached_dev_bio_complete, NULL);
-}
-
-static void cached_dev_read_done(struct closure *cl)
-{
- struct search *s = container_of(cl, struct search, cl);
- struct cached_dev *dc = container_of(s->d, struct cached_dev, disk);
-
- if (dc->verify && s->recoverable && !s->read_dirty_data)
- bch_data_verify(dc, s->orig_bio);
-
- continue_at_nobarrier(cl, cached_dev_bio_complete, NULL);
-}
-
-static void cached_dev_read_done_bh(struct closure *cl)
-{
- struct search *s = container_of(cl, struct search, cl);
- struct cached_dev *dc = container_of(s->d, struct cached_dev, disk);
-
- bch_mark_cache_accounting(s->iop.c, dc, !s->cache_miss, s->bypass);
- trace_bcache_read(s->orig_bio, !s->cache_miss, s->bypass);
-
- if (s->iop.error)
- continue_at_nobarrier(cl, cached_dev_read_error, s->iop.c->wq);
- else if (dc->verify)
- continue_at_nobarrier(cl, cached_dev_read_done, s->iop.c->wq);
- else
- continue_at_nobarrier(cl, cached_dev_bio_complete, NULL);
-}
-
-/**
- * __cache_promote -- insert result of read bio into cache
- *
- * Used for backing devices and flash-only volumes.
- *
- * @orig_bio must actually be a bbio with a valid key.
- */
-void __cache_promote(struct bch_fs *c, struct bch_read_bio *orig_bio,
- struct bkey_s_c old,
- struct bkey_s_c new,
- unsigned write_flags)
-{
-#if 0
- struct cache_promote_op *op;
- struct bio *bio;
- unsigned pages = DIV_ROUND_UP(orig_bio->bio.bi_iter.bi_size, PAGE_SIZE);
-
- /* XXX: readahead? */
-
- op = kmalloc(sizeof(*op) + sizeof(struct bio_vec) * pages, GFP_NOIO);
- if (!op)
- goto out_submit;
-
- /* clone the bbio */
- memcpy(&op->bio, orig_bio, offsetof(struct bbio, bio));
-
- bio = &op->bio.bio.bio;
- bio_init(bio);
- bio_get(bio);
- bio->bi_bdev = orig_bio->bio.bi_bdev;
- bio->bi_iter.bi_sector = orig_bio->bio.bi_iter.bi_sector;
- bio->bi_iter.bi_size = orig_bio->bio.bi_iter.bi_size;
- bio->bi_end_io = cache_promote_endio;
- bio->bi_private = &op->cl;
- bio->bi_io_vec = bio->bi_inline_vecs;
- bch_bio_map(bio, NULL);
-
- if (bio_alloc_pages(bio, __GFP_NOWARN|GFP_NOIO))
- goto out_free;
-
- orig_bio->ca = NULL;
-
- closure_init(&op->cl, &c->cl);
- op->orig_bio = &orig_bio->bio;
- op->stale = 0;
-
- bch_write_op_init(&op->iop, c, &op->bio, &c->promote_write_point,
- new, old,
- BCH_WRITE_ALLOC_NOWAIT|write_flags);
- op->iop.nr_replicas = 1;
-
- //bch_cut_front(bkey_start_pos(&orig_bio->key.k), &op->iop.insert_key);
- //bch_cut_back(orig_bio->key.k.p, &op->iop.insert_key.k);
-
- trace_bcache_promote(&orig_bio->bio);
-
- op->bio.bio.submit_time_us = local_clock_us();
- closure_bio_submit(bio, &op->cl);
-
- continue_at(&op->cl, cache_promote_write, c->wq);
-out_free:
- kfree(op);
-out_submit:
- generic_make_request(&orig_bio->bio);
-#endif
-}
-
-/**
- * cached_dev_cache_miss - populate cache with data from backing device
- *
- * We don't write to the cache if s->bypass is set.
- */
-static int cached_dev_cache_miss(struct btree_iter *iter, struct search *s,
- struct bio *bio, unsigned sectors)
-{
- int ret;
- unsigned reada = 0;
- struct bio *miss;
- BKEY_PADDED(key) replace;
-
- s->cache_miss = 1;
-
- if (s->bypass)
- goto nopromote;
-#if 0
- struct cached_dev *dc = container_of(s->d, struct cached_dev, disk);
-
- /* XXX: broken */
- if (!(bio->bi_opf & REQ_RAHEAD) &&
- !(bio->bi_opf & REQ_META) &&
- ((u64) sectors_available(dc->disk.c) * 100 <
- (u64) iter->c->capacity * CUTOFF_CACHE_READA))
- reada = min_t(sector_t, dc->readahead >> 9,
- bdev_sectors(bio->bi_bdev) - bio_end_sector(bio));
-#endif
- sectors = min(sectors, bio_sectors(bio) + reada);
-
- replace.key.k = KEY(s->inode,
- bio->bi_iter.bi_sector + sectors,
- sectors);
-
- ret = bch_btree_insert_check_key(iter, &replace.key);
- if (ret == -EINTR)
- return ret;
-
- miss = bio_next_split(bio, sectors, GFP_NOIO, &s->d->bio_split);
-
- miss->bi_end_io = request_endio;
- miss->bi_private = &s->cl;
-
- //to_bbio(miss)->key.k = KEY(s->inode,
- // bio_end_sector(miss),
- // bio_sectors(miss));
- to_rbio(miss)->ca = NULL;
-
- closure_get(&s->cl);
- __cache_promote(s->iop.c, to_rbio(miss),
- bkey_i_to_s_c(&replace.key),
- bkey_to_s_c(&KEY(replace.key.k.p.inode,
- replace.key.k.p.offset,
- replace.key.k.size)),
- BCH_WRITE_CACHED);
-
- return 0;
-nopromote:
- miss = bio_next_split(bio, sectors, GFP_NOIO, &s->d->bio_split);
-
- miss->bi_end_io = request_endio;
- miss->bi_private = &s->cl;
- closure_bio_submit(miss, &s->cl);
-
- return 0;
-}
-
-static void cached_dev_read(struct cached_dev *dc, struct search *s)
-{
- struct bch_fs *c = s->iop.c;
- struct closure *cl = &s->cl;
- struct bio *bio = &s->rbio.bio;
- struct btree_iter iter;
- struct bkey_s_c k;
- int ret;
-
- for_each_btree_key_with_holes(&iter, c, BTREE_ID_EXTENTS,
- POS(s->inode, bio->bi_iter.bi_sector), k) {
- BKEY_PADDED(k) tmp;
- struct extent_pick_ptr pick;
- unsigned sectors, bytes;
- bool is_last;
-retry:
- bkey_reassemble(&tmp.k, k);
- bch_btree_iter_unlock(&iter);
- k = bkey_i_to_s_c(&tmp.k);
-
- bch_extent_pick_ptr(c, k, &pick);
- if (IS_ERR(pick.ca)) {
- bcache_io_error(c, bio, "no device to read from");
- goto out;
- }
-
- sectors = min_t(u64, k.k->p.offset, bio_end_sector(bio)) -
- bio->bi_iter.bi_sector;
- bytes = sectors << 9;
- is_last = bytes == bio->bi_iter.bi_size;
- swap(bio->bi_iter.bi_size, bytes);
-
- if (pick.ca) {
- PTR_BUCKET(pick.ca, &pick.ptr)->read_prio =
- c->prio_clock[READ].hand;
-
- if (!bkey_extent_is_cached(k.k))
- s->read_dirty_data = true;
-
- bch_read_extent(c, &s->rbio, k, &pick,
- BCH_READ_ACCOUNT_TIMES|
- BCH_READ_RETRY_IF_STALE|
- (!s->bypass ? BCH_READ_PROMOTE : 0)|
- (is_last ? BCH_READ_IS_LAST : 0));
- } else {
- /* not present (hole), or stale cached data */
- if (cached_dev_cache_miss(&iter, s, bio, sectors)) {
- k = bch_btree_iter_peek_with_holes(&iter);
- if (btree_iter_err(k))
- break;
- goto retry;
- }
- }
-
- swap(bio->bi_iter.bi_size, bytes);
- bio_advance(bio, bytes);
-
- if (is_last) {
- bch_btree_iter_unlock(&iter);
- goto out;
- }
- }
-
- /*
- * If we get here, it better have been because there was an error
- * reading a btree node
- */
- ret = bch_btree_iter_unlock(&iter);
- BUG_ON(!ret);
- bcache_io_error(c, bio, "btree IO error %i", ret);
-out:
- continue_at(cl, cached_dev_read_done_bh, NULL);
-}
-
-/* Process writes */
-
-static void cached_dev_write_complete(struct closure *cl)
-{
- struct search *s = container_of(cl, struct search, cl);
- struct cached_dev *dc = container_of(s->d, struct cached_dev, disk);
-
- up_read_non_owner(&dc->writeback_lock);
- cached_dev_bio_complete(cl);
-}
-
-static void cached_dev_write(struct cached_dev *dc, struct search *s)
-{
- struct closure *cl = &s->cl;
- struct bio *bio = &s->wbio.bio;
- bool writeback = false;
- bool bypass = s->bypass;
- struct bkey insert_key = KEY(s->inode,
- bio_end_sector(bio),
- bio_sectors(bio));
- unsigned flags = BCH_WRITE_DISCARD_ON_ERROR;
-
- down_read_non_owner(&dc->writeback_lock);
- if (bch_keybuf_check_overlapping(&dc->writeback_keys,
- bkey_start_pos(&insert_key),
- insert_key.p)) {
- /*
- * We overlap with some dirty data undergoing background
- * writeback, force this write to writeback
- */
- bypass = false;
- writeback = true;
- }
-
- /*
- * Discards aren't _required_ to do anything, so skipping if
- * check_overlapping returned true is ok
- *
- * But check_overlapping drops dirty keys for which io hasn't started,
- * so we still want to call it.
- */
- if (bio_op(bio) == REQ_OP_DISCARD)
- bypass = true;
-
- if (should_writeback(dc, bio, BDEV_CACHE_MODE(dc->disk_sb.sb),
- bypass)) {
- bypass = false;
- writeback = true;
- }
-
- if (bypass) {
- /*
- * If this is a bypass-write (as opposed to a discard), send
- * it down to the backing device. If this is a discard, only
- * send it to the backing device if the backing device
- * supports discards. Otherwise, we simply discard the key
- * range from the cache and don't touch the backing device.
- */
- if ((bio_op(bio) != REQ_OP_DISCARD) ||
- blk_queue_discard(bdev_get_queue(dc->disk_sb.bdev)))
- closure_bio_submit(s->orig_bio, cl);
- } else if (writeback) {
- bch_writeback_add(dc);
-
- if (bio->bi_opf & REQ_PREFLUSH) {
- /* Also need to send a flush to the backing device */
- struct bio *flush = bio_alloc_bioset(GFP_NOIO, 0,
- &dc->disk.bio_split);
-
- flush->bi_bdev = bio->bi_bdev;
- flush->bi_end_io = request_endio;
- flush->bi_private = cl;
- bio_set_op_attrs(flush, REQ_OP_WRITE, WRITE_FLUSH);
-
- closure_bio_submit(flush, cl);
- }
- } else {
- struct bio *writethrough =
- bio_clone_fast(bio, GFP_NOIO, &dc->disk.bio_split);
-
- closure_bio_submit(writethrough, cl);
-
- flags |= BCH_WRITE_CACHED;
- flags |= BCH_WRITE_ALLOC_NOWAIT;
- }
-
- if (bio->bi_opf & (REQ_PREFLUSH|REQ_FUA))
- flags |= BCH_WRITE_FLUSH;
- if (bypass)
- flags |= BCH_WRITE_DISCARD;
-
- bch_write_op_init(&s->iop, dc->disk.c, &s->wbio,
- (struct disk_reservation) { 0 },
- foreground_write_point(dc->disk.c,
- (unsigned long) current),
- bkey_start_pos(&insert_key),
- NULL, flags);
-
- closure_call(&s->iop.cl, bch_write, NULL, cl);
- continue_at(cl, cached_dev_write_complete, NULL);
-}
-
-/* Cached devices - read & write stuff */
-
-static void __cached_dev_make_request(struct request_queue *q, struct bio *bio)
-{
- struct search *s;
- struct bcache_device *d = bio->bi_bdev->bd_disk->private_data;
- struct cached_dev *dc = container_of(d, struct cached_dev, disk);
- int rw = bio_data_dir(bio);
-
- generic_start_io_acct(rw, bio_sectors(bio), &d->disk->part0);
-
- bio->bi_bdev = dc->disk_sb.bdev;
- bio->bi_iter.bi_sector += le64_to_cpu(dc->disk_sb.sb->data_offset);
-
- if (cached_dev_get(dc)) {
- struct bio *clone;
-
- s = search_alloc(bio, d);
- trace_bcache_request_start(s->d, bio);
-
- clone = rw ? &s->wbio.bio : &s->rbio.bio;
-
- if (!bio->bi_iter.bi_size) {
- if (s->orig_bio->bi_opf & (REQ_PREFLUSH|REQ_FUA))
- bch_journal_flush_async(&s->iop.c->journal,
- &s->cl);
-
- /*
- * If it's a flush, we send the flush to the backing
- * device too
- */
- closure_bio_submit(clone, &s->cl);
-
- continue_at(&s->cl, cached_dev_bio_complete, NULL);
- } else {
- s->bypass = check_should_bypass(dc, bio, rw);
-
- if (rw)
- cached_dev_write(dc, s);
- else
- cached_dev_read(dc, s);
- }
- } else {
- if ((bio_op(bio) == REQ_OP_DISCARD) &&
- !blk_queue_discard(bdev_get_queue(dc->disk_sb.bdev)))
- bio_endio(bio);
- else
- generic_make_request(bio);
- }
-}
-
-static blk_qc_t cached_dev_make_request(struct request_queue *q,
- struct bio *bio)
-{
- __cached_dev_make_request(q, bio);
- return BLK_QC_T_NONE;
-}
-
-static int cached_dev_ioctl(struct bcache_device *d, fmode_t mode,
- unsigned int cmd, unsigned long arg)
-{
- struct cached_dev *dc = container_of(d, struct cached_dev, disk);
- return __blkdev_driver_ioctl(dc->disk_sb.bdev, mode, cmd, arg);
-}
-
-static int cached_dev_congested(void *data, int bits)
-{
- struct bcache_device *d = data;
- struct cached_dev *dc = container_of(d, struct cached_dev, disk);
- struct request_queue *q = bdev_get_queue(dc->disk_sb.bdev);
- int ret = 0;
-
- if (bdi_congested(&q->backing_dev_info, bits))
- return 1;
-
- if (cached_dev_get(dc)) {
- ret |= bch_congested(d->c, bits);
- cached_dev_put(dc);
- }
-
- return ret;
-}
-
-void bch_cached_dev_request_init(struct cached_dev *dc)
-{
- struct gendisk *g = dc->disk.disk;
-
- g->queue->make_request_fn = cached_dev_make_request;
- g->queue->backing_dev_info.congested_fn = cached_dev_congested;
- dc->disk.ioctl = cached_dev_ioctl;
-}
-
-/* Blockdev volumes */
-
-static void __blockdev_volume_make_request(struct request_queue *q,
- struct bio *bio)
-{
- struct search *s;
- struct bcache_device *d = bio->bi_bdev->bd_disk->private_data;
- int rw = bio_data_dir(bio);
-
- generic_start_io_acct(rw, bio_sectors(bio), &d->disk->part0);
-
- trace_bcache_request_start(d, bio);
-
- s = search_alloc(bio, d);
-
- if (!bio->bi_iter.bi_size) {
- if (s->orig_bio->bi_opf & (REQ_PREFLUSH|REQ_FUA))
- bch_journal_flush_async(&s->iop.c->journal,
- &s->cl);
-
- continue_at(&s->cl, search_free, NULL);
- } else if (rw) {
- struct disk_reservation res = { 0 };
- unsigned flags = 0;
-
- if (bio_op(bio) != REQ_OP_DISCARD &&
- bch_disk_reservation_get(d->c, &res, bio_sectors(bio), 0)) {
- s->iop.error = -ENOSPC;
- continue_at(&s->cl, search_free, NULL);
- return;
- }
-
- if (bio->bi_opf & (REQ_PREFLUSH|REQ_FUA))
- flags |= BCH_WRITE_FLUSH;
- if (bio_op(bio) == REQ_OP_DISCARD)
- flags |= BCH_WRITE_DISCARD;
-
- bch_write_op_init(&s->iop, d->c, &s->wbio, res,
- foreground_write_point(d->c,
- (unsigned long) current),
- POS(s->inode, bio->bi_iter.bi_sector),
- NULL, flags);
-
- closure_call(&s->iop.cl, bch_write, NULL, &s->cl);
- } else {
- closure_get(&s->cl);
- bch_read(d->c, &s->rbio, bcache_dev_inum(d));
- }
- continue_at(&s->cl, search_free, NULL);
-}
-
-static blk_qc_t blockdev_volume_make_request(struct request_queue *q,
- struct bio *bio)
-{
- __blockdev_volume_make_request(q, bio);
- return BLK_QC_T_NONE;
-}
-
-static int blockdev_volume_ioctl(struct bcache_device *d, fmode_t mode,
- unsigned int cmd, unsigned long arg)
-{
- return -ENOTTY;
-}
-
-static int blockdev_volume_congested(void *data, int bits)
-{
- struct bcache_device *d = data;
-
- return bch_congested(d->c, bits);
-}
-
-void bch_blockdev_volume_request_init(struct bcache_device *d)
-{
- struct gendisk *g = d->disk;
-
- g->queue->make_request_fn = blockdev_volume_make_request;
- g->queue->backing_dev_info.congested_fn = blockdev_volume_congested;
- d->ioctl = blockdev_volume_ioctl;
-}
diff --git a/fs/bcachefs/request.h b/fs/bcachefs/request.h
deleted file mode 100644
index 1ee3d16fac46..000000000000
--- a/fs/bcachefs/request.h
+++ /dev/null
@@ -1,16 +0,0 @@
-#ifndef _BCACHE_REQUEST_H_
-#define _BCACHE_REQUEST_H_
-
-#include "stats.h"
-
-struct bch_fs;
-struct cached_dev;
-struct bcache_device;
-struct kmem_cache;
-
-unsigned bch_get_congested(struct bch_fs *);
-
-void bch_cached_dev_request_init(struct cached_dev *dc);
-void bch_blockdev_volume_request_init(struct bcache_device *d);
-
-#endif /* _BCACHE_REQUEST_H_ */
diff --git a/fs/bcachefs/stats.c b/fs/bcachefs/stats.c
deleted file mode 100644
index a8a4eb3692c4..000000000000
--- a/fs/bcachefs/stats.c
+++ /dev/null
@@ -1,219 +0,0 @@
-/*
- * bcache stats code
- *
- * Copyright 2012 Google, Inc.
- */
-
-#include "bcache.h"
-#include "stats.h"
-#include "sysfs.h"
-
-/*
- * We keep absolute totals of various statistics, and addionally a set of three
- * rolling averages.
- *
- * Every so often, a timer goes off and rescales the rolling averages.
- * accounting_rescale[] is how many times the timer has to go off before we
- * rescale each set of numbers; that gets us half lives of 5 minutes, one hour,
- * and one day.
- *
- * accounting_delay is how often the timer goes off - 22 times in 5 minutes,
- * and accounting_weight is what we use to rescale:
- *
- * pow(31 / 32, 22) ~= 1/2
- *
- * So that we don't have to increment each set of numbers every time we (say)
- * get a cache hit, we increment a single atomic_t in acc->collector, and when
- * the rescale function runs it resets the atomic counter to 0 and adds its
- * old value to each of the exported numbers.
- *
- * To reduce rounding error, the numbers in struct cache_stats are all
- * stored left shifted by 16, and scaled back in the sysfs show() function.
- */
-
-static const unsigned DAY_RESCALE = 288;
-static const unsigned HOUR_RESCALE = 12;
-static const unsigned FIVE_MINUTE_RESCALE = 1;
-static const unsigned accounting_delay = (HZ * 300) / 22;
-static const unsigned accounting_weight = 5;
-
-/* sysfs reading/writing */
-
-read_attribute(cache_hits);
-read_attribute(cache_misses);
-read_attribute(cache_bypass_hits);
-read_attribute(cache_bypass_misses);
-read_attribute(cache_hit_ratio);
-read_attribute(cache_readaheads);
-read_attribute(cache_miss_collisions);
-read_attribute(bypassed);
-read_attribute(foreground_write_ratio);
-read_attribute(foreground_writes);
-read_attribute(gc_writes);
-read_attribute(discards);
-
-SHOW(bch_stats)
-{
- struct cache_stats *s =
- container_of(kobj, struct cache_stats, kobj);
-#define var(stat) (s->stat >> 16)
- var_print(cache_hits);
- var_print(cache_misses);
- var_print(cache_bypass_hits);
- var_print(cache_bypass_misses);
-
- sysfs_print(cache_hit_ratio,
- DIV_SAFE(var(cache_hits) * 100,
- var(cache_hits) + var(cache_misses)));
-
- var_print(cache_readaheads);
- var_print(cache_miss_collisions);
-
- sysfs_hprint(bypassed, var(sectors_bypassed) << 9);
- sysfs_hprint(foreground_writes, var(foreground_write_sectors) << 9);
- sysfs_hprint(gc_writes, var(gc_write_sectors) << 9);
- sysfs_hprint(discards, var(discard_sectors) << 9);
-
- sysfs_print(foreground_write_ratio,
- DIV_SAFE(var(foreground_write_sectors) * 100,
- var(foreground_write_sectors) +
- var(gc_write_sectors)));
-#undef var
- return 0;
-}
-
-STORE(bch_stats)
-{
- return size;
-}
-
-static void bch_stats_release(struct kobject *k)
-{
-}
-
-static struct attribute *bch_stats_files[] = {
- &sysfs_cache_hits,
- &sysfs_cache_misses,
- &sysfs_cache_bypass_hits,
- &sysfs_cache_bypass_misses,
- &sysfs_cache_hit_ratio,
- &sysfs_cache_readaheads,
- &sysfs_cache_miss_collisions,
- &sysfs_bypassed,
- &sysfs_foreground_write_ratio,
- &sysfs_foreground_writes,
- &sysfs_gc_writes,
- &sysfs_discards,
- NULL
-};
-static KTYPE(bch_stats);
-
-int bch_cache_accounting_add_kobjs(struct cache_accounting *acc,
- struct kobject *parent)
-{
- int ret = kobject_add(&acc->total.kobj, parent,
- "stats_total");
- ret = ret ?: kobject_add(&acc->five_minute.kobj, parent,
- "stats_five_minute");
- ret = ret ?: kobject_add(&acc->hour.kobj, parent,
- "stats_hour");
- ret = ret ?: kobject_add(&acc->day.kobj, parent,
- "stats_day");
- return ret;
-}
-
-void bch_cache_accounting_clear(struct cache_accounting *acc)
-{
- memset(&acc->total.cache_hits,
- 0,
- sizeof(unsigned long) * 9);
-}
-
-void bch_cache_accounting_destroy(struct cache_accounting *acc)
-{
- kobject_put(&acc->total.kobj);
- kobject_put(&acc->five_minute.kobj);
- kobject_put(&acc->hour.kobj);
- kobject_put(&acc->day.kobj);
-
- atomic_set(&acc->closing, 1);
- if (del_timer_sync(&acc->timer))
- closure_return(&acc->cl);
-}
-
-/* EWMA scaling */
-
-static void scale_stat(unsigned long *stat)
-{
- *stat = ewma_add(*stat, 0, accounting_weight);
-}
-
-static void scale_stats(struct cache_stats *stats, unsigned long rescale_at)
-{
- if (++stats->rescale == rescale_at) {
- stats->rescale = 0;
- scale_stat(&stats->cache_hits);
- scale_stat(&stats->cache_misses);
- scale_stat(&stats->cache_bypass_hits);
- scale_stat(&stats->cache_bypass_misses);
- scale_stat(&stats->cache_readaheads);
- scale_stat(&stats->cache_miss_collisions);
- scale_stat(&stats->sectors_bypassed);
- scale_stat(&stats->foreground_write_sectors);
- scale_stat(&stats->gc_write_sectors);
- scale_stat(&stats->discard_sectors);
- }
-}
-
-static void scale_accounting(unsigned long data)
-{
- struct cache_accounting *acc = (struct cache_accounting *) data;
-
-#define move_stat(name) do { \
- unsigned t = atomic_xchg(&acc->collector.name, 0); \
- t <<= 16; \
- acc->five_minute.name += t; \
- acc->hour.name += t; \
- acc->day.name += t; \
- acc->total.name += t; \
-} while (0)
-
- move_stat(cache_hits);
- move_stat(cache_misses);
- move_stat(cache_bypass_hits);
- move_stat(cache_bypass_misses);
- move_stat(cache_readaheads);
- move_stat(cache_miss_collisions);
- move_stat(sectors_bypassed);
- move_stat(foreground_write_sectors);
- move_stat(gc_write_sectors);
- move_stat(discard_sectors);
-
- scale_stats(&acc->total, 0);
- scale_stats(&acc->day, DAY_RESCALE);
- scale_stats(&acc->hour, HOUR_RESCALE);
- scale_stats(&acc->five_minute, FIVE_MINUTE_RESCALE);
-
- acc->timer.expires += accounting_delay;
-
- if (!atomic_read(&acc->closing))
- add_timer(&acc->timer);
- else
- closure_return(&acc->cl);
-}
-
-void bch_cache_accounting_init(struct cache_accounting *acc,
- struct closure *parent)
-{
- kobject_init(&acc->total.kobj, &bch_stats_ktype);
- kobject_init(&acc->five_minute.kobj, &bch_stats_ktype);
- kobject_init(&acc->hour.kobj, &bch_stats_ktype);
- kobject_init(&acc->day.kobj, &bch_stats_ktype);
-
- closure_init(&acc->cl, parent);
- init_timer(&acc->timer);
- acc->timer.expires = jiffies + accounting_delay;
- acc->timer.data = (unsigned long) acc;
- acc->timer.function = scale_accounting;
- add_timer(&acc->timer);
-}
diff --git a/fs/bcachefs/stats.h b/fs/bcachefs/stats.h
deleted file mode 100644
index a3c7bd2629bf..000000000000
--- a/fs/bcachefs/stats.h
+++ /dev/null
@@ -1,68 +0,0 @@
-#ifndef _BCACHE_STATS_H_
-#define _BCACHE_STATS_H_
-
-#include "stats_types.h"
-
-struct bch_fs;
-struct cached_dev;
-struct bcache_device;
-
-#ifndef NO_BCACHE_ACCOUNTING
-
-void bch_cache_accounting_init(struct cache_accounting *, struct closure *);
-int bch_cache_accounting_add_kobjs(struct cache_accounting *, struct kobject *);
-void bch_cache_accounting_clear(struct cache_accounting *);
-void bch_cache_accounting_destroy(struct cache_accounting *);
-
-#else
-
-static inline void bch_cache_accounting_init(struct cache_accounting *acc,
- struct closure *cl) {}
-static inline int bch_cache_accounting_add_kobjs(struct cache_accounting *acc,
- struct kobject *cl)
-{
- return 0;
-}
-static inline void bch_cache_accounting_clear(struct cache_accounting *acc) {}
-static inline void bch_cache_accounting_destroy(struct cache_accounting *acc) {}
-
-#endif
-
-static inline void mark_cache_stats(struct cache_stat_collector *stats,
- bool hit, bool bypass)
-{
- atomic_inc(&stats->cache_hit_array[!bypass][!hit]);
-}
-
-static inline void bch_mark_cache_accounting(struct bch_fs *c,
- struct cached_dev *dc,
- bool hit, bool bypass)
-{
- mark_cache_stats(&dc->accounting.collector, hit, bypass);
- mark_cache_stats(&c->accounting.collector, hit, bypass);
-}
-
-static inline void bch_mark_sectors_bypassed(struct bch_fs *c,
- struct cached_dev *dc,
- unsigned sectors)
-{
- atomic_add(sectors, &dc->accounting.collector.sectors_bypassed);
- atomic_add(sectors, &c->accounting.collector.sectors_bypassed);
-}
-
-static inline void bch_mark_gc_write(struct bch_fs *c, int sectors)
-{
- atomic_add(sectors, &c->accounting.collector.gc_write_sectors);
-}
-
-static inline void bch_mark_foreground_write(struct bch_fs *c, int sectors)
-{
- atomic_add(sectors, &c->accounting.collector.foreground_write_sectors);
-}
-
-static inline void bch_mark_discard(struct bch_fs *c, int sectors)
-{
- atomic_add(sectors, &c->accounting.collector.discard_sectors);
-}
-
-#endif /* _BCACHE_STATS_H_ */
diff --git a/fs/bcachefs/stats_types.h b/fs/bcachefs/stats_types.h
deleted file mode 100644
index 28e4c69e8e6d..000000000000
--- a/fs/bcachefs/stats_types.h
+++ /dev/null
@@ -1,56 +0,0 @@
-#ifndef _BCACHE_STATS_TYPES_H_
-#define _BCACHE_STATS_TYPES_H_
-
-struct cache_stat_collector {
- union {
- struct {
- atomic_t cache_hits;
- atomic_t cache_misses;
- atomic_t cache_bypass_hits;
- atomic_t cache_bypass_misses;
- };
-
- /* cache_hit_array[!bypass][!hit]: */
- atomic_t cache_hit_array[2][2];
- };
-
-
- atomic_t cache_readaheads;
- atomic_t cache_miss_collisions;
- atomic_t sectors_bypassed;
- atomic_t foreground_write_sectors;
- atomic_t gc_write_sectors;
- atomic_t discard_sectors;
-};
-
-struct cache_stats {
- struct kobject kobj;
-
- unsigned long cache_hits;
- unsigned long cache_misses;
- unsigned long cache_bypass_hits;
- unsigned long cache_bypass_misses;
- unsigned long cache_readaheads;
- unsigned long cache_miss_collisions;
- unsigned long sectors_bypassed;
- unsigned long foreground_write_sectors;
- unsigned long gc_write_sectors;
- unsigned long discard_sectors;
-
- unsigned rescale;
-};
-
-struct cache_accounting {
- struct closure cl;
- struct timer_list timer;
- atomic_t closing;
-
- struct cache_stat_collector collector;
-
- struct cache_stats total;
- struct cache_stats five_minute;
- struct cache_stats hour;
- struct cache_stats day;
-};
-
-#endif /* _BCACHE_STATS_TYPES_H_ */
diff --git a/fs/bcachefs/super-io.c b/fs/bcachefs/super-io.c
index 67c03e1932b1..41d54d3cdb10 100644
--- a/fs/bcachefs/super-io.c
+++ b/fs/bcachefs/super-io.c
@@ -1,6 +1,5 @@
#include "bcache.h"
-#include "blockdev.h"
#include "checksum.h"
#include "error.h"
#include "io.h"
@@ -710,8 +709,6 @@ static void write_super_endio(struct bio *bio)
bch_dev_fatal_io_err_on(bio->bi_error, ca, "superblock write");
- bch_account_io_completion(ca);
-
closure_put(&ca->fs->sb_write);
percpu_ref_put(&ca->io_ref);
}
@@ -744,7 +741,7 @@ static bool write_one_super(struct bch_fs *c, struct bch_dev *ca, unsigned idx)
bio_set_op_attrs(bio, REQ_OP_WRITE, REQ_SYNC|REQ_META);
bch_bio_map(bio, sb);
- closure_bio_submit_punt(bio, &c->sb_write, c);
+ closure_bio_submit(bio, &c->sb_write);
return true;
}
diff --git a/fs/bcachefs/super.c b/fs/bcachefs/super.c
index 8f2c6b3fb419..fdd731d25917 100644
--- a/fs/bcachefs/super.c
+++ b/fs/bcachefs/super.c
@@ -7,7 +7,6 @@
*/
#include "bcache.h"
-#include "blockdev.h"
#include "alloc.h"
#include "btree_cache.h"
#include "btree_gc.h"
@@ -28,12 +27,9 @@
#include "move.h"
#include "migrate.h"
#include "movinggc.h"
-#include "notify.h"
-#include "stats.h"
#include "super.h"
#include "super-io.h"
#include "tier.h"
-#include "writeback.h"
#include <linux/backing-dev.h>
#include <linux/blkdev.h>
@@ -45,7 +41,6 @@
#include <linux/module.h>
#include <linux/percpu.h>
#include <linux/random.h>
-#include <linux/reboot.h>
#include <linux/sysfs.h>
#include <crypto/hash.h>
@@ -66,7 +61,6 @@ static LIST_HEAD(bch_fs_list);
static DEFINE_MUTEX(bch_fs_list_lock);
static DECLARE_WAIT_QUEUE_HEAD(bch_read_only_wait);
-struct workqueue_struct *bcache_io_wq;
static void bch_dev_free(struct bch_dev *);
static int bch_dev_alloc(struct bch_fs *, unsigned);
@@ -221,8 +215,6 @@ void bch_fs_read_only(struct bch_fs *c)
if (test_bit(BCH_FS_ERROR, &c->flags))
goto out;
- trace_fs_read_only(c);
-
/*
* Block new foreground-end write operations from starting - any new
* writes will return -EROFS:
@@ -270,8 +262,6 @@ void bch_fs_read_only(struct bch_fs *c)
}
c->state = BCH_FS_RO;
- bch_notify_fs_read_only(c);
- trace_fs_read_only_done(c);
out:
mutex_unlock(&c->state_lock);
}
@@ -358,7 +348,6 @@ static void bch_fs_free(struct bch_fs *c)
bch_io_clock_exit(&c->io_clock[WRITE]);
bch_io_clock_exit(&c->io_clock[READ]);
bch_fs_compress_exit(c);
- bch_fs_blockdev_exit(c);
bdi_destroy(&c->bdi);
lg_lock_free(&c->usage_lock);
free_percpu(c->usage_percpu);
@@ -390,7 +379,6 @@ static void bch_fs_exit(struct bch_fs *c)
del_timer_sync(&c->foreground_write_wakeup);
cancel_delayed_work_sync(&c->pd_controllers_update);
cancel_work_sync(&c->read_only_work);
- cancel_work_sync(&c->bio_submit_work);
cancel_work_sync(&c->read_retry_work);
for (i = 0; i < c->sb.nr_devices; i++)
@@ -422,8 +410,6 @@ static void bch_fs_offline(struct bch_fs *c)
bch_fs_debug_exit(c);
bch_fs_chardev_exit(c);
- bch_cache_accounting_destroy(&c->accounting);
-
kobject_put(&c->time_stats);
kobject_put(&c->opts_dir);
kobject_put(&c->internal);
@@ -431,65 +417,13 @@ static void bch_fs_offline(struct bch_fs *c)
__bch_fs_read_only(c);
}
-/*
- * should be __bch_fs_stop4 - block devices are closed, now we can finally
- * free it
- */
void bch_fs_release(struct kobject *kobj)
{
struct bch_fs *c = container_of(kobj, struct bch_fs, kobj);
- bch_notify_fs_stopped(c);
bch_fs_free(c);
}
-/*
- * All activity on the filesystem should have stopped now - close devices:
- */
-static void __bch_fs_stop3(struct closure *cl)
-{
- struct bch_fs *c = container_of(cl, struct bch_fs, cl);
-
- bch_fs_exit(c);
-}
-
-/*
- * Openers (i.e. block devices) should have exited, shutdown all userspace
- * interfaces and wait for &c->cl to hit 0
- */
-static void __bch_fs_stop2(struct closure *cl)
-{
- struct bch_fs *c = container_of(cl, struct bch_fs, caching);
-
- bch_fs_offline(c);
-
- closure_return(cl);
-}
-
-/*
- * First phase of the shutdown process that's kicked off by bch_fs_stop_async();
- * we haven't waited for anything to stop yet, we're just punting to process
- * context to shut down block devices:
- */
-static void __bch_fs_stop1(struct closure *cl)
-{
- struct bch_fs *c = container_of(cl, struct bch_fs, caching);
-
- bch_blockdevs_stop(c);
-
- continue_at(cl, __bch_fs_stop2, system_wq);
-}
-
-void bch_fs_stop_async(struct bch_fs *c)
-{
- mutex_lock(&c->state_lock);
- if (c->state != BCH_FS_STOPPING) {
- c->state = BCH_FS_STOPPING;
- closure_queue(&c->caching);
- }
- mutex_unlock(&c->state_lock);
-}
-
void bch_fs_stop(struct bch_fs *c)
{
mutex_lock(&c->state_lock);
@@ -497,26 +431,13 @@ void bch_fs_stop(struct bch_fs *c)
c->state = BCH_FS_STOPPING;
mutex_unlock(&c->state_lock);
- bch_blockdevs_stop(c);
-
- closure_sync(&c->caching);
- closure_debug_destroy(&c->caching);
-
bch_fs_offline(c);
- closure_put(&c->cl);
closure_sync(&c->cl);
bch_fs_exit(c);
}
-/* Stop, detaching from backing devices: */
-void bch_fs_detach(struct bch_fs *c)
-{
- if (!test_and_set_bit(BCH_FS_DETACHING, &c->flags))
- bch_fs_stop_async(c);
-}
-
#define alloc_bucket_pages(gfp, ca) \
((void *) __get_free_pages(__GFP_ZERO|gfp, ilog2(bucket_pages(ca))))
@@ -536,7 +457,6 @@ static struct bch_fs *bch_fs_alloc(struct bch_sb *sb, struct bch_opts opts)
mutex_init(&c->state_lock);
mutex_init(&c->sb_lock);
- INIT_RADIX_TREE(&c->devices, GFP_KERNEL);
mutex_init(&c->btree_cache_lock);
mutex_init(&c->bucket_lock);
mutex_init(&c->btree_root_lock);
@@ -553,7 +473,6 @@ static struct bch_fs *bch_fs_alloc(struct bch_sb *sb, struct bch_opts opts)
bch_fs_tiering_init(c);
INIT_LIST_HEAD(&c->list);
- INIT_LIST_HEAD(&c->cached_devs);
INIT_LIST_HEAD(&c->btree_cache);
INIT_LIST_HEAD(&c->btree_cache_freeable);
INIT_LIST_HEAD(&c->btree_cache_freed);
@@ -563,8 +482,6 @@ static struct bch_fs *bch_fs_alloc(struct bch_sb *sb, struct bch_opts opts)
mutex_init(&c->btree_interior_update_lock);
mutex_init(&c->bio_bounce_pages_lock);
- INIT_WORK(&c->bio_submit_work, bch_bio_submit_work);
- spin_lock_init(&c->bio_submit_lock);
bio_list_init(&c->read_retry_list);
spin_lock_init(&c->read_retry_lock);
INIT_WORK(&c->read_retry_work, bch_read_retry_work);
@@ -577,11 +494,7 @@ static struct bch_fs *bch_fs_alloc(struct bch_sb *sb, struct bch_opts opts)
c->prio_clock[WRITE].hand = 1;
c->prio_clock[WRITE].min_prio = 0;
- c->congested_read_threshold_us = 2000;
- c->congested_write_threshold_us = 20000;
- c->error_limit = 16 << IO_ERROR_SHIFT;
init_waitqueue_head(&c->writeback_wait);
-
c->writeback_pages_max = (256 << 10) / PAGE_SIZE;
c->copy_gc_enabled = 1;
@@ -595,8 +508,6 @@ static struct bch_fs *bch_fs_alloc(struct bch_sb *sb, struct bch_opts opts)
c->journal.blocked_time = &c->journal_blocked_time;
c->journal.flush_seq_time = &c->journal_flush_seq_time;
- mutex_init(&c->uevent_lock);
-
mutex_lock(&c->sb_lock);
if (bch_sb_to_fs(c, sb)) {
@@ -648,7 +559,6 @@ static struct bch_fs *bch_fs_alloc(struct bch_sb *sb, struct bch_opts opts)
mempool_init_page_pool(&c->btree_bounce_pool, 1,
ilog2(btree_pages(c))) ||
bdi_setup_and_register(&c->bdi, "bcache") ||
- bch_fs_blockdev_init(c) ||
bch_io_clock_init(&c->io_clock[READ]) ||
bch_io_clock_init(&c->io_clock[WRITE]) ||
bch_fs_journal_init(&c->journal, journal_entry_bytes) ||
@@ -679,14 +589,6 @@ static struct bch_fs *bch_fs_alloc(struct bch_sb *sb, struct bch_opts opts)
kobject_init(&c->internal, &bch_fs_internal_ktype);
kobject_init(&c->opts_dir, &bch_fs_opts_dir_ktype);
kobject_init(&c->time_stats, &bch_fs_time_stats_ktype);
-
- bch_cache_accounting_init(&c->accounting, &c->cl);
-
- closure_init(&c->caching, &c->cl);
- set_closure_fn(&c->caching, __bch_fs_stop1, system_wq);
-
- closure_get(&c->cl);
- continue_at_noreturn(&c->cl, __bch_fs_stop3, system_wq);
return c;
err:
bch_fs_free(c);
@@ -717,8 +619,7 @@ static const char *__bch_fs_online(struct bch_fs *c)
if (kobject_add(&c->kobj, NULL, "%pU", c->sb.user_uuid.b) ||
kobject_add(&c->internal, &c->kobj, "internal") ||
kobject_add(&c->opts_dir, &c->kobj, "options") ||
- kobject_add(&c->time_stats, &c->kobj, "time_stats") ||
- bch_cache_accounting_add_kobjs(&c->accounting, &c->kobj))
+ kobject_add(&c->time_stats, &c->kobj, "time_stats"))
return "error creating sysfs objects";
mutex_lock(&c->state_lock);
@@ -728,12 +629,6 @@ static const char *__bch_fs_online(struct bch_fs *c)
if (bch_dev_sysfs_online(ca))
goto err;
- err = "can't bring up blockdev volumes";
- if (bch_blockdev_volumes_start(c))
- goto err;
-
- bch_attach_backing_devs(c);
-
list_add(&c->list, &bch_fs_list);
err = NULL;
err:
@@ -1394,8 +1289,6 @@ static const char *__bch_dev_read_write(struct bch_fs *c, struct bch_dev *ca)
BUG_ON(ca->mi.state != BCH_MEMBER_STATE_RW);
- trace_bcache_cache_read_write(ca);
-
if (bch_dev_allocator_start(ca))
return "error starting allocator thread";
@@ -1405,9 +1298,6 @@ static const char *__bch_dev_read_write(struct bch_fs *c, struct bch_dev *ca)
if (bch_tiering_start(c))
return "error starting tiering thread";
- bch_notify_dev_read_write(ca);
- trace_bcache_cache_read_write_done(ca);
-
return NULL;
}
@@ -1621,7 +1511,6 @@ have_slot:
goto err;
}
- bch_notify_dev_added(ca);
mutex_unlock(&c->state_lock);
return 0;
err_unlock:
@@ -1895,13 +1784,10 @@ const char *bch_fs_open_incremental(const char *path)
if (err)
return err;
- if (__SB_IS_BDEV(le64_to_cpu(sb.sb->version))) {
- mutex_lock(&bch_fs_list_lock);
- err = bch_backing_dev_register(&sb);
- mutex_unlock(&bch_fs_list_lock);
- } else {
+ if (!__SB_IS_BDEV(le64_to_cpu(sb.sb->version)))
err = __bch_fs_open_incremental(&sb, opts);
- }
+ else
+ err = "not a bcachefs superblock";
bch_free_super(&sb);
@@ -1910,114 +1796,21 @@ const char *bch_fs_open_incremental(const char *path)
/* Global interfaces/init */
-#define kobj_attribute_write(n, fn) \
- static struct kobj_attribute ksysfs_##n = __ATTR(n, S_IWUSR, NULL, fn)
-
-#define kobj_attribute_rw(n, show, store) \
- static struct kobj_attribute ksysfs_##n = \
- __ATTR(n, S_IWUSR|S_IRUSR, show, store)
-
-static ssize_t register_bcache(struct kobject *, struct kobj_attribute *,
- const char *, size_t);
-
-kobj_attribute_write(register, register_bcache);
-kobj_attribute_write(register_quiet, register_bcache);
-
-static ssize_t register_bcache(struct kobject *k, struct kobj_attribute *attr,
- const char *buffer, size_t size)
-{
- ssize_t ret = -EINVAL;
- const char *err = "cannot allocate memory";
- char *path = NULL;
-
- if (!try_module_get(THIS_MODULE))
- return -EBUSY;
-
- if (!(path = kstrndup(skip_spaces(buffer), size, GFP_KERNEL)))
- goto err;
-
- err = bch_fs_open_incremental(strim(path));
- if (err)
- goto err;
-
- ret = size;
-out:
- kfree(path);
- module_put(THIS_MODULE);
- return ret;
-err:
- pr_err("error opening %s: %s", path, err);
- goto out;
-}
-
-static int bcache_reboot(struct notifier_block *n, unsigned long code, void *x)
-{
- if (code == SYS_DOWN ||
- code == SYS_HALT ||
- code == SYS_POWER_OFF) {
- struct bch_fs *c;
-
- mutex_lock(&bch_fs_list_lock);
-
- if (!list_empty(&bch_fs_list))
- pr_info("Setting all devices read only:");
-
- list_for_each_entry(c, &bch_fs_list, list)
- bch_fs_read_only_async(c);
-
- list_for_each_entry(c, &bch_fs_list, list)
- bch_fs_read_only(c);
-
- mutex_unlock(&bch_fs_list_lock);
- }
-
- return NOTIFY_DONE;
-}
-
-static struct notifier_block reboot = {
- .notifier_call = bcache_reboot,
- .priority = INT_MAX, /* before any real devices */
-};
-
-static ssize_t reboot_test(struct kobject *k, struct kobj_attribute *attr,
- const char *buffer, size_t size)
-{
- bcache_reboot(NULL, SYS_DOWN, NULL);
- return size;
-}
-
-kobj_attribute_write(reboot, reboot_test);
-
static void bcache_exit(void)
{
bch_debug_exit();
bch_vfs_exit();
- bch_blockdev_exit();
bch_chardev_exit();
if (bcache_kset)
kset_unregister(bcache_kset);
- if (bcache_io_wq)
- destroy_workqueue(bcache_io_wq);
- unregister_reboot_notifier(&reboot);
}
static int __init bcache_init(void)
{
- static const struct attribute *files[] = {
- &ksysfs_register.attr,
- &ksysfs_register_quiet.attr,
- &ksysfs_reboot.attr,
- NULL
- };
-
- register_reboot_notifier(&reboot);
bkey_pack_test();
- if (!(bcache_io_wq = create_freezable_workqueue("bcache_io")) ||
- !(bcache_kset = kset_create_and_add("bcache", NULL, fs_kobj)) ||
- sysfs_create_files(&bcache_kset->kobj, files) ||
+ if (!(bcache_kset = kset_create_and_add("bcache", NULL, fs_kobj)) ||
bch_chardev_init() ||
- bch_blockdev_init() ||
bch_vfs_init() ||
bch_debug_init())
goto err;
diff --git a/fs/bcachefs/super.h b/fs/bcachefs/super.h
index f676d2cb2e43..26b6da49fd6e 100644
--- a/fs/bcachefs/super.h
+++ b/fs/bcachefs/super.h
@@ -109,14 +109,11 @@ int bch_dev_online(struct bch_fs *, const char *);
int bch_dev_offline(struct bch_fs *, struct bch_dev *, int);
int bch_dev_evacuate(struct bch_fs *, struct bch_dev *);
-void bch_fs_detach(struct bch_fs *);
-
bool bch_fs_emergency_read_only(struct bch_fs *);
void bch_fs_read_only(struct bch_fs *);
const char *bch_fs_read_write(struct bch_fs *);
void bch_fs_release(struct kobject *);
-void bch_fs_stop_async(struct bch_fs *);
void bch_fs_stop(struct bch_fs *);
const char *bch_fs_start(struct bch_fs *);
@@ -124,8 +121,6 @@ const char *bch_fs_open(char * const *, unsigned, struct bch_opts,
struct bch_fs **);
const char *bch_fs_open_incremental(const char *path);
-extern struct workqueue_struct *bcache_io_wq;
-
extern struct kobj_type bch_fs_ktype;
extern struct kobj_type bch_fs_internal_ktype;
extern struct kobj_type bch_fs_time_stats_ktype;
diff --git a/fs/bcachefs/sysfs.c b/fs/bcachefs/sysfs.c
index 6f87f71072a2..70b0c546171f 100644
--- a/fs/bcachefs/sysfs.c
+++ b/fs/bcachefs/sysfs.c
@@ -7,7 +7,6 @@
#include "bcache.h"
#include "alloc.h"
-#include "blockdev.h"
#include "compress.h"
#include "sysfs.h"
#include "btree_cache.h"
@@ -20,23 +19,15 @@
#include "keylist.h"
#include "move.h"
#include "opts.h"
-#include "request.h"
#include "super-io.h"
#include "tier.h"
-#include "writeback.h"
#include <linux/blkdev.h>
#include <linux/sort.h>
-write_attribute(attach);
-write_attribute(detach);
-write_attribute(unregister);
-write_attribute(stop);
-write_attribute(clear_stats);
write_attribute(trigger_btree_coalesce);
write_attribute(trigger_gc);
write_attribute(prune_cache);
-write_attribute(blockdev_volume_create);
read_attribute(uuid);
read_attribute(minor);
@@ -85,37 +76,13 @@ read_attribute(has_metadata);
read_attribute(bset_tree_stats);
read_attribute(alloc_debug);
-read_attribute(state);
read_attribute(cache_read_races);
-read_attribute(writeback_keys_done);
-read_attribute(writeback_keys_failed);
-read_attribute(io_errors);
-rw_attribute(io_error_limit);
-rw_attribute(io_error_halflife);
-read_attribute(congested);
-rw_attribute(congested_read_threshold_us);
-rw_attribute(congested_write_threshold_us);
-
-rw_attribute(sequential_cutoff);
-rw_attribute(cache_mode);
-rw_attribute(writeback_metadata);
-rw_attribute(writeback_running);
-rw_attribute(writeback_percent);
-sysfs_pd_controller_attribute(writeback);
-
-read_attribute(stripe_size);
-read_attribute(partial_stripes_expensive);
rw_attribute(journal_write_delay_ms);
rw_attribute(journal_reclaim_delay_ms);
read_attribute(journal_entry_size_max);
rw_attribute(discard);
-rw_attribute(running);
-rw_attribute(label);
-rw_attribute(readahead);
-rw_attribute(verify);
-rw_attribute(bypass_torture_test);
rw_attribute(cache_replacement_policy);
rw_attribute(foreground_write_ratelimit_enabled);
@@ -133,7 +100,6 @@ rw_attribute(pd_controllers_update_seconds);
rw_attribute(foreground_target_percent);
-rw_attribute(size);
read_attribute(meta_replicas_have);
read_attribute(data_replicas_have);
@@ -161,286 +127,6 @@ static struct attribute sysfs_state_rw = {
.mode = S_IRUGO
};
-SHOW(bch_cached_dev)
-{
- struct cached_dev *dc = container_of(kobj, struct cached_dev,
- disk.kobj);
- const char *states[] = { "no cache", "clean", "dirty", "inconsistent" };
-
-#define var(stat) (dc->stat)
-
- if (attr == &sysfs_cache_mode)
- return bch_snprint_string_list(buf, PAGE_SIZE,
- bch_cache_modes + 1,
- BDEV_CACHE_MODE(dc->disk_sb.sb));
-
- var_printf(verify, "%i");
- var_printf(bypass_torture_test, "%i");
- var_printf(writeback_metadata, "%i");
- var_printf(writeback_running, "%i");
- var_print(writeback_percent);
- sysfs_pd_controller_show(writeback, &dc->writeback_pd);
-
- sysfs_hprint(dirty_data,
- bcache_dev_sectors_dirty(&dc->disk) << 9);
- sysfs_print(dirty_bytes,
- bcache_dev_sectors_dirty(&dc->disk) << 9);
-
- sysfs_hprint(stripe_size, dc->disk.stripe_size << 9);
- var_printf(partial_stripes_expensive, "%u");
-
- var_hprint(sequential_cutoff);
- var_hprint(readahead);
-
- sysfs_print(running, atomic_read(&dc->running));
- sysfs_print(state, states[BDEV_STATE(dc->disk_sb.sb)]);
-
- if (attr == &sysfs_label) {
- memcpy(buf, dc->disk_sb.sb->label, BCH_SB_LABEL_SIZE);
- buf[BCH_SB_LABEL_SIZE + 1] = '\0';
- strcat(buf, "\n");
- return strlen(buf);
- }
-
-#undef var
- return 0;
-}
-
-STORE(bch_cached_dev)
-{
- struct cached_dev *dc = container_of(kobj, struct cached_dev,
- disk.kobj);
- struct kobj_uevent_env *env;
-
-#define d_strtoul(var) sysfs_strtoul(var, dc->var)
-#define d_strtoul_nonzero(var) sysfs_strtoul_clamp(var, dc->var, 1, INT_MAX)
-#define d_strtoi_h(var) sysfs_hatoi(var, dc->var)
-
- d_strtoul(verify);
- d_strtoul(bypass_torture_test);
- d_strtoul(writeback_metadata);
- d_strtoul(writeback_running);
- sysfs_strtoul_clamp(writeback_percent, dc->writeback_percent, 0, 40);
- sysfs_pd_controller_store(writeback, &dc->writeback_pd);
-
- d_strtoi_h(sequential_cutoff);
- d_strtoi_h(readahead);
-
- if (attr == &sysfs_writeback_running)
- bch_writeback_queue(dc);
-
- if (attr == &sysfs_writeback_percent)
- schedule_delayed_work(&dc->writeback_pd_update,
- dc->writeback_pd_update_seconds * HZ);
-
- if (attr == &sysfs_clear_stats)
- bch_cache_accounting_clear(&dc->accounting);
-
- if (attr == &sysfs_running &&
- strtoul_or_return(buf))
- bch_cached_dev_run(dc);
-
- if (attr == &sysfs_cache_mode) {
- ssize_t v = bch_read_string_list(buf, bch_cache_modes + 1);
-
- if (v < 0)
- return v;
-
- if ((unsigned) v != BDEV_CACHE_MODE(dc->disk_sb.sb)) {
- SET_BDEV_CACHE_MODE(dc->disk_sb.sb, v);
- bch_write_bdev_super(dc, NULL);
- }
- }
-
- if (attr == &sysfs_label) {
- u64 journal_seq = 0;
- int ret = 0;
-
- if (size > BCH_SB_LABEL_SIZE)
- return -EINVAL;
-
- mutex_lock(&dc->disk.inode_lock);
-
- memcpy(dc->disk_sb.sb->label, buf, size);
- if (size < BCH_SB_LABEL_SIZE)
- dc->disk_sb.sb->label[size] = '\0';
- if (size && dc->disk_sb.sb->label[size - 1] == '\n')
- dc->disk_sb.sb->label[size - 1] = '\0';
-
- memcpy(dc->disk.inode.v.i_label,
- dc->disk_sb.sb->label, BCH_SB_LABEL_SIZE);
-
- bch_write_bdev_super(dc, NULL);
-
- if (dc->disk.c)
- ret = bch_btree_update(dc->disk.c, BTREE_ID_INODES,
- &dc->disk.inode.k_i,
- &journal_seq);
-
- mutex_unlock(&dc->disk.inode_lock);
-
- if (ret)
- return ret;
-
- if (dc->disk.c)
- ret = bch_journal_flush_seq(&dc->disk.c->journal,
- journal_seq);
- if (ret)
- return ret;
-
- env = kzalloc(sizeof(struct kobj_uevent_env), GFP_KERNEL);
- if (!env)
- return -ENOMEM;
- add_uevent_var(env, "DRIVER=bcache");
- add_uevent_var(env, "CACHED_UUID=%pU", dc->disk_sb.sb->disk_uuid.b),
- add_uevent_var(env, "CACHED_LABEL=%s", buf);
- kobject_uevent_env(
- &disk_to_dev(dc->disk.disk)->kobj, KOBJ_CHANGE, env->envp);
- kfree(env);
- }
-
- if (attr == &sysfs_attach) {
- struct bch_fs *c;
- uuid_le uuid;
- int ret;
-
- if (uuid_parse(buf, &uuid))
- return -EINVAL;
-
- c = bch_uuid_to_fs(uuid);
- if (!c) {
- pr_err("Can't attach %s: cache set not found", buf);
- return -ENOENT;
- }
-
- dc->disk_sb.sb->set_uuid = uuid;
-
- ret = bch_cached_dev_attach(dc, c);
- closure_put(&c->cl);
- if (ret)
- return ret;
- }
-
- if (attr == &sysfs_detach && dc->disk.c)
- bch_cached_dev_detach(dc);
-
- if (attr == &sysfs_stop)
- bch_blockdev_stop(&dc->disk);
-
- return size;
-}
-
-static struct attribute *bch_cached_dev_files[] = {
- &sysfs_attach,
- &sysfs_detach,
- &sysfs_stop,
- &sysfs_cache_mode,
- &sysfs_writeback_metadata,
- &sysfs_writeback_running,
- &sysfs_writeback_percent,
- sysfs_pd_controller_files(writeback),
- &sysfs_dirty_data,
- &sysfs_dirty_bytes,
- &sysfs_stripe_size,
- &sysfs_partial_stripes_expensive,
- &sysfs_sequential_cutoff,
- &sysfs_clear_stats,
- &sysfs_running,
- &sysfs_state,
- &sysfs_label,
- &sysfs_readahead,
-#ifdef CONFIG_BCACHEFS_DEBUG
- &sysfs_verify,
- &sysfs_bypass_torture_test,
-#endif
- NULL
-};
-KTYPE(bch_cached_dev);
-
-SHOW(bch_blockdev_volume)
-{
- struct bcache_device *d = container_of(kobj, struct bcache_device,
- kobj);
-
- sysfs_hprint(size, le64_to_cpu(d->inode.v.i_size));
-
- if (attr == &sysfs_label) {
- memcpy(buf, d->inode.v.i_label, BCH_SB_LABEL_SIZE);
- buf[BCH_SB_LABEL_SIZE + 1] = '\0';
- strcat(buf, "\n");
- return strlen(buf);
- }
-
- return 0;
-}
-
-STORE(bch_blockdev_volume)
-{
- struct bcache_device *d = container_of(kobj, struct bcache_device,
- kobj);
-
- if (attr == &sysfs_size) {
- u64 journal_seq = 0;
- u64 v = strtoi_h_or_return(buf);
- int ret;
-
- mutex_lock(&d->inode_lock);
-
- if (v < le64_to_cpu(d->inode.v.i_size) ){
- ret = bch_inode_truncate(d->c, d->inode.k.p.inode,
- v >> 9, NULL, NULL);
- if (ret) {
- mutex_unlock(&d->inode_lock);
- return ret;
- }
- }
- d->inode.v.i_size = cpu_to_le64(v);
- ret = bch_btree_update(d->c, BTREE_ID_INODES,
- &d->inode.k_i, &journal_seq);
-
- mutex_unlock(&d->inode_lock);
-
- if (ret)
- return ret;
-
- ret = bch_journal_flush_seq(&d->c->journal, journal_seq);
- if (ret)
- return ret;
-
- set_capacity(d->disk, v >> 9);
- }
-
- if (attr == &sysfs_label) {
- u64 journal_seq = 0;
- int ret;
-
- mutex_lock(&d->inode_lock);
-
- memcpy(d->inode.v.i_label, buf, BCH_SB_LABEL_SIZE);
- ret = bch_btree_update(d->c, BTREE_ID_INODES,
- &d->inode.k_i, &journal_seq);
-
- mutex_unlock(&d->inode_lock);
-
- return ret ?: bch_journal_flush_seq(&d->c->journal, journal_seq);
- }
-
- if (attr == &sysfs_unregister) {
- set_bit(BCACHE_DEV_DETACHING, &d->flags);
- bch_blockdev_stop(d);
- }
-
- return size;
-}
-
-static struct attribute *bch_blockdev_volume_files[] = {
- &sysfs_unregister,
- &sysfs_label,
- &sysfs_size,
- NULL
-};
-KTYPE(bch_blockdev_volume);
-
static int bch_bset_print_stats(struct bch_fs *c, char *buf)
{
struct bset_stats stats;
@@ -647,22 +333,6 @@ SHOW(bch_fs)
sysfs_print(cache_read_races,
atomic_long_read(&c->cache_read_races));
- sysfs_print(writeback_keys_done,
- atomic_long_read(&c->writeback_keys_done));
- sysfs_print(writeback_keys_failed,
- atomic_long_read(&c->writeback_keys_failed));
-
- /* See count_io_errors for why 88 */
- sysfs_print(io_error_halflife, c->error_decay * 88);
- sysfs_print(io_error_limit, c->error_limit >> IO_ERROR_SHIFT);
-
- sysfs_hprint(congested,
- ((uint64_t) bch_get_congested(c)) << 9);
- sysfs_print(congested_read_threshold_us,
- c->congested_read_threshold_us);
- sysfs_print(congested_write_threshold_us,
- c->congested_write_threshold_us);
-
sysfs_printf(foreground_write_ratelimit_enabled, "%i",
c->foreground_write_ratelimit_enabled);
sysfs_printf(copy_gc_enabled, "%i", c->copy_gc_enabled);
@@ -712,40 +382,6 @@ STORE(__bch_fs)
{
struct bch_fs *c = container_of(kobj, struct bch_fs, kobj);
- if (attr == &sysfs_unregister) {
- bch_fs_detach(c);
- return size;
- }
-
- if (attr == &sysfs_stop) {
- bch_fs_stop_async(c);
- return size;
- }
-
- if (attr == &sysfs_clear_stats) {
- atomic_long_set(&c->writeback_keys_done, 0);
- atomic_long_set(&c->writeback_keys_failed, 0);
- bch_cache_accounting_clear(&c->accounting);
-
- return size;
- }
-
- sysfs_strtoul(congested_read_threshold_us,
- c->congested_read_threshold_us);
- sysfs_strtoul(congested_write_threshold_us,
- c->congested_write_threshold_us);
-
- if (attr == &sysfs_io_error_limit) {
- c->error_limit = strtoul_or_return(buf) << IO_ERROR_SHIFT;
- return size;
- }
-
- /* See count_io_errors() for why 88 */
- if (attr == &sysfs_io_error_halflife) {
- c->error_decay = strtoul_or_return(buf) / 88;
- return size;
- }
-
sysfs_strtoul(journal_write_delay_ms, c->journal.write_delay_ms);
sysfs_strtoul(journal_reclaim_delay_ms, c->journal.reclaim_delay_ms);
@@ -796,14 +432,6 @@ STORE(__bch_fs)
return size;
}
- if (attr == &sysfs_blockdev_volume_create) {
- u64 v = strtoi_h_or_return(buf);
- int r = bch_blockdev_volume_create(c, v);
-
- if (r)
- return r;
- }
-
if (attr == &sysfs_trigger_btree_coalesce)
bch_coalesce(c);
@@ -835,12 +463,9 @@ STORE(bch_fs)
}
static struct attribute *bch_fs_files[] = {
- &sysfs_unregister,
- &sysfs_stop,
&sysfs_journal_write_delay_ms,
&sysfs_journal_reclaim_delay_ms,
&sysfs_journal_entry_size_max,
- &sysfs_blockdev_volume_create,
&sysfs_block_size,
&sysfs_block_size_bytes,
@@ -854,13 +479,6 @@ static struct attribute *bch_fs_files[] = {
&sysfs_average_key_size,
- &sysfs_io_error_limit,
- &sysfs_io_error_halflife,
- &sysfs_congested,
- &sysfs_congested_read_threshold_us,
- &sysfs_congested_write_threshold_us,
- &sysfs_clear_stats,
-
&sysfs_meta_replicas_have,
&sysfs_data_replicas_have,
@@ -902,8 +520,6 @@ static struct attribute *bch_fs_internal_files[] = {
&sysfs_bset_tree_stats,
&sysfs_cache_read_races,
- &sysfs_writeback_keys_done,
- &sysfs_writeback_keys_failed,
&sysfs_trigger_btree_coalesce,
&sysfs_trigger_gc,
@@ -1171,9 +787,6 @@ SHOW(bch_dev)
(atomic64_read(&ca->meta_sectors_written) +
atomic64_read(&ca->btree_sectors_written)) << 9);
- sysfs_print(io_errors,
- atomic_read(&ca->io_errors) >> IO_ERROR_SHIFT);
-
sysfs_hprint(dirty_data, stats.sectors[S_DIRTY] << 9);
sysfs_print(dirty_bytes, stats.sectors[S_DIRTY] << 9);
sysfs_print(dirty_buckets, stats.buckets_dirty);
@@ -1279,18 +892,6 @@ STORE(bch_dev)
bch_tiering_start(c);
}
- if (attr == &sysfs_clear_stats) {
- int cpu;
-
- for_each_possible_cpu(cpu)
- *per_cpu_ptr(ca->sectors_written, cpu) = 0;
-
- atomic64_set(&ca->btree_sectors_written, 0);
- atomic64_set(&ca->meta_sectors_written, 0);
- atomic_set(&ca->io_count, 0);
- atomic_set(&ca->io_errors, 0);
- }
-
return size;
}
@@ -1323,8 +924,6 @@ static struct attribute *bch_dev_files[] = {
&sysfs_written,
&sysfs_btree_written,
&sysfs_metadata_written,
- &sysfs_io_errors,
- &sysfs_clear_stats,
&sysfs_cache_replacement_policy,
&sysfs_tier,
&sysfs_state_rw,
diff --git a/fs/bcachefs/trace.c b/fs/bcachefs/trace.c
index 911d1bbfe03b..970699930c5e 100644
--- a/fs/bcachefs/trace.c
+++ b/fs/bcachefs/trace.c
@@ -1,6 +1,5 @@
#include "bcache.h"
#include "alloc_types.h"
-#include "blockdev_types.h"
#include "buckets.h"
#include "btree_types.h"
#include "keylist.h"
diff --git a/fs/bcachefs/util.h b/fs/bcachefs/util.h
index cd8ca6e6cc86..95be24854669 100644
--- a/fs/bcachefs/util.h
+++ b/fs/bcachefs/util.h
@@ -610,13 +610,7 @@ static inline sector_t bdev_sectors(struct block_device *bdev)
#define closure_bio_submit(bio, cl) \
do { \
closure_get(cl); \
- generic_make_request(bio); \
-} while (0)
-
-#define closure_bio_submit_punt(bio, cl, c) \
-do { \
- closure_get(cl); \
- bch_generic_make_request(bio, c); \
+ submit_bio(bio); \
} while (0)
#define kthread_wait_freezable(cond) \
diff --git a/fs/bcachefs/writeback.c b/fs/bcachefs/writeback.c
deleted file mode 100644
index 3b7798000a18..000000000000
--- a/fs/bcachefs/writeback.c
+++ /dev/null
@@ -1,657 +0,0 @@
-/*
- * background writeback - scan btree for dirty data and write it to the backing
- * device
- *
- * Copyright 2010, 2011 Kent Overstreet <kent.overstreet@gmail.com>
- * Copyright 2012 Google, Inc.
- */
-
-#include "bcache.h"
-#include "btree_update.h"
-#include "clock.h"
-#include "debug.h"
-#include "error.h"
-#include "extents.h"
-#include "io.h"
-#include "keybuf.h"
-#include "keylist.h"
-#include "writeback.h"
-
-#include <linux/delay.h>
-#include <linux/freezer.h>
-#include <linux/kthread.h>
-#include <trace/events/bcachefs.h>
-
-/* Rate limiting */
-
-static void __update_writeback_rate(struct cached_dev *dc)
-{
- struct bch_fs *c = dc->disk.c;
- u64 cache_dirty_target =
- div_u64(c->capacity * dc->writeback_percent, 100);
- s64 target = div64_u64(cache_dirty_target *
- bdev_sectors(dc->disk_sb.bdev),
- c->cached_dev_sectors);
- s64 dirty = bcache_dev_sectors_dirty(&dc->disk);
-
- bch_pd_controller_update(&dc->writeback_pd, target << 9,
- dirty << 9, -1);
-}
-
-static void update_writeback_rate(struct work_struct *work)
-{
- struct cached_dev *dc = container_of(to_delayed_work(work),
- struct cached_dev,
- writeback_pd_update);
-
- down_read(&dc->writeback_lock);
-
- if (atomic_read(&dc->has_dirty) &&
- dc->writeback_percent &&
- !test_bit(BCACHE_DEV_DETACHING, &dc->disk.flags))
- __update_writeback_rate(dc);
- else
- dc->writeback_pd.rate.rate = UINT_MAX;
-
- up_read(&dc->writeback_lock);
-
- schedule_delayed_work(&dc->writeback_pd_update,
- dc->writeback_pd_update_seconds * HZ);
-}
-
-struct dirty_io {
- struct closure cl;
- struct bch_replace_info replace;
- struct cached_dev *dc;
- struct bch_dev *ca;
- struct keybuf_key *w;
- struct bch_extent_ptr ptr;
- int error;
- bool from_mempool;
- /* Must be last */
- struct bio bio;
-};
-
-#define DIRTY_IO_MEMPOOL_BVECS 64
-#define DIRTY_IO_MEMPOOL_SECTORS (DIRTY_IO_MEMPOOL_BVECS * PAGE_SECTORS)
-
-static void dirty_init(struct dirty_io *io)
-{
- struct bio *bio = &io->bio;
-
- bio_init(bio);
- if (!io->dc->writeback_percent)
- bio_set_prio(bio, IOPRIO_PRIO_VALUE(IOPRIO_CLASS_IDLE, 0));
-
- bio->bi_iter.bi_size = io->replace.key.k.size << 9;
- bio->bi_max_vecs =
- DIV_ROUND_UP(io->replace.key.k.size, PAGE_SECTORS);
- bio->bi_io_vec = bio->bi_inline_vecs;
- bch_bio_map(bio, NULL);
-}
-
-static void dirty_io_destructor(struct closure *cl)
-{
- struct dirty_io *io = container_of(cl, struct dirty_io, cl);
-
- if (io->from_mempool)
- mempool_free(io, &io->dc->writeback_io_pool);
- else
- kfree(io);
-}
-
-static void write_dirty_finish(struct closure *cl)
-{
- struct dirty_io *io = container_of(cl, struct dirty_io, cl);
- struct cached_dev *dc = io->dc;
- struct bio_vec *bv;
- int i;
-
- bio_for_each_segment_all(bv, &io->bio, i)
- mempool_free(bv->bv_page, &dc->writeback_page_pool);
-
- if (!io->error) {
- BKEY_PADDED(k) tmp;
- int ret;
-
- bkey_copy(&tmp.k, &io->replace.key);
- io->replace.hook.fn = bch_extent_cmpxchg;
- bkey_extent_set_cached(&tmp.k.k, true);
-
- ret = bch_btree_insert(dc->disk.c, BTREE_ID_EXTENTS, &tmp.k,
- NULL, &io->replace.hook, NULL, 0);
- if (io->replace.successes == 0)
- trace_bcache_writeback_collision(&io->replace.key.k);
-
- atomic_long_inc(ret
- ? &dc->disk.c->writeback_keys_failed
- : &dc->disk.c->writeback_keys_done);
- }
-
- bch_keybuf_put(&dc->writeback_keys, io->w);
-
- closure_return_with_destructor(cl, dirty_io_destructor);
-}
-
-static void dirty_endio(struct bio *bio)
-{
- struct dirty_io *io = container_of(bio, struct dirty_io, bio);
-
- if (bio->bi_error) {
- trace_bcache_writeback_error(&io->replace.key.k,
- op_is_write(bio_op(&io->bio)),
- bio->bi_error);
- io->error = bio->bi_error;
- }
-
- closure_put(&io->cl);
-}
-
-static void write_dirty(struct closure *cl)
-{
- struct dirty_io *io = container_of(cl, struct dirty_io, cl);
-
- if (!io->error) {
- dirty_init(io);
- bio_set_op_attrs(&io->bio, REQ_OP_WRITE, 0);
- io->bio.bi_iter.bi_sector =
- bkey_start_offset(&io->replace.key.k);
- io->bio.bi_bdev = io->dc->disk_sb.bdev;
- io->bio.bi_end_io = dirty_endio;
-
- closure_bio_submit(&io->bio, cl);
- }
-
- continue_at(cl, write_dirty_finish, io->dc->disk.c->wq);
-}
-
-static void read_dirty_endio(struct bio *bio)
-{
- struct dirty_io *io = container_of(bio, struct dirty_io, bio);
-
- bch_dev_nonfatal_io_err_on(bio->bi_error, io->ca, "writeback read");
-
- bch_account_io_completion(io->ca);
-
- if (ptr_stale(io->ca, &io->ptr))
- bio->bi_error = -EINTR;
-
- dirty_endio(bio);
-}
-
-static void read_dirty_submit(struct closure *cl)
-{
- struct dirty_io *io = container_of(cl, struct dirty_io, cl);
-
- closure_bio_submit(&io->bio, cl);
-
- continue_at(cl, write_dirty, system_freezable_wq);
-}
-
-static u64 read_dirty(struct cached_dev *dc)
-{
- struct keybuf_key *w;
- struct dirty_io *io;
- struct closure cl;
- unsigned i;
- struct bio_vec *bv;
- u64 sectors_written = 0;
- BKEY_PADDED(k) tmp;
-
- closure_init_stack(&cl);
-
- while (!bch_ratelimit_wait_freezable_stoppable(&dc->writeback_pd.rate)) {
- w = bch_keybuf_next(&dc->writeback_keys);
- if (!w)
- break;
-
- sectors_written += w->key.k.size;
- bkey_copy(&tmp.k, &w->key);
-
- while (tmp.k.k.size) {
- struct extent_pick_ptr pick;
-
- bch_extent_pick_ptr(dc->disk.c,
- bkey_i_to_s_c(&tmp.k),
- &pick);
- if (IS_ERR_OR_NULL(pick.ca))
- break;
-
- io = kzalloc(sizeof(*io) + sizeof(struct bio_vec) *
- DIV_ROUND_UP(tmp.k.k.size,
- PAGE_SECTORS),
- GFP_KERNEL);
- if (!io) {
- trace_bcache_writeback_alloc_fail(pick.ca->fs,
- tmp.k.k.size);
- io = mempool_alloc(&dc->writeback_io_pool,
- GFP_KERNEL);
- memset(io, 0, sizeof(*io) +
- sizeof(struct bio_vec) *
- DIRTY_IO_MEMPOOL_BVECS);
- io->from_mempool = true;
-
- bkey_copy(&io->replace.key, &tmp.k);
-
- if (DIRTY_IO_MEMPOOL_SECTORS <
- io->replace.key.k.size)
- bch_key_resize(&io->replace.key.k,
- DIRTY_IO_MEMPOOL_SECTORS);
- } else {
- bkey_copy(&io->replace.key, &tmp.k);
- }
-
- io->dc = dc;
- io->ca = pick.ca;
- io->w = w;
- io->ptr = pick.ptr;
- atomic_inc(&w->ref);
-
- dirty_init(io);
- bio_set_op_attrs(&io->bio, REQ_OP_READ, 0);
- io->bio.bi_iter.bi_sector = pick.ptr.offset;
- io->bio.bi_bdev = pick.ca->disk_sb.bdev;
- io->bio.bi_end_io = read_dirty_endio;
-
- bio_for_each_segment_all(bv, &io->bio, i) {
- bv->bv_page =
- mempool_alloc(&dc->writeback_page_pool,
- i ? GFP_NOWAIT
- : GFP_KERNEL);
- if (!bv->bv_page) {
- BUG_ON(!i);
- io->bio.bi_vcnt = i;
-
- io->bio.bi_iter.bi_size =
- io->bio.bi_vcnt * PAGE_SIZE;
-
- bch_key_resize(&io->replace.key.k,
- bio_sectors(&io->bio));
- break;
- }
- }
-
- bch_cut_front(io->replace.key.k.p, &tmp.k);
- trace_bcache_writeback(&io->replace.key.k);
-
- bch_ratelimit_increment(&dc->writeback_pd.rate,
- io->replace.key.k.size << 9);
-
- closure_call(&io->cl, read_dirty_submit, NULL, &cl);
- }
-
- bch_keybuf_put(&dc->writeback_keys, w);
- }
-
- /*
- * Wait for outstanding writeback IOs to finish (and keybuf slots to be
- * freed) before refilling again
- */
- closure_sync(&cl);
-
- return sectors_written;
-}
-
-/* Scan for dirty data */
-
-static void __bcache_dev_sectors_dirty_add(struct bcache_device *d,
- u64 offset, int nr_sectors)
-{
- unsigned stripe_offset, stripe, sectors_dirty;
-
- if (!d)
- return;
-
- if (!d->stripe_sectors_dirty)
- return;
-
- stripe = offset_to_stripe(d, offset);
- stripe_offset = offset & (d->stripe_size - 1);
-
- while (nr_sectors) {
- int s = min_t(unsigned, abs(nr_sectors),
- d->stripe_size - stripe_offset);
-
- if (nr_sectors < 0)
- s = -s;
-
- if (stripe >= d->nr_stripes)
- return;
-
- sectors_dirty = atomic_add_return(s,
- d->stripe_sectors_dirty + stripe);
- if (sectors_dirty == d->stripe_size)
- set_bit(stripe, d->full_dirty_stripes);
- else
- clear_bit(stripe, d->full_dirty_stripes);
-
- nr_sectors -= s;
- stripe_offset = 0;
- stripe++;
- }
-}
-
-void bcache_dev_sectors_dirty_add(struct bch_fs *c, unsigned inode,
- u64 offset, int nr_sectors)
-{
- struct bcache_device *d;
-
- rcu_read_lock();
- d = bch_dev_find(c, inode);
- if (d)
- __bcache_dev_sectors_dirty_add(d, offset, nr_sectors);
- rcu_read_unlock();
-}
-
-static bool dirty_pred(struct keybuf *buf, struct bkey_s_c k)
-{
- struct cached_dev *dc = container_of(buf, struct cached_dev, writeback_keys);
-
- BUG_ON(k.k->p.inode != bcache_dev_inum(&dc->disk));
-
- return bkey_extent_is_data(k.k) &&
- !bkey_extent_is_cached(k.k);
-}
-
-static void refill_full_stripes(struct cached_dev *dc)
-{
- struct keybuf *buf = &dc->writeback_keys;
- unsigned inode = bcache_dev_inum(&dc->disk);
- unsigned start_stripe, stripe, next_stripe;
- bool wrapped = false;
-
- stripe = offset_to_stripe(&dc->disk, buf->last_scanned.offset);
-
- if (stripe >= dc->disk.nr_stripes)
- stripe = 0;
-
- start_stripe = stripe;
-
- while (1) {
- stripe = find_next_bit(dc->disk.full_dirty_stripes,
- dc->disk.nr_stripes, stripe);
-
- if (stripe == dc->disk.nr_stripes)
- goto next;
-
- next_stripe = find_next_zero_bit(dc->disk.full_dirty_stripes,
- dc->disk.nr_stripes, stripe);
-
- buf->last_scanned = POS(inode,
- stripe * dc->disk.stripe_size);
-
- bch_refill_keybuf(dc->disk.c, buf,
- POS(inode,
- next_stripe * dc->disk.stripe_size),
- dirty_pred);
-
- if (array_freelist_empty(&buf->freelist))
- return;
-
- stripe = next_stripe;
-next:
- if (wrapped && stripe > start_stripe)
- return;
-
- if (stripe == dc->disk.nr_stripes) {
- stripe = 0;
- wrapped = true;
- }
- }
-}
-
-static u64 bch_writeback(struct cached_dev *dc)
-{
- struct keybuf *buf = &dc->writeback_keys;
- unsigned inode = bcache_dev_inum(&dc->disk);
- struct bpos start = POS(inode, 0);
- struct bpos end = POS(inode, KEY_OFFSET_MAX);
- struct bpos start_pos;
- u64 sectors_written = 0;
-
- buf->last_scanned = POS(inode, 0);
-
- while (bkey_cmp(buf->last_scanned, end) < 0 &&
- !kthread_should_stop()) {
- down_write(&dc->writeback_lock);
-
- if (!atomic_read(&dc->has_dirty)) {
- up_write(&dc->writeback_lock);
- set_current_state(TASK_INTERRUPTIBLE);
-
- if (kthread_should_stop())
- return sectors_written;
-
- schedule();
- try_to_freeze();
- return sectors_written;
- }
-
- if (bkey_cmp(buf->last_scanned, end) >= 0)
- buf->last_scanned = POS(inode, 0);
-
- if (dc->partial_stripes_expensive) {
- refill_full_stripes(dc);
- if (array_freelist_empty(&buf->freelist))
- goto refill_done;
- }
-
- start_pos = buf->last_scanned;
- bch_refill_keybuf(dc->disk.c, buf, end, dirty_pred);
-
- if (bkey_cmp(buf->last_scanned, end) >= 0) {
- /*
- * If we get to the end start scanning again from the
- * beginning, and only scan up to where we initially
- * started scanning from:
- */
- buf->last_scanned = start;
- bch_refill_keybuf(dc->disk.c, buf, start_pos,
- dirty_pred);
- }
-
- if (RB_EMPTY_ROOT(&dc->writeback_keys.keys)) {
- atomic_set(&dc->has_dirty, 0);
- cached_dev_put(dc);
- SET_BDEV_STATE(dc->disk_sb.sb, BDEV_STATE_CLEAN);
- bch_write_bdev_super(dc, NULL);
- }
-
-refill_done:
- up_write(&dc->writeback_lock);
-
- bch_ratelimit_reset(&dc->writeback_pd.rate);
- sectors_written += read_dirty(dc);
- }
-
- return sectors_written;
-}
-
-static int bch_writeback_thread(void *arg)
-{
- struct cached_dev *dc = arg;
- struct bch_fs *c = dc->disk.c;
- struct io_clock *clock = &c->io_clock[WRITE];
- unsigned long last;
- u64 sectors_written;
-
- set_freezable();
-
- while (!kthread_should_stop()) {
- if (kthread_wait_freezable(dc->writeback_running ||
- test_bit(BCACHE_DEV_DETACHING,
- &dc->disk.flags)))
- break;
-
- last = atomic_long_read(&clock->now);
-
- sectors_written = bch_writeback(dc);
-
- if (sectors_written < c->capacity >> 4)
- bch_kthread_io_clock_wait(clock,
- last + (c->capacity >> 5));
- }
-
- return 0;
-}
-
-/**
- * bch_keylist_recalc_oldest_gens - update oldest_gen pointers from writeback keys
- *
- * This prevents us from wrapping around gens for a bucket only referenced from
- * writeback keybufs. We don't actually care that the data in those buckets is
- * marked live, only that we don't wrap the gens.
- */
-void bch_writeback_recalc_oldest_gens(struct bch_fs *c)
-{
- struct radix_tree_iter iter;
- void **slot;
-
- rcu_read_lock();
-
- radix_tree_for_each_slot(slot, &c->devices, &iter, 0) {
- struct bcache_device *d;
- struct cached_dev *dc;
-
- d = radix_tree_deref_slot(slot);
-
- if (!CACHED_DEV(&d->inode.v))
- continue;
- dc = container_of(d, struct cached_dev, disk);
-
- bch_keybuf_recalc_oldest_gens(c, &dc->writeback_keys);
- }
-
- rcu_read_unlock();
-}
-
-/* Init */
-
-void bch_sectors_dirty_init(struct cached_dev *dc, struct bch_fs *c)
-{
- struct bcache_device *d = &dc->disk;
- struct btree_iter iter;
- struct bkey_s_c k;
-
- /*
- * We have to do this before the disk is added to the radix tree or we
- * race with moving GC
- */
- for_each_btree_key(&iter, c, BTREE_ID_EXTENTS,
- POS(bcache_dev_inum(d), 0), k) {
- if (k.k->p.inode > bcache_dev_inum(d))
- break;
-
- if (bkey_extent_is_data(k.k) &&
- !bkey_extent_is_cached(k.k))
- __bcache_dev_sectors_dirty_add(d,
- bkey_start_offset(k.k),
- k.k->size);
-
- bch_btree_iter_cond_resched(&iter);
- }
- bch_btree_iter_unlock(&iter);
-
- dc->writeback_pd.last_actual = bcache_dev_sectors_dirty(d);
-}
-
-void bch_cached_dev_writeback_stop(struct cached_dev *dc)
-{
- cancel_delayed_work_sync(&dc->writeback_pd_update);
- if (!IS_ERR_OR_NULL(dc->writeback_thread)) {
- kthread_stop(dc->writeback_thread);
- dc->writeback_thread = NULL;
- }
-}
-
-void bch_cached_dev_writeback_free(struct cached_dev *dc)
-{
- struct bcache_device *d = &dc->disk;
-
- mempool_exit(&dc->writeback_page_pool);
- mempool_exit(&dc->writeback_io_pool);
- kvfree(d->full_dirty_stripes);
- kvfree(d->stripe_sectors_dirty);
-}
-
-int bch_cached_dev_writeback_init(struct cached_dev *dc)
-{
- struct bcache_device *d = &dc->disk;
- sector_t sectors;
- size_t n;
-
- sectors = get_capacity(dc->disk.disk);
-
- if (!d->stripe_size) {
-#ifdef CONFIG_BCACHEFS_DEBUG
- d->stripe_size = 1 << 0;
-#else
- d->stripe_size = 1 << 31;
-#endif
- }
-
- pr_debug("stripe size: %d sectors", d->stripe_size);
- d->nr_stripes = DIV_ROUND_UP_ULL(sectors, d->stripe_size);
-
- if (!d->nr_stripes ||
- d->nr_stripes > INT_MAX ||
- d->nr_stripes > SIZE_MAX / sizeof(atomic_t)) {
- pr_err("nr_stripes too large or invalid: %u (start sector beyond end of disk?)",
- (unsigned)d->nr_stripes);
- return -ENOMEM;
- }
-
- n = d->nr_stripes * sizeof(atomic_t);
- d->stripe_sectors_dirty = n < PAGE_SIZE << 6
- ? kzalloc(n, GFP_KERNEL)
- : vzalloc(n);
- if (!d->stripe_sectors_dirty) {
- pr_err("cannot allocate stripe_sectors_dirty");
- return -ENOMEM;
- }
-
- n = BITS_TO_LONGS(d->nr_stripes) * sizeof(unsigned long);
- d->full_dirty_stripes = n < PAGE_SIZE << 6
- ? kzalloc(n, GFP_KERNEL)
- : vzalloc(n);
- if (!d->full_dirty_stripes) {
- pr_err("cannot allocate full_dirty_stripes");
- return -ENOMEM;
- }
-
- if (mempool_init_kmalloc_pool(&dc->writeback_io_pool, 4,
- sizeof(struct dirty_io) +
- sizeof(struct bio_vec) *
- DIRTY_IO_MEMPOOL_BVECS) ||
- mempool_init_page_pool(&dc->writeback_page_pool,
- (64 << 10) / PAGE_SIZE, 0))
- return -ENOMEM;
-
- init_rwsem(&dc->writeback_lock);
- bch_keybuf_init(&dc->writeback_keys);
-
- dc->writeback_metadata = true;
- dc->writeback_running = true;
- dc->writeback_percent = 10;
- dc->writeback_pd_update_seconds = 5;
-
- bch_pd_controller_init(&dc->writeback_pd);
- INIT_DELAYED_WORK(&dc->writeback_pd_update, update_writeback_rate);
-
- return 0;
-}
-
-int bch_cached_dev_writeback_start(struct cached_dev *dc)
-{
- dc->writeback_thread = kthread_create(bch_writeback_thread, dc,
- "bcache_writeback");
- if (IS_ERR(dc->writeback_thread))
- return PTR_ERR(dc->writeback_thread);
-
- schedule_delayed_work(&dc->writeback_pd_update,
- dc->writeback_pd_update_seconds * HZ);
-
- bch_writeback_queue(dc);
-
- return 0;
-}
diff --git a/fs/bcachefs/writeback.h b/fs/bcachefs/writeback.h
deleted file mode 100644
index 82ce306e9ac9..000000000000
--- a/fs/bcachefs/writeback.h
+++ /dev/null
@@ -1,122 +0,0 @@
-#ifndef _BCACHE_WRITEBACK_H
-#define _BCACHE_WRITEBACK_H
-
-#include "blockdev.h"
-#include "buckets.h"
-
-#define CUTOFF_WRITEBACK 60
-#define CUTOFF_WRITEBACK_SYNC 30
-
-static inline uint64_t bcache_dev_sectors_dirty(struct bcache_device *d)
-{
- uint64_t i, ret = 0;
-
- for (i = 0; i < d->nr_stripes; i++)
- ret += atomic_read(d->stripe_sectors_dirty + i);
-
- return ret;
-}
-
-static inline unsigned offset_to_stripe(struct bcache_device *d,
- uint64_t offset)
-{
- do_div(offset, d->stripe_size);
- return offset;
-}
-
-static inline bool bcache_dev_stripe_dirty(struct cached_dev *dc,
- uint64_t offset,
- unsigned nr_sectors)
-{
- unsigned stripe = offset_to_stripe(&dc->disk, offset);
-
- while (1) {
- if (atomic_read(dc->disk.stripe_sectors_dirty + stripe))
- return true;
-
- if (nr_sectors <= dc->disk.stripe_size)
- return false;
-
- nr_sectors -= dc->disk.stripe_size;
- stripe++;
- }
-}
-
-static inline bool should_writeback(struct cached_dev *dc, struct bio *bio,
- unsigned cache_mode, bool would_skip)
-{
- struct bch_fs *c = dc->disk.c;
- u64 available = sectors_available(c);
-
- if (cache_mode != CACHE_MODE_WRITEBACK ||
- test_bit(BCACHE_DEV_DETACHING, &dc->disk.flags) ||
- available * 100 < c->capacity * CUTOFF_WRITEBACK_SYNC)
- return false;
-
- if (dc->partial_stripes_expensive &&
- bcache_dev_stripe_dirty(dc, bio->bi_iter.bi_sector,
- bio_sectors(bio)))
- return true;
-
- if (would_skip)
- return false;
-
- return bio->bi_opf & REQ_SYNC ||
- available * 100 < c->capacity * CUTOFF_WRITEBACK;
-}
-
-static inline void bch_writeback_queue(struct cached_dev *dc)
-{
- if (!IS_ERR_OR_NULL(dc->writeback_thread))
- wake_up_process(dc->writeback_thread);
-}
-
-static inline void bch_writeback_add(struct cached_dev *dc)
-{
- if (!atomic_read(&dc->has_dirty) &&
- !atomic_xchg(&dc->has_dirty, 1)) {
- atomic_inc(&dc->count);
-
- if (BDEV_STATE(dc->disk_sb.sb) != BDEV_STATE_DIRTY) {
- SET_BDEV_STATE(dc->disk_sb.sb, BDEV_STATE_DIRTY);
- /* XXX: should do this synchronously */
- bch_write_bdev_super(dc, NULL);
- }
-
- bch_writeback_queue(dc);
- }
-}
-
-#ifndef NO_BCACHE_WRITEBACK
-
-void bcache_dev_sectors_dirty_add(struct bch_fs *, unsigned, u64, int);
-
-void bch_writeback_recalc_oldest_gens(struct bch_fs *);
-void bch_sectors_dirty_init(struct cached_dev *, struct bch_fs *c);
-
-void bch_cached_dev_writeback_stop(struct cached_dev *);
-void bch_cached_dev_writeback_free(struct cached_dev *);
-int bch_cached_dev_writeback_init(struct cached_dev *);
-int bch_cached_dev_writeback_start(struct cached_dev *);
-
-#else
-
-static inline void bcache_dev_sectors_dirty_add(struct bch_fs *c,
- unsigned i, u64 o, int n) {}
-static inline void bch_writeback_recalc_oldest_gens(struct bch_fs *c) {}
-static inline void bch_sectors_dirty_init(struct cached_dev *dc,
- struct bch_fs *c) {}
-static inline void bch_cached_dev_writeback_stop(struct cached_dev *dc) {}
-static inline void bch_cached_dev_writeback_free(struct cached_dev *dc) {}
-static inline int bch_cached_dev_writeback_init(struct cached_dev *dc)
-{
- return 0;
-}
-static inline int bch_cached_dev_writeback_start(struct cached_dev *dc)
-{
- return 0;
-}
-
-#endif
-
-#endif
diff --git a/include/linux/sched.h b/include/linux/sched.h
index 6c17b0ff0b8b..2865d10a2882 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -1942,7 +1942,7 @@ struct task_struct {
#ifdef CONFIG_UPROBES
struct uprobe_task *utask;
#endif
-#if defined(CONFIG_BCACHE) || defined(CONFIG_BCACHE_MODULE) || defined(CONFIG_BCACHE_FS) || defined(CONFIG_BCACHE_FS_MODULE)
+#if defined(CONFIG_BCACHE) || defined(CONFIG_BCACHE_MODULE)
unsigned int sequential_io;
unsigned int sequential_io_avg;
#endif
diff --git a/include/trace/events/bcachefs.h b/include/trace/events/bcachefs.h
index 24e6478015f2..5b55dd0862c8 100644
--- a/include/trace/events/bcachefs.h
+++ b/include/trace/events/bcachefs.h
@@ -6,38 +6,6 @@
#include <linux/tracepoint.h>
-DECLARE_EVENT_CLASS(bcache_request,
- TP_PROTO(struct bcache_device *d, struct bio *bio),
- TP_ARGS(d, bio),
-
- TP_STRUCT__entry(
- __field(dev_t, dev )
- __field(unsigned int, orig_major )
- __field(unsigned int, orig_minor )
- __field(sector_t, sector )
- __field(sector_t, orig_sector )
- __field(unsigned int, nr_sector )
- __array(char, rwbs, 6 )
- ),
-
- TP_fast_assign(
- __entry->dev = bio->bi_bdev->bd_dev;
- __entry->orig_major = d->disk->major;
- __entry->orig_minor = d->disk->first_minor;
- __entry->sector = bio->bi_iter.bi_sector;
- __entry->orig_sector = bio->bi_iter.bi_sector - 16;
- __entry->nr_sector = bio->bi_iter.bi_size >> 9;
- blk_fill_rwbs(__entry->rwbs, bio_op(bio), bio->bi_opf,
- bio->bi_iter.bi_size);
- ),
-
- TP_printk("%d,%d %s %llu + %u (from %d,%d @ %llu)",
- MAJOR(__entry->dev), MINOR(__entry->dev),
- __entry->rwbs, (unsigned long long)__entry->sector,
- __entry->nr_sector, __entry->orig_major, __entry->orig_minor,
- (unsigned long long)__entry->orig_sector)
-);
-
DECLARE_EVENT_CLASS(bpos,
TP_PROTO(struct bpos p),
TP_ARGS(p),
@@ -75,16 +43,36 @@ DECLARE_EVENT_CLASS(bkey,
__entry->offset, __entry->size)
);
-/* request.c */
+DECLARE_EVENT_CLASS(bch_dev,
+ TP_PROTO(struct bch_dev *ca),
+ TP_ARGS(ca),
-DEFINE_EVENT(bcache_request, bcache_request_start,
- TP_PROTO(struct bcache_device *d, struct bio *bio),
- TP_ARGS(d, bio)
+ TP_STRUCT__entry(
+ __array(char, uuid, 16 )
+ __field(unsigned, tier )
+ ),
+
+ TP_fast_assign(
+ memcpy(__entry->uuid, ca->uuid.b, 16);
+ __entry->tier = ca->mi.tier;
+ ),
+
+ TP_printk("%pU tier %u", __entry->uuid, __entry->tier)
);
-DEFINE_EVENT(bcache_request, bcache_request_end,
- TP_PROTO(struct bcache_device *d, struct bio *bio),
- TP_ARGS(d, bio)
+DECLARE_EVENT_CLASS(bch_fs,
+ TP_PROTO(struct bch_fs *c),
+ TP_ARGS(c),
+
+ TP_STRUCT__entry(
+ __array(char, uuid, 16 )
+ ),
+
+ TP_fast_assign(
+ memcpy(__entry->uuid, c->sb.user_uuid.b, 16);
+ ),
+
+ TP_printk("%pU", __entry->uuid)
);
DECLARE_EVENT_CLASS(bcache_bio,
@@ -111,26 +99,25 @@ DECLARE_EVENT_CLASS(bcache_bio,
(unsigned long long)__entry->sector, __entry->nr_sector)
);
-DEFINE_EVENT(bcache_bio, bcache_bypass_sequential,
- TP_PROTO(struct bio *bio),
- TP_ARGS(bio)
-);
+DECLARE_EVENT_CLASS(page_alloc_fail,
+ TP_PROTO(struct bch_fs *c, u64 size),
+ TP_ARGS(c, size),
-DEFINE_EVENT(bcache_bio, bcache_bypass_congested,
- TP_PROTO(struct bio *bio),
- TP_ARGS(bio)
-);
+ TP_STRUCT__entry(
+ __array(char, uuid, 16 )
+ __field(u64, size )
+ ),
-DEFINE_EVENT(bcache_bio, bcache_promote,
- TP_PROTO(struct bio *bio),
- TP_ARGS(bio)
-);
+ TP_fast_assign(
+ memcpy(__entry->uuid, c->sb.user_uuid.b, 16);
+ __entry->size = size;
+ ),
-DEFINE_EVENT(bkey, bcache_promote_collision,
- TP_PROTO(const struct bkey *k),
- TP_ARGS(k)
+ TP_printk("%pU size %llu", __entry->uuid, __entry->size)
);
+/* io.c: */
+
DEFINE_EVENT(bcache_bio, bcache_read_split,
TP_PROTO(struct bio *bio),
TP_ARGS(bio)
@@ -141,65 +128,14 @@ DEFINE_EVENT(bcache_bio, bcache_read_bounce,
TP_ARGS(bio)
);
-TRACE_EVENT(bcache_read,
- TP_PROTO(struct bio *bio, bool hit, bool bypass),
- TP_ARGS(bio, hit, bypass),
-
- TP_STRUCT__entry(
- __field(dev_t, dev )
- __field(sector_t, sector )
- __field(unsigned int, nr_sector )
- __array(char, rwbs, 6 )
- __field(bool, cache_hit )
- __field(bool, bypass )
- ),
-
- TP_fast_assign(
- __entry->dev = bio->bi_bdev->bd_dev;
- __entry->sector = bio->bi_iter.bi_sector;
- __entry->nr_sector = bio->bi_iter.bi_size >> 9;
- blk_fill_rwbs(__entry->rwbs, bio_op(bio), bio->bi_opf,
- bio->bi_iter.bi_size);
- __entry->cache_hit = hit;
- __entry->bypass = bypass;
- ),
-
- TP_printk("%d,%d %s %llu + %u hit %u bypass %u",
- MAJOR(__entry->dev), MINOR(__entry->dev),
- __entry->rwbs, (unsigned long long)__entry->sector,
- __entry->nr_sector, __entry->cache_hit, __entry->bypass)
+DEFINE_EVENT(bcache_bio, bcache_read_retry,
+ TP_PROTO(struct bio *bio),
+ TP_ARGS(bio)
);
-TRACE_EVENT(bcache_write,
- TP_PROTO(struct bch_fs *c, u64 inode, struct bio *bio,
- bool writeback, bool bypass),
- TP_ARGS(c, inode, bio, writeback, bypass),
-
- TP_STRUCT__entry(
- __array(char, uuid, 16 )
- __field(u64, inode )
- __field(sector_t, sector )
- __field(unsigned int, nr_sector )
- __array(char, rwbs, 6 )
- __field(bool, writeback )
- __field(bool, bypass )
- ),
-
- TP_fast_assign(
- memcpy(__entry->uuid, c->sb.user_uuid.b, 16);
- __entry->inode = inode;
- __entry->sector = bio->bi_iter.bi_sector;
- __entry->nr_sector = bio->bi_iter.bi_size >> 9;
- blk_fill_rwbs(__entry->rwbs, bio_op(bio), bio->bi_opf,
- bio->bi_iter.bi_size);
- __entry->writeback = writeback;
- __entry->bypass = bypass;
- ),
-
- TP_printk("%pU inode %llu %s %llu + %u hit %u bypass %u",
- __entry->uuid, __entry->inode,
- __entry->rwbs, (unsigned long long)__entry->sector,
- __entry->nr_sector, __entry->writeback, __entry->bypass)
+DEFINE_EVENT(bcache_bio, bcache_promote,
+ TP_PROTO(struct bio *bio),
+ TP_ARGS(bio)
);
TRACE_EVENT(bcache_write_throttle,
@@ -231,113 +167,14 @@ TRACE_EVENT(bcache_write_throttle,
__entry->nr_sector, __entry->delay)
);
-DEFINE_EVENT(bcache_bio, bcache_read_retry,
- TP_PROTO(struct bio *bio),
- TP_ARGS(bio)
-);
-
-DECLARE_EVENT_CLASS(page_alloc_fail,
- TP_PROTO(struct bch_fs *c, u64 size),
- TP_ARGS(c, size),
-
- TP_STRUCT__entry(
- __array(char, uuid, 16 )
- __field(u64, size )
- ),
-
- TP_fast_assign(
- memcpy(__entry->uuid, c->sb.user_uuid.b, 16);
- __entry->size = size;
- ),
-
- TP_printk("%pU size %llu", __entry->uuid, __entry->size)
-);
-
/* Journal */
-DECLARE_EVENT_CLASS(cache_set,
- TP_PROTO(struct bch_fs *c),
- TP_ARGS(c),
-
- TP_STRUCT__entry(
- __array(char, uuid, 16 )
- ),
-
- TP_fast_assign(
- memcpy(__entry->uuid, c->sb.user_uuid.b, 16);
- ),
-
- TP_printk("%pU", __entry->uuid)
-);
-
-DEFINE_EVENT(bkey, bcache_journal_replay_key,
- TP_PROTO(const struct bkey *k),
- TP_ARGS(k)
-);
-
-TRACE_EVENT(bcache_journal_next_bucket,
- TP_PROTO(struct bch_dev *ca, unsigned cur_idx, unsigned last_idx),
- TP_ARGS(ca, cur_idx, last_idx),
-
- TP_STRUCT__entry(
- __array(char, uuid, 16 )
- __field(unsigned, cur_idx )
- __field(unsigned, last_idx )
- ),
-
- TP_fast_assign(
- memcpy(__entry->uuid, ca->uuid.b, 16);
- __entry->cur_idx = cur_idx;
- __entry->last_idx = last_idx;
- ),
-
- TP_printk("%pU cur %u last %u", __entry->uuid,
- __entry->cur_idx, __entry->last_idx)
-);
-
-TRACE_EVENT(bcache_journal_write_oldest,
- TP_PROTO(struct bch_fs *c, u64 seq),
- TP_ARGS(c, seq),
-
- TP_STRUCT__entry(
- __array(char, uuid, 16 )
- __field(u64, seq )
- ),
-
- TP_fast_assign(
- memcpy(__entry->uuid, c->sb.user_uuid.b, 16);
- __entry->seq = seq;
- ),
-
- TP_printk("%pU seq %llu", __entry->uuid, __entry->seq)
-);
-
-TRACE_EVENT(bcache_journal_write_oldest_done,
- TP_PROTO(struct bch_fs *c, u64 seq, unsigned written),
- TP_ARGS(c, seq, written),
-
- TP_STRUCT__entry(
- __array(char, uuid, 16 )
- __field(u64, seq )
- __field(unsigned, written )
- ),
-
- TP_fast_assign(
- memcpy(__entry->uuid, c->sb.user_uuid.b, 16);
- __entry->seq = seq;
- __entry->written = written;
- ),
-
- TP_printk("%pU seq %llu written %u", __entry->uuid, __entry->seq,
- __entry->written)
-);
-
-DEFINE_EVENT(cache_set, bcache_journal_full,
+DEFINE_EVENT(bch_fs, bcache_journal_full,
TP_PROTO(struct bch_fs *c),
TP_ARGS(c)
);
-DEFINE_EVENT(cache_set, bcache_journal_entry_full,
+DEFINE_EVENT(bch_fs, bcache_journal_entry_full,
TP_PROTO(struct bch_fs *c),
TP_ARGS(c)
);
@@ -347,56 +184,7 @@ DEFINE_EVENT(bcache_bio, bcache_journal_write,
TP_ARGS(bio)
);
-/* Device state changes */
-
-DEFINE_EVENT(cache_set, fs_read_only,
- TP_PROTO(struct bch_fs *c),
- TP_ARGS(c)
-);
-
-DEFINE_EVENT(cache_set, fs_read_only_done,
- TP_PROTO(struct bch_fs *c),
- TP_ARGS(c)
-);
-
-DECLARE_EVENT_CLASS(cache,
- TP_PROTO(struct bch_dev *ca),
- TP_ARGS(ca),
-
- TP_STRUCT__entry(
- __array(char, uuid, 16 )
- __field(unsigned, tier )
- ),
-
- TP_fast_assign(
- memcpy(__entry->uuid, ca->uuid.b, 16);
- __entry->tier = ca->mi.tier;
- ),
-
- TP_printk("%pU tier %u", __entry->uuid, __entry->tier)
-);
-
-DEFINE_EVENT(cache, bcache_cache_read_only,
- TP_PROTO(struct bch_dev *ca),
- TP_ARGS(ca)
-);
-
-DEFINE_EVENT(cache, bcache_cache_read_only_done,
- TP_PROTO(struct bch_dev *ca),
- TP_ARGS(ca)
-);
-
-DEFINE_EVENT(cache, bcache_cache_read_write,
- TP_PROTO(struct bch_dev *ca),
- TP_ARGS(ca)
-);
-
-DEFINE_EVENT(cache, bcache_cache_read_write_done,
- TP_PROTO(struct bch_dev *ca),
- TP_ARGS(ca)
-);
-
-/* Searching */
+/* bset.c: */
DEFINE_EVENT(bpos, bkey_pack_pos_fail,
TP_PROTO(struct bpos p),
@@ -462,23 +250,6 @@ DEFINE_EVENT(btree_node, bcache_btree_node_alloc,
TP_ARGS(c, b)
);
-TRACE_EVENT(bcache_btree_node_alloc_fail,
- TP_PROTO(struct bch_fs *c, enum btree_id id),
- TP_ARGS(c, id),
-
- TP_STRUCT__entry(
- __array(char, uuid, 16 )
- __field(enum btree_id, id )
- ),
-
- TP_fast_assign(
- memcpy(__entry->uuid, c->sb.user_uuid.b, 16);
- __entry->id = id;
- ),
-
- TP_printk("%pU id %u", __entry->uuid, __entry->id)
-);
-
DEFINE_EVENT(btree_node, bcache_btree_node_free,
TP_PROTO(struct bch_fs *c, struct btree *b),
TP_ARGS(c, b)
@@ -557,11 +328,31 @@ DEFINE_EVENT(mca_cannibalize_lock, bcache_mca_cannibalize,
TP_ARGS(c)
);
-DEFINE_EVENT(cache_set, bcache_mca_cannibalize_unlock,
+DEFINE_EVENT(bch_fs, bcache_mca_cannibalize_unlock,
TP_PROTO(struct bch_fs *c),
TP_ARGS(c)
);
+TRACE_EVENT(bcache_btree_reserve_get_fail,
+ TP_PROTO(struct bch_fs *c, size_t required, struct closure *cl),
+ TP_ARGS(c, required, cl),
+
+ TP_STRUCT__entry(
+ __array(char, uuid, 16 )
+ __field(size_t, required )
+ __field(struct closure *, cl )
+ ),
+
+ TP_fast_assign(
+ memcpy(__entry->uuid, c->sb.user_uuid.b, 16);
+ __entry->required = required;
+ __entry->cl = cl;
+ ),
+
+ TP_printk("%pU required %zu by %p", __entry->uuid,
+ __entry->required, __entry->cl)
+);
+
TRACE_EVENT(bcache_btree_insert_key,
TP_PROTO(struct bch_fs *c, struct btree *b, struct bkey_i *k),
TP_ARGS(c, b, k),
@@ -723,75 +514,41 @@ DEFINE_EVENT(btree_node, bcache_btree_gc_rewrite_node_fail,
TP_ARGS(c, b)
);
-DEFINE_EVENT(cache_set, bcache_gc_start,
+DEFINE_EVENT(bch_fs, bcache_gc_start,
TP_PROTO(struct bch_fs *c),
TP_ARGS(c)
);
-DEFINE_EVENT(cache_set, bcache_gc_end,
+DEFINE_EVENT(bch_fs, bcache_gc_end,
TP_PROTO(struct bch_fs *c),
TP_ARGS(c)
);
-DEFINE_EVENT(cache_set, bcache_gc_coalesce_start,
+DEFINE_EVENT(bch_fs, bcache_gc_coalesce_start,
TP_PROTO(struct bch_fs *c),
TP_ARGS(c)
);
-DEFINE_EVENT(cache_set, bcache_gc_coalesce_end,
+DEFINE_EVENT(bch_fs, bcache_gc_coalesce_end,
TP_PROTO(struct bch_fs *c),
TP_ARGS(c)
);
-DEFINE_EVENT(cache, bcache_sectors_saturated,
+DEFINE_EVENT(bch_dev, bcache_sectors_saturated,
TP_PROTO(struct bch_dev *ca),
TP_ARGS(ca)
);
-DEFINE_EVENT(cache_set, bcache_gc_sectors_saturated,
- TP_PROTO(struct bch_fs *c),
- TP_ARGS(c)
-);
-
-DEFINE_EVENT(cache_set, bcache_gc_cannot_inc_gens,
+DEFINE_EVENT(bch_fs, bcache_gc_sectors_saturated,
TP_PROTO(struct bch_fs *c),
TP_ARGS(c)
);
-DEFINE_EVENT(cache_set, bcache_gc_periodic,
+DEFINE_EVENT(bch_fs, bcache_gc_cannot_inc_gens,
TP_PROTO(struct bch_fs *c),
TP_ARGS(c)
);
-TRACE_EVENT(bcache_mark_bucket,
- TP_PROTO(struct bch_dev *ca, const struct bkey *k,
- const struct bch_extent_ptr *ptr,
- int sectors, bool dirty),
- TP_ARGS(ca, k, ptr, sectors, dirty),
-
- TP_STRUCT__entry(
- __array(char, uuid, 16 )
- __field(u32, inode )
- __field(u64, offset )
- __field(u32, sectors )
- __field(u64, bucket )
- __field(bool, dirty )
- ),
-
- TP_fast_assign(
- memcpy(__entry->uuid, ca->uuid.b, 16);
- __entry->inode = k->p.inode;
- __entry->offset = k->p.offset;
- __entry->sectors = sectors;
- __entry->bucket = PTR_BUCKET_NR(ca, ptr);
- __entry->dirty = dirty;
- ),
-
- TP_printk("%pU %u:%llu sectors %i bucket %llu dirty %i",
- __entry->uuid, __entry->inode, __entry->offset,
- __entry->sectors, __entry->bucket, __entry->dirty)
-);
-
/* Allocator */
TRACE_EVENT(bcache_alloc_batch,
@@ -814,32 +571,12 @@ TRACE_EVENT(bcache_alloc_batch,
__entry->uuid, __entry->free, __entry->total)
);
-TRACE_EVENT(bcache_btree_reserve_get_fail,
- TP_PROTO(struct bch_fs *c, size_t required, struct closure *cl),
- TP_ARGS(c, required, cl),
-
- TP_STRUCT__entry(
- __array(char, uuid, 16 )
- __field(size_t, required )
- __field(struct closure *, cl )
- ),
-
- TP_fast_assign(
- memcpy(__entry->uuid, c->sb.user_uuid.b, 16);
- __entry->required = required;
- __entry->cl = cl;
- ),
-
- TP_printk("%pU required %zu by %p", __entry->uuid,
- __entry->required, __entry->cl)
-);
-
-DEFINE_EVENT(cache, bcache_prio_write_start,
+DEFINE_EVENT(bch_dev, bcache_prio_write_start,
TP_PROTO(struct bch_dev *ca),
TP_ARGS(ca)
);
-DEFINE_EVENT(cache, bcache_prio_write_end,
+DEFINE_EVENT(bch_dev, bcache_prio_write_end,
TP_PROTO(struct bch_dev *ca),
TP_ARGS(ca)
);
@@ -865,12 +602,12 @@ TRACE_EVENT(bcache_invalidate,
MINOR(__entry->dev), __entry->offset)
);
-DEFINE_EVENT(cache_set, bcache_rescale_prios,
+DEFINE_EVENT(bch_fs, bcache_rescale_prios,
TP_PROTO(struct bch_fs *c),
TP_ARGS(c)
);
-DECLARE_EVENT_CLASS(cache_bucket_alloc,
+DECLARE_EVENT_CLASS(bucket_alloc,
TP_PROTO(struct bch_dev *ca, enum alloc_reserve reserve),
TP_ARGS(ca, reserve),
@@ -887,12 +624,12 @@ DECLARE_EVENT_CLASS(cache_bucket_alloc,
TP_printk("%pU reserve %d", __entry->uuid, __entry->reserve)
);
-DEFINE_EVENT(cache_bucket_alloc, bcache_bucket_alloc,
+DEFINE_EVENT(bucket_alloc, bcache_bucket_alloc,
TP_PROTO(struct bch_dev *ca, enum alloc_reserve reserve),
TP_ARGS(ca, reserve)
);
-DEFINE_EVENT(cache_bucket_alloc, bcache_bucket_alloc_fail,
+DEFINE_EVENT(bucket_alloc, bcache_bucket_alloc_fail,
TP_PROTO(struct bch_dev *ca, enum alloc_reserve reserve),
TP_ARGS(ca, reserve)
);
@@ -946,37 +683,6 @@ DEFINE_EVENT(open_bucket_alloc, bcache_open_bucket_alloc_fail,
TP_ARGS(c, cl)
);
-/* Keylists */
-
-TRACE_EVENT(bcache_keyscan,
- TP_PROTO(unsigned nr_found,
- unsigned start_inode, u64 start_offset,
- unsigned end_inode, u64 end_offset),
- TP_ARGS(nr_found,
- start_inode, start_offset,
- end_inode, end_offset),
-
- TP_STRUCT__entry(
- __field(__u32, nr_found )
- __field(__u32, start_inode )
- __field(__u64, start_offset )
- __field(__u32, end_inode )
- __field(__u64, end_offset )
- ),
-
- TP_fast_assign(
- __entry->nr_found = nr_found;
- __entry->start_inode = start_inode;
- __entry->start_offset = start_offset;
- __entry->end_inode = end_inode;
- __entry->end_offset = end_offset;
- ),
-
- TP_printk("found %u keys from %u:%llu to %u:%llu", __entry->nr_found,
- __entry->start_inode, __entry->start_offset,
- __entry->end_inode, __entry->end_offset)
-);
-
/* Moving IO */
DECLARE_EVENT_CLASS(moving_io,
@@ -1014,11 +720,6 @@ DEFINE_EVENT(moving_io, bcache_move_write,
TP_ARGS(k)
);
-DEFINE_EVENT(moving_io, bcache_move_write_done,
- TP_PROTO(struct bkey *k),
- TP_ARGS(k)
-);
-
DEFINE_EVENT(moving_io, bcache_copy_collision,
TP_PROTO(struct bkey *k),
TP_ARGS(k)
@@ -1031,7 +732,7 @@ DEFINE_EVENT(page_alloc_fail, bcache_moving_gc_alloc_fail,
TP_ARGS(c, size)
);
-DEFINE_EVENT(cache, bcache_moving_gc_start,
+DEFINE_EVENT(bch_dev, bcache_moving_gc_start,
TP_PROTO(struct bch_dev *ca),
TP_ARGS(ca)
);
@@ -1060,16 +761,6 @@ TRACE_EVENT(bcache_moving_gc_end,
__entry->buckets_moved)
);
-DEFINE_EVENT(cache, bcache_moving_gc_reserve_empty,
- TP_PROTO(struct bch_dev *ca),
- TP_ARGS(ca)
-);
-
-DEFINE_EVENT(cache, bcache_moving_gc_no_work,
- TP_PROTO(struct bch_dev *ca),
- TP_ARGS(ca)
-);
-
DEFINE_EVENT(bkey, bcache_gc_copy,
TP_PROTO(const struct bkey *k),
TP_ARGS(k)
@@ -1077,22 +768,12 @@ DEFINE_EVENT(bkey, bcache_gc_copy,
/* Tiering */
-DEFINE_EVENT(cache_set, bcache_tiering_refill_start,
- TP_PROTO(struct bch_fs *c),
- TP_ARGS(c)
-);
-
-DEFINE_EVENT(cache_set, bcache_tiering_refill_end,
- TP_PROTO(struct bch_fs *c),
- TP_ARGS(c)
-);
-
DEFINE_EVENT(page_alloc_fail, bcache_tiering_alloc_fail,
TP_PROTO(struct bch_fs *c, u64 size),
TP_ARGS(c, size)
);
-DEFINE_EVENT(cache_set, bcache_tiering_start,
+DEFINE_EVENT(bch_fs, bcache_tiering_start,
TP_PROTO(struct bch_fs *c),
TP_ARGS(c)
);
@@ -1123,49 +804,6 @@ DEFINE_EVENT(bkey, bcache_tiering_copy,
TP_ARGS(k)
);
-/* Background writeback */
-
-DEFINE_EVENT(bkey, bcache_writeback,
- TP_PROTO(const struct bkey *k),
- TP_ARGS(k)
-);
-
-DEFINE_EVENT(bkey, bcache_writeback_collision,
- TP_PROTO(const struct bkey *k),
- TP_ARGS(k)
-);
-
-TRACE_EVENT(bcache_writeback_error,
- TP_PROTO(struct bkey *k, bool write, int error),
- TP_ARGS(k, write, error),
-
- TP_STRUCT__entry(
- __field(u32, size )
- __field(u32, inode )
- __field(u64, offset )
- __field(bool, write )
- __field(int, error )
- ),
-
- TP_fast_assign(
- __entry->inode = k->p.inode;
- __entry->offset = k->p.offset;
- __entry->size = k->size;
- __entry->write = write;
- __entry->error = error;
- ),
-
- TP_printk("%u:%llu len %u %s error %d", __entry->inode,
- __entry->offset, __entry->size,
- __entry->write ? "write" : "read",
- __entry->error)
-);
-
-DEFINE_EVENT(page_alloc_fail, bcache_writeback_alloc_fail,
- TP_PROTO(struct bch_fs *c, u64 size),
- TP_ARGS(c, size)
-);
-
#endif /* _TRACE_BCACHE_H */
/* This part must be outside protection */