diff options
author | Kent Overstreet <kent.overstreet@gmail.com> | 2017-03-17 17:10:15 -0800 |
---|---|---|
committer | Kent Overstreet <kent.overstreet@gmail.com> | 2017-03-19 17:23:29 -0800 |
commit | 8b9eb1274b81c9144238c589f0a9e12ea5961898 (patch) | |
tree | 2b506fff714509a6aab586e9a01c351da121a5ee | |
parent | 6115d0bcf8fee40ab9a98ff8068ea4d3bce5dbdb (diff) |
bcachefs: delete old style bcache block device interfaces
37 files changed, 124 insertions, 4982 deletions
diff --git a/fs/bcachefs/Makefile b/fs/bcachefs/Makefile index 0ad8ba903460..2f4c628b69c3 100644 --- a/fs/bcachefs/Makefile +++ b/fs/bcachefs/Makefile @@ -1,10 +1,9 @@ obj-$(CONFIG_BCACHE_FS) += bcachefs.o -bcachefs-y := acl.o alloc.o bkey.o bkey_methods.o blockdev.o\ - bset.o btree_cache.o btree_gc.o btree_io.o btree_iter.o btree_update.o\ +bcachefs-y := acl.o alloc.o bkey.o bkey_methods.o bset.o\ + btree_cache.o btree_gc.o btree_io.o btree_iter.o btree_update.o\ buckets.o chardev.o checksum.o clock.o compress.o debug.o dirent.o\ - error.o extents.o fs.o fs-gc.o fs-io.o inode.o io.o journal.o keybuf.o\ - keylist.o migrate.o move.o movinggc.o notify.o opts.o request.o\ - siphash.o six.o stats.o super.o super-io.o sysfs.o tier.o trace.o\ - util.o writeback.o xattr.o + error.o extents.o fs.o fs-gc.o fs-io.o inode.o io.o journal.o keylist.o\ + migrate.o move.o movinggc.o opts.o siphash.o six.o super.o super-io.o\ + sysfs.o tier.o trace.o util.o xattr.o diff --git a/fs/bcachefs/bcache.h b/fs/bcachefs/bcache.h index 09e5de351dfb..61209bfb15d4 100644 --- a/fs/bcachefs/bcache.h +++ b/fs/bcachefs/bcache.h @@ -282,15 +282,12 @@ do { \ BCH_TIME_STAT(journal_flush_seq, us, us) #include "alloc_types.h" -#include "blockdev_types.h" #include "buckets_types.h" #include "clock_types.h" #include "io_types.h" #include "journal_types.h" #include "keylist_types.h" -#include "keybuf_types.h" #include "move_types.h" -#include "stats_types.h" #include "super_types.h" /* 256k, in sectors */ @@ -435,10 +432,6 @@ struct bch_dev { struct work_struct io_error_work; /* The rest of this all shows up in sysfs */ -#define IO_ERROR_SHIFT 20 - atomic_t io_errors; - atomic_t io_count; - atomic64_t meta_sectors_written; atomic64_t btree_sectors_written; u64 __percpu *sectors_written; @@ -454,7 +447,6 @@ struct bch_dev { */ enum { BCH_FS_INITIAL_GC_DONE, - BCH_FS_DETACHING, BCH_FS_EMERGENCY_RO, BCH_FS_WRITE_DISABLE_COMPLETE, BCH_FS_GC_STOPPING, @@ -723,11 +715,6 @@ struct bch_fs { atomic64_t key_version; - /* For punting bio submissions to workqueue, io.c */ - struct bio_list bio_submit_list; - struct work_struct bio_submit_work; - spinlock_t bio_submit_lock; - struct bio_list read_retry_list; struct work_struct read_retry_work; spinlock_t read_retry_lock; @@ -738,10 +725,6 @@ struct bch_fs { unsigned writeback_pages_max; atomic_long_t nr_inodes; - /* NOTIFICATIONS */ - struct mutex uevent_lock; - struct kobj_uevent_env uevent_env; - /* DEBUG JUNK */ struct dentry *debug; struct btree_debug btree_debug[BTREE_ID_NR]; @@ -765,28 +748,8 @@ struct bch_fs { unsigned bucket_journal_seq; - /* CACHING OTHER BLOCK DEVICES */ - mempool_t search; - struct radix_tree_root devices; - struct list_head cached_devs; - u64 cached_dev_sectors; - struct closure caching; - -#define CONGESTED_MAX 1024 - unsigned congested_last_us; - atomic_t congested; - /* The rest of this all shows up in sysfs */ - unsigned congested_read_threshold_us; - unsigned congested_write_threshold_us; - - struct cache_accounting accounting; atomic_long_t cache_read_races; - atomic_long_t writeback_keys_done; - atomic_long_t writeback_keys_failed; - - unsigned error_limit; - unsigned error_decay; unsigned foreground_write_ratelimit_enabled:1; unsigned copy_gc_enabled:1; diff --git a/fs/bcachefs/bcachefs_format.h b/fs/bcachefs/bcachefs_format.h index c221747b4a4b..5bed6ed488ab 100644 --- a/fs/bcachefs/bcachefs_format.h +++ b/fs/bcachefs/bcachefs_format.h @@ -1344,103 +1344,6 @@ struct btree_node_entry { }; } __attribute__((packed)); -/* OBSOLETE */ - -#define BITMASK(name, type, field, offset, end) \ -static const unsigned name##_OFFSET = offset; \ -static const unsigned name##_BITS = (end - offset); \ -static const __u64 name##_MAX = (1ULL << (end - offset)) - 1; \ - \ -static inline __u64 name(const type *k) \ -{ return (k->field >> offset) & ~(~0ULL << (end - offset)); } \ - \ -static inline void SET_##name(type *k, __u64 v) \ -{ \ - k->field &= ~(~(~0ULL << (end - offset)) << offset); \ - k->field |= (v & ~(~0ULL << (end - offset))) << offset; \ -} - -struct bkey_v0 { - __u64 high; - __u64 low; - __u64 ptr[]; -}; - -#define KEY0_FIELD(name, field, offset, size) \ - BITMASK(name, struct bkey_v0, field, offset, size) - -KEY0_FIELD(KEY0_PTRS, high, 60, 63) -KEY0_FIELD(KEY0_CSUM, high, 56, 58) -KEY0_FIELD(KEY0_DIRTY, high, 36, 37) - -KEY0_FIELD(KEY0_SIZE, high, 20, 36) -KEY0_FIELD(KEY0_INODE, high, 0, 20) - -static inline unsigned long bkey_v0_u64s(const struct bkey_v0 *k) -{ - return (sizeof(struct bkey_v0) / sizeof(__u64)) + KEY0_PTRS(k); -} - -static inline struct bkey_v0 *bkey_v0_next(const struct bkey_v0 *k) -{ - __u64 *d = (__u64 *) k; - - return (struct bkey_v0 *) (d + bkey_v0_u64s(k)); -} - -struct jset_v0 { - __u64 csum; - __u64 magic; - __u64 seq; - __u32 version; - __u32 keys; - - __u64 last_seq; - - __BKEY_PADDED(uuid_bucket, 4); - __BKEY_PADDED(btree_root, 4); - __u16 btree_level; - __u16 pad[3]; - - __u64 prio_bucket[64]; - - union { - struct bkey start[0]; - __u64 d[0]; - }; -}; - -/* UUIDS - per backing device/flash only volume metadata */ - -struct uuid_entry_v0 { - uuid_le uuid; - __u8 label[32]; - __u32 first_reg; - __u32 last_reg; - __u32 invalidated; - __u32 pad; -}; - -struct uuid_entry { - union { - struct { - uuid_le uuid; - __u8 label[32]; - __u32 first_reg; - __u32 last_reg; - __u32 invalidated; - - __u32 flags; - /* Size of flash only volumes */ - __u64 sectors; - }; - - __u8 pad[128]; - }; -}; - -BITMASK(UUID_FLASH_ONLY, struct uuid_entry, flags, 0, 1); - #ifdef __cplusplus } #endif diff --git a/fs/bcachefs/blockdev.c b/fs/bcachefs/blockdev.c deleted file mode 100644 index a4522ad2836f..000000000000 --- a/fs/bcachefs/blockdev.c +++ /dev/null @@ -1,819 +0,0 @@ - -#include "bcache.h" -#include "blockdev.h" -#include "btree_iter.h" -#include "btree_update.h" -#include "checksum.h" -#include "error.h" -#include "inode.h" -#include "request.h" -#include "super-io.h" -#include "writeback.h" - -#include <linux/kthread.h> -#include <linux/module.h> -#include <linux/random.h> - -static int bch_blockdev_major; -static DEFINE_IDA(bch_blockdev_minor); -static LIST_HEAD(uncached_devices); -static DEFINE_MUTEX(bch_blockdev_lock); - -static struct kmem_cache *bch_search_cache; - -static void write_bdev_super_endio(struct bio *bio) -{ - struct cached_dev *dc = bio->bi_private; - /* XXX: error checking */ - - closure_put(&dc->sb_write); -} - -static void bch_write_bdev_super_unlock(struct closure *cl) -{ - struct cached_dev *dc = container_of(cl, struct cached_dev, sb_write); - - up(&dc->sb_write_mutex); -} - -void bch_write_bdev_super(struct cached_dev *dc, struct closure *parent) -{ - struct backingdev_sb *sb = dc->disk_sb.sb; - struct closure *cl = &dc->sb_write; - struct bio *bio = dc->disk_sb.bio; - - down(&dc->sb_write_mutex); - closure_init(cl, parent); - - sb->csum = csum_vstruct(NULL, BCH_CSUM_CRC64, - (struct nonce) { 0 }, sb).lo; - - bio_reset(bio); - bio->bi_bdev = dc->disk_sb.bdev; - bio->bi_iter.bi_sector = le64_to_cpu(sb->offset); - bio->bi_iter.bi_size = - roundup(vstruct_bytes(sb), - bdev_logical_block_size(dc->disk_sb.bdev)); - bio->bi_end_io = write_bdev_super_endio; - bio->bi_private = dc; - bio_set_op_attrs(bio, REQ_OP_WRITE, WRITE_FUA|REQ_META); - bch_bio_map(bio, sb); - - closure_get(cl); - - closure_return_with_destructor(cl, bch_write_bdev_super_unlock); -} - -static int open_dev(struct block_device *b, fmode_t mode) -{ - struct bcache_device *d = b->bd_disk->private_data; - - if (test_bit(BCACHE_DEV_CLOSING, &d->flags)) - return -ENXIO; - - closure_get(&d->cl); - return 0; -} - -static void release_dev(struct gendisk *b, fmode_t mode) -{ - struct bcache_device *d = b->private_data; - - closure_put(&d->cl); -} - -static int ioctl_dev(struct block_device *b, fmode_t mode, - unsigned int cmd, unsigned long arg) -{ - struct bcache_device *d = b->bd_disk->private_data; - - return d->ioctl(d, mode, cmd, arg); -} - -static const struct block_device_operations bcache_ops = { - .open = open_dev, - .release = release_dev, - .ioctl = ioctl_dev, - .owner = THIS_MODULE, -}; - -void bch_blockdev_stop(struct bcache_device *d) -{ - if (!test_and_set_bit(BCACHE_DEV_CLOSING, &d->flags)) - closure_queue(&d->cl); -} - -static void bcache_device_unlink(struct bcache_device *d) -{ - if (d->c && !test_and_set_bit(BCACHE_DEV_UNLINK_DONE, &d->flags)) { - sysfs_remove_link(&d->c->kobj, d->name); - sysfs_remove_link(&d->kobj, "cache"); - } -} - -static void bcache_device_link(struct bcache_device *d, struct bch_fs *c, - const char *name) -{ - snprintf(d->name, BCACHEDEVNAME_SIZE, - "%s%llu", name, bcache_dev_inum(d)); - - WARN(sysfs_create_link(&d->kobj, &c->kobj, "cache") || - sysfs_create_link(&c->kobj, &d->kobj, d->name), - "Couldn't create device <-> cache set symlinks"); - - clear_bit(BCACHE_DEV_UNLINK_DONE, &d->flags); -} - -static void bcache_device_detach(struct bcache_device *d) -{ - if (test_bit(BCACHE_DEV_DETACHING, &d->flags)) { - mutex_lock(&d->inode_lock); - bch_inode_rm(d->c, bcache_dev_inum(d)); - mutex_unlock(&d->inode_lock); - } - - bcache_device_unlink(d); - - radix_tree_delete(&d->c->devices, bcache_dev_inum(d)); - - closure_put(&d->c->caching); - d->c = NULL; -} - -static int bcache_device_attach(struct bcache_device *d, struct bch_fs *c) -{ - int ret; - - ret = radix_tree_insert(&c->devices, bcache_dev_inum(d), d); - if (ret) { - pr_err("radix_tree_insert() error for inum %llu", - bcache_dev_inum(d)); - return ret; - } - - d->c = c; - closure_get(&c->caching); - - return ret; -} - -static void bcache_device_free(struct bcache_device *d) -{ - pr_info("%s stopped", d->disk->disk_name); - - if (d->c) - bcache_device_detach(d); - if (d->disk && d->disk->flags & GENHD_FL_UP) - del_gendisk(d->disk); - if (d->disk && d->disk->queue) - blk_cleanup_queue(d->disk->queue); - if (d->disk) { - ida_simple_remove(&bch_blockdev_minor, d->disk->first_minor); - put_disk(d->disk); - } - - bioset_exit(&d->bio_split); - - closure_debug_destroy(&d->cl); -} - -static int bcache_device_init(struct bcache_device *d, unsigned block_size, - sector_t sectors) -{ - struct request_queue *q; - int minor; - - mutex_init(&d->inode_lock); - - minor = ida_simple_get(&bch_blockdev_minor, 0, MINORMASK + 1, GFP_KERNEL); - if (minor < 0) { - pr_err("cannot allocate minor"); - return minor; - } - - if (!(d->disk = alloc_disk(1)) || - bioset_init(&d->bio_split, 4, offsetof(struct bch_read_bio, bio))) { - pr_err("cannot allocate disk"); - ida_simple_remove(&bch_blockdev_minor, minor); - return -ENOMEM; - } - - set_capacity(d->disk, sectors); - snprintf(d->disk->disk_name, DISK_NAME_LEN, "bcache%i", minor); - - d->disk->major = bch_blockdev_major; - d->disk->first_minor = minor; - d->disk->fops = &bcache_ops; - d->disk->private_data = d; - - q = blk_alloc_queue(GFP_KERNEL); - if (!q) { - pr_err("cannot allocate queue"); - return -ENOMEM; - } - - blk_queue_make_request(q, NULL); - d->disk->queue = q; - q->queuedata = d; - q->backing_dev_info.congested_data = d; - q->limits.max_hw_sectors = UINT_MAX; - q->limits.max_sectors = UINT_MAX; - q->limits.max_segment_size = UINT_MAX; - q->limits.max_segments = BIO_MAX_PAGES; - blk_queue_max_discard_sectors(q, UINT_MAX); - q->limits.discard_granularity = 512; - q->limits.io_min = block_size; - q->limits.logical_block_size = block_size; - q->limits.physical_block_size = block_size; - set_bit(QUEUE_FLAG_NONROT, &d->disk->queue->queue_flags); - clear_bit(QUEUE_FLAG_ADD_RANDOM, &d->disk->queue->queue_flags); - set_bit(QUEUE_FLAG_DISCARD, &d->disk->queue->queue_flags); - - blk_queue_write_cache(q, true, true); - - return 0; -} - -/* Cached device */ - -static void calc_cached_dev_sectors(struct bch_fs *c) -{ - u64 sectors = 0; - struct cached_dev *dc; - - list_for_each_entry(dc, &c->cached_devs, list) - sectors += bdev_sectors(dc->disk_sb.bdev); - - c->cached_dev_sectors = sectors; -} - -void bch_cached_dev_run(struct cached_dev *dc) -{ - struct bcache_device *d = &dc->disk; - char buf[BCH_SB_LABEL_SIZE + 1]; - char *env[] = { - "DRIVER=bcache", - kasprintf(GFP_KERNEL, "CACHED_UUID=%pU", - dc->disk_sb.sb->disk_uuid.b), - NULL, - NULL, - }; - - memcpy(buf, dc->disk_sb.sb->label, BCH_SB_LABEL_SIZE); - buf[BCH_SB_LABEL_SIZE] = '\0'; - env[2] = kasprintf(GFP_KERNEL, "CACHED_LABEL=%s", buf); - - if (atomic_xchg(&dc->running, 1)) { - kfree(env[1]); - kfree(env[2]); - return; - } - - if (!d->c && - BDEV_STATE(dc->disk_sb.sb) != BDEV_STATE_NONE) { - struct closure cl; - - closure_init_stack(&cl); - - SET_BDEV_STATE(dc->disk_sb.sb, BDEV_STATE_STALE); - bch_write_bdev_super(dc, &cl); - closure_sync(&cl); - } - - add_disk(d->disk); - bd_link_disk_holder(dc->disk_sb.bdev, dc->disk.disk); - /* won't show up in the uevent file, use udevadm monitor -e instead - * only class / kset properties are persistent */ - kobject_uevent_env(&disk_to_dev(d->disk)->kobj, KOBJ_CHANGE, env); - kfree(env[1]); - kfree(env[2]); - - if (sysfs_create_link(&d->kobj, &disk_to_dev(d->disk)->kobj, "dev") || - sysfs_create_link(&disk_to_dev(d->disk)->kobj, &d->kobj, "bcache")) - pr_debug("error creating sysfs link"); -} - -static void cached_dev_detach_finish(struct work_struct *w) -{ - struct cached_dev *dc = container_of(w, struct cached_dev, detach); - char buf[BDEVNAME_SIZE]; - struct closure cl; - - closure_init_stack(&cl); - - BUG_ON(!test_bit(BCACHE_DEV_DETACHING, &dc->disk.flags)); - BUG_ON(atomic_read(&dc->count)); - - mutex_lock(&bch_blockdev_lock); - - memset(&dc->disk_sb.sb->set_uuid, 0, 16); - SET_BDEV_STATE(dc->disk_sb.sb, BDEV_STATE_NONE); - - bch_write_bdev_super(dc, &cl); - closure_sync(&cl); - - bcache_device_detach(&dc->disk); - list_move(&dc->list, &uncached_devices); - - clear_bit(BCACHE_DEV_DETACHING, &dc->disk.flags); - clear_bit(BCACHE_DEV_UNLINK_DONE, &dc->disk.flags); - - mutex_unlock(&bch_blockdev_lock); - - pr_info("Caching disabled for %s", bdevname(dc->disk_sb.bdev, buf)); - - /* Drop ref we took in cached_dev_detach() */ - closure_put(&dc->disk.cl); -} - -void bch_cached_dev_detach(struct cached_dev *dc) -{ - if (test_bit(BCACHE_DEV_CLOSING, &dc->disk.flags)) - return; - - if (test_and_set_bit(BCACHE_DEV_DETACHING, &dc->disk.flags)) - return; - - /* - * Block the device from being closed and freed until we're finished - * detaching - */ - closure_get(&dc->disk.cl); - - dc->writeback_pd.rate.rate = UINT_MAX; - bch_writeback_queue(dc); - cached_dev_put(dc); -} - -int bch_cached_dev_attach(struct cached_dev *dc, struct bch_fs *c) -{ - __le64 rtime = cpu_to_le64(ktime_get_seconds()); - char buf[BDEVNAME_SIZE]; - bool found; - int ret; - - lockdep_assert_held(&c->state_lock); - - bdevname(dc->disk_sb.bdev, buf); - - if (memcmp(&dc->disk_sb.sb->set_uuid, - &c->sb.uuid, - sizeof(c->sb.uuid))) - return -ENOENT; - - if (dc->disk.c) { - pr_err("Can't attach %s: already attached", buf); - return -EINVAL; - } - - if (!bch_fs_running(c)) { - pr_err("Can't attach %s: not running", buf); - return -EINVAL; - } - - if (le16_to_cpu(dc->disk_sb.sb->block_size) < c->sb.block_size) { - /* Will die */ - pr_err("Couldn't attach %s: block size less than set's block size", - buf); - return -EINVAL; - } - - found = !bch_cached_dev_inode_find_by_uuid(c, - &dc->disk_sb.sb->disk_uuid, - &dc->disk.inode); - - if (!found && BDEV_STATE(dc->disk_sb.sb) == BDEV_STATE_DIRTY) { - pr_err("Couldn't find uuid for %s in set", buf); - return -ENOENT; - } - - if (found && - (BDEV_STATE(dc->disk_sb.sb) == BDEV_STATE_STALE || - BDEV_STATE(dc->disk_sb.sb) == BDEV_STATE_NONE)) { - found = false; - bch_inode_rm(c, bcache_dev_inum(&dc->disk)); - } - - /* Deadlocks since we're called via sysfs... - sysfs_remove_file(&dc->kobj, &sysfs_attach); - */ - - if (!found) { - struct closure cl; - - closure_init_stack(&cl); - - bkey_inode_blockdev_init(&dc->disk.inode.k_i); - dc->disk.inode.k.type = BCH_INODE_BLOCKDEV; - SET_CACHED_DEV(&dc->disk.inode.v, true); - dc->disk.inode.v.i_uuid = dc->disk_sb.sb->disk_uuid; - memcpy(dc->disk.inode.v.i_label, - dc->disk_sb.sb->label, BCH_SB_LABEL_SIZE); - dc->disk.inode.v.i_ctime = rtime; - dc->disk.inode.v.i_mtime = rtime; - - ret = bch_inode_create(c, &dc->disk.inode.k_i, - 0, BLOCKDEV_INODE_MAX, - &c->unused_inode_hint); - if (ret) { - pr_err("Error %d, not caching %s", ret, buf); - return ret; - } - - pr_info("attached inode %llu", bcache_dev_inum(&dc->disk)); - - dc->disk_sb.sb->set_uuid = c->sb.uuid; - SET_BDEV_STATE(dc->disk_sb.sb, BDEV_STATE_CLEAN); - - bch_write_bdev_super(dc, &cl); - closure_sync(&cl); - } else { - dc->disk.inode.v.i_mtime = rtime; - bch_btree_update(c, BTREE_ID_INODES, - &dc->disk.inode.k_i, NULL); - } - - /* Count dirty sectors before attaching */ - if (BDEV_STATE(dc->disk_sb.sb) == BDEV_STATE_DIRTY) - bch_sectors_dirty_init(dc, c); - - ret = bcache_device_attach(&dc->disk, c); - if (ret) - return ret; - - list_move(&dc->list, &c->cached_devs); - calc_cached_dev_sectors(c); - - /* - * dc->c must be set before dc->count != 0 - paired with the mb in - * cached_dev_get() - */ - smp_wmb(); - atomic_set(&dc->count, 1); - - if (bch_cached_dev_writeback_start(dc)) - return -ENOMEM; - - if (BDEV_STATE(dc->disk_sb.sb) == BDEV_STATE_DIRTY) { - atomic_set(&dc->has_dirty, 1); - atomic_inc(&dc->count); - } - - bch_cached_dev_run(dc); - bcache_device_link(&dc->disk, c, "bdev"); - - pr_info("Caching %s as %s on set %pU", - bdevname(dc->disk_sb.bdev, buf), dc->disk.disk->disk_name, - dc->disk.c->sb.uuid.b); - return 0; -} - -void bch_attach_backing_devs(struct bch_fs *c) -{ - struct cached_dev *dc, *t; - - lockdep_assert_held(&c->state_lock); - - mutex_lock(&bch_blockdev_lock); - - list_for_each_entry_safe(dc, t, &uncached_devices, list) - bch_cached_dev_attach(dc, c); - - mutex_unlock(&bch_blockdev_lock); -} - -void bch_cached_dev_release(struct kobject *kobj) -{ - struct cached_dev *dc = container_of(kobj, struct cached_dev, - disk.kobj); - kfree(dc); - module_put(THIS_MODULE); -} - -static void cached_dev_free(struct closure *cl) -{ - struct cached_dev *dc = container_of(cl, struct cached_dev, disk.cl); - - bch_cached_dev_writeback_stop(dc); - bch_cached_dev_writeback_free(dc); - - mutex_lock(&bch_blockdev_lock); - - if (atomic_read(&dc->running)) - bd_unlink_disk_holder(dc->disk_sb.bdev, dc->disk.disk); - bcache_device_free(&dc->disk); - list_del(&dc->list); - - mutex_unlock(&bch_blockdev_lock); - - bch_free_super((void *) &dc->disk_sb); - - kobject_put(&dc->disk.kobj); -} - -static void cached_dev_flush(struct closure *cl) -{ - struct cached_dev *dc = container_of(cl, struct cached_dev, disk.cl); - struct bcache_device *d = &dc->disk; - - bch_cache_accounting_destroy(&dc->accounting); - bcache_device_unlink(d); - kobject_del(&d->kobj); - - continue_at(cl, cached_dev_free, system_wq); -} - -static int cached_dev_init(struct cached_dev *dc, unsigned block_size) -{ - int ret; - struct io *io; - struct request_queue *q = bdev_get_queue(dc->disk_sb.bdev); - - dc->sequential_cutoff = 4 << 20; - - for (io = dc->io; io < dc->io + RECENT_IO; io++) { - list_add(&io->lru, &dc->io_lru); - hlist_add_head(&io->hash, dc->io_hash + RECENT_IO); - } - - dc->disk.stripe_size = q->limits.io_opt >> 9; - - if (dc->disk.stripe_size) - dc->partial_stripes_expensive = - q->limits.raid_partial_stripes_expensive; - - ret = bcache_device_init(&dc->disk, block_size, - dc->disk_sb.bdev->bd_part->nr_sects - - le64_to_cpu(dc->disk_sb.sb->data_offset)); - if (ret) - return ret; - - dc->disk.disk->queue->backing_dev_info.ra_pages = - max(dc->disk.disk->queue->backing_dev_info.ra_pages, - q->backing_dev_info.ra_pages); - - bch_cached_dev_request_init(dc); - ret = bch_cached_dev_writeback_init(dc); - if (ret) - return ret; - - return 0; -} - -/* Cached device - bcache superblock */ - -static const char *bdev_validate_super(struct backingdev_sb *sb) -{ - switch (le64_to_cpu(sb->version)) { - case BCACHE_SB_VERSION_BDEV: - sb->data_offset = cpu_to_le64(BDEV_DATA_START_DEFAULT); - break; - case BCACHE_SB_VERSION_BDEV_WITH_OFFSET: - if (le64_to_cpu(sb->data_offset) < BDEV_DATA_START_DEFAULT) - return "Bad data offset"; - - break; - default: - return"Unsupported superblock version"; - } - - sb->last_mount = cpu_to_le32(get_seconds()); - - return NULL; -} - -const char *bch_backing_dev_register(struct bcache_superblock *sb) -{ - char name[BDEVNAME_SIZE]; - const char *err; - struct bch_fs *c; - struct cached_dev *dc; - - dc = kzalloc(sizeof(*dc), GFP_KERNEL); - if (!dc) - return "cannot allocate memory"; - - __module_get(THIS_MODULE); - INIT_LIST_HEAD(&dc->list); - closure_init(&dc->disk.cl, NULL); - set_closure_fn(&dc->disk.cl, cached_dev_flush, system_wq); - kobject_init(&dc->disk.kobj, &bch_cached_dev_ktype); - INIT_WORK(&dc->detach, cached_dev_detach_finish); - sema_init(&dc->sb_write_mutex, 1); - INIT_LIST_HEAD(&dc->io_lru); - spin_lock_init(&dc->io_lock); - bch_cache_accounting_init(&dc->accounting, &dc->disk.cl); - - memcpy(&dc->disk_sb, sb, sizeof(*sb)); - dc->disk_sb.bdev->bd_holder = dc; - memset(sb, 0, sizeof(*sb)); - - err = bdev_validate_super(dc->disk_sb.sb); - if (err) - goto err; - - if (cached_dev_init(dc, le16_to_cpu(dc->disk_sb.sb->block_size) << 9)) - goto err; - - err = "error creating kobject"; - if (kobject_add(&dc->disk.kobj, - &part_to_dev(dc->disk_sb.bdev->bd_part)->kobj, - "bcache")) - goto err; - - err = "error accounting kobject"; - if (bch_cache_accounting_add_kobjs(&dc->accounting, &dc->disk.kobj)) - goto err; - - pr_info("registered backing device %s", - bdevname(dc->disk_sb.bdev, name)); - - list_add(&dc->list, &uncached_devices); - c = bch_uuid_to_fs(dc->disk_sb.sb->set_uuid); - if (c) { - bch_cached_dev_attach(dc, c); - closure_put(&c->cl); - } - - if (BDEV_STATE(dc->disk_sb.sb) == BDEV_STATE_NONE || - BDEV_STATE(dc->disk_sb.sb) == BDEV_STATE_STALE) - bch_cached_dev_run(dc); - - return NULL; -err: - bch_blockdev_stop(&dc->disk); - return err; -} - -/* Flash only volumes */ - -void bch_blockdev_volume_release(struct kobject *kobj) -{ - struct bcache_device *d = container_of(kobj, struct bcache_device, - kobj); - kfree(d); -} - -static void blockdev_volume_free(struct closure *cl) -{ - struct bcache_device *d = container_of(cl, struct bcache_device, cl); - - bcache_device_free(d); - kobject_put(&d->kobj); -} - -static void blockdev_volume_flush(struct closure *cl) -{ - struct bcache_device *d = container_of(cl, struct bcache_device, cl); - - bcache_device_unlink(d); - kobject_del(&d->kobj); - continue_at(cl, blockdev_volume_free, system_wq); -} - -static int blockdev_volume_run(struct bch_fs *c, - struct bkey_s_c_inode_blockdev inode) -{ - struct bcache_device *d = kzalloc(sizeof(struct bcache_device), - GFP_KERNEL); - int ret = -ENOMEM; - - if (!d) - return ret; - - bkey_reassemble(&d->inode.k_i, inode.s_c); - - closure_init(&d->cl, NULL); - set_closure_fn(&d->cl, blockdev_volume_flush, system_wq); - - kobject_init(&d->kobj, &bch_blockdev_volume_ktype); - - ret = bcache_device_init(d, block_bytes(c), - le64_to_cpu(inode.v->i_size) >> 9); - if (ret) - goto err; - - ret = bcache_device_attach(d, c); - if (ret) - goto err; - - bch_blockdev_volume_request_init(d); - add_disk(d->disk); - - if (kobject_add(&d->kobj, &disk_to_dev(d->disk)->kobj, "bcache")) - goto err; - - bcache_device_link(d, c, "volume"); - - return 0; -err: - kobject_put(&d->kobj); - return ret; -} - -int bch_blockdev_volumes_start(struct bch_fs *c) -{ - struct btree_iter iter; - struct bkey_s_c k; - struct bkey_s_c_inode_blockdev inode; - int ret = 0; - - if (!bch_fs_running(c)) - return -EINVAL; - - for_each_btree_key(&iter, c, BTREE_ID_INODES, POS_MIN, k) { - if (k.k->p.inode >= BLOCKDEV_INODE_MAX) - break; - - if (k.k->type != BCH_INODE_BLOCKDEV) - continue; - - inode = bkey_s_c_to_inode_blockdev(k); - - ret = blockdev_volume_run(c, inode); - if (ret) - break; - } - bch_btree_iter_unlock(&iter); - - return ret; -} - -int bch_blockdev_volume_create(struct bch_fs *c, u64 size) -{ - __le64 rtime = cpu_to_le64(ktime_get_seconds()); - struct bkey_i_inode_blockdev inode; - int ret; - - bkey_inode_blockdev_init(&inode.k_i); - get_random_bytes(&inode.v.i_uuid, sizeof(inode.v.i_uuid)); - inode.v.i_ctime = rtime; - inode.v.i_mtime = rtime; - inode.v.i_size = cpu_to_le64(size); - - ret = bch_inode_create(c, &inode.k_i, 0, BLOCKDEV_INODE_MAX, - &c->unused_inode_hint); - if (ret) { - pr_err("Can't create volume: %d", ret); - return ret; - } - - return blockdev_volume_run(c, inode_blockdev_i_to_s_c(&inode)); -} - -void bch_blockdevs_stop(struct bch_fs *c) -{ - struct cached_dev *dc; - struct bcache_device *d; - struct radix_tree_iter iter; - void **slot; - - mutex_lock(&bch_blockdev_lock); - rcu_read_lock(); - - radix_tree_for_each_slot(slot, &c->devices, &iter, 0) { - d = radix_tree_deref_slot(slot); - - if (CACHED_DEV(&d->inode.v) && - test_bit(BCH_FS_DETACHING, &c->flags)) { - dc = container_of(d, struct cached_dev, disk); - bch_cached_dev_detach(dc); - } else { - bch_blockdev_stop(d); - } - } - - rcu_read_unlock(); - mutex_unlock(&bch_blockdev_lock); -} - -void bch_fs_blockdev_exit(struct bch_fs *c) -{ - mempool_exit(&c->search); -} - -int bch_fs_blockdev_init(struct bch_fs *c) -{ - return mempool_init_slab_pool(&c->search, 1, bch_search_cache); -} - -void bch_blockdev_exit(void) -{ - kmem_cache_destroy(bch_search_cache); - - if (bch_blockdev_major >= 0) - unregister_blkdev(bch_blockdev_major, "bcache"); -} - -int __init bch_blockdev_init(void) -{ - bch_blockdev_major = register_blkdev(0, "bcache"); - if (bch_blockdev_major < 0) - return bch_blockdev_major; - - bch_search_cache = KMEM_CACHE(search, 0); - if (!bch_search_cache) - return -ENOMEM; - - return 0; -} diff --git a/fs/bcachefs/blockdev.h b/fs/bcachefs/blockdev.h deleted file mode 100644 index 5423d77644f8..000000000000 --- a/fs/bcachefs/blockdev.h +++ /dev/null @@ -1,134 +0,0 @@ -#ifndef _BCACHE_BLOCKDEV_H -#define _BCACHE_BLOCKDEV_H - -#include "blockdev_types.h" -#include "io_types.h" - -struct search { - /* Stack frame for bio_complete */ - struct closure cl; - - union { - struct bch_read_bio rbio; - struct bch_write_bio wbio; - }; - /* Not modified */ - struct bio *orig_bio; - struct bcache_device *d; - - unsigned inode; - unsigned write:1; - - /* Flags only used for reads */ - unsigned recoverable:1; - unsigned read_dirty_data:1; - unsigned cache_miss:1; - - /* - * For reads: bypass read from cache and insertion into cache - * For writes: discard key range from cache, sending the write to - * the backing device (if there is a backing device) - */ - unsigned bypass:1; - - unsigned long start_time; - - /* - * Mostly only used for writes. For reads, we still make use of - * some trivial fields: - * - c - * - error - */ - struct bch_write_op iop; -}; - -#ifndef NO_BCACHE_BLOCKDEV - -extern struct kobj_type bch_cached_dev_ktype; -extern struct kobj_type bch_blockdev_volume_ktype; - -void bch_write_bdev_super(struct cached_dev *, struct closure *); - -void bch_cached_dev_release(struct kobject *); -void bch_blockdev_volume_release(struct kobject *); - -int bch_cached_dev_attach(struct cached_dev *, struct bch_fs *); -void bch_attach_backing_devs(struct bch_fs *); - -void bch_cached_dev_detach(struct cached_dev *); -void bch_cached_dev_run(struct cached_dev *); -void bch_blockdev_stop(struct bcache_device *); - -const char *bch_backing_dev_register(struct bcache_superblock *); - -int bch_blockdev_volume_create(struct bch_fs *, u64); -int bch_blockdev_volumes_start(struct bch_fs *); - -void bch_blockdevs_stop(struct bch_fs *); - -void bch_fs_blockdev_exit(struct bch_fs *); -int bch_fs_blockdev_init(struct bch_fs *); -void bch_blockdev_exit(void); -int bch_blockdev_init(void); - -#else - -static inline void bch_write_bdev_super(struct cached_dev *dc, - struct closure *cl) {} - -static inline void bch_cached_dev_release(struct kobject *kobj) {} -static inline void bch_blockdev_volume_release(struct kobject *kobj) {} - -static inline int bch_cached_dev_attach(struct cached_dev *dc, struct bch_fs *c) -{ - return 0; -} -static inline void bch_attach_backing_devs(struct bch_fs *c) {} - -static inline void bch_cached_dev_detach(struct cached_dev *dc) {} -static inline void bch_cached_dev_run(struct cached_dev *dc) {} -static inline void bch_blockdev_stop(struct bcache_device *d) {} - -static inline const char *bch_backing_dev_register(struct bcache_superblock *sb) -{ - return "not implemented"; -} - -static inline int bch_blockdev_volume_create(struct bch_fs *c, u64 s) { return 0; } -static inline int bch_blockdev_volumes_start(struct bch_fs *c) { return 0; } - -static inline void bch_blockdevs_stop(struct bch_fs *c) {} -static inline void bch_fs_blockdev_exit(struct bch_fs *c) {} -static inline int bch_fs_blockdev_init(struct bch_fs *c) { return 0; } -static inline void bch_blockdev_exit(void) {} -static inline int bch_blockdev_init(void) { return 0; } - -#endif - -static inline void cached_dev_put(struct cached_dev *dc) -{ - if (atomic_dec_and_test(&dc->count)) - schedule_work(&dc->detach); -} - -static inline bool cached_dev_get(struct cached_dev *dc) -{ - if (!atomic_inc_not_zero(&dc->count)) - return false; - - /* Paired with the mb in cached_dev_attach */ - smp_mb__after_atomic(); - return true; -} - -static inline u64 bcache_dev_inum(struct bcache_device *d) -{ - return d->inode.k.p.inode; -} - -static inline struct bcache_device *bch_dev_find(struct bch_fs *c, u64 inode) -{ - return radix_tree_lookup(&c->devices, inode); -} - -#endif /* _BCACHE_BLOCKDEV_H */ diff --git a/fs/bcachefs/blockdev_types.h b/fs/bcachefs/blockdev_types.h deleted file mode 100644 index e51720041f6f..000000000000 --- a/fs/bcachefs/blockdev_types.h +++ /dev/null @@ -1,123 +0,0 @@ -#ifndef _BCACHE_BLOCKDEV_TYPES_H -#define _BCACHE_BLOCKDEV_TYPES_H - -#include "keybuf_types.h" -#include "stats_types.h" -#include "super_types.h" -#include "util.h" - -struct bcache_device { - struct closure cl; - - struct kobject kobj; - - struct bch_fs *c; - - struct rb_node node; - struct bkey_i_inode_blockdev inode; - struct mutex inode_lock; - -#define BCACHEDEVNAME_SIZE 12 - char name[BCACHEDEVNAME_SIZE]; - - struct gendisk *disk; - - unsigned long flags; -#define BCACHE_DEV_CLOSING 0 -#define BCACHE_DEV_DETACHING 1 -#define BCACHE_DEV_UNLINK_DONE 2 - - unsigned nr_stripes; - unsigned stripe_size; - atomic_t *stripe_sectors_dirty; - unsigned long *full_dirty_stripes; - - struct bio_set bio_split; - - unsigned data_csum:1; - - int (*ioctl)(struct bcache_device *, fmode_t, unsigned, unsigned long); -}; - -struct io { - /* Used to track sequential IO so it can be skipped */ - struct hlist_node hash; - struct list_head lru; - - unsigned long last_io; - unsigned sequential; - sector_t last; -}; - -struct cached_dev { - struct list_head list; - struct bcache_device disk; - - //struct backingdev_sb sb; - - struct { - struct backingdev_sb *sb; - struct block_device *bdev; - struct bio *bio; - unsigned page_order; - } disk_sb; - struct closure sb_write; - struct semaphore sb_write_mutex; - - /* Refcount on the cache set. Always nonzero when we're caching. */ - atomic_t count; - struct work_struct detach; - - /* - * Device might not be running if it's dirty and the cache set hasn't - * showed up yet. - */ - atomic_t running; - - /* - * Writes take a shared lock from start to finish; scanning for dirty - * data to refill the rb tree requires an exclusive lock. - */ - struct rw_semaphore writeback_lock; - - /* - * Nonzero, and writeback has a refcount (d->count), iff there is dirty - * data in the cache. Protected by writeback_lock; must have an - * shared lock to set and exclusive lock to clear. - */ - atomic_t has_dirty; - - /* for dynamic rate control of writeback */ - struct bch_pd_controller writeback_pd; - struct delayed_work writeback_pd_update; - unsigned writeback_pd_update_seconds; - - struct task_struct *writeback_thread; - struct keybuf writeback_keys; - mempool_t writeback_io_pool; - mempool_t writeback_page_pool; - - /* For tracking sequential IO */ -#define RECENT_IO_BITS 7 -#define RECENT_IO (1 << RECENT_IO_BITS) - struct io io[RECENT_IO]; - struct hlist_head io_hash[RECENT_IO + 1]; - struct list_head io_lru; - spinlock_t io_lock; - - struct cache_accounting accounting; - - /* The rest of this all shows up in sysfs */ - unsigned sequential_cutoff; - unsigned readahead; - - unsigned verify:1; - unsigned bypass_torture_test:1; - - unsigned partial_stripes_expensive:1; - unsigned writeback_metadata:1; - unsigned writeback_running:1; - unsigned char writeback_percent; -}; - -#endif /* _BCACHE_BLOCKDEV_TYPES_H */ diff --git a/fs/bcachefs/btree_gc.c b/fs/bcachefs/btree_gc.c index 7e8a3f6a17df..c86e7ac7abfd 100644 --- a/fs/bcachefs/btree_gc.c +++ b/fs/bcachefs/btree_gc.c @@ -19,7 +19,6 @@ #include "keylist.h" #include "move.h" #include "super-io.h" -#include "writeback.h" #include <linux/slab.h> #include <linux/bitops.h> @@ -463,7 +462,6 @@ void bch_gc(struct bch_fs *c) bch_mark_metadata(c); bch_mark_pending_btree_node_frees(c); - bch_writeback_recalc_oldest_gens(c); for_each_member_device(ca, c, i) atomic_long_set(&ca->saturated_count, 0); diff --git a/fs/bcachefs/btree_io.c b/fs/bcachefs/btree_io.c index 71478fb1cc89..13e280cc4e18 100644 --- a/fs/bcachefs/btree_io.c +++ b/fs/bcachefs/btree_io.c @@ -1177,22 +1177,14 @@ err: goto out; } -static void btree_node_read_endio(struct bio *bio) -{ - closure_put(bio->bi_private); -} - void bch_btree_node_read(struct bch_fs *c, struct btree *b) { uint64_t start_time = local_clock(); - struct closure cl; struct bio *bio; struct extent_pick_ptr pick; trace_bcache_btree_read(c, b); - closure_init_stack(&cl); - pick = bch_btree_pick_ptr(c, b); if (bch_fs_fatal_err_on(!pick.ca, c, "no cache device for btree node")) { @@ -1204,15 +1196,10 @@ void bch_btree_node_read(struct bch_fs *c, struct btree *b) bio->bi_bdev = pick.ca->disk_sb.bdev; bio->bi_iter.bi_sector = pick.ptr.offset; bio->bi_iter.bi_size = btree_bytes(c); - bio->bi_end_io = btree_node_read_endio; - bio->bi_private = &cl; bio_set_op_attrs(bio, REQ_OP_READ, REQ_META|READ_SYNC); - bch_bio_map(bio, b->data); - closure_get(&cl); - bch_generic_make_request(bio, c); - closure_sync(&cl); + submit_bio_wait(bio); if (bch_dev_fatal_io_err_on(bio->bi_error, pick.ca, "IO error reading bucket %zu", @@ -1562,7 +1549,7 @@ void __bch_btree_node_write(struct bch_fs *c, struct btree *b, b->written += sectors_to_write; - bch_submit_wbio_replicas(wbio, c, &k.key, true); + bch_submit_wbio_replicas(wbio, c, &k.key); } /* diff --git a/fs/bcachefs/debug.c b/fs/bcachefs/debug.c index b91f53d261d2..f19ccadfbeb8 100644 --- a/fs/bcachefs/debug.c +++ b/fs/bcachefs/debug.c @@ -30,13 +30,6 @@ static struct dentry *bch_debug; #ifdef CONFIG_BCACHEFS_DEBUG -static void btree_verify_endio(struct bio *bio) -{ - struct closure *cl = bio->bi_private; - - closure_put(cl); -} - void __bch_btree_verify(struct bch_fs *c, struct btree *b) { struct btree *v = c->verify_data; @@ -44,13 +37,10 @@ void __bch_btree_verify(struct bch_fs *c, struct btree *b) struct bset *sorted, *inmemory; struct extent_pick_ptr pick; struct bio *bio; - struct closure cl; if (c->opts.nochanges) return; - closure_init_stack(&cl); - btree_node_io_lock(b); mutex_lock(&c->verify_lock); @@ -73,13 +63,9 @@ void __bch_btree_verify(struct bch_fs *c, struct btree *b) bio->bi_iter.bi_sector = pick.ptr.offset; bio->bi_iter.bi_size = btree_bytes(c); bio_set_op_attrs(bio, REQ_OP_READ, REQ_META|READ_SYNC); - bio->bi_private = &cl; - bio->bi_end_io = btree_verify_endio; bch_bio_map(bio, n_sorted); - closure_get(&cl); - bch_generic_make_request(bio, c); - closure_sync(&cl); + submit_bio_wait(bio); bio_put(bio); @@ -146,42 +132,6 @@ void __bch_btree_verify(struct bch_fs *c, struct btree *b) btree_node_io_unlock(b); } -void bch_data_verify(struct cached_dev *dc, struct bio *bio) -{ - char name[BDEVNAME_SIZE]; - struct bio *check; - struct bio_vec bv; - struct bvec_iter iter; - - check = bio_clone(bio, GFP_NOIO); - if (!check) - return; - bio_set_op_attrs(check, REQ_OP_READ, READ_SYNC); - - if (bio_alloc_pages(check, GFP_NOIO)) - goto out_put; - - submit_bio_wait(check); - - bio_for_each_segment(bv, bio, iter) { - void *p1 = kmap_atomic(bv.bv_page); - void *p2 = page_address(check->bi_io_vec[iter.bi_idx].bv_page); - - if (memcmp(p1 + bv.bv_offset, - p2 + bv.bv_offset, - bv.bv_len)) - panic("verify failed at dev %s sector %llu\n", - bdevname(dc->disk_sb.bdev, name), - (uint64_t) bio->bi_iter.bi_sector); - - kunmap_atomic(p1); - } - - bio_free_pages(check); -out_put: - bio_put(check); -} - #endif #ifdef CONFIG_DEBUG_FS diff --git a/fs/bcachefs/debug.h b/fs/bcachefs/debug.h index 035b247886dd..7cb4f4787a88 100644 --- a/fs/bcachefs/debug.h +++ b/fs/bcachefs/debug.h @@ -5,7 +5,6 @@ struct bio; struct btree; -struct cached_dev; struct bch_fs; #define BCH_DEBUG_PARAM(name, description) extern bool bch_##name; @@ -27,7 +26,6 @@ BCH_DEBUG_PARAMS_DEBUG() #undef BCH_DEBUG_PARAM void __bch_btree_verify(struct bch_fs *, struct btree *); -void bch_data_verify(struct cached_dev *, struct bio *); #define bypass_torture_test(d) ((d)->bypass_torture_test) @@ -39,7 +37,6 @@ BCH_DEBUG_PARAMS_DEBUG() #undef BCH_DEBUG_PARAM static inline void __bch_btree_verify(struct bch_fs *c, struct btree *b) {} -static inline void bch_data_verify(struct cached_dev *dc, struct bio *bio) {} #define bypass_torture_test(d) 0 diff --git a/fs/bcachefs/error.c b/fs/bcachefs/error.c index ba46d2d12f59..0c8ac1d79c97 100644 --- a/fs/bcachefs/error.c +++ b/fs/bcachefs/error.c @@ -1,7 +1,6 @@ #include "bcache.h" #include "error.h" #include "io.h" -#include "notify.h" #include "super.h" void bch_inconsistent_error(struct bch_fs *c) @@ -12,12 +11,6 @@ void bch_inconsistent_error(struct bch_fs *c) case BCH_ON_ERROR_CONTINUE: break; case BCH_ON_ERROR_RO: - if (!test_bit(BCH_FS_INITIAL_GC_DONE, &c->flags)) { - /* XXX do something better here? */ - bch_fs_stop_async(c); - return; - } - if (bch_fs_emergency_read_only(c)) bch_err(c, "emergency read only"); break; @@ -33,108 +26,26 @@ void bch_fatal_error(struct bch_fs *c) bch_err(c, "emergency read only"); } -/* Nonfatal IO errors, IO error/latency accounting: */ - -/* Just does IO error accounting: */ -void bch_account_io_completion(struct bch_dev *ca) -{ - /* - * The halflife of an error is: - * log2(1/2)/log2(127/128) * refresh ~= 88 * refresh - */ - - if (ca->fs->error_decay) { - unsigned count = atomic_inc_return(&ca->io_count); - - while (count > ca->fs->error_decay) { - unsigned errors; - unsigned old = count; - unsigned new = count - ca->fs->error_decay; - - /* - * First we subtract refresh from count; each time we - * succesfully do so, we rescale the errors once: - */ - - count = atomic_cmpxchg(&ca->io_count, old, new); - - if (count == old) { - count = new; - - errors = atomic_read(&ca->io_errors); - do { - old = errors; - new = ((uint64_t) errors * 127) / 128; - errors = atomic_cmpxchg(&ca->io_errors, - old, new); - } while (old != errors); - } - } - } -} - -/* IO error accounting and latency accounting: */ -void bch_account_io_completion_time(struct bch_dev *ca, - unsigned submit_time_us, int op) -{ - struct bch_fs *c; - unsigned threshold; - - if (!ca) - return; - - c = ca->fs; - threshold = op_is_write(op) - ? c->congested_write_threshold_us - : c->congested_read_threshold_us; - - if (threshold && submit_time_us) { - unsigned t = local_clock_us(); - - int us = t - submit_time_us; - int congested = atomic_read(&c->congested); - - if (us > (int) threshold) { - int ms = us / 1024; - c->congested_last_us = t; - - ms = min(ms, CONGESTED_MAX + congested); - atomic_sub(ms, &c->congested); - } else if (congested < 0) - atomic_inc(&c->congested); - } - - bch_account_io_completion(ca); -} - void bch_nonfatal_io_error_work(struct work_struct *work) { struct bch_dev *ca = container_of(work, struct bch_dev, io_error_work); struct bch_fs *c = ca->fs; - unsigned errors = atomic_read(&ca->io_errors); bool dev; - if (errors < c->error_limit) { - bch_notify_dev_error(ca, false); - } else { - bch_notify_dev_error(ca, true); - - mutex_lock(&c->state_lock); - dev = bch_dev_state_allowed(c, ca, BCH_MEMBER_STATE_RO, - BCH_FORCE_IF_DEGRADED); - if (dev - ? __bch_dev_set_state(c, ca, BCH_MEMBER_STATE_RO, - BCH_FORCE_IF_DEGRADED) - : bch_fs_emergency_read_only(c)) - bch_err(ca, - "too many IO errors, setting %s RO", - dev ? "device" : "filesystem"); - mutex_unlock(&c->state_lock); - } + mutex_lock(&c->state_lock); + dev = bch_dev_state_allowed(c, ca, BCH_MEMBER_STATE_RO, + BCH_FORCE_IF_DEGRADED); + if (dev + ? __bch_dev_set_state(c, ca, BCH_MEMBER_STATE_RO, + BCH_FORCE_IF_DEGRADED) + : bch_fs_emergency_read_only(c)) + bch_err(ca, + "too many IO errors, setting %s RO", + dev ? "device" : "filesystem"); + mutex_unlock(&c->state_lock); } void bch_nonfatal_io_error(struct bch_dev *ca) { - atomic_add(1 << IO_ERROR_SHIFT, &ca->io_errors); queue_work(system_long_wq, &ca->io_error_work); } diff --git a/fs/bcachefs/error.h b/fs/bcachefs/error.h index 726b20d4434b..2d7f15803ae9 100644 --- a/fs/bcachefs/error.h +++ b/fs/bcachefs/error.h @@ -191,9 +191,6 @@ do { \ * don't (necessarily) want to shut down the fs: */ -void bch_account_io_completion(struct bch_dev *); -void bch_account_io_completion_time(struct bch_dev *, unsigned, int); - void bch_nonfatal_io_error_work(struct work_struct *); /* Does the error handling without logging a message */ diff --git a/fs/bcachefs/extents.c b/fs/bcachefs/extents.c index ce69c411c34c..28f5766049fc 100644 --- a/fs/bcachefs/extents.c +++ b/fs/bcachefs/extents.c @@ -17,7 +17,6 @@ #include "inode.h" #include "journal.h" #include "super-io.h" -#include "writeback.h" #include "xattr.h" #include <trace/events/bcachefs.h> @@ -950,10 +949,6 @@ static void bch_add_sectors(struct extent_insert_state *s, bch_mark_key(c, k, sectors, false, gc_pos_btree_node(b), &s->stats, s->trans->journal_res.seq); - - if (bkey_extent_is_data(k.k) && - !bkey_extent_is_cached(k.k)) - bcache_dev_sectors_dirty_add(c, k.k->p.inode, offset, sectors); } static void bch_subtract_sectors(struct extent_insert_state *s, @@ -988,118 +983,6 @@ static void bch_drop_subtract(struct extent_insert_state *s, struct bkey_s k) __set_bkey_deleted(k.k); } -/* - * Note: If this returns true because only some pointers matched, - * we can lose some caching that had happened in the interim. - * Because cache promotion only promotes the part of the extent - * actually read, and not the whole extent, and due to the key - * splitting done in bch_extent_insert_fixup, preserving such - * caching is difficult. - */ -static bool bch_extent_cmpxchg_cmp(struct bkey_s_c l, struct bkey_s_c r) -{ - struct bkey_s_c_extent le, re; - const struct bch_extent_ptr *lp, *rp; - s64 offset; - - BUG_ON(!l.k->size || !r.k->size); - - if (l.k->type != r.k->type || - bversion_cmp(l.k->version, r.k->version)) - return false; - - switch (l.k->type) { - case KEY_TYPE_COOKIE: - return !memcmp(bkey_s_c_to_cookie(l).v, - bkey_s_c_to_cookie(r).v, - sizeof(struct bch_cookie)); - - case BCH_EXTENT: - case BCH_EXTENT_CACHED: - le = bkey_s_c_to_extent(l); - re = bkey_s_c_to_extent(r); - - /* - * bkey_cmpxchg() handles partial matches - when either l or r - * has been trimmed - so we need just to handle l or r not - * starting at the same place when checking for a match here. - * - * If the starts of the keys are different, we just apply that - * offset to the device pointer offsets when checking those - - * matching how bch_cut_front() adjusts device pointer offsets - * when adjusting the start of a key: - */ - offset = bkey_start_offset(l.k) - bkey_start_offset(r.k); - - /* - * XXX: perhaps we only raced with copygc or tiering replacing - * one of the pointers: it should suffice to find _any_ matching - * pointer - */ - - if (bkey_val_u64s(le.k) != bkey_val_u64s(re.k)) - return false; - - extent_for_each_ptr(le, lp) { - const union bch_extent_entry *entry = - vstruct_idx(re.v, (u64 *) lp - le.v->_data); - - if (!extent_entry_is_ptr(entry)) - return false; - - rp = &entry->ptr; - - if (lp->offset != rp->offset + offset || - lp->dev != rp->dev || - lp->gen != rp->gen) - return false; - } - - return true; - default: - return false; - } - -} - -/* - * Returns true on success, false on failure (and false means @new no longer - * overlaps with @k) - * - * If returned true, we may have inserted up to one key in @b. - * If returned false, we may have inserted up to two keys in @b. - * - * On return, there is room in @res for at least one more key of the same size - * as @new. - */ -enum extent_insert_hook_ret bch_extent_cmpxchg(struct extent_insert_hook *hook, - struct bpos committed_pos, - struct bpos next_pos, - struct bkey_s_c k, - const struct bkey_i *new) -{ - struct bch_replace_info *replace = container_of(hook, - struct bch_replace_info, hook); - struct bkey_i *old = &replace->key; - - EBUG_ON(bkey_cmp(committed_pos, bkey_start_pos(&new->k)) < 0); - - /* must have something to compare against */ - EBUG_ON(!bkey_val_u64s(&old->k)); - - /* new must be a subset of old */ - EBUG_ON(bkey_cmp(new->k.p, old->k.p) > 0 || - bkey_cmp(bkey_start_pos(&new->k), bkey_start_pos(&old->k)) < 0); - - if (k.k && bch_extent_cmpxchg_cmp(k, bkey_i_to_s_c(old))) { - replace->successes++; - return BTREE_HOOK_DO_INSERT; - } else { - replace->failures++; - return BTREE_HOOK_NO_INSERT; - } -} - static bool bch_extent_merge_inline(struct bch_fs *, struct btree_iter *, struct bkey_packed *, diff --git a/fs/bcachefs/extents.h b/fs/bcachefs/extents.h index db7bd4f14988..070b64048168 100644 --- a/fs/bcachefs/extents.h +++ b/fs/bcachefs/extents.h @@ -42,10 +42,6 @@ bch_extent_pick_ptr(struct bch_fs *c, struct bkey_s_c k, bch_extent_pick_ptr_avoiding(c, k, NULL, ret); } -enum extent_insert_hook_ret -bch_extent_cmpxchg(struct extent_insert_hook *, struct bpos, struct bpos, - struct bkey_s_c, const struct bkey_i *); - enum btree_insert_ret bch_insert_fixup_extent(struct btree_insert *, struct btree_insert_entry *); diff --git a/fs/bcachefs/io.c b/fs/bcachefs/io.c index f4ad2d194413..bdf93c4f3632 100644 --- a/fs/bcachefs/io.c +++ b/fs/bcachefs/io.c @@ -20,8 +20,6 @@ #include "journal.h" #include "keylist.h" #include "move.h" -#include "notify.h" -#include "stats.h" #include "super-io.h" #include <linux/blkdev.h> @@ -36,34 +34,6 @@ static inline void __bio_inc_remaining(struct bio *bio) atomic_inc(&bio->__bi_remaining); } -void bch_generic_make_request(struct bio *bio, struct bch_fs *c) -{ - if (current->bio_list) { - spin_lock(&c->bio_submit_lock); - bio_list_add(&c->bio_submit_list, bio); - spin_unlock(&c->bio_submit_lock); - queue_work(bcache_io_wq, &c->bio_submit_work); - } else { - generic_make_request(bio); - } -} - -void bch_bio_submit_work(struct work_struct *work) -{ - struct bch_fs *c = container_of(work, struct bch_fs, - bio_submit_work); - struct bio_list bl; - struct bio *bio; - - spin_lock(&c->bio_submit_lock); - bl = c->bio_submit_list; - bio_list_init(&c->bio_submit_list); - spin_unlock(&c->bio_submit_lock); - - while ((bio = bio_list_pop(&bl))) - generic_make_request(bio); -} - /* Allocate, free from mempool: */ void bch_bio_free_pages_pool(struct bch_fs *c, struct bio *bio) @@ -116,8 +86,7 @@ void bch_bio_alloc_pages_pool(struct bch_fs *c, struct bio *bio, /* Bios with headers */ static void bch_submit_wbio(struct bch_fs *c, struct bch_write_bio *wbio, - struct bch_dev *ca, const struct bch_extent_ptr *ptr, - bool punt) + struct bch_dev *ca, const struct bch_extent_ptr *ptr) { wbio->ca = ca; wbio->submit_time_us = local_clock_us(); @@ -126,14 +95,12 @@ static void bch_submit_wbio(struct bch_fs *c, struct bch_write_bio *wbio, if (!ca) bcache_io_error(c, &wbio->bio, "device has been removed"); - else if (punt) - bch_generic_make_request(&wbio->bio, c); else generic_make_request(&wbio->bio); } void bch_submit_wbio_replicas(struct bch_write_bio *wbio, struct bch_fs *c, - const struct bkey_i *k, bool punt) + const struct bkey_i *k) { struct bkey_s_c_extent e = bkey_i_to_s_c_extent(k); const struct bch_extent_ptr *ptr; @@ -148,7 +115,7 @@ void bch_submit_wbio_replicas(struct bch_write_bio *wbio, struct bch_fs *c, extent_for_each_ptr(e, ptr) { ca = c->devs[ptr->dev]; if (!percpu_ref_tryget(&ca->io_ref)) { - bch_submit_wbio(c, wbio, NULL, ptr, punt); + bch_submit_wbio(c, wbio, NULL, ptr); break; } @@ -172,7 +139,7 @@ void bch_submit_wbio_replicas(struct bch_write_bio *wbio, struct bch_fs *c, if (!journal_flushes_device(ca)) n->bio.bi_opf |= REQ_FUA; - bch_submit_wbio(c, n, ca, ptr, punt); + bch_submit_wbio(c, n, ca, ptr); } } @@ -358,8 +325,6 @@ static void bch_write_endio(struct bio *bio) set_closure_fn(cl, bch_write_io_error, index_update_wq(op)); } - bch_account_io_completion_time(ca, wbio->submit_time_us, - REQ_OP_WRITE); if (ca) percpu_ref_put(&ca->io_ref); @@ -574,7 +539,7 @@ static int bch_write_extent(struct bch_write_op *op, bch_check_mark_super(c, key_to_write, false); - bch_submit_wbio_replicas(to_wbio(bio), c, key_to_write, false); + bch_submit_wbio_replicas(to_wbio(bio), c, key_to_write); return ret; } @@ -756,10 +721,6 @@ void bch_write(struct closure *cl) struct bch_fs *c = op->c; u64 inode = op->pos.inode; - trace_bcache_write(c, inode, bio, - !(op->flags & BCH_WRITE_CACHED), - op->flags & BCH_WRITE_DISCARD); - if (c->opts.nochanges || !percpu_ref_tryget(&c->writes)) { __bcache_io_error(c, "read only"); @@ -776,11 +737,6 @@ void bch_write(struct closure *cl) if (!(op->flags & BCH_WRITE_DISCARD)) bch_increment_clock(c, bio_sectors(bio), WRITE); - if (!(op->flags & BCH_WRITE_DISCARD)) - bch_mark_foreground_write(c, bio_sectors(bio)); - else - bch_mark_discard(c, bio_sectors(bio)); - /* Don't call bch_next_delay() if rate is >= 1 GB/sec */ if (c->foreground_write_ratelimit_enabled && @@ -1065,6 +1021,8 @@ static void __bch_read_endio(struct work_struct *work) BUG_ON(!rbio->split || !rbio->bounce); + trace_bcache_promote(&rbio->bio); + /* we now own pages: */ swap(promote->write.wbio.bio.bi_vcnt, rbio->bio.bi_vcnt); rbio->promote = NULL; @@ -1085,10 +1043,6 @@ static void bch_read_endio(struct bio *bio) container_of(bio, struct bch_read_bio, bio); struct bch_fs *c = rbio->c; - if (rbio->flags & BCH_READ_ACCOUNT_TIMES) - bch_account_io_completion_time(rbio->ca, rbio->submit_time_us, - REQ_OP_READ); - if (bch_dev_nonfatal_io_err_on(bio->bi_error, rbio->ca, "data read")) { /* XXX: retry IO errors when we have another replica */ bch_rbio_error(rbio, bio->bi_error); diff --git a/fs/bcachefs/io.h b/fs/bcachefs/io.h index 9239ca4aac16..bc7c9cf81494 100644 --- a/fs/bcachefs/io.h +++ b/fs/bcachefs/io.h @@ -69,16 +69,13 @@ enum bch_read_flags { BCH_READ_PROMOTE = 1 << 2, BCH_READ_IS_LAST = 1 << 3, BCH_READ_MAY_REUSE_BIO = 1 << 4, - BCH_READ_ACCOUNT_TIMES = 1 << 5, - BCH_READ_USER_MAPPED = 1 << 6, + BCH_READ_USER_MAPPED = 1 << 5, }; void bch_read(struct bch_fs *, struct bch_read_bio *, u64); -void bch_generic_make_request(struct bio *, struct bch_fs *); -void bch_bio_submit_work(struct work_struct *); void bch_submit_wbio_replicas(struct bch_write_bio *, struct bch_fs *, - const struct bkey_i *, bool); + const struct bkey_i *); int bch_discard(struct bch_fs *, struct bpos, struct bpos, struct bversion, struct disk_reservation *, diff --git a/fs/bcachefs/journal.c b/fs/bcachefs/journal.c index 109c27c88be5..136b8c87ea3a 100644 --- a/fs/bcachefs/journal.c +++ b/fs/bcachefs/journal.c @@ -1482,8 +1482,6 @@ int bch_journal_replay(struct bch_fs *c, struct list_head *list) */ BUG_ON(bch_disk_reservation_get(c, &disk_res, 0, 0)); - trace_bcache_journal_replay_key(&k->k); - ret = bch_btree_insert(c, entry->btree_id, k, &disk_res, NULL, NULL, BTREE_INSERT_NOFAIL| @@ -2085,8 +2083,6 @@ static int journal_write_alloc(struct journal *j, unsigned sectors) .dev = ca->dev_idx, }); replicas++; - - trace_bcache_journal_next_bucket(ca, ja->cur_idx, ja->last_idx); } spin_unlock(&j->devs.lock); @@ -2280,7 +2276,7 @@ static void journal_write(struct closure *cl) bch_bio_map(bio, jset); trace_bcache_journal_write(bio); - closure_bio_submit_punt(bio, cl, c); + closure_bio_submit(bio, cl); ca->journal.bucket_seq[ca->journal.cur_idx] = le64_to_cpu(w->data->seq); } @@ -2296,7 +2292,7 @@ static void journal_write(struct closure *cl) bio->bi_end_io = journal_write_endio; bio->bi_private = ca; bio_set_op_attrs(bio, REQ_OP_WRITE, WRITE_FLUSH); - closure_bio_submit_punt(bio, cl, c); + closure_bio_submit(bio, cl); } no_io: diff --git a/fs/bcachefs/keybuf.c b/fs/bcachefs/keybuf.c deleted file mode 100644 index 15a1f85d353d..000000000000 --- a/fs/bcachefs/keybuf.c +++ /dev/null @@ -1,195 +0,0 @@ - -#include "bcache.h" -#include "btree_gc.h" -#include "btree_iter.h" -#include "keybuf.h" - -#include <trace/events/bcachefs.h> - -/* - * For buffered iteration over the btree, with predicates and ratelimiting and - * whatnot - */ - -static inline int keybuf_cmp(struct keybuf_key *l, struct keybuf_key *r) -{ - /* Overlapping keys compare equal */ - if (bkey_cmp(l->key.k.p, bkey_start_pos(&r->key.k)) <= 0) - return -1; - if (bkey_cmp(bkey_start_pos(&l->key.k), r->key.k.p) >= 0) - return 1; - return 0; -} - -static inline int keybuf_nonoverlapping_cmp(struct keybuf_key *l, - struct keybuf_key *r) -{ - return clamp_t(s64, bkey_cmp(l->key.k.p, r->key.k.p), -1, 1); -} - -void bch_refill_keybuf(struct bch_fs *c, struct keybuf *buf, - struct bpos end, keybuf_pred_fn *pred) -{ - struct bpos start = buf->last_scanned; - struct btree_iter iter; - struct bkey_s_c k; - unsigned nr_found = 0; - - for_each_btree_key(&iter, c, BTREE_ID_EXTENTS, buf->last_scanned, k) { - if (bkey_cmp(k.k->p, end) >= 0) { - buf->last_scanned = k.k->p; - goto done; - } - - if (pred(buf, k)) { - struct keybuf_key *w; - - spin_lock(&buf->lock); - - w = array_alloc(&buf->freelist); - if (!w) { - spin_unlock(&buf->lock); - goto done; - } - - bkey_reassemble(&w->key, k); - atomic_set(&w->ref, -1); /* -1 means hasn't started */ - - if (RB_INSERT(&buf->keys, w, node, keybuf_cmp)) - array_free(&buf->freelist, w); - else - nr_found++; - - spin_unlock(&buf->lock); - } - - buf->last_scanned = k.k->p; - bch_btree_iter_cond_resched(&iter); - } - - /* If we end up here, it means: - * - the map_fn didn't fill up the keybuf - * - the map_fn didn't see the end key - * - there were no more keys to map over - * Therefore, we are at the end of the key space */ - buf->last_scanned = POS_MAX; -done: - bch_btree_iter_unlock(&iter); - - trace_bcache_keyscan(nr_found, - start.inode, start.offset, - buf->last_scanned.inode, - buf->last_scanned.offset); - - spin_lock(&buf->lock); - - if (!RB_EMPTY_ROOT(&buf->keys)) { - struct keybuf_key *w; - - w = RB_FIRST(&buf->keys, struct keybuf_key, node); - buf->start = bkey_start_pos(&w->key.k); - - w = RB_LAST(&buf->keys, struct keybuf_key, node); - buf->end = w->key.k.p; - } else { - buf->start = POS_MAX; - buf->end = POS_MAX; - } - - spin_unlock(&buf->lock); -} - -static void bch_keybuf_del(struct keybuf *buf, struct keybuf_key *w) -{ - rb_erase(&w->node, &buf->keys); - array_free(&buf->freelist, w); -} - -void bch_keybuf_put(struct keybuf *buf, struct keybuf_key *w) -{ - BUG_ON(atomic_read(&w->ref) <= 0); - - if (atomic_dec_and_test(&w->ref)) { - up(&buf->in_flight); - - spin_lock(&buf->lock); - bch_keybuf_del(buf, w); - spin_unlock(&buf->lock); - } -} - -void bch_keybuf_recalc_oldest_gens(struct bch_fs *c, struct keybuf *buf) -{ - struct keybuf_key *w, *n; - - spin_lock(&buf->lock); - rbtree_postorder_for_each_entry_safe(w, n, - &buf->keys, node) - bch_btree_key_recalc_oldest_gen(c, bkey_i_to_s_c(&w->key)); - spin_unlock(&buf->lock); -} - -bool bch_keybuf_check_overlapping(struct keybuf *buf, struct bpos start, - struct bpos end) -{ - bool ret = false; - struct keybuf_key *w, *next, s = { .key.k.p = start }; - - if (bkey_cmp(end, buf->start) <= 0 || - bkey_cmp(start, buf->end) >= 0) - return false; - - spin_lock(&buf->lock); - - for (w = RB_GREATER(&buf->keys, s, node, keybuf_nonoverlapping_cmp); - w && bkey_cmp(bkey_start_pos(&w->key.k), end) < 0; - w = next) { - next = RB_NEXT(w, node); - - if (atomic_read(&w->ref) == -1) - bch_keybuf_del(buf, w); - else - ret = true; - } - - spin_unlock(&buf->lock); - return ret; -} - -struct keybuf_key *bch_keybuf_next(struct keybuf *buf) -{ - struct keybuf_key *w; - - spin_lock(&buf->lock); - - w = RB_FIRST(&buf->keys, struct keybuf_key, node); - - while (w && atomic_read(&w->ref) != -1) - w = RB_NEXT(w, node); - - if (!w) { - spin_unlock(&buf->lock); - return NULL; - } - - atomic_set(&w->ref, 1); - spin_unlock(&buf->lock); - - down(&buf->in_flight); - - return w; -} - -void bch_keybuf_init(struct keybuf *buf) -{ - sema_init(&buf->in_flight, KEYBUF_REFILL_BATCH / 2); - - buf->last_scanned = POS_MAX; - buf->start = POS_MIN; - buf->end = POS_MIN; - - buf->keys = RB_ROOT; - - spin_lock_init(&buf->lock); - array_allocator_init(&buf->freelist); -} diff --git a/fs/bcachefs/keybuf.h b/fs/bcachefs/keybuf.h deleted file mode 100644 index dd1402d3d68c..000000000000 --- a/fs/bcachefs/keybuf.h +++ /dev/null @@ -1,16 +0,0 @@ -#ifndef _BCACHE_KEYBUF_H -#define _BCACHE_KEYBUF_H - -#include "keybuf_types.h" - -typedef bool (keybuf_pred_fn)(struct keybuf *, struct bkey_s_c); - -void bch_keybuf_init(struct keybuf *); -void bch_refill_keybuf(struct bch_fs *, struct keybuf *, - struct bpos, keybuf_pred_fn *); -void bch_keybuf_recalc_oldest_gens(struct bch_fs *, struct keybuf *); -bool bch_keybuf_check_overlapping(struct keybuf *, struct bpos, struct bpos); -void bch_keybuf_put(struct keybuf *, struct keybuf_key *); -struct keybuf_key *bch_keybuf_next(struct keybuf *); - -#endif /* _BCACHE_KEYBUF_H */ diff --git a/fs/bcachefs/keybuf_types.h b/fs/bcachefs/keybuf_types.h deleted file mode 100644 index 3facc4a07526..000000000000 --- a/fs/bcachefs/keybuf_types.h +++ /dev/null @@ -1,33 +0,0 @@ -#ifndef _BCACHE_KEYBUF_TYPES_H -#define _BCACHE_KEYBUF_TYPES_H - -struct keybuf_key { - struct rb_node node; - BKEY_PADDED(key); - atomic_t ref; -}; - -#define KEYBUF_REFILL_BATCH 500 - -struct keybuf { - struct bpos last_scanned; - spinlock_t lock; - - /* - * Beginning and end of range in rb tree - so that we can skip taking - * lock and checking the rb tree when we need to check for overlapping - * keys. - */ - struct bpos start; - struct bpos end; - - struct rb_root keys; - - unsigned max_in_flight; - struct semaphore in_flight; - - DECLARE_ARRAY_ALLOCATOR(struct keybuf_key, freelist, - KEYBUF_REFILL_BATCH); -}; - -#endif /* _BCACHE_KEYBUF_TYPES_H */ diff --git a/fs/bcachefs/notify.c b/fs/bcachefs/notify.c deleted file mode 100644 index b06a87499b2d..000000000000 --- a/fs/bcachefs/notify.c +++ /dev/null @@ -1,105 +0,0 @@ -/* - * Code for sending uevent notifications to user-space. - * - * Copyright 2015 Datera, Inc. - */ - -#include "bcache.h" -#include "notify.h" - -#include <linux/kobject.h> - -#define notify_var(c, format, ...) \ -({ \ - int ret; \ - lockdep_assert_held(&(c)->uevent_lock); \ - ret = add_uevent_var(&(c)->uevent_env, format, ##__VA_ARGS__); \ - WARN_ON_ONCE(ret); \ -}) - -static void notify_get(struct bch_fs *c) -{ - struct kobj_uevent_env *env = &c->uevent_env; - - mutex_lock(&c->uevent_lock); - env->envp_idx = 0; - env->buflen = 0; - - notify_var(c, "SET_UUID=%pU", c->sb.user_uuid.b); -} - -static void notify_get_cache(struct bch_dev *ca) -{ - struct bch_fs *c = ca->fs; - - notify_get(c); - notify_var(c, "UUID=%pU", ca->uuid.b); - notify_var(c, "BLOCKDEV=%s", ca->name); -} - -static void notify_put(struct bch_fs *c) -{ - struct kobj_uevent_env *env = &c->uevent_env; - - env->envp[env->envp_idx] = NULL; - kobject_uevent_env(&c->kobj, KOBJ_CHANGE, env->envp); - mutex_unlock(&c->uevent_lock); -} - -void bch_notify_fs_read_write(struct bch_fs *c) -{ - notify_get(c); - notify_var(c, "STATE=active"); - notify_put(c); -} - -void bch_notify_fs_read_only(struct bch_fs *c) -{ - notify_get(c); - notify_var(c, "STATE=readonly"); - notify_put(c); -} - -void bch_notify_fs_stopped(struct bch_fs *c) -{ - notify_get(c); - notify_var(c, "STATE=stopped"); - notify_put(c); -} - -void bch_notify_dev_read_write(struct bch_dev *ca) -{ - struct bch_fs *c = ca->fs; - - notify_get_cache(ca); - notify_var(c, "STATE=active"); - notify_put(c); -} - -void bch_notify_dev_read_only(struct bch_dev *ca) -{ - struct bch_fs *c = ca->fs; - - notify_get_cache(ca); - notify_var(c, "STATE=readonly"); - notify_put(c); -} - -void bch_notify_dev_added(struct bch_dev *ca) -{ - struct bch_fs *c = ca->fs; - - notify_get_cache(ca); - notify_var(c, "STATE=removing"); - notify_put(c); -} - -void bch_notify_dev_error(struct bch_dev *ca, bool fatal) -{ - struct bch_fs *c = ca->fs; - - notify_get_cache(ca); - notify_var(c, "STATE=error"); - notify_var(c, "FATAL=%d", fatal); - notify_put(c); -} diff --git a/fs/bcachefs/notify.h b/fs/bcachefs/notify.h deleted file mode 100644 index 2c1e367913f0..000000000000 --- a/fs/bcachefs/notify.h +++ /dev/null @@ -1,34 +0,0 @@ -/* - * Code for sending uevent notifications to user-space. - * - * Copyright 2015 Datera, Inc. - */ - -#ifndef _NOTIFY_H -#define _NOTIFY_H - -#ifndef NO_BCACHE_NOTIFY - -void bch_notify_fs_read_write(struct bch_fs *); -void bch_notify_fs_read_only(struct bch_fs *); -void bch_notify_fs_stopped(struct bch_fs *); - -void bch_notify_dev_read_write(struct bch_dev *); -void bch_notify_dev_read_only(struct bch_dev *); -void bch_notify_dev_added(struct bch_dev *); -void bch_notify_dev_error(struct bch_dev *, bool); - -#else - -static inline void bch_notify_fs_read_write(struct bch_fs *c) {} -static inline void bch_notify_fs_read_only(struct bch_fs *c) {} -static inline void bch_notify_fs_stopped(struct bch_fs *c) {} - -static inline void bch_notify_dev_read_write(struct bch_dev *ca) {} -static inline void bch_notify_dev_read_only(struct bch_dev *ca) {} -static inline void bch_notify_dev_added(struct bch_dev *ca) {} -static inline void bch_notify_dev_error(struct bch_dev *ca, bool b) {} - -#endif - -#endif /* _NOTIFY_H */ diff --git a/fs/bcachefs/request.c b/fs/bcachefs/request.c deleted file mode 100644 index 8d7627499988..000000000000 --- a/fs/bcachefs/request.c +++ /dev/null @@ -1,807 +0,0 @@ -/* - * Handle a read or a write request and decide what to do with it. - * - * Copyright 2010, 2011 Kent Overstreet <kent.overstreet@gmail.com> - * Copyright 2012 Google, Inc. - * - * Main pieces here: - * - * 1) Data insert path, via bch_data_insert() -- writes data to cache and - * updates extents btree - * 2) Read path, via bch_read() -- for now only used by bcachefs and ioctl - * interface - * 3) Read path, via cache_lookup() and struct search -- used by block device - * make_request functions - * 4) Cache promotion -- used by bch_read() and cache_lookup() to copy data to - * the cache, either from a backing device or a cache device in a higher tier - * - * One tricky thing that comes up is a race condition where a bucket may be - * re-used while reads from it are still in flight. To guard against this, we - * save the ptr that is being read and check if it is stale once the read - * completes. If the ptr is stale, the read is retried. - * - * #2 and #3 will be unified further in the future. - */ - -#include "bcache.h" -#include "blockdev.h" -#include "btree_update.h" -#include "btree_iter.h" -#include "clock.h" -#include "debug.h" -#include "error.h" -#include "extents.h" -#include "io.h" -#include "journal.h" -#include "keybuf.h" -#include "request.h" -#include "writeback.h" -#include "stats.h" - -#include <linux/module.h> -#include <linux/hash.h> -#include <linux/random.h> -#include <linux/backing-dev.h> - -#include <trace/events/bcachefs.h> - -#define CUTOFF_CACHE_ADD 10 -#define CUTOFF_CACHE_READA 15 - -/* Congested? */ - -unsigned bch_get_congested(struct bch_fs *c) -{ - int i; - long rand; - - if (!c->congested_read_threshold_us && - !c->congested_write_threshold_us) - return 0; - - i = (local_clock_us() - c->congested_last_us) / 1024; - if (i < 0) - return 0; - - i += atomic_read(&c->congested); - if (i >= 0) - return 0; - - i += CONGESTED_MAX; - - if (i > 0) - i = fract_exp_two(i, 6); - - rand = get_random_int(); - i -= bitmap_weight(&rand, BITS_PER_LONG); - - return i > 0 ? i : 1; -} - -static void add_sequential(struct task_struct *t) -{ - t->sequential_io_avg = ewma_add(t->sequential_io_avg, - t->sequential_io, 3); - t->sequential_io = 0; -} - -static struct hlist_head *iohash(struct cached_dev *dc, uint64_t k) -{ - return &dc->io_hash[hash_64(k, RECENT_IO_BITS)]; -} - -static bool check_should_bypass(struct cached_dev *dc, struct bio *bio, int rw) -{ - struct bch_fs *c = dc->disk.c; - unsigned mode = BDEV_CACHE_MODE(dc->disk_sb.sb); - unsigned sectors, congested = bch_get_congested(c); - struct task_struct *task = current; - struct io *i; - - if (test_bit(BCACHE_DEV_DETACHING, &dc->disk.flags) || - sectors_available(c) * 100 < c->capacity * CUTOFF_CACHE_ADD || - (bio_op(bio) == REQ_OP_DISCARD)) - goto skip; - - if (mode == CACHE_MODE_NONE || - (mode == CACHE_MODE_WRITEAROUND && - op_is_write(bio_op(bio)))) - goto skip; - - if (bio->bi_iter.bi_sector & (c->sb.block_size - 1) || - bio_sectors(bio) & (c->sb.block_size - 1)) { - pr_debug("skipping unaligned io"); - goto skip; - } - - if (bypass_torture_test(dc)) { - if ((get_random_int() & 3) == 3) - goto skip; - else - goto rescale; - } - - if (!congested && !dc->sequential_cutoff) - goto rescale; - - if (!congested && - mode == CACHE_MODE_WRITEBACK && - op_is_write(bio_op(bio)) && - (bio->bi_opf & REQ_SYNC)) - goto rescale; - - spin_lock(&dc->io_lock); - - hlist_for_each_entry(i, iohash(dc, bio->bi_iter.bi_sector), hash) - if (i->last == bio->bi_iter.bi_sector && - time_before(jiffies, i->last_io)) - goto found; - - i = list_first_entry(&dc->io_lru, struct io, lru); - - add_sequential(task); - i->sequential = 0; -found: - if (i->sequential + bio->bi_iter.bi_size > i->sequential) - i->sequential += bio->bi_iter.bi_size; - - i->last = bio_end_sector(bio); - i->last_io = jiffies + msecs_to_jiffies(5000); - task->sequential_io = i->sequential; - - hlist_del(&i->hash); - hlist_add_head(&i->hash, iohash(dc, i->last)); - list_move_tail(&i->lru, &dc->io_lru); - - spin_unlock(&dc->io_lock); - - sectors = max(task->sequential_io, - task->sequential_io_avg) >> 9; - - if (dc->sequential_cutoff && - sectors >= dc->sequential_cutoff >> 9) { - trace_bcache_bypass_sequential(bio); - goto skip; - } - - if (congested && sectors >= congested) { - trace_bcache_bypass_congested(bio); - goto skip; - } - -rescale: - return false; -skip: - bch_mark_sectors_bypassed(c, dc, bio_sectors(bio)); - return true; -} - -/* Common code for the make_request functions */ - -/** - * request_endio - endio function for backing device bios - */ -static void request_endio(struct bio *bio) -{ - struct closure *cl = bio->bi_private; - - if (bio->bi_error) { - struct search *s = container_of(cl, struct search, cl); - s->iop.error = bio->bi_error; - /* Only cache read errors are recoverable */ - s->recoverable = false; - } - - bio_put(bio); - closure_put(cl); -} - -static void bio_complete(struct search *s) -{ - if (s->orig_bio) { - generic_end_io_acct(bio_data_dir(s->orig_bio), - &s->d->disk->part0, s->start_time); - - trace_bcache_request_end(s->d, s->orig_bio); - s->orig_bio->bi_error = s->iop.error; - bio_endio(s->orig_bio); - s->orig_bio = NULL; - } -} - -static void do_bio_hook(struct search *s, struct bio *orig_bio) -{ - int rw = bio_data_dir(orig_bio); - struct bio *bio = rw ? &s->wbio.bio : &s->rbio.bio; - - bio_init(bio); - __bio_clone_fast(bio, orig_bio); - bio->bi_end_io = request_endio; - bio->bi_private = &s->cl; - - bio_cnt_set(bio, 3); -} - -static void search_free(struct closure *cl) -{ - struct search *s = container_of(cl, struct search, cl); - - bio_complete(s); - - if (s->iop.bio) - bio_put(&s->iop.bio->bio); - - closure_debug_destroy(cl); - mempool_free(s, &s->d->c->search); -} - -static inline struct search *search_alloc(struct bio *bio, - struct bcache_device *d) -{ - struct search *s; - - s = mempool_alloc(&d->c->search, GFP_NOIO); - - closure_init(&s->cl, NULL); - do_bio_hook(s, bio); - - s->orig_bio = bio; - s->d = d; - s->recoverable = 1; - s->bypass = 0; - s->write = op_is_write(bio_op(bio)); - s->read_dirty_data = 0; - s->cache_miss = 0; - s->start_time = jiffies; - s->inode = bcache_dev_inum(d); - - s->iop.c = d->c; - s->iop.bio = NULL; - s->iop.error = 0; - - return s; -} - -/* Cached devices */ - -static void cached_dev_bio_complete(struct closure *cl) -{ - struct search *s = container_of(cl, struct search, cl); - struct cached_dev *dc = container_of(s->d, struct cached_dev, disk); - - search_free(cl); - cached_dev_put(dc); -} - -/* Process reads */ - -static void cached_dev_read_error(struct closure *cl) -{ - struct search *s = container_of(cl, struct search, cl); - struct bio *bio = &s->rbio.bio; - - if (s->recoverable) { - /* Read bucket invalidate races are handled here, also plain - * old IO errors from the cache that can be retried from the - * backing device (reads of clean data) */ - trace_bcache_read_retry(s->orig_bio); - - s->iop.error = 0; - do_bio_hook(s, s->orig_bio); - - /* XXX: invalidate cache, don't count twice */ - - closure_bio_submit(bio, cl); - } - - continue_at(cl, cached_dev_bio_complete, NULL); -} - -static void cached_dev_read_done(struct closure *cl) -{ - struct search *s = container_of(cl, struct search, cl); - struct cached_dev *dc = container_of(s->d, struct cached_dev, disk); - - if (dc->verify && s->recoverable && !s->read_dirty_data) - bch_data_verify(dc, s->orig_bio); - - continue_at_nobarrier(cl, cached_dev_bio_complete, NULL); -} - -static void cached_dev_read_done_bh(struct closure *cl) -{ - struct search *s = container_of(cl, struct search, cl); - struct cached_dev *dc = container_of(s->d, struct cached_dev, disk); - - bch_mark_cache_accounting(s->iop.c, dc, !s->cache_miss, s->bypass); - trace_bcache_read(s->orig_bio, !s->cache_miss, s->bypass); - - if (s->iop.error) - continue_at_nobarrier(cl, cached_dev_read_error, s->iop.c->wq); - else if (dc->verify) - continue_at_nobarrier(cl, cached_dev_read_done, s->iop.c->wq); - else - continue_at_nobarrier(cl, cached_dev_bio_complete, NULL); -} - -/** - * __cache_promote -- insert result of read bio into cache - * - * Used for backing devices and flash-only volumes. - * - * @orig_bio must actually be a bbio with a valid key. - */ -void __cache_promote(struct bch_fs *c, struct bch_read_bio *orig_bio, - struct bkey_s_c old, - struct bkey_s_c new, - unsigned write_flags) -{ -#if 0 - struct cache_promote_op *op; - struct bio *bio; - unsigned pages = DIV_ROUND_UP(orig_bio->bio.bi_iter.bi_size, PAGE_SIZE); - - /* XXX: readahead? */ - - op = kmalloc(sizeof(*op) + sizeof(struct bio_vec) * pages, GFP_NOIO); - if (!op) - goto out_submit; - - /* clone the bbio */ - memcpy(&op->bio, orig_bio, offsetof(struct bbio, bio)); - - bio = &op->bio.bio.bio; - bio_init(bio); - bio_get(bio); - bio->bi_bdev = orig_bio->bio.bi_bdev; - bio->bi_iter.bi_sector = orig_bio->bio.bi_iter.bi_sector; - bio->bi_iter.bi_size = orig_bio->bio.bi_iter.bi_size; - bio->bi_end_io = cache_promote_endio; - bio->bi_private = &op->cl; - bio->bi_io_vec = bio->bi_inline_vecs; - bch_bio_map(bio, NULL); - - if (bio_alloc_pages(bio, __GFP_NOWARN|GFP_NOIO)) - goto out_free; - - orig_bio->ca = NULL; - - closure_init(&op->cl, &c->cl); - op->orig_bio = &orig_bio->bio; - op->stale = 0; - - bch_write_op_init(&op->iop, c, &op->bio, &c->promote_write_point, - new, old, - BCH_WRITE_ALLOC_NOWAIT|write_flags); - op->iop.nr_replicas = 1; - - //bch_cut_front(bkey_start_pos(&orig_bio->key.k), &op->iop.insert_key); - //bch_cut_back(orig_bio->key.k.p, &op->iop.insert_key.k); - - trace_bcache_promote(&orig_bio->bio); - - op->bio.bio.submit_time_us = local_clock_us(); - closure_bio_submit(bio, &op->cl); - - continue_at(&op->cl, cache_promote_write, c->wq); -out_free: - kfree(op); -out_submit: - generic_make_request(&orig_bio->bio); -#endif -} - -/** - * cached_dev_cache_miss - populate cache with data from backing device - * - * We don't write to the cache if s->bypass is set. - */ -static int cached_dev_cache_miss(struct btree_iter *iter, struct search *s, - struct bio *bio, unsigned sectors) -{ - int ret; - unsigned reada = 0; - struct bio *miss; - BKEY_PADDED(key) replace; - - s->cache_miss = 1; - - if (s->bypass) - goto nopromote; -#if 0 - struct cached_dev *dc = container_of(s->d, struct cached_dev, disk); - - /* XXX: broken */ - if (!(bio->bi_opf & REQ_RAHEAD) && - !(bio->bi_opf & REQ_META) && - ((u64) sectors_available(dc->disk.c) * 100 < - (u64) iter->c->capacity * CUTOFF_CACHE_READA)) - reada = min_t(sector_t, dc->readahead >> 9, - bdev_sectors(bio->bi_bdev) - bio_end_sector(bio)); -#endif - sectors = min(sectors, bio_sectors(bio) + reada); - - replace.key.k = KEY(s->inode, - bio->bi_iter.bi_sector + sectors, - sectors); - - ret = bch_btree_insert_check_key(iter, &replace.key); - if (ret == -EINTR) - return ret; - - miss = bio_next_split(bio, sectors, GFP_NOIO, &s->d->bio_split); - - miss->bi_end_io = request_endio; - miss->bi_private = &s->cl; - - //to_bbio(miss)->key.k = KEY(s->inode, - // bio_end_sector(miss), - // bio_sectors(miss)); - to_rbio(miss)->ca = NULL; - - closure_get(&s->cl); - __cache_promote(s->iop.c, to_rbio(miss), - bkey_i_to_s_c(&replace.key), - bkey_to_s_c(&KEY(replace.key.k.p.inode, - replace.key.k.p.offset, - replace.key.k.size)), - BCH_WRITE_CACHED); - - return 0; -nopromote: - miss = bio_next_split(bio, sectors, GFP_NOIO, &s->d->bio_split); - - miss->bi_end_io = request_endio; - miss->bi_private = &s->cl; - closure_bio_submit(miss, &s->cl); - - return 0; -} - -static void cached_dev_read(struct cached_dev *dc, struct search *s) -{ - struct bch_fs *c = s->iop.c; - struct closure *cl = &s->cl; - struct bio *bio = &s->rbio.bio; - struct btree_iter iter; - struct bkey_s_c k; - int ret; - - for_each_btree_key_with_holes(&iter, c, BTREE_ID_EXTENTS, - POS(s->inode, bio->bi_iter.bi_sector), k) { - BKEY_PADDED(k) tmp; - struct extent_pick_ptr pick; - unsigned sectors, bytes; - bool is_last; -retry: - bkey_reassemble(&tmp.k, k); - bch_btree_iter_unlock(&iter); - k = bkey_i_to_s_c(&tmp.k); - - bch_extent_pick_ptr(c, k, &pick); - if (IS_ERR(pick.ca)) { - bcache_io_error(c, bio, "no device to read from"); - goto out; - } - - sectors = min_t(u64, k.k->p.offset, bio_end_sector(bio)) - - bio->bi_iter.bi_sector; - bytes = sectors << 9; - is_last = bytes == bio->bi_iter.bi_size; - swap(bio->bi_iter.bi_size, bytes); - - if (pick.ca) { - PTR_BUCKET(pick.ca, &pick.ptr)->read_prio = - c->prio_clock[READ].hand; - - if (!bkey_extent_is_cached(k.k)) - s->read_dirty_data = true; - - bch_read_extent(c, &s->rbio, k, &pick, - BCH_READ_ACCOUNT_TIMES| - BCH_READ_RETRY_IF_STALE| - (!s->bypass ? BCH_READ_PROMOTE : 0)| - (is_last ? BCH_READ_IS_LAST : 0)); - } else { - /* not present (hole), or stale cached data */ - if (cached_dev_cache_miss(&iter, s, bio, sectors)) { - k = bch_btree_iter_peek_with_holes(&iter); - if (btree_iter_err(k)) - break; - goto retry; - } - } - - swap(bio->bi_iter.bi_size, bytes); - bio_advance(bio, bytes); - - if (is_last) { - bch_btree_iter_unlock(&iter); - goto out; - } - } - - /* - * If we get here, it better have been because there was an error - * reading a btree node - */ - ret = bch_btree_iter_unlock(&iter); - BUG_ON(!ret); - bcache_io_error(c, bio, "btree IO error %i", ret); -out: - continue_at(cl, cached_dev_read_done_bh, NULL); -} - -/* Process writes */ - -static void cached_dev_write_complete(struct closure *cl) -{ - struct search *s = container_of(cl, struct search, cl); - struct cached_dev *dc = container_of(s->d, struct cached_dev, disk); - - up_read_non_owner(&dc->writeback_lock); - cached_dev_bio_complete(cl); -} - -static void cached_dev_write(struct cached_dev *dc, struct search *s) -{ - struct closure *cl = &s->cl; - struct bio *bio = &s->wbio.bio; - bool writeback = false; - bool bypass = s->bypass; - struct bkey insert_key = KEY(s->inode, - bio_end_sector(bio), - bio_sectors(bio)); - unsigned flags = BCH_WRITE_DISCARD_ON_ERROR; - - down_read_non_owner(&dc->writeback_lock); - if (bch_keybuf_check_overlapping(&dc->writeback_keys, - bkey_start_pos(&insert_key), - insert_key.p)) { - /* - * We overlap with some dirty data undergoing background - * writeback, force this write to writeback - */ - bypass = false; - writeback = true; - } - - /* - * Discards aren't _required_ to do anything, so skipping if - * check_overlapping returned true is ok - * - * But check_overlapping drops dirty keys for which io hasn't started, - * so we still want to call it. - */ - if (bio_op(bio) == REQ_OP_DISCARD) - bypass = true; - - if (should_writeback(dc, bio, BDEV_CACHE_MODE(dc->disk_sb.sb), - bypass)) { - bypass = false; - writeback = true; - } - - if (bypass) { - /* - * If this is a bypass-write (as opposed to a discard), send - * it down to the backing device. If this is a discard, only - * send it to the backing device if the backing device - * supports discards. Otherwise, we simply discard the key - * range from the cache and don't touch the backing device. - */ - if ((bio_op(bio) != REQ_OP_DISCARD) || - blk_queue_discard(bdev_get_queue(dc->disk_sb.bdev))) - closure_bio_submit(s->orig_bio, cl); - } else if (writeback) { - bch_writeback_add(dc); - - if (bio->bi_opf & REQ_PREFLUSH) { - /* Also need to send a flush to the backing device */ - struct bio *flush = bio_alloc_bioset(GFP_NOIO, 0, - &dc->disk.bio_split); - - flush->bi_bdev = bio->bi_bdev; - flush->bi_end_io = request_endio; - flush->bi_private = cl; - bio_set_op_attrs(flush, REQ_OP_WRITE, WRITE_FLUSH); - - closure_bio_submit(flush, cl); - } - } else { - struct bio *writethrough = - bio_clone_fast(bio, GFP_NOIO, &dc->disk.bio_split); - - closure_bio_submit(writethrough, cl); - - flags |= BCH_WRITE_CACHED; - flags |= BCH_WRITE_ALLOC_NOWAIT; - } - - if (bio->bi_opf & (REQ_PREFLUSH|REQ_FUA)) - flags |= BCH_WRITE_FLUSH; - if (bypass) - flags |= BCH_WRITE_DISCARD; - - bch_write_op_init(&s->iop, dc->disk.c, &s->wbio, - (struct disk_reservation) { 0 }, - foreground_write_point(dc->disk.c, - (unsigned long) current), - bkey_start_pos(&insert_key), - NULL, flags); - - closure_call(&s->iop.cl, bch_write, NULL, cl); - continue_at(cl, cached_dev_write_complete, NULL); -} - -/* Cached devices - read & write stuff */ - -static void __cached_dev_make_request(struct request_queue *q, struct bio *bio) -{ - struct search *s; - struct bcache_device *d = bio->bi_bdev->bd_disk->private_data; - struct cached_dev *dc = container_of(d, struct cached_dev, disk); - int rw = bio_data_dir(bio); - - generic_start_io_acct(rw, bio_sectors(bio), &d->disk->part0); - - bio->bi_bdev = dc->disk_sb.bdev; - bio->bi_iter.bi_sector += le64_to_cpu(dc->disk_sb.sb->data_offset); - - if (cached_dev_get(dc)) { - struct bio *clone; - - s = search_alloc(bio, d); - trace_bcache_request_start(s->d, bio); - - clone = rw ? &s->wbio.bio : &s->rbio.bio; - - if (!bio->bi_iter.bi_size) { - if (s->orig_bio->bi_opf & (REQ_PREFLUSH|REQ_FUA)) - bch_journal_flush_async(&s->iop.c->journal, - &s->cl); - - /* - * If it's a flush, we send the flush to the backing - * device too - */ - closure_bio_submit(clone, &s->cl); - - continue_at(&s->cl, cached_dev_bio_complete, NULL); - } else { - s->bypass = check_should_bypass(dc, bio, rw); - - if (rw) - cached_dev_write(dc, s); - else - cached_dev_read(dc, s); - } - } else { - if ((bio_op(bio) == REQ_OP_DISCARD) && - !blk_queue_discard(bdev_get_queue(dc->disk_sb.bdev))) - bio_endio(bio); - else - generic_make_request(bio); - } -} - -static blk_qc_t cached_dev_make_request(struct request_queue *q, - struct bio *bio) -{ - __cached_dev_make_request(q, bio); - return BLK_QC_T_NONE; -} - -static int cached_dev_ioctl(struct bcache_device *d, fmode_t mode, - unsigned int cmd, unsigned long arg) -{ - struct cached_dev *dc = container_of(d, struct cached_dev, disk); - return __blkdev_driver_ioctl(dc->disk_sb.bdev, mode, cmd, arg); -} - -static int cached_dev_congested(void *data, int bits) -{ - struct bcache_device *d = data; - struct cached_dev *dc = container_of(d, struct cached_dev, disk); - struct request_queue *q = bdev_get_queue(dc->disk_sb.bdev); - int ret = 0; - - if (bdi_congested(&q->backing_dev_info, bits)) - return 1; - - if (cached_dev_get(dc)) { - ret |= bch_congested(d->c, bits); - cached_dev_put(dc); - } - - return ret; -} - -void bch_cached_dev_request_init(struct cached_dev *dc) -{ - struct gendisk *g = dc->disk.disk; - - g->queue->make_request_fn = cached_dev_make_request; - g->queue->backing_dev_info.congested_fn = cached_dev_congested; - dc->disk.ioctl = cached_dev_ioctl; -} - -/* Blockdev volumes */ - -static void __blockdev_volume_make_request(struct request_queue *q, - struct bio *bio) -{ - struct search *s; - struct bcache_device *d = bio->bi_bdev->bd_disk->private_data; - int rw = bio_data_dir(bio); - - generic_start_io_acct(rw, bio_sectors(bio), &d->disk->part0); - - trace_bcache_request_start(d, bio); - - s = search_alloc(bio, d); - - if (!bio->bi_iter.bi_size) { - if (s->orig_bio->bi_opf & (REQ_PREFLUSH|REQ_FUA)) - bch_journal_flush_async(&s->iop.c->journal, - &s->cl); - - continue_at(&s->cl, search_free, NULL); - } else if (rw) { - struct disk_reservation res = { 0 }; - unsigned flags = 0; - - if (bio_op(bio) != REQ_OP_DISCARD && - bch_disk_reservation_get(d->c, &res, bio_sectors(bio), 0)) { - s->iop.error = -ENOSPC; - continue_at(&s->cl, search_free, NULL); - return; - } - - if (bio->bi_opf & (REQ_PREFLUSH|REQ_FUA)) - flags |= BCH_WRITE_FLUSH; - if (bio_op(bio) == REQ_OP_DISCARD) - flags |= BCH_WRITE_DISCARD; - - bch_write_op_init(&s->iop, d->c, &s->wbio, res, - foreground_write_point(d->c, - (unsigned long) current), - POS(s->inode, bio->bi_iter.bi_sector), - NULL, flags); - - closure_call(&s->iop.cl, bch_write, NULL, &s->cl); - } else { - closure_get(&s->cl); - bch_read(d->c, &s->rbio, bcache_dev_inum(d)); - } - continue_at(&s->cl, search_free, NULL); -} - -static blk_qc_t blockdev_volume_make_request(struct request_queue *q, - struct bio *bio) -{ - __blockdev_volume_make_request(q, bio); - return BLK_QC_T_NONE; -} - -static int blockdev_volume_ioctl(struct bcache_device *d, fmode_t mode, - unsigned int cmd, unsigned long arg) -{ - return -ENOTTY; -} - -static int blockdev_volume_congested(void *data, int bits) -{ - struct bcache_device *d = data; - - return bch_congested(d->c, bits); -} - -void bch_blockdev_volume_request_init(struct bcache_device *d) -{ - struct gendisk *g = d->disk; - - g->queue->make_request_fn = blockdev_volume_make_request; - g->queue->backing_dev_info.congested_fn = blockdev_volume_congested; - d->ioctl = blockdev_volume_ioctl; -} diff --git a/fs/bcachefs/request.h b/fs/bcachefs/request.h deleted file mode 100644 index 1ee3d16fac46..000000000000 --- a/fs/bcachefs/request.h +++ /dev/null @@ -1,16 +0,0 @@ -#ifndef _BCACHE_REQUEST_H_ -#define _BCACHE_REQUEST_H_ - -#include "stats.h" - -struct bch_fs; -struct cached_dev; -struct bcache_device; -struct kmem_cache; - -unsigned bch_get_congested(struct bch_fs *); - -void bch_cached_dev_request_init(struct cached_dev *dc); -void bch_blockdev_volume_request_init(struct bcache_device *d); - -#endif /* _BCACHE_REQUEST_H_ */ diff --git a/fs/bcachefs/stats.c b/fs/bcachefs/stats.c deleted file mode 100644 index a8a4eb3692c4..000000000000 --- a/fs/bcachefs/stats.c +++ /dev/null @@ -1,219 +0,0 @@ -/* - * bcache stats code - * - * Copyright 2012 Google, Inc. - */ - -#include "bcache.h" -#include "stats.h" -#include "sysfs.h" - -/* - * We keep absolute totals of various statistics, and addionally a set of three - * rolling averages. - * - * Every so often, a timer goes off and rescales the rolling averages. - * accounting_rescale[] is how many times the timer has to go off before we - * rescale each set of numbers; that gets us half lives of 5 minutes, one hour, - * and one day. - * - * accounting_delay is how often the timer goes off - 22 times in 5 minutes, - * and accounting_weight is what we use to rescale: - * - * pow(31 / 32, 22) ~= 1/2 - * - * So that we don't have to increment each set of numbers every time we (say) - * get a cache hit, we increment a single atomic_t in acc->collector, and when - * the rescale function runs it resets the atomic counter to 0 and adds its - * old value to each of the exported numbers. - * - * To reduce rounding error, the numbers in struct cache_stats are all - * stored left shifted by 16, and scaled back in the sysfs show() function. - */ - -static const unsigned DAY_RESCALE = 288; -static const unsigned HOUR_RESCALE = 12; -static const unsigned FIVE_MINUTE_RESCALE = 1; -static const unsigned accounting_delay = (HZ * 300) / 22; -static const unsigned accounting_weight = 5; - -/* sysfs reading/writing */ - -read_attribute(cache_hits); -read_attribute(cache_misses); -read_attribute(cache_bypass_hits); -read_attribute(cache_bypass_misses); -read_attribute(cache_hit_ratio); -read_attribute(cache_readaheads); -read_attribute(cache_miss_collisions); -read_attribute(bypassed); -read_attribute(foreground_write_ratio); -read_attribute(foreground_writes); -read_attribute(gc_writes); -read_attribute(discards); - -SHOW(bch_stats) -{ - struct cache_stats *s = - container_of(kobj, struct cache_stats, kobj); -#define var(stat) (s->stat >> 16) - var_print(cache_hits); - var_print(cache_misses); - var_print(cache_bypass_hits); - var_print(cache_bypass_misses); - - sysfs_print(cache_hit_ratio, - DIV_SAFE(var(cache_hits) * 100, - var(cache_hits) + var(cache_misses))); - - var_print(cache_readaheads); - var_print(cache_miss_collisions); - - sysfs_hprint(bypassed, var(sectors_bypassed) << 9); - sysfs_hprint(foreground_writes, var(foreground_write_sectors) << 9); - sysfs_hprint(gc_writes, var(gc_write_sectors) << 9); - sysfs_hprint(discards, var(discard_sectors) << 9); - - sysfs_print(foreground_write_ratio, - DIV_SAFE(var(foreground_write_sectors) * 100, - var(foreground_write_sectors) + - var(gc_write_sectors))); -#undef var - return 0; -} - -STORE(bch_stats) -{ - return size; -} - -static void bch_stats_release(struct kobject *k) -{ -} - -static struct attribute *bch_stats_files[] = { - &sysfs_cache_hits, - &sysfs_cache_misses, - &sysfs_cache_bypass_hits, - &sysfs_cache_bypass_misses, - &sysfs_cache_hit_ratio, - &sysfs_cache_readaheads, - &sysfs_cache_miss_collisions, - &sysfs_bypassed, - &sysfs_foreground_write_ratio, - &sysfs_foreground_writes, - &sysfs_gc_writes, - &sysfs_discards, - NULL -}; -static KTYPE(bch_stats); - -int bch_cache_accounting_add_kobjs(struct cache_accounting *acc, - struct kobject *parent) -{ - int ret = kobject_add(&acc->total.kobj, parent, - "stats_total"); - ret = ret ?: kobject_add(&acc->five_minute.kobj, parent, - "stats_five_minute"); - ret = ret ?: kobject_add(&acc->hour.kobj, parent, - "stats_hour"); - ret = ret ?: kobject_add(&acc->day.kobj, parent, - "stats_day"); - return ret; -} - -void bch_cache_accounting_clear(struct cache_accounting *acc) -{ - memset(&acc->total.cache_hits, - 0, - sizeof(unsigned long) * 9); -} - -void bch_cache_accounting_destroy(struct cache_accounting *acc) -{ - kobject_put(&acc->total.kobj); - kobject_put(&acc->five_minute.kobj); - kobject_put(&acc->hour.kobj); - kobject_put(&acc->day.kobj); - - atomic_set(&acc->closing, 1); - if (del_timer_sync(&acc->timer)) - closure_return(&acc->cl); -} - -/* EWMA scaling */ - -static void scale_stat(unsigned long *stat) -{ - *stat = ewma_add(*stat, 0, accounting_weight); -} - -static void scale_stats(struct cache_stats *stats, unsigned long rescale_at) -{ - if (++stats->rescale == rescale_at) { - stats->rescale = 0; - scale_stat(&stats->cache_hits); - scale_stat(&stats->cache_misses); - scale_stat(&stats->cache_bypass_hits); - scale_stat(&stats->cache_bypass_misses); - scale_stat(&stats->cache_readaheads); - scale_stat(&stats->cache_miss_collisions); - scale_stat(&stats->sectors_bypassed); - scale_stat(&stats->foreground_write_sectors); - scale_stat(&stats->gc_write_sectors); - scale_stat(&stats->discard_sectors); - } -} - -static void scale_accounting(unsigned long data) -{ - struct cache_accounting *acc = (struct cache_accounting *) data; - -#define move_stat(name) do { \ - unsigned t = atomic_xchg(&acc->collector.name, 0); \ - t <<= 16; \ - acc->five_minute.name += t; \ - acc->hour.name += t; \ - acc->day.name += t; \ - acc->total.name += t; \ -} while (0) - - move_stat(cache_hits); - move_stat(cache_misses); - move_stat(cache_bypass_hits); - move_stat(cache_bypass_misses); - move_stat(cache_readaheads); - move_stat(cache_miss_collisions); - move_stat(sectors_bypassed); - move_stat(foreground_write_sectors); - move_stat(gc_write_sectors); - move_stat(discard_sectors); - - scale_stats(&acc->total, 0); - scale_stats(&acc->day, DAY_RESCALE); - scale_stats(&acc->hour, HOUR_RESCALE); - scale_stats(&acc->five_minute, FIVE_MINUTE_RESCALE); - - acc->timer.expires += accounting_delay; - - if (!atomic_read(&acc->closing)) - add_timer(&acc->timer); - else - closure_return(&acc->cl); -} - -void bch_cache_accounting_init(struct cache_accounting *acc, - struct closure *parent) -{ - kobject_init(&acc->total.kobj, &bch_stats_ktype); - kobject_init(&acc->five_minute.kobj, &bch_stats_ktype); - kobject_init(&acc->hour.kobj, &bch_stats_ktype); - kobject_init(&acc->day.kobj, &bch_stats_ktype); - - closure_init(&acc->cl, parent); - init_timer(&acc->timer); - acc->timer.expires = jiffies + accounting_delay; - acc->timer.data = (unsigned long) acc; - acc->timer.function = scale_accounting; - add_timer(&acc->timer); -} diff --git a/fs/bcachefs/stats.h b/fs/bcachefs/stats.h deleted file mode 100644 index a3c7bd2629bf..000000000000 --- a/fs/bcachefs/stats.h +++ /dev/null @@ -1,68 +0,0 @@ -#ifndef _BCACHE_STATS_H_ -#define _BCACHE_STATS_H_ - -#include "stats_types.h" - -struct bch_fs; -struct cached_dev; -struct bcache_device; - -#ifndef NO_BCACHE_ACCOUNTING - -void bch_cache_accounting_init(struct cache_accounting *, struct closure *); -int bch_cache_accounting_add_kobjs(struct cache_accounting *, struct kobject *); -void bch_cache_accounting_clear(struct cache_accounting *); -void bch_cache_accounting_destroy(struct cache_accounting *); - -#else - -static inline void bch_cache_accounting_init(struct cache_accounting *acc, - struct closure *cl) {} -static inline int bch_cache_accounting_add_kobjs(struct cache_accounting *acc, - struct kobject *cl) -{ - return 0; -} -static inline void bch_cache_accounting_clear(struct cache_accounting *acc) {} -static inline void bch_cache_accounting_destroy(struct cache_accounting *acc) {} - -#endif - -static inline void mark_cache_stats(struct cache_stat_collector *stats, - bool hit, bool bypass) -{ - atomic_inc(&stats->cache_hit_array[!bypass][!hit]); -} - -static inline void bch_mark_cache_accounting(struct bch_fs *c, - struct cached_dev *dc, - bool hit, bool bypass) -{ - mark_cache_stats(&dc->accounting.collector, hit, bypass); - mark_cache_stats(&c->accounting.collector, hit, bypass); -} - -static inline void bch_mark_sectors_bypassed(struct bch_fs *c, - struct cached_dev *dc, - unsigned sectors) -{ - atomic_add(sectors, &dc->accounting.collector.sectors_bypassed); - atomic_add(sectors, &c->accounting.collector.sectors_bypassed); -} - -static inline void bch_mark_gc_write(struct bch_fs *c, int sectors) -{ - atomic_add(sectors, &c->accounting.collector.gc_write_sectors); -} - -static inline void bch_mark_foreground_write(struct bch_fs *c, int sectors) -{ - atomic_add(sectors, &c->accounting.collector.foreground_write_sectors); -} - -static inline void bch_mark_discard(struct bch_fs *c, int sectors) -{ - atomic_add(sectors, &c->accounting.collector.discard_sectors); -} - -#endif /* _BCACHE_STATS_H_ */ diff --git a/fs/bcachefs/stats_types.h b/fs/bcachefs/stats_types.h deleted file mode 100644 index 28e4c69e8e6d..000000000000 --- a/fs/bcachefs/stats_types.h +++ /dev/null @@ -1,56 +0,0 @@ -#ifndef _BCACHE_STATS_TYPES_H_ -#define _BCACHE_STATS_TYPES_H_ - -struct cache_stat_collector { - union { - struct { - atomic_t cache_hits; - atomic_t cache_misses; - atomic_t cache_bypass_hits; - atomic_t cache_bypass_misses; - }; - - /* cache_hit_array[!bypass][!hit]: */ - atomic_t cache_hit_array[2][2]; - }; - - - atomic_t cache_readaheads; - atomic_t cache_miss_collisions; - atomic_t sectors_bypassed; - atomic_t foreground_write_sectors; - atomic_t gc_write_sectors; - atomic_t discard_sectors; -}; - -struct cache_stats { - struct kobject kobj; - - unsigned long cache_hits; - unsigned long cache_misses; - unsigned long cache_bypass_hits; - unsigned long cache_bypass_misses; - unsigned long cache_readaheads; - unsigned long cache_miss_collisions; - unsigned long sectors_bypassed; - unsigned long foreground_write_sectors; - unsigned long gc_write_sectors; - unsigned long discard_sectors; - - unsigned rescale; -}; - -struct cache_accounting { - struct closure cl; - struct timer_list timer; - atomic_t closing; - - struct cache_stat_collector collector; - - struct cache_stats total; - struct cache_stats five_minute; - struct cache_stats hour; - struct cache_stats day; -}; - -#endif /* _BCACHE_STATS_TYPES_H_ */ diff --git a/fs/bcachefs/super-io.c b/fs/bcachefs/super-io.c index 67c03e1932b1..41d54d3cdb10 100644 --- a/fs/bcachefs/super-io.c +++ b/fs/bcachefs/super-io.c @@ -1,6 +1,5 @@ #include "bcache.h" -#include "blockdev.h" #include "checksum.h" #include "error.h" #include "io.h" @@ -710,8 +709,6 @@ static void write_super_endio(struct bio *bio) bch_dev_fatal_io_err_on(bio->bi_error, ca, "superblock write"); - bch_account_io_completion(ca); - closure_put(&ca->fs->sb_write); percpu_ref_put(&ca->io_ref); } @@ -744,7 +741,7 @@ static bool write_one_super(struct bch_fs *c, struct bch_dev *ca, unsigned idx) bio_set_op_attrs(bio, REQ_OP_WRITE, REQ_SYNC|REQ_META); bch_bio_map(bio, sb); - closure_bio_submit_punt(bio, &c->sb_write, c); + closure_bio_submit(bio, &c->sb_write); return true; } diff --git a/fs/bcachefs/super.c b/fs/bcachefs/super.c index 8f2c6b3fb419..fdd731d25917 100644 --- a/fs/bcachefs/super.c +++ b/fs/bcachefs/super.c @@ -7,7 +7,6 @@ */ #include "bcache.h" -#include "blockdev.h" #include "alloc.h" #include "btree_cache.h" #include "btree_gc.h" @@ -28,12 +27,9 @@ #include "move.h" #include "migrate.h" #include "movinggc.h" -#include "notify.h" -#include "stats.h" #include "super.h" #include "super-io.h" #include "tier.h" -#include "writeback.h" #include <linux/backing-dev.h> #include <linux/blkdev.h> @@ -45,7 +41,6 @@ #include <linux/module.h> #include <linux/percpu.h> #include <linux/random.h> -#include <linux/reboot.h> #include <linux/sysfs.h> #include <crypto/hash.h> @@ -66,7 +61,6 @@ static LIST_HEAD(bch_fs_list); static DEFINE_MUTEX(bch_fs_list_lock); static DECLARE_WAIT_QUEUE_HEAD(bch_read_only_wait); -struct workqueue_struct *bcache_io_wq; static void bch_dev_free(struct bch_dev *); static int bch_dev_alloc(struct bch_fs *, unsigned); @@ -221,8 +215,6 @@ void bch_fs_read_only(struct bch_fs *c) if (test_bit(BCH_FS_ERROR, &c->flags)) goto out; - trace_fs_read_only(c); - /* * Block new foreground-end write operations from starting - any new * writes will return -EROFS: @@ -270,8 +262,6 @@ void bch_fs_read_only(struct bch_fs *c) } c->state = BCH_FS_RO; - bch_notify_fs_read_only(c); - trace_fs_read_only_done(c); out: mutex_unlock(&c->state_lock); } @@ -358,7 +348,6 @@ static void bch_fs_free(struct bch_fs *c) bch_io_clock_exit(&c->io_clock[WRITE]); bch_io_clock_exit(&c->io_clock[READ]); bch_fs_compress_exit(c); - bch_fs_blockdev_exit(c); bdi_destroy(&c->bdi); lg_lock_free(&c->usage_lock); free_percpu(c->usage_percpu); @@ -390,7 +379,6 @@ static void bch_fs_exit(struct bch_fs *c) del_timer_sync(&c->foreground_write_wakeup); cancel_delayed_work_sync(&c->pd_controllers_update); cancel_work_sync(&c->read_only_work); - cancel_work_sync(&c->bio_submit_work); cancel_work_sync(&c->read_retry_work); for (i = 0; i < c->sb.nr_devices; i++) @@ -422,8 +410,6 @@ static void bch_fs_offline(struct bch_fs *c) bch_fs_debug_exit(c); bch_fs_chardev_exit(c); - bch_cache_accounting_destroy(&c->accounting); - kobject_put(&c->time_stats); kobject_put(&c->opts_dir); kobject_put(&c->internal); @@ -431,65 +417,13 @@ static void bch_fs_offline(struct bch_fs *c) __bch_fs_read_only(c); } -/* - * should be __bch_fs_stop4 - block devices are closed, now we can finally - * free it - */ void bch_fs_release(struct kobject *kobj) { struct bch_fs *c = container_of(kobj, struct bch_fs, kobj); - bch_notify_fs_stopped(c); bch_fs_free(c); } -/* - * All activity on the filesystem should have stopped now - close devices: - */ -static void __bch_fs_stop3(struct closure *cl) -{ - struct bch_fs *c = container_of(cl, struct bch_fs, cl); - - bch_fs_exit(c); -} - -/* - * Openers (i.e. block devices) should have exited, shutdown all userspace - * interfaces and wait for &c->cl to hit 0 - */ -static void __bch_fs_stop2(struct closure *cl) -{ - struct bch_fs *c = container_of(cl, struct bch_fs, caching); - - bch_fs_offline(c); - - closure_return(cl); -} - -/* - * First phase of the shutdown process that's kicked off by bch_fs_stop_async(); - * we haven't waited for anything to stop yet, we're just punting to process - * context to shut down block devices: - */ -static void __bch_fs_stop1(struct closure *cl) -{ - struct bch_fs *c = container_of(cl, struct bch_fs, caching); - - bch_blockdevs_stop(c); - - continue_at(cl, __bch_fs_stop2, system_wq); -} - -void bch_fs_stop_async(struct bch_fs *c) -{ - mutex_lock(&c->state_lock); - if (c->state != BCH_FS_STOPPING) { - c->state = BCH_FS_STOPPING; - closure_queue(&c->caching); - } - mutex_unlock(&c->state_lock); -} - void bch_fs_stop(struct bch_fs *c) { mutex_lock(&c->state_lock); @@ -497,26 +431,13 @@ void bch_fs_stop(struct bch_fs *c) c->state = BCH_FS_STOPPING; mutex_unlock(&c->state_lock); - bch_blockdevs_stop(c); - - closure_sync(&c->caching); - closure_debug_destroy(&c->caching); - bch_fs_offline(c); - closure_put(&c->cl); closure_sync(&c->cl); bch_fs_exit(c); } -/* Stop, detaching from backing devices: */ -void bch_fs_detach(struct bch_fs *c) -{ - if (!test_and_set_bit(BCH_FS_DETACHING, &c->flags)) - bch_fs_stop_async(c); -} - #define alloc_bucket_pages(gfp, ca) \ ((void *) __get_free_pages(__GFP_ZERO|gfp, ilog2(bucket_pages(ca)))) @@ -536,7 +457,6 @@ static struct bch_fs *bch_fs_alloc(struct bch_sb *sb, struct bch_opts opts) mutex_init(&c->state_lock); mutex_init(&c->sb_lock); - INIT_RADIX_TREE(&c->devices, GFP_KERNEL); mutex_init(&c->btree_cache_lock); mutex_init(&c->bucket_lock); mutex_init(&c->btree_root_lock); @@ -553,7 +473,6 @@ static struct bch_fs *bch_fs_alloc(struct bch_sb *sb, struct bch_opts opts) bch_fs_tiering_init(c); INIT_LIST_HEAD(&c->list); - INIT_LIST_HEAD(&c->cached_devs); INIT_LIST_HEAD(&c->btree_cache); INIT_LIST_HEAD(&c->btree_cache_freeable); INIT_LIST_HEAD(&c->btree_cache_freed); @@ -563,8 +482,6 @@ static struct bch_fs *bch_fs_alloc(struct bch_sb *sb, struct bch_opts opts) mutex_init(&c->btree_interior_update_lock); mutex_init(&c->bio_bounce_pages_lock); - INIT_WORK(&c->bio_submit_work, bch_bio_submit_work); - spin_lock_init(&c->bio_submit_lock); bio_list_init(&c->read_retry_list); spin_lock_init(&c->read_retry_lock); INIT_WORK(&c->read_retry_work, bch_read_retry_work); @@ -577,11 +494,7 @@ static struct bch_fs *bch_fs_alloc(struct bch_sb *sb, struct bch_opts opts) c->prio_clock[WRITE].hand = 1; c->prio_clock[WRITE].min_prio = 0; - c->congested_read_threshold_us = 2000; - c->congested_write_threshold_us = 20000; - c->error_limit = 16 << IO_ERROR_SHIFT; init_waitqueue_head(&c->writeback_wait); - c->writeback_pages_max = (256 << 10) / PAGE_SIZE; c->copy_gc_enabled = 1; @@ -595,8 +508,6 @@ static struct bch_fs *bch_fs_alloc(struct bch_sb *sb, struct bch_opts opts) c->journal.blocked_time = &c->journal_blocked_time; c->journal.flush_seq_time = &c->journal_flush_seq_time; - mutex_init(&c->uevent_lock); - mutex_lock(&c->sb_lock); if (bch_sb_to_fs(c, sb)) { @@ -648,7 +559,6 @@ static struct bch_fs *bch_fs_alloc(struct bch_sb *sb, struct bch_opts opts) mempool_init_page_pool(&c->btree_bounce_pool, 1, ilog2(btree_pages(c))) || bdi_setup_and_register(&c->bdi, "bcache") || - bch_fs_blockdev_init(c) || bch_io_clock_init(&c->io_clock[READ]) || bch_io_clock_init(&c->io_clock[WRITE]) || bch_fs_journal_init(&c->journal, journal_entry_bytes) || @@ -679,14 +589,6 @@ static struct bch_fs *bch_fs_alloc(struct bch_sb *sb, struct bch_opts opts) kobject_init(&c->internal, &bch_fs_internal_ktype); kobject_init(&c->opts_dir, &bch_fs_opts_dir_ktype); kobject_init(&c->time_stats, &bch_fs_time_stats_ktype); - - bch_cache_accounting_init(&c->accounting, &c->cl); - - closure_init(&c->caching, &c->cl); - set_closure_fn(&c->caching, __bch_fs_stop1, system_wq); - - closure_get(&c->cl); - continue_at_noreturn(&c->cl, __bch_fs_stop3, system_wq); return c; err: bch_fs_free(c); @@ -717,8 +619,7 @@ static const char *__bch_fs_online(struct bch_fs *c) if (kobject_add(&c->kobj, NULL, "%pU", c->sb.user_uuid.b) || kobject_add(&c->internal, &c->kobj, "internal") || kobject_add(&c->opts_dir, &c->kobj, "options") || - kobject_add(&c->time_stats, &c->kobj, "time_stats") || - bch_cache_accounting_add_kobjs(&c->accounting, &c->kobj)) + kobject_add(&c->time_stats, &c->kobj, "time_stats")) return "error creating sysfs objects"; mutex_lock(&c->state_lock); @@ -728,12 +629,6 @@ static const char *__bch_fs_online(struct bch_fs *c) if (bch_dev_sysfs_online(ca)) goto err; - err = "can't bring up blockdev volumes"; - if (bch_blockdev_volumes_start(c)) - goto err; - - bch_attach_backing_devs(c); - list_add(&c->list, &bch_fs_list); err = NULL; err: @@ -1394,8 +1289,6 @@ static const char *__bch_dev_read_write(struct bch_fs *c, struct bch_dev *ca) BUG_ON(ca->mi.state != BCH_MEMBER_STATE_RW); - trace_bcache_cache_read_write(ca); - if (bch_dev_allocator_start(ca)) return "error starting allocator thread"; @@ -1405,9 +1298,6 @@ static const char *__bch_dev_read_write(struct bch_fs *c, struct bch_dev *ca) if (bch_tiering_start(c)) return "error starting tiering thread"; - bch_notify_dev_read_write(ca); - trace_bcache_cache_read_write_done(ca); - return NULL; } @@ -1621,7 +1511,6 @@ have_slot: goto err; } - bch_notify_dev_added(ca); mutex_unlock(&c->state_lock); return 0; err_unlock: @@ -1895,13 +1784,10 @@ const char *bch_fs_open_incremental(const char *path) if (err) return err; - if (__SB_IS_BDEV(le64_to_cpu(sb.sb->version))) { - mutex_lock(&bch_fs_list_lock); - err = bch_backing_dev_register(&sb); - mutex_unlock(&bch_fs_list_lock); - } else { + if (!__SB_IS_BDEV(le64_to_cpu(sb.sb->version))) err = __bch_fs_open_incremental(&sb, opts); - } + else + err = "not a bcachefs superblock"; bch_free_super(&sb); @@ -1910,114 +1796,21 @@ const char *bch_fs_open_incremental(const char *path) /* Global interfaces/init */ -#define kobj_attribute_write(n, fn) \ - static struct kobj_attribute ksysfs_##n = __ATTR(n, S_IWUSR, NULL, fn) - -#define kobj_attribute_rw(n, show, store) \ - static struct kobj_attribute ksysfs_##n = \ - __ATTR(n, S_IWUSR|S_IRUSR, show, store) - -static ssize_t register_bcache(struct kobject *, struct kobj_attribute *, - const char *, size_t); - -kobj_attribute_write(register, register_bcache); -kobj_attribute_write(register_quiet, register_bcache); - -static ssize_t register_bcache(struct kobject *k, struct kobj_attribute *attr, - const char *buffer, size_t size) -{ - ssize_t ret = -EINVAL; - const char *err = "cannot allocate memory"; - char *path = NULL; - - if (!try_module_get(THIS_MODULE)) - return -EBUSY; - - if (!(path = kstrndup(skip_spaces(buffer), size, GFP_KERNEL))) - goto err; - - err = bch_fs_open_incremental(strim(path)); - if (err) - goto err; - - ret = size; -out: - kfree(path); - module_put(THIS_MODULE); - return ret; -err: - pr_err("error opening %s: %s", path, err); - goto out; -} - -static int bcache_reboot(struct notifier_block *n, unsigned long code, void *x) -{ - if (code == SYS_DOWN || - code == SYS_HALT || - code == SYS_POWER_OFF) { - struct bch_fs *c; - - mutex_lock(&bch_fs_list_lock); - - if (!list_empty(&bch_fs_list)) - pr_info("Setting all devices read only:"); - - list_for_each_entry(c, &bch_fs_list, list) - bch_fs_read_only_async(c); - - list_for_each_entry(c, &bch_fs_list, list) - bch_fs_read_only(c); - - mutex_unlock(&bch_fs_list_lock); - } - - return NOTIFY_DONE; -} - -static struct notifier_block reboot = { - .notifier_call = bcache_reboot, - .priority = INT_MAX, /* before any real devices */ -}; - -static ssize_t reboot_test(struct kobject *k, struct kobj_attribute *attr, - const char *buffer, size_t size) -{ - bcache_reboot(NULL, SYS_DOWN, NULL); - return size; -} - -kobj_attribute_write(reboot, reboot_test); - static void bcache_exit(void) { bch_debug_exit(); bch_vfs_exit(); - bch_blockdev_exit(); bch_chardev_exit(); if (bcache_kset) kset_unregister(bcache_kset); - if (bcache_io_wq) - destroy_workqueue(bcache_io_wq); - unregister_reboot_notifier(&reboot); } static int __init bcache_init(void) { - static const struct attribute *files[] = { - &ksysfs_register.attr, - &ksysfs_register_quiet.attr, - &ksysfs_reboot.attr, - NULL - }; - - register_reboot_notifier(&reboot); bkey_pack_test(); - if (!(bcache_io_wq = create_freezable_workqueue("bcache_io")) || - !(bcache_kset = kset_create_and_add("bcache", NULL, fs_kobj)) || - sysfs_create_files(&bcache_kset->kobj, files) || + if (!(bcache_kset = kset_create_and_add("bcache", NULL, fs_kobj)) || bch_chardev_init() || - bch_blockdev_init() || bch_vfs_init() || bch_debug_init()) goto err; diff --git a/fs/bcachefs/super.h b/fs/bcachefs/super.h index f676d2cb2e43..26b6da49fd6e 100644 --- a/fs/bcachefs/super.h +++ b/fs/bcachefs/super.h @@ -109,14 +109,11 @@ int bch_dev_online(struct bch_fs *, const char *); int bch_dev_offline(struct bch_fs *, struct bch_dev *, int); int bch_dev_evacuate(struct bch_fs *, struct bch_dev *); -void bch_fs_detach(struct bch_fs *); - bool bch_fs_emergency_read_only(struct bch_fs *); void bch_fs_read_only(struct bch_fs *); const char *bch_fs_read_write(struct bch_fs *); void bch_fs_release(struct kobject *); -void bch_fs_stop_async(struct bch_fs *); void bch_fs_stop(struct bch_fs *); const char *bch_fs_start(struct bch_fs *); @@ -124,8 +121,6 @@ const char *bch_fs_open(char * const *, unsigned, struct bch_opts, struct bch_fs **); const char *bch_fs_open_incremental(const char *path); -extern struct workqueue_struct *bcache_io_wq; - extern struct kobj_type bch_fs_ktype; extern struct kobj_type bch_fs_internal_ktype; extern struct kobj_type bch_fs_time_stats_ktype; diff --git a/fs/bcachefs/sysfs.c b/fs/bcachefs/sysfs.c index 6f87f71072a2..70b0c546171f 100644 --- a/fs/bcachefs/sysfs.c +++ b/fs/bcachefs/sysfs.c @@ -7,7 +7,6 @@ #include "bcache.h" #include "alloc.h" -#include "blockdev.h" #include "compress.h" #include "sysfs.h" #include "btree_cache.h" @@ -20,23 +19,15 @@ #include "keylist.h" #include "move.h" #include "opts.h" -#include "request.h" #include "super-io.h" #include "tier.h" -#include "writeback.h" #include <linux/blkdev.h> #include <linux/sort.h> -write_attribute(attach); -write_attribute(detach); -write_attribute(unregister); -write_attribute(stop); -write_attribute(clear_stats); write_attribute(trigger_btree_coalesce); write_attribute(trigger_gc); write_attribute(prune_cache); -write_attribute(blockdev_volume_create); read_attribute(uuid); read_attribute(minor); @@ -85,37 +76,13 @@ read_attribute(has_metadata); read_attribute(bset_tree_stats); read_attribute(alloc_debug); -read_attribute(state); read_attribute(cache_read_races); -read_attribute(writeback_keys_done); -read_attribute(writeback_keys_failed); -read_attribute(io_errors); -rw_attribute(io_error_limit); -rw_attribute(io_error_halflife); -read_attribute(congested); -rw_attribute(congested_read_threshold_us); -rw_attribute(congested_write_threshold_us); - -rw_attribute(sequential_cutoff); -rw_attribute(cache_mode); -rw_attribute(writeback_metadata); -rw_attribute(writeback_running); -rw_attribute(writeback_percent); -sysfs_pd_controller_attribute(writeback); - -read_attribute(stripe_size); -read_attribute(partial_stripes_expensive); rw_attribute(journal_write_delay_ms); rw_attribute(journal_reclaim_delay_ms); read_attribute(journal_entry_size_max); rw_attribute(discard); -rw_attribute(running); -rw_attribute(label); -rw_attribute(readahead); -rw_attribute(verify); -rw_attribute(bypass_torture_test); rw_attribute(cache_replacement_policy); rw_attribute(foreground_write_ratelimit_enabled); @@ -133,7 +100,6 @@ rw_attribute(pd_controllers_update_seconds); rw_attribute(foreground_target_percent); -rw_attribute(size); read_attribute(meta_replicas_have); read_attribute(data_replicas_have); @@ -161,286 +127,6 @@ static struct attribute sysfs_state_rw = { .mode = S_IRUGO }; -SHOW(bch_cached_dev) -{ - struct cached_dev *dc = container_of(kobj, struct cached_dev, - disk.kobj); - const char *states[] = { "no cache", "clean", "dirty", "inconsistent" }; - -#define var(stat) (dc->stat) - - if (attr == &sysfs_cache_mode) - return bch_snprint_string_list(buf, PAGE_SIZE, - bch_cache_modes + 1, - BDEV_CACHE_MODE(dc->disk_sb.sb)); - - var_printf(verify, "%i"); - var_printf(bypass_torture_test, "%i"); - var_printf(writeback_metadata, "%i"); - var_printf(writeback_running, "%i"); - var_print(writeback_percent); - sysfs_pd_controller_show(writeback, &dc->writeback_pd); - - sysfs_hprint(dirty_data, - bcache_dev_sectors_dirty(&dc->disk) << 9); - sysfs_print(dirty_bytes, - bcache_dev_sectors_dirty(&dc->disk) << 9); - - sysfs_hprint(stripe_size, dc->disk.stripe_size << 9); - var_printf(partial_stripes_expensive, "%u"); - - var_hprint(sequential_cutoff); - var_hprint(readahead); - - sysfs_print(running, atomic_read(&dc->running)); - sysfs_print(state, states[BDEV_STATE(dc->disk_sb.sb)]); - - if (attr == &sysfs_label) { - memcpy(buf, dc->disk_sb.sb->label, BCH_SB_LABEL_SIZE); - buf[BCH_SB_LABEL_SIZE + 1] = '\0'; - strcat(buf, "\n"); - return strlen(buf); - } - -#undef var - return 0; -} - -STORE(bch_cached_dev) -{ - struct cached_dev *dc = container_of(kobj, struct cached_dev, - disk.kobj); - struct kobj_uevent_env *env; - -#define d_strtoul(var) sysfs_strtoul(var, dc->var) -#define d_strtoul_nonzero(var) sysfs_strtoul_clamp(var, dc->var, 1, INT_MAX) -#define d_strtoi_h(var) sysfs_hatoi(var, dc->var) - - d_strtoul(verify); - d_strtoul(bypass_torture_test); - d_strtoul(writeback_metadata); - d_strtoul(writeback_running); - sysfs_strtoul_clamp(writeback_percent, dc->writeback_percent, 0, 40); - sysfs_pd_controller_store(writeback, &dc->writeback_pd); - - d_strtoi_h(sequential_cutoff); - d_strtoi_h(readahead); - - if (attr == &sysfs_writeback_running) - bch_writeback_queue(dc); - - if (attr == &sysfs_writeback_percent) - schedule_delayed_work(&dc->writeback_pd_update, - dc->writeback_pd_update_seconds * HZ); - - if (attr == &sysfs_clear_stats) - bch_cache_accounting_clear(&dc->accounting); - - if (attr == &sysfs_running && - strtoul_or_return(buf)) - bch_cached_dev_run(dc); - - if (attr == &sysfs_cache_mode) { - ssize_t v = bch_read_string_list(buf, bch_cache_modes + 1); - - if (v < 0) - return v; - - if ((unsigned) v != BDEV_CACHE_MODE(dc->disk_sb.sb)) { - SET_BDEV_CACHE_MODE(dc->disk_sb.sb, v); - bch_write_bdev_super(dc, NULL); - } - } - - if (attr == &sysfs_label) { - u64 journal_seq = 0; - int ret = 0; - - if (size > BCH_SB_LABEL_SIZE) - return -EINVAL; - - mutex_lock(&dc->disk.inode_lock); - - memcpy(dc->disk_sb.sb->label, buf, size); - if (size < BCH_SB_LABEL_SIZE) - dc->disk_sb.sb->label[size] = '\0'; - if (size && dc->disk_sb.sb->label[size - 1] == '\n') - dc->disk_sb.sb->label[size - 1] = '\0'; - - memcpy(dc->disk.inode.v.i_label, - dc->disk_sb.sb->label, BCH_SB_LABEL_SIZE); - - bch_write_bdev_super(dc, NULL); - - if (dc->disk.c) - ret = bch_btree_update(dc->disk.c, BTREE_ID_INODES, - &dc->disk.inode.k_i, - &journal_seq); - - mutex_unlock(&dc->disk.inode_lock); - - if (ret) - return ret; - - if (dc->disk.c) - ret = bch_journal_flush_seq(&dc->disk.c->journal, - journal_seq); - if (ret) - return ret; - - env = kzalloc(sizeof(struct kobj_uevent_env), GFP_KERNEL); - if (!env) - return -ENOMEM; - add_uevent_var(env, "DRIVER=bcache"); - add_uevent_var(env, "CACHED_UUID=%pU", dc->disk_sb.sb->disk_uuid.b), - add_uevent_var(env, "CACHED_LABEL=%s", buf); - kobject_uevent_env( - &disk_to_dev(dc->disk.disk)->kobj, KOBJ_CHANGE, env->envp); - kfree(env); - } - - if (attr == &sysfs_attach) { - struct bch_fs *c; - uuid_le uuid; - int ret; - - if (uuid_parse(buf, &uuid)) - return -EINVAL; - - c = bch_uuid_to_fs(uuid); - if (!c) { - pr_err("Can't attach %s: cache set not found", buf); - return -ENOENT; - } - - dc->disk_sb.sb->set_uuid = uuid; - - ret = bch_cached_dev_attach(dc, c); - closure_put(&c->cl); - if (ret) - return ret; - } - - if (attr == &sysfs_detach && dc->disk.c) - bch_cached_dev_detach(dc); - - if (attr == &sysfs_stop) - bch_blockdev_stop(&dc->disk); - - return size; -} - -static struct attribute *bch_cached_dev_files[] = { - &sysfs_attach, - &sysfs_detach, - &sysfs_stop, - &sysfs_cache_mode, - &sysfs_writeback_metadata, - &sysfs_writeback_running, - &sysfs_writeback_percent, - sysfs_pd_controller_files(writeback), - &sysfs_dirty_data, - &sysfs_dirty_bytes, - &sysfs_stripe_size, - &sysfs_partial_stripes_expensive, - &sysfs_sequential_cutoff, - &sysfs_clear_stats, - &sysfs_running, - &sysfs_state, - &sysfs_label, - &sysfs_readahead, -#ifdef CONFIG_BCACHEFS_DEBUG - &sysfs_verify, - &sysfs_bypass_torture_test, -#endif - NULL -}; -KTYPE(bch_cached_dev); - -SHOW(bch_blockdev_volume) -{ - struct bcache_device *d = container_of(kobj, struct bcache_device, - kobj); - - sysfs_hprint(size, le64_to_cpu(d->inode.v.i_size)); - - if (attr == &sysfs_label) { - memcpy(buf, d->inode.v.i_label, BCH_SB_LABEL_SIZE); - buf[BCH_SB_LABEL_SIZE + 1] = '\0'; - strcat(buf, "\n"); - return strlen(buf); - } - - return 0; -} - -STORE(bch_blockdev_volume) -{ - struct bcache_device *d = container_of(kobj, struct bcache_device, - kobj); - - if (attr == &sysfs_size) { - u64 journal_seq = 0; - u64 v = strtoi_h_or_return(buf); - int ret; - - mutex_lock(&d->inode_lock); - - if (v < le64_to_cpu(d->inode.v.i_size) ){ - ret = bch_inode_truncate(d->c, d->inode.k.p.inode, - v >> 9, NULL, NULL); - if (ret) { - mutex_unlock(&d->inode_lock); - return ret; - } - } - d->inode.v.i_size = cpu_to_le64(v); - ret = bch_btree_update(d->c, BTREE_ID_INODES, - &d->inode.k_i, &journal_seq); - - mutex_unlock(&d->inode_lock); - - if (ret) - return ret; - - ret = bch_journal_flush_seq(&d->c->journal, journal_seq); - if (ret) - return ret; - - set_capacity(d->disk, v >> 9); - } - - if (attr == &sysfs_label) { - u64 journal_seq = 0; - int ret; - - mutex_lock(&d->inode_lock); - - memcpy(d->inode.v.i_label, buf, BCH_SB_LABEL_SIZE); - ret = bch_btree_update(d->c, BTREE_ID_INODES, - &d->inode.k_i, &journal_seq); - - mutex_unlock(&d->inode_lock); - - return ret ?: bch_journal_flush_seq(&d->c->journal, journal_seq); - } - - if (attr == &sysfs_unregister) { - set_bit(BCACHE_DEV_DETACHING, &d->flags); - bch_blockdev_stop(d); - } - - return size; -} - -static struct attribute *bch_blockdev_volume_files[] = { - &sysfs_unregister, - &sysfs_label, - &sysfs_size, - NULL -}; -KTYPE(bch_blockdev_volume); - static int bch_bset_print_stats(struct bch_fs *c, char *buf) { struct bset_stats stats; @@ -647,22 +333,6 @@ SHOW(bch_fs) sysfs_print(cache_read_races, atomic_long_read(&c->cache_read_races)); - sysfs_print(writeback_keys_done, - atomic_long_read(&c->writeback_keys_done)); - sysfs_print(writeback_keys_failed, - atomic_long_read(&c->writeback_keys_failed)); - - /* See count_io_errors for why 88 */ - sysfs_print(io_error_halflife, c->error_decay * 88); - sysfs_print(io_error_limit, c->error_limit >> IO_ERROR_SHIFT); - - sysfs_hprint(congested, - ((uint64_t) bch_get_congested(c)) << 9); - sysfs_print(congested_read_threshold_us, - c->congested_read_threshold_us); - sysfs_print(congested_write_threshold_us, - c->congested_write_threshold_us); - sysfs_printf(foreground_write_ratelimit_enabled, "%i", c->foreground_write_ratelimit_enabled); sysfs_printf(copy_gc_enabled, "%i", c->copy_gc_enabled); @@ -712,40 +382,6 @@ STORE(__bch_fs) { struct bch_fs *c = container_of(kobj, struct bch_fs, kobj); - if (attr == &sysfs_unregister) { - bch_fs_detach(c); - return size; - } - - if (attr == &sysfs_stop) { - bch_fs_stop_async(c); - return size; - } - - if (attr == &sysfs_clear_stats) { - atomic_long_set(&c->writeback_keys_done, 0); - atomic_long_set(&c->writeback_keys_failed, 0); - bch_cache_accounting_clear(&c->accounting); - - return size; - } - - sysfs_strtoul(congested_read_threshold_us, - c->congested_read_threshold_us); - sysfs_strtoul(congested_write_threshold_us, - c->congested_write_threshold_us); - - if (attr == &sysfs_io_error_limit) { - c->error_limit = strtoul_or_return(buf) << IO_ERROR_SHIFT; - return size; - } - - /* See count_io_errors() for why 88 */ - if (attr == &sysfs_io_error_halflife) { - c->error_decay = strtoul_or_return(buf) / 88; - return size; - } - sysfs_strtoul(journal_write_delay_ms, c->journal.write_delay_ms); sysfs_strtoul(journal_reclaim_delay_ms, c->journal.reclaim_delay_ms); @@ -796,14 +432,6 @@ STORE(__bch_fs) return size; } - if (attr == &sysfs_blockdev_volume_create) { - u64 v = strtoi_h_or_return(buf); - int r = bch_blockdev_volume_create(c, v); - - if (r) - return r; - } - if (attr == &sysfs_trigger_btree_coalesce) bch_coalesce(c); @@ -835,12 +463,9 @@ STORE(bch_fs) } static struct attribute *bch_fs_files[] = { - &sysfs_unregister, - &sysfs_stop, &sysfs_journal_write_delay_ms, &sysfs_journal_reclaim_delay_ms, &sysfs_journal_entry_size_max, - &sysfs_blockdev_volume_create, &sysfs_block_size, &sysfs_block_size_bytes, @@ -854,13 +479,6 @@ static struct attribute *bch_fs_files[] = { &sysfs_average_key_size, - &sysfs_io_error_limit, - &sysfs_io_error_halflife, - &sysfs_congested, - &sysfs_congested_read_threshold_us, - &sysfs_congested_write_threshold_us, - &sysfs_clear_stats, - &sysfs_meta_replicas_have, &sysfs_data_replicas_have, @@ -902,8 +520,6 @@ static struct attribute *bch_fs_internal_files[] = { &sysfs_bset_tree_stats, &sysfs_cache_read_races, - &sysfs_writeback_keys_done, - &sysfs_writeback_keys_failed, &sysfs_trigger_btree_coalesce, &sysfs_trigger_gc, @@ -1171,9 +787,6 @@ SHOW(bch_dev) (atomic64_read(&ca->meta_sectors_written) + atomic64_read(&ca->btree_sectors_written)) << 9); - sysfs_print(io_errors, - atomic_read(&ca->io_errors) >> IO_ERROR_SHIFT); - sysfs_hprint(dirty_data, stats.sectors[S_DIRTY] << 9); sysfs_print(dirty_bytes, stats.sectors[S_DIRTY] << 9); sysfs_print(dirty_buckets, stats.buckets_dirty); @@ -1279,18 +892,6 @@ STORE(bch_dev) bch_tiering_start(c); } - if (attr == &sysfs_clear_stats) { - int cpu; - - for_each_possible_cpu(cpu) - *per_cpu_ptr(ca->sectors_written, cpu) = 0; - - atomic64_set(&ca->btree_sectors_written, 0); - atomic64_set(&ca->meta_sectors_written, 0); - atomic_set(&ca->io_count, 0); - atomic_set(&ca->io_errors, 0); - } - return size; } @@ -1323,8 +924,6 @@ static struct attribute *bch_dev_files[] = { &sysfs_written, &sysfs_btree_written, &sysfs_metadata_written, - &sysfs_io_errors, - &sysfs_clear_stats, &sysfs_cache_replacement_policy, &sysfs_tier, &sysfs_state_rw, diff --git a/fs/bcachefs/trace.c b/fs/bcachefs/trace.c index 911d1bbfe03b..970699930c5e 100644 --- a/fs/bcachefs/trace.c +++ b/fs/bcachefs/trace.c @@ -1,6 +1,5 @@ #include "bcache.h" #include "alloc_types.h" -#include "blockdev_types.h" #include "buckets.h" #include "btree_types.h" #include "keylist.h" diff --git a/fs/bcachefs/util.h b/fs/bcachefs/util.h index cd8ca6e6cc86..95be24854669 100644 --- a/fs/bcachefs/util.h +++ b/fs/bcachefs/util.h @@ -610,13 +610,7 @@ static inline sector_t bdev_sectors(struct block_device *bdev) #define closure_bio_submit(bio, cl) \ do { \ closure_get(cl); \ - generic_make_request(bio); \ -} while (0) - -#define closure_bio_submit_punt(bio, cl, c) \ -do { \ - closure_get(cl); \ - bch_generic_make_request(bio, c); \ + submit_bio(bio); \ } while (0) #define kthread_wait_freezable(cond) \ diff --git a/fs/bcachefs/writeback.c b/fs/bcachefs/writeback.c deleted file mode 100644 index 3b7798000a18..000000000000 --- a/fs/bcachefs/writeback.c +++ /dev/null @@ -1,657 +0,0 @@ -/* - * background writeback - scan btree for dirty data and write it to the backing - * device - * - * Copyright 2010, 2011 Kent Overstreet <kent.overstreet@gmail.com> - * Copyright 2012 Google, Inc. - */ - -#include "bcache.h" -#include "btree_update.h" -#include "clock.h" -#include "debug.h" -#include "error.h" -#include "extents.h" -#include "io.h" -#include "keybuf.h" -#include "keylist.h" -#include "writeback.h" - -#include <linux/delay.h> -#include <linux/freezer.h> -#include <linux/kthread.h> -#include <trace/events/bcachefs.h> - -/* Rate limiting */ - -static void __update_writeback_rate(struct cached_dev *dc) -{ - struct bch_fs *c = dc->disk.c; - u64 cache_dirty_target = - div_u64(c->capacity * dc->writeback_percent, 100); - s64 target = div64_u64(cache_dirty_target * - bdev_sectors(dc->disk_sb.bdev), - c->cached_dev_sectors); - s64 dirty = bcache_dev_sectors_dirty(&dc->disk); - - bch_pd_controller_update(&dc->writeback_pd, target << 9, - dirty << 9, -1); -} - -static void update_writeback_rate(struct work_struct *work) -{ - struct cached_dev *dc = container_of(to_delayed_work(work), - struct cached_dev, - writeback_pd_update); - - down_read(&dc->writeback_lock); - - if (atomic_read(&dc->has_dirty) && - dc->writeback_percent && - !test_bit(BCACHE_DEV_DETACHING, &dc->disk.flags)) - __update_writeback_rate(dc); - else - dc->writeback_pd.rate.rate = UINT_MAX; - - up_read(&dc->writeback_lock); - - schedule_delayed_work(&dc->writeback_pd_update, - dc->writeback_pd_update_seconds * HZ); -} - -struct dirty_io { - struct closure cl; - struct bch_replace_info replace; - struct cached_dev *dc; - struct bch_dev *ca; - struct keybuf_key *w; - struct bch_extent_ptr ptr; - int error; - bool from_mempool; - /* Must be last */ - struct bio bio; -}; - -#define DIRTY_IO_MEMPOOL_BVECS 64 -#define DIRTY_IO_MEMPOOL_SECTORS (DIRTY_IO_MEMPOOL_BVECS * PAGE_SECTORS) - -static void dirty_init(struct dirty_io *io) -{ - struct bio *bio = &io->bio; - - bio_init(bio); - if (!io->dc->writeback_percent) - bio_set_prio(bio, IOPRIO_PRIO_VALUE(IOPRIO_CLASS_IDLE, 0)); - - bio->bi_iter.bi_size = io->replace.key.k.size << 9; - bio->bi_max_vecs = - DIV_ROUND_UP(io->replace.key.k.size, PAGE_SECTORS); - bio->bi_io_vec = bio->bi_inline_vecs; - bch_bio_map(bio, NULL); -} - -static void dirty_io_destructor(struct closure *cl) -{ - struct dirty_io *io = container_of(cl, struct dirty_io, cl); - - if (io->from_mempool) - mempool_free(io, &io->dc->writeback_io_pool); - else - kfree(io); -} - -static void write_dirty_finish(struct closure *cl) -{ - struct dirty_io *io = container_of(cl, struct dirty_io, cl); - struct cached_dev *dc = io->dc; - struct bio_vec *bv; - int i; - - bio_for_each_segment_all(bv, &io->bio, i) - mempool_free(bv->bv_page, &dc->writeback_page_pool); - - if (!io->error) { - BKEY_PADDED(k) tmp; - int ret; - - bkey_copy(&tmp.k, &io->replace.key); - io->replace.hook.fn = bch_extent_cmpxchg; - bkey_extent_set_cached(&tmp.k.k, true); - - ret = bch_btree_insert(dc->disk.c, BTREE_ID_EXTENTS, &tmp.k, - NULL, &io->replace.hook, NULL, 0); - if (io->replace.successes == 0) - trace_bcache_writeback_collision(&io->replace.key.k); - - atomic_long_inc(ret - ? &dc->disk.c->writeback_keys_failed - : &dc->disk.c->writeback_keys_done); - } - - bch_keybuf_put(&dc->writeback_keys, io->w); - - closure_return_with_destructor(cl, dirty_io_destructor); -} - -static void dirty_endio(struct bio *bio) -{ - struct dirty_io *io = container_of(bio, struct dirty_io, bio); - - if (bio->bi_error) { - trace_bcache_writeback_error(&io->replace.key.k, - op_is_write(bio_op(&io->bio)), - bio->bi_error); - io->error = bio->bi_error; - } - - closure_put(&io->cl); -} - -static void write_dirty(struct closure *cl) -{ - struct dirty_io *io = container_of(cl, struct dirty_io, cl); - - if (!io->error) { - dirty_init(io); - bio_set_op_attrs(&io->bio, REQ_OP_WRITE, 0); - io->bio.bi_iter.bi_sector = - bkey_start_offset(&io->replace.key.k); - io->bio.bi_bdev = io->dc->disk_sb.bdev; - io->bio.bi_end_io = dirty_endio; - - closure_bio_submit(&io->bio, cl); - } - - continue_at(cl, write_dirty_finish, io->dc->disk.c->wq); -} - -static void read_dirty_endio(struct bio *bio) -{ - struct dirty_io *io = container_of(bio, struct dirty_io, bio); - - bch_dev_nonfatal_io_err_on(bio->bi_error, io->ca, "writeback read"); - - bch_account_io_completion(io->ca); - - if (ptr_stale(io->ca, &io->ptr)) - bio->bi_error = -EINTR; - - dirty_endio(bio); -} - -static void read_dirty_submit(struct closure *cl) -{ - struct dirty_io *io = container_of(cl, struct dirty_io, cl); - - closure_bio_submit(&io->bio, cl); - - continue_at(cl, write_dirty, system_freezable_wq); -} - -static u64 read_dirty(struct cached_dev *dc) -{ - struct keybuf_key *w; - struct dirty_io *io; - struct closure cl; - unsigned i; - struct bio_vec *bv; - u64 sectors_written = 0; - BKEY_PADDED(k) tmp; - - closure_init_stack(&cl); - - while (!bch_ratelimit_wait_freezable_stoppable(&dc->writeback_pd.rate)) { - w = bch_keybuf_next(&dc->writeback_keys); - if (!w) - break; - - sectors_written += w->key.k.size; - bkey_copy(&tmp.k, &w->key); - - while (tmp.k.k.size) { - struct extent_pick_ptr pick; - - bch_extent_pick_ptr(dc->disk.c, - bkey_i_to_s_c(&tmp.k), - &pick); - if (IS_ERR_OR_NULL(pick.ca)) - break; - - io = kzalloc(sizeof(*io) + sizeof(struct bio_vec) * - DIV_ROUND_UP(tmp.k.k.size, - PAGE_SECTORS), - GFP_KERNEL); - if (!io) { - trace_bcache_writeback_alloc_fail(pick.ca->fs, - tmp.k.k.size); - io = mempool_alloc(&dc->writeback_io_pool, - GFP_KERNEL); - memset(io, 0, sizeof(*io) + - sizeof(struct bio_vec) * - DIRTY_IO_MEMPOOL_BVECS); - io->from_mempool = true; - - bkey_copy(&io->replace.key, &tmp.k); - - if (DIRTY_IO_MEMPOOL_SECTORS < - io->replace.key.k.size) - bch_key_resize(&io->replace.key.k, - DIRTY_IO_MEMPOOL_SECTORS); - } else { - bkey_copy(&io->replace.key, &tmp.k); - } - - io->dc = dc; - io->ca = pick.ca; - io->w = w; - io->ptr = pick.ptr; - atomic_inc(&w->ref); - - dirty_init(io); - bio_set_op_attrs(&io->bio, REQ_OP_READ, 0); - io->bio.bi_iter.bi_sector = pick.ptr.offset; - io->bio.bi_bdev = pick.ca->disk_sb.bdev; - io->bio.bi_end_io = read_dirty_endio; - - bio_for_each_segment_all(bv, &io->bio, i) { - bv->bv_page = - mempool_alloc(&dc->writeback_page_pool, - i ? GFP_NOWAIT - : GFP_KERNEL); - if (!bv->bv_page) { - BUG_ON(!i); - io->bio.bi_vcnt = i; - - io->bio.bi_iter.bi_size = - io->bio.bi_vcnt * PAGE_SIZE; - - bch_key_resize(&io->replace.key.k, - bio_sectors(&io->bio)); - break; - } - } - - bch_cut_front(io->replace.key.k.p, &tmp.k); - trace_bcache_writeback(&io->replace.key.k); - - bch_ratelimit_increment(&dc->writeback_pd.rate, - io->replace.key.k.size << 9); - - closure_call(&io->cl, read_dirty_submit, NULL, &cl); - } - - bch_keybuf_put(&dc->writeback_keys, w); - } - - /* - * Wait for outstanding writeback IOs to finish (and keybuf slots to be - * freed) before refilling again - */ - closure_sync(&cl); - - return sectors_written; -} - -/* Scan for dirty data */ - -static void __bcache_dev_sectors_dirty_add(struct bcache_device *d, - u64 offset, int nr_sectors) -{ - unsigned stripe_offset, stripe, sectors_dirty; - - if (!d) - return; - - if (!d->stripe_sectors_dirty) - return; - - stripe = offset_to_stripe(d, offset); - stripe_offset = offset & (d->stripe_size - 1); - - while (nr_sectors) { - int s = min_t(unsigned, abs(nr_sectors), - d->stripe_size - stripe_offset); - - if (nr_sectors < 0) - s = -s; - - if (stripe >= d->nr_stripes) - return; - - sectors_dirty = atomic_add_return(s, - d->stripe_sectors_dirty + stripe); - if (sectors_dirty == d->stripe_size) - set_bit(stripe, d->full_dirty_stripes); - else - clear_bit(stripe, d->full_dirty_stripes); - - nr_sectors -= s; - stripe_offset = 0; - stripe++; - } -} - -void bcache_dev_sectors_dirty_add(struct bch_fs *c, unsigned inode, - u64 offset, int nr_sectors) -{ - struct bcache_device *d; - - rcu_read_lock(); - d = bch_dev_find(c, inode); - if (d) - __bcache_dev_sectors_dirty_add(d, offset, nr_sectors); - rcu_read_unlock(); -} - -static bool dirty_pred(struct keybuf *buf, struct bkey_s_c k) -{ - struct cached_dev *dc = container_of(buf, struct cached_dev, writeback_keys); - - BUG_ON(k.k->p.inode != bcache_dev_inum(&dc->disk)); - - return bkey_extent_is_data(k.k) && - !bkey_extent_is_cached(k.k); -} - -static void refill_full_stripes(struct cached_dev *dc) -{ - struct keybuf *buf = &dc->writeback_keys; - unsigned inode = bcache_dev_inum(&dc->disk); - unsigned start_stripe, stripe, next_stripe; - bool wrapped = false; - - stripe = offset_to_stripe(&dc->disk, buf->last_scanned.offset); - - if (stripe >= dc->disk.nr_stripes) - stripe = 0; - - start_stripe = stripe; - - while (1) { - stripe = find_next_bit(dc->disk.full_dirty_stripes, - dc->disk.nr_stripes, stripe); - - if (stripe == dc->disk.nr_stripes) - goto next; - - next_stripe = find_next_zero_bit(dc->disk.full_dirty_stripes, - dc->disk.nr_stripes, stripe); - - buf->last_scanned = POS(inode, - stripe * dc->disk.stripe_size); - - bch_refill_keybuf(dc->disk.c, buf, - POS(inode, - next_stripe * dc->disk.stripe_size), - dirty_pred); - - if (array_freelist_empty(&buf->freelist)) - return; - - stripe = next_stripe; -next: - if (wrapped && stripe > start_stripe) - return; - - if (stripe == dc->disk.nr_stripes) { - stripe = 0; - wrapped = true; - } - } -} - -static u64 bch_writeback(struct cached_dev *dc) -{ - struct keybuf *buf = &dc->writeback_keys; - unsigned inode = bcache_dev_inum(&dc->disk); - struct bpos start = POS(inode, 0); - struct bpos end = POS(inode, KEY_OFFSET_MAX); - struct bpos start_pos; - u64 sectors_written = 0; - - buf->last_scanned = POS(inode, 0); - - while (bkey_cmp(buf->last_scanned, end) < 0 && - !kthread_should_stop()) { - down_write(&dc->writeback_lock); - - if (!atomic_read(&dc->has_dirty)) { - up_write(&dc->writeback_lock); - set_current_state(TASK_INTERRUPTIBLE); - - if (kthread_should_stop()) - return sectors_written; - - schedule(); - try_to_freeze(); - return sectors_written; - } - - if (bkey_cmp(buf->last_scanned, end) >= 0) - buf->last_scanned = POS(inode, 0); - - if (dc->partial_stripes_expensive) { - refill_full_stripes(dc); - if (array_freelist_empty(&buf->freelist)) - goto refill_done; - } - - start_pos = buf->last_scanned; - bch_refill_keybuf(dc->disk.c, buf, end, dirty_pred); - - if (bkey_cmp(buf->last_scanned, end) >= 0) { - /* - * If we get to the end start scanning again from the - * beginning, and only scan up to where we initially - * started scanning from: - */ - buf->last_scanned = start; - bch_refill_keybuf(dc->disk.c, buf, start_pos, - dirty_pred); - } - - if (RB_EMPTY_ROOT(&dc->writeback_keys.keys)) { - atomic_set(&dc->has_dirty, 0); - cached_dev_put(dc); - SET_BDEV_STATE(dc->disk_sb.sb, BDEV_STATE_CLEAN); - bch_write_bdev_super(dc, NULL); - } - -refill_done: - up_write(&dc->writeback_lock); - - bch_ratelimit_reset(&dc->writeback_pd.rate); - sectors_written += read_dirty(dc); - } - - return sectors_written; -} - -static int bch_writeback_thread(void *arg) -{ - struct cached_dev *dc = arg; - struct bch_fs *c = dc->disk.c; - struct io_clock *clock = &c->io_clock[WRITE]; - unsigned long last; - u64 sectors_written; - - set_freezable(); - - while (!kthread_should_stop()) { - if (kthread_wait_freezable(dc->writeback_running || - test_bit(BCACHE_DEV_DETACHING, - &dc->disk.flags))) - break; - - last = atomic_long_read(&clock->now); - - sectors_written = bch_writeback(dc); - - if (sectors_written < c->capacity >> 4) - bch_kthread_io_clock_wait(clock, - last + (c->capacity >> 5)); - } - - return 0; -} - -/** - * bch_keylist_recalc_oldest_gens - update oldest_gen pointers from writeback keys - * - * This prevents us from wrapping around gens for a bucket only referenced from - * writeback keybufs. We don't actually care that the data in those buckets is - * marked live, only that we don't wrap the gens. - */ -void bch_writeback_recalc_oldest_gens(struct bch_fs *c) -{ - struct radix_tree_iter iter; - void **slot; - - rcu_read_lock(); - - radix_tree_for_each_slot(slot, &c->devices, &iter, 0) { - struct bcache_device *d; - struct cached_dev *dc; - - d = radix_tree_deref_slot(slot); - - if (!CACHED_DEV(&d->inode.v)) - continue; - dc = container_of(d, struct cached_dev, disk); - - bch_keybuf_recalc_oldest_gens(c, &dc->writeback_keys); - } - - rcu_read_unlock(); -} - -/* Init */ - -void bch_sectors_dirty_init(struct cached_dev *dc, struct bch_fs *c) -{ - struct bcache_device *d = &dc->disk; - struct btree_iter iter; - struct bkey_s_c k; - - /* - * We have to do this before the disk is added to the radix tree or we - * race with moving GC - */ - for_each_btree_key(&iter, c, BTREE_ID_EXTENTS, - POS(bcache_dev_inum(d), 0), k) { - if (k.k->p.inode > bcache_dev_inum(d)) - break; - - if (bkey_extent_is_data(k.k) && - !bkey_extent_is_cached(k.k)) - __bcache_dev_sectors_dirty_add(d, - bkey_start_offset(k.k), - k.k->size); - - bch_btree_iter_cond_resched(&iter); - } - bch_btree_iter_unlock(&iter); - - dc->writeback_pd.last_actual = bcache_dev_sectors_dirty(d); -} - -void bch_cached_dev_writeback_stop(struct cached_dev *dc) -{ - cancel_delayed_work_sync(&dc->writeback_pd_update); - if (!IS_ERR_OR_NULL(dc->writeback_thread)) { - kthread_stop(dc->writeback_thread); - dc->writeback_thread = NULL; - } -} - -void bch_cached_dev_writeback_free(struct cached_dev *dc) -{ - struct bcache_device *d = &dc->disk; - - mempool_exit(&dc->writeback_page_pool); - mempool_exit(&dc->writeback_io_pool); - kvfree(d->full_dirty_stripes); - kvfree(d->stripe_sectors_dirty); -} - -int bch_cached_dev_writeback_init(struct cached_dev *dc) -{ - struct bcache_device *d = &dc->disk; - sector_t sectors; - size_t n; - - sectors = get_capacity(dc->disk.disk); - - if (!d->stripe_size) { -#ifdef CONFIG_BCACHEFS_DEBUG - d->stripe_size = 1 << 0; -#else - d->stripe_size = 1 << 31; -#endif - } - - pr_debug("stripe size: %d sectors", d->stripe_size); - d->nr_stripes = DIV_ROUND_UP_ULL(sectors, d->stripe_size); - - if (!d->nr_stripes || - d->nr_stripes > INT_MAX || - d->nr_stripes > SIZE_MAX / sizeof(atomic_t)) { - pr_err("nr_stripes too large or invalid: %u (start sector beyond end of disk?)", - (unsigned)d->nr_stripes); - return -ENOMEM; - } - - n = d->nr_stripes * sizeof(atomic_t); - d->stripe_sectors_dirty = n < PAGE_SIZE << 6 - ? kzalloc(n, GFP_KERNEL) - : vzalloc(n); - if (!d->stripe_sectors_dirty) { - pr_err("cannot allocate stripe_sectors_dirty"); - return -ENOMEM; - } - - n = BITS_TO_LONGS(d->nr_stripes) * sizeof(unsigned long); - d->full_dirty_stripes = n < PAGE_SIZE << 6 - ? kzalloc(n, GFP_KERNEL) - : vzalloc(n); - if (!d->full_dirty_stripes) { - pr_err("cannot allocate full_dirty_stripes"); - return -ENOMEM; - } - - if (mempool_init_kmalloc_pool(&dc->writeback_io_pool, 4, - sizeof(struct dirty_io) + - sizeof(struct bio_vec) * - DIRTY_IO_MEMPOOL_BVECS) || - mempool_init_page_pool(&dc->writeback_page_pool, - (64 << 10) / PAGE_SIZE, 0)) - return -ENOMEM; - - init_rwsem(&dc->writeback_lock); - bch_keybuf_init(&dc->writeback_keys); - - dc->writeback_metadata = true; - dc->writeback_running = true; - dc->writeback_percent = 10; - dc->writeback_pd_update_seconds = 5; - - bch_pd_controller_init(&dc->writeback_pd); - INIT_DELAYED_WORK(&dc->writeback_pd_update, update_writeback_rate); - - return 0; -} - -int bch_cached_dev_writeback_start(struct cached_dev *dc) -{ - dc->writeback_thread = kthread_create(bch_writeback_thread, dc, - "bcache_writeback"); - if (IS_ERR(dc->writeback_thread)) - return PTR_ERR(dc->writeback_thread); - - schedule_delayed_work(&dc->writeback_pd_update, - dc->writeback_pd_update_seconds * HZ); - - bch_writeback_queue(dc); - - return 0; -} diff --git a/fs/bcachefs/writeback.h b/fs/bcachefs/writeback.h deleted file mode 100644 index 82ce306e9ac9..000000000000 --- a/fs/bcachefs/writeback.h +++ /dev/null @@ -1,122 +0,0 @@ -#ifndef _BCACHE_WRITEBACK_H -#define _BCACHE_WRITEBACK_H - -#include "blockdev.h" -#include "buckets.h" - -#define CUTOFF_WRITEBACK 60 -#define CUTOFF_WRITEBACK_SYNC 30 - -static inline uint64_t bcache_dev_sectors_dirty(struct bcache_device *d) -{ - uint64_t i, ret = 0; - - for (i = 0; i < d->nr_stripes; i++) - ret += atomic_read(d->stripe_sectors_dirty + i); - - return ret; -} - -static inline unsigned offset_to_stripe(struct bcache_device *d, - uint64_t offset) -{ - do_div(offset, d->stripe_size); - return offset; -} - -static inline bool bcache_dev_stripe_dirty(struct cached_dev *dc, - uint64_t offset, - unsigned nr_sectors) -{ - unsigned stripe = offset_to_stripe(&dc->disk, offset); - - while (1) { - if (atomic_read(dc->disk.stripe_sectors_dirty + stripe)) - return true; - - if (nr_sectors <= dc->disk.stripe_size) - return false; - - nr_sectors -= dc->disk.stripe_size; - stripe++; - } -} - -static inline bool should_writeback(struct cached_dev *dc, struct bio *bio, - unsigned cache_mode, bool would_skip) -{ - struct bch_fs *c = dc->disk.c; - u64 available = sectors_available(c); - - if (cache_mode != CACHE_MODE_WRITEBACK || - test_bit(BCACHE_DEV_DETACHING, &dc->disk.flags) || - available * 100 < c->capacity * CUTOFF_WRITEBACK_SYNC) - return false; - - if (dc->partial_stripes_expensive && - bcache_dev_stripe_dirty(dc, bio->bi_iter.bi_sector, - bio_sectors(bio))) - return true; - - if (would_skip) - return false; - - return bio->bi_opf & REQ_SYNC || - available * 100 < c->capacity * CUTOFF_WRITEBACK; -} - -static inline void bch_writeback_queue(struct cached_dev *dc) -{ - if (!IS_ERR_OR_NULL(dc->writeback_thread)) - wake_up_process(dc->writeback_thread); -} - -static inline void bch_writeback_add(struct cached_dev *dc) -{ - if (!atomic_read(&dc->has_dirty) && - !atomic_xchg(&dc->has_dirty, 1)) { - atomic_inc(&dc->count); - - if (BDEV_STATE(dc->disk_sb.sb) != BDEV_STATE_DIRTY) { - SET_BDEV_STATE(dc->disk_sb.sb, BDEV_STATE_DIRTY); - /* XXX: should do this synchronously */ - bch_write_bdev_super(dc, NULL); - } - - bch_writeback_queue(dc); - } -} - -#ifndef NO_BCACHE_WRITEBACK - -void bcache_dev_sectors_dirty_add(struct bch_fs *, unsigned, u64, int); - -void bch_writeback_recalc_oldest_gens(struct bch_fs *); -void bch_sectors_dirty_init(struct cached_dev *, struct bch_fs *c); - -void bch_cached_dev_writeback_stop(struct cached_dev *); -void bch_cached_dev_writeback_free(struct cached_dev *); -int bch_cached_dev_writeback_init(struct cached_dev *); -int bch_cached_dev_writeback_start(struct cached_dev *); - -#else - -static inline void bcache_dev_sectors_dirty_add(struct bch_fs *c, - unsigned i, u64 o, int n) {} -static inline void bch_writeback_recalc_oldest_gens(struct bch_fs *c) {} -static inline void bch_sectors_dirty_init(struct cached_dev *dc, - struct bch_fs *c) {} -static inline void bch_cached_dev_writeback_stop(struct cached_dev *dc) {} -static inline void bch_cached_dev_writeback_free(struct cached_dev *dc) {} -static inline int bch_cached_dev_writeback_init(struct cached_dev *dc) -{ - return 0; -} -static inline int bch_cached_dev_writeback_start(struct cached_dev *dc) -{ - return 0; -} - -#endif - -#endif diff --git a/include/linux/sched.h b/include/linux/sched.h index 6c17b0ff0b8b..2865d10a2882 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -1942,7 +1942,7 @@ struct task_struct { #ifdef CONFIG_UPROBES struct uprobe_task *utask; #endif -#if defined(CONFIG_BCACHE) || defined(CONFIG_BCACHE_MODULE) || defined(CONFIG_BCACHE_FS) || defined(CONFIG_BCACHE_FS_MODULE) +#if defined(CONFIG_BCACHE) || defined(CONFIG_BCACHE_MODULE) unsigned int sequential_io; unsigned int sequential_io_avg; #endif diff --git a/include/trace/events/bcachefs.h b/include/trace/events/bcachefs.h index 24e6478015f2..5b55dd0862c8 100644 --- a/include/trace/events/bcachefs.h +++ b/include/trace/events/bcachefs.h @@ -6,38 +6,6 @@ #include <linux/tracepoint.h> -DECLARE_EVENT_CLASS(bcache_request, - TP_PROTO(struct bcache_device *d, struct bio *bio), - TP_ARGS(d, bio), - - TP_STRUCT__entry( - __field(dev_t, dev ) - __field(unsigned int, orig_major ) - __field(unsigned int, orig_minor ) - __field(sector_t, sector ) - __field(sector_t, orig_sector ) - __field(unsigned int, nr_sector ) - __array(char, rwbs, 6 ) - ), - - TP_fast_assign( - __entry->dev = bio->bi_bdev->bd_dev; - __entry->orig_major = d->disk->major; - __entry->orig_minor = d->disk->first_minor; - __entry->sector = bio->bi_iter.bi_sector; - __entry->orig_sector = bio->bi_iter.bi_sector - 16; - __entry->nr_sector = bio->bi_iter.bi_size >> 9; - blk_fill_rwbs(__entry->rwbs, bio_op(bio), bio->bi_opf, - bio->bi_iter.bi_size); - ), - - TP_printk("%d,%d %s %llu + %u (from %d,%d @ %llu)", - MAJOR(__entry->dev), MINOR(__entry->dev), - __entry->rwbs, (unsigned long long)__entry->sector, - __entry->nr_sector, __entry->orig_major, __entry->orig_minor, - (unsigned long long)__entry->orig_sector) -); - DECLARE_EVENT_CLASS(bpos, TP_PROTO(struct bpos p), TP_ARGS(p), @@ -75,16 +43,36 @@ DECLARE_EVENT_CLASS(bkey, __entry->offset, __entry->size) ); -/* request.c */ +DECLARE_EVENT_CLASS(bch_dev, + TP_PROTO(struct bch_dev *ca), + TP_ARGS(ca), -DEFINE_EVENT(bcache_request, bcache_request_start, - TP_PROTO(struct bcache_device *d, struct bio *bio), - TP_ARGS(d, bio) + TP_STRUCT__entry( + __array(char, uuid, 16 ) + __field(unsigned, tier ) + ), + + TP_fast_assign( + memcpy(__entry->uuid, ca->uuid.b, 16); + __entry->tier = ca->mi.tier; + ), + + TP_printk("%pU tier %u", __entry->uuid, __entry->tier) ); -DEFINE_EVENT(bcache_request, bcache_request_end, - TP_PROTO(struct bcache_device *d, struct bio *bio), - TP_ARGS(d, bio) +DECLARE_EVENT_CLASS(bch_fs, + TP_PROTO(struct bch_fs *c), + TP_ARGS(c), + + TP_STRUCT__entry( + __array(char, uuid, 16 ) + ), + + TP_fast_assign( + memcpy(__entry->uuid, c->sb.user_uuid.b, 16); + ), + + TP_printk("%pU", __entry->uuid) ); DECLARE_EVENT_CLASS(bcache_bio, @@ -111,26 +99,25 @@ DECLARE_EVENT_CLASS(bcache_bio, (unsigned long long)__entry->sector, __entry->nr_sector) ); -DEFINE_EVENT(bcache_bio, bcache_bypass_sequential, - TP_PROTO(struct bio *bio), - TP_ARGS(bio) -); +DECLARE_EVENT_CLASS(page_alloc_fail, + TP_PROTO(struct bch_fs *c, u64 size), + TP_ARGS(c, size), -DEFINE_EVENT(bcache_bio, bcache_bypass_congested, - TP_PROTO(struct bio *bio), - TP_ARGS(bio) -); + TP_STRUCT__entry( + __array(char, uuid, 16 ) + __field(u64, size ) + ), -DEFINE_EVENT(bcache_bio, bcache_promote, - TP_PROTO(struct bio *bio), - TP_ARGS(bio) -); + TP_fast_assign( + memcpy(__entry->uuid, c->sb.user_uuid.b, 16); + __entry->size = size; + ), -DEFINE_EVENT(bkey, bcache_promote_collision, - TP_PROTO(const struct bkey *k), - TP_ARGS(k) + TP_printk("%pU size %llu", __entry->uuid, __entry->size) ); +/* io.c: */ + DEFINE_EVENT(bcache_bio, bcache_read_split, TP_PROTO(struct bio *bio), TP_ARGS(bio) @@ -141,65 +128,14 @@ DEFINE_EVENT(bcache_bio, bcache_read_bounce, TP_ARGS(bio) ); -TRACE_EVENT(bcache_read, - TP_PROTO(struct bio *bio, bool hit, bool bypass), - TP_ARGS(bio, hit, bypass), - - TP_STRUCT__entry( - __field(dev_t, dev ) - __field(sector_t, sector ) - __field(unsigned int, nr_sector ) - __array(char, rwbs, 6 ) - __field(bool, cache_hit ) - __field(bool, bypass ) - ), - - TP_fast_assign( - __entry->dev = bio->bi_bdev->bd_dev; - __entry->sector = bio->bi_iter.bi_sector; - __entry->nr_sector = bio->bi_iter.bi_size >> 9; - blk_fill_rwbs(__entry->rwbs, bio_op(bio), bio->bi_opf, - bio->bi_iter.bi_size); - __entry->cache_hit = hit; - __entry->bypass = bypass; - ), - - TP_printk("%d,%d %s %llu + %u hit %u bypass %u", - MAJOR(__entry->dev), MINOR(__entry->dev), - __entry->rwbs, (unsigned long long)__entry->sector, - __entry->nr_sector, __entry->cache_hit, __entry->bypass) +DEFINE_EVENT(bcache_bio, bcache_read_retry, + TP_PROTO(struct bio *bio), + TP_ARGS(bio) ); -TRACE_EVENT(bcache_write, - TP_PROTO(struct bch_fs *c, u64 inode, struct bio *bio, - bool writeback, bool bypass), - TP_ARGS(c, inode, bio, writeback, bypass), - - TP_STRUCT__entry( - __array(char, uuid, 16 ) - __field(u64, inode ) - __field(sector_t, sector ) - __field(unsigned int, nr_sector ) - __array(char, rwbs, 6 ) - __field(bool, writeback ) - __field(bool, bypass ) - ), - - TP_fast_assign( - memcpy(__entry->uuid, c->sb.user_uuid.b, 16); - __entry->inode = inode; - __entry->sector = bio->bi_iter.bi_sector; - __entry->nr_sector = bio->bi_iter.bi_size >> 9; - blk_fill_rwbs(__entry->rwbs, bio_op(bio), bio->bi_opf, - bio->bi_iter.bi_size); - __entry->writeback = writeback; - __entry->bypass = bypass; - ), - - TP_printk("%pU inode %llu %s %llu + %u hit %u bypass %u", - __entry->uuid, __entry->inode, - __entry->rwbs, (unsigned long long)__entry->sector, - __entry->nr_sector, __entry->writeback, __entry->bypass) +DEFINE_EVENT(bcache_bio, bcache_promote, + TP_PROTO(struct bio *bio), + TP_ARGS(bio) ); TRACE_EVENT(bcache_write_throttle, @@ -231,113 +167,14 @@ TRACE_EVENT(bcache_write_throttle, __entry->nr_sector, __entry->delay) ); -DEFINE_EVENT(bcache_bio, bcache_read_retry, - TP_PROTO(struct bio *bio), - TP_ARGS(bio) -); - -DECLARE_EVENT_CLASS(page_alloc_fail, - TP_PROTO(struct bch_fs *c, u64 size), - TP_ARGS(c, size), - - TP_STRUCT__entry( - __array(char, uuid, 16 ) - __field(u64, size ) - ), - - TP_fast_assign( - memcpy(__entry->uuid, c->sb.user_uuid.b, 16); - __entry->size = size; - ), - - TP_printk("%pU size %llu", __entry->uuid, __entry->size) -); - /* Journal */ -DECLARE_EVENT_CLASS(cache_set, - TP_PROTO(struct bch_fs *c), - TP_ARGS(c), - - TP_STRUCT__entry( - __array(char, uuid, 16 ) - ), - - TP_fast_assign( - memcpy(__entry->uuid, c->sb.user_uuid.b, 16); - ), - - TP_printk("%pU", __entry->uuid) -); - -DEFINE_EVENT(bkey, bcache_journal_replay_key, - TP_PROTO(const struct bkey *k), - TP_ARGS(k) -); - -TRACE_EVENT(bcache_journal_next_bucket, - TP_PROTO(struct bch_dev *ca, unsigned cur_idx, unsigned last_idx), - TP_ARGS(ca, cur_idx, last_idx), - - TP_STRUCT__entry( - __array(char, uuid, 16 ) - __field(unsigned, cur_idx ) - __field(unsigned, last_idx ) - ), - - TP_fast_assign( - memcpy(__entry->uuid, ca->uuid.b, 16); - __entry->cur_idx = cur_idx; - __entry->last_idx = last_idx; - ), - - TP_printk("%pU cur %u last %u", __entry->uuid, - __entry->cur_idx, __entry->last_idx) -); - -TRACE_EVENT(bcache_journal_write_oldest, - TP_PROTO(struct bch_fs *c, u64 seq), - TP_ARGS(c, seq), - - TP_STRUCT__entry( - __array(char, uuid, 16 ) - __field(u64, seq ) - ), - - TP_fast_assign( - memcpy(__entry->uuid, c->sb.user_uuid.b, 16); - __entry->seq = seq; - ), - - TP_printk("%pU seq %llu", __entry->uuid, __entry->seq) -); - -TRACE_EVENT(bcache_journal_write_oldest_done, - TP_PROTO(struct bch_fs *c, u64 seq, unsigned written), - TP_ARGS(c, seq, written), - - TP_STRUCT__entry( - __array(char, uuid, 16 ) - __field(u64, seq ) - __field(unsigned, written ) - ), - - TP_fast_assign( - memcpy(__entry->uuid, c->sb.user_uuid.b, 16); - __entry->seq = seq; - __entry->written = written; - ), - - TP_printk("%pU seq %llu written %u", __entry->uuid, __entry->seq, - __entry->written) -); - -DEFINE_EVENT(cache_set, bcache_journal_full, +DEFINE_EVENT(bch_fs, bcache_journal_full, TP_PROTO(struct bch_fs *c), TP_ARGS(c) ); -DEFINE_EVENT(cache_set, bcache_journal_entry_full, +DEFINE_EVENT(bch_fs, bcache_journal_entry_full, TP_PROTO(struct bch_fs *c), TP_ARGS(c) ); @@ -347,56 +184,7 @@ DEFINE_EVENT(bcache_bio, bcache_journal_write, TP_ARGS(bio) ); -/* Device state changes */ - -DEFINE_EVENT(cache_set, fs_read_only, - TP_PROTO(struct bch_fs *c), - TP_ARGS(c) -); - -DEFINE_EVENT(cache_set, fs_read_only_done, - TP_PROTO(struct bch_fs *c), - TP_ARGS(c) -); - -DECLARE_EVENT_CLASS(cache, - TP_PROTO(struct bch_dev *ca), - TP_ARGS(ca), - - TP_STRUCT__entry( - __array(char, uuid, 16 ) - __field(unsigned, tier ) - ), - - TP_fast_assign( - memcpy(__entry->uuid, ca->uuid.b, 16); - __entry->tier = ca->mi.tier; - ), - - TP_printk("%pU tier %u", __entry->uuid, __entry->tier) -); - -DEFINE_EVENT(cache, bcache_cache_read_only, - TP_PROTO(struct bch_dev *ca), - TP_ARGS(ca) -); - -DEFINE_EVENT(cache, bcache_cache_read_only_done, - TP_PROTO(struct bch_dev *ca), - TP_ARGS(ca) -); - -DEFINE_EVENT(cache, bcache_cache_read_write, - TP_PROTO(struct bch_dev *ca), - TP_ARGS(ca) -); - -DEFINE_EVENT(cache, bcache_cache_read_write_done, - TP_PROTO(struct bch_dev *ca), - TP_ARGS(ca) -); - -/* Searching */ +/* bset.c: */ DEFINE_EVENT(bpos, bkey_pack_pos_fail, TP_PROTO(struct bpos p), @@ -462,23 +250,6 @@ DEFINE_EVENT(btree_node, bcache_btree_node_alloc, TP_ARGS(c, b) ); -TRACE_EVENT(bcache_btree_node_alloc_fail, - TP_PROTO(struct bch_fs *c, enum btree_id id), - TP_ARGS(c, id), - - TP_STRUCT__entry( - __array(char, uuid, 16 ) - __field(enum btree_id, id ) - ), - - TP_fast_assign( - memcpy(__entry->uuid, c->sb.user_uuid.b, 16); - __entry->id = id; - ), - - TP_printk("%pU id %u", __entry->uuid, __entry->id) -); - DEFINE_EVENT(btree_node, bcache_btree_node_free, TP_PROTO(struct bch_fs *c, struct btree *b), TP_ARGS(c, b) @@ -557,11 +328,31 @@ DEFINE_EVENT(mca_cannibalize_lock, bcache_mca_cannibalize, TP_ARGS(c) ); -DEFINE_EVENT(cache_set, bcache_mca_cannibalize_unlock, +DEFINE_EVENT(bch_fs, bcache_mca_cannibalize_unlock, TP_PROTO(struct bch_fs *c), TP_ARGS(c) ); +TRACE_EVENT(bcache_btree_reserve_get_fail, + TP_PROTO(struct bch_fs *c, size_t required, struct closure *cl), + TP_ARGS(c, required, cl), + + TP_STRUCT__entry( + __array(char, uuid, 16 ) + __field(size_t, required ) + __field(struct closure *, cl ) + ), + + TP_fast_assign( + memcpy(__entry->uuid, c->sb.user_uuid.b, 16); + __entry->required = required; + __entry->cl = cl; + ), + + TP_printk("%pU required %zu by %p", __entry->uuid, + __entry->required, __entry->cl) +); + TRACE_EVENT(bcache_btree_insert_key, TP_PROTO(struct bch_fs *c, struct btree *b, struct bkey_i *k), TP_ARGS(c, b, k), @@ -723,75 +514,41 @@ DEFINE_EVENT(btree_node, bcache_btree_gc_rewrite_node_fail, TP_ARGS(c, b) ); -DEFINE_EVENT(cache_set, bcache_gc_start, +DEFINE_EVENT(bch_fs, bcache_gc_start, TP_PROTO(struct bch_fs *c), TP_ARGS(c) ); -DEFINE_EVENT(cache_set, bcache_gc_end, +DEFINE_EVENT(bch_fs, bcache_gc_end, TP_PROTO(struct bch_fs *c), TP_ARGS(c) ); -DEFINE_EVENT(cache_set, bcache_gc_coalesce_start, +DEFINE_EVENT(bch_fs, bcache_gc_coalesce_start, TP_PROTO(struct bch_fs *c), TP_ARGS(c) ); -DEFINE_EVENT(cache_set, bcache_gc_coalesce_end, +DEFINE_EVENT(bch_fs, bcache_gc_coalesce_end, TP_PROTO(struct bch_fs *c), TP_ARGS(c) ); -DEFINE_EVENT(cache, bcache_sectors_saturated, +DEFINE_EVENT(bch_dev, bcache_sectors_saturated, TP_PROTO(struct bch_dev *ca), TP_ARGS(ca) ); -DEFINE_EVENT(cache_set, bcache_gc_sectors_saturated, - TP_PROTO(struct bch_fs *c), - TP_ARGS(c) -); - -DEFINE_EVENT(cache_set, bcache_gc_cannot_inc_gens, +DEFINE_EVENT(bch_fs, bcache_gc_sectors_saturated, TP_PROTO(struct bch_fs *c), TP_ARGS(c) ); -DEFINE_EVENT(cache_set, bcache_gc_periodic, +DEFINE_EVENT(bch_fs, bcache_gc_cannot_inc_gens, TP_PROTO(struct bch_fs *c), TP_ARGS(c) ); -TRACE_EVENT(bcache_mark_bucket, - TP_PROTO(struct bch_dev *ca, const struct bkey *k, - const struct bch_extent_ptr *ptr, - int sectors, bool dirty), - TP_ARGS(ca, k, ptr, sectors, dirty), - - TP_STRUCT__entry( - __array(char, uuid, 16 ) - __field(u32, inode ) - __field(u64, offset ) - __field(u32, sectors ) - __field(u64, bucket ) - __field(bool, dirty ) - ), - - TP_fast_assign( - memcpy(__entry->uuid, ca->uuid.b, 16); - __entry->inode = k->p.inode; - __entry->offset = k->p.offset; - __entry->sectors = sectors; - __entry->bucket = PTR_BUCKET_NR(ca, ptr); - __entry->dirty = dirty; - ), - - TP_printk("%pU %u:%llu sectors %i bucket %llu dirty %i", - __entry->uuid, __entry->inode, __entry->offset, - __entry->sectors, __entry->bucket, __entry->dirty) -); - /* Allocator */ TRACE_EVENT(bcache_alloc_batch, @@ -814,32 +571,12 @@ TRACE_EVENT(bcache_alloc_batch, __entry->uuid, __entry->free, __entry->total) ); -TRACE_EVENT(bcache_btree_reserve_get_fail, - TP_PROTO(struct bch_fs *c, size_t required, struct closure *cl), - TP_ARGS(c, required, cl), - - TP_STRUCT__entry( - __array(char, uuid, 16 ) - __field(size_t, required ) - __field(struct closure *, cl ) - ), - - TP_fast_assign( - memcpy(__entry->uuid, c->sb.user_uuid.b, 16); - __entry->required = required; - __entry->cl = cl; - ), - - TP_printk("%pU required %zu by %p", __entry->uuid, - __entry->required, __entry->cl) -); - -DEFINE_EVENT(cache, bcache_prio_write_start, +DEFINE_EVENT(bch_dev, bcache_prio_write_start, TP_PROTO(struct bch_dev *ca), TP_ARGS(ca) ); -DEFINE_EVENT(cache, bcache_prio_write_end, +DEFINE_EVENT(bch_dev, bcache_prio_write_end, TP_PROTO(struct bch_dev *ca), TP_ARGS(ca) ); @@ -865,12 +602,12 @@ TRACE_EVENT(bcache_invalidate, MINOR(__entry->dev), __entry->offset) ); -DEFINE_EVENT(cache_set, bcache_rescale_prios, +DEFINE_EVENT(bch_fs, bcache_rescale_prios, TP_PROTO(struct bch_fs *c), TP_ARGS(c) ); -DECLARE_EVENT_CLASS(cache_bucket_alloc, +DECLARE_EVENT_CLASS(bucket_alloc, TP_PROTO(struct bch_dev *ca, enum alloc_reserve reserve), TP_ARGS(ca, reserve), @@ -887,12 +624,12 @@ DECLARE_EVENT_CLASS(cache_bucket_alloc, TP_printk("%pU reserve %d", __entry->uuid, __entry->reserve) ); -DEFINE_EVENT(cache_bucket_alloc, bcache_bucket_alloc, +DEFINE_EVENT(bucket_alloc, bcache_bucket_alloc, TP_PROTO(struct bch_dev *ca, enum alloc_reserve reserve), TP_ARGS(ca, reserve) ); -DEFINE_EVENT(cache_bucket_alloc, bcache_bucket_alloc_fail, +DEFINE_EVENT(bucket_alloc, bcache_bucket_alloc_fail, TP_PROTO(struct bch_dev *ca, enum alloc_reserve reserve), TP_ARGS(ca, reserve) ); @@ -946,37 +683,6 @@ DEFINE_EVENT(open_bucket_alloc, bcache_open_bucket_alloc_fail, TP_ARGS(c, cl) ); -/* Keylists */ - -TRACE_EVENT(bcache_keyscan, - TP_PROTO(unsigned nr_found, - unsigned start_inode, u64 start_offset, - unsigned end_inode, u64 end_offset), - TP_ARGS(nr_found, - start_inode, start_offset, - end_inode, end_offset), - - TP_STRUCT__entry( - __field(__u32, nr_found ) - __field(__u32, start_inode ) - __field(__u64, start_offset ) - __field(__u32, end_inode ) - __field(__u64, end_offset ) - ), - - TP_fast_assign( - __entry->nr_found = nr_found; - __entry->start_inode = start_inode; - __entry->start_offset = start_offset; - __entry->end_inode = end_inode; - __entry->end_offset = end_offset; - ), - - TP_printk("found %u keys from %u:%llu to %u:%llu", __entry->nr_found, - __entry->start_inode, __entry->start_offset, - __entry->end_inode, __entry->end_offset) -); - /* Moving IO */ DECLARE_EVENT_CLASS(moving_io, @@ -1014,11 +720,6 @@ DEFINE_EVENT(moving_io, bcache_move_write, TP_ARGS(k) ); -DEFINE_EVENT(moving_io, bcache_move_write_done, - TP_PROTO(struct bkey *k), - TP_ARGS(k) -); - DEFINE_EVENT(moving_io, bcache_copy_collision, TP_PROTO(struct bkey *k), TP_ARGS(k) @@ -1031,7 +732,7 @@ DEFINE_EVENT(page_alloc_fail, bcache_moving_gc_alloc_fail, TP_ARGS(c, size) ); -DEFINE_EVENT(cache, bcache_moving_gc_start, +DEFINE_EVENT(bch_dev, bcache_moving_gc_start, TP_PROTO(struct bch_dev *ca), TP_ARGS(ca) ); @@ -1060,16 +761,6 @@ TRACE_EVENT(bcache_moving_gc_end, __entry->buckets_moved) ); -DEFINE_EVENT(cache, bcache_moving_gc_reserve_empty, - TP_PROTO(struct bch_dev *ca), - TP_ARGS(ca) -); - -DEFINE_EVENT(cache, bcache_moving_gc_no_work, - TP_PROTO(struct bch_dev *ca), - TP_ARGS(ca) -); - DEFINE_EVENT(bkey, bcache_gc_copy, TP_PROTO(const struct bkey *k), TP_ARGS(k) @@ -1077,22 +768,12 @@ DEFINE_EVENT(bkey, bcache_gc_copy, /* Tiering */ -DEFINE_EVENT(cache_set, bcache_tiering_refill_start, - TP_PROTO(struct bch_fs *c), - TP_ARGS(c) -); - -DEFINE_EVENT(cache_set, bcache_tiering_refill_end, - TP_PROTO(struct bch_fs *c), - TP_ARGS(c) -); - DEFINE_EVENT(page_alloc_fail, bcache_tiering_alloc_fail, TP_PROTO(struct bch_fs *c, u64 size), TP_ARGS(c, size) ); -DEFINE_EVENT(cache_set, bcache_tiering_start, +DEFINE_EVENT(bch_fs, bcache_tiering_start, TP_PROTO(struct bch_fs *c), TP_ARGS(c) ); @@ -1123,49 +804,6 @@ DEFINE_EVENT(bkey, bcache_tiering_copy, TP_ARGS(k) ); -/* Background writeback */ - -DEFINE_EVENT(bkey, bcache_writeback, - TP_PROTO(const struct bkey *k), - TP_ARGS(k) -); - -DEFINE_EVENT(bkey, bcache_writeback_collision, - TP_PROTO(const struct bkey *k), - TP_ARGS(k) -); - -TRACE_EVENT(bcache_writeback_error, - TP_PROTO(struct bkey *k, bool write, int error), - TP_ARGS(k, write, error), - - TP_STRUCT__entry( - __field(u32, size ) - __field(u32, inode ) - __field(u64, offset ) - __field(bool, write ) - __field(int, error ) - ), - - TP_fast_assign( - __entry->inode = k->p.inode; - __entry->offset = k->p.offset; - __entry->size = k->size; - __entry->write = write; - __entry->error = error; - ), - - TP_printk("%u:%llu len %u %s error %d", __entry->inode, - __entry->offset, __entry->size, - __entry->write ? "write" : "read", - __entry->error) -); - -DEFINE_EVENT(page_alloc_fail, bcache_writeback_alloc_fail, - TP_PROTO(struct bch_fs *c, u64 size), - TP_ARGS(c, size) -); - #endif /* _TRACE_BCACHE_H */ /* This part must be outside protection */ |