diff options
author | Kent Overstreet <kent.overstreet@gmail.com> | 2017-12-27 16:41:59 -0500 |
---|---|---|
committer | Kent Overstreet <kent.overstreet@gmail.com> | 2018-05-22 00:44:18 -0400 |
commit | ed5c08b773c6f52b861f53a7c06b4f02d07be05b (patch) | |
tree | 7a75a642bb8ede0be7c442c3ba010f8d57795bb7 | |
parent | 389e3f5919dac7178afe2c7cdbd34184337bb807 (diff) |
bcachefs: device resize
-rw-r--r-- | fs/bcachefs/bcachefs_ioctl.h | 9 | ||||
-rw-r--r-- | fs/bcachefs/buckets.c | 165 | ||||
-rw-r--r-- | fs/bcachefs/buckets.h | 3 | ||||
-rw-r--r-- | fs/bcachefs/buckets_types.h | 2 | ||||
-rw-r--r-- | fs/bcachefs/chardev.c | 22 | ||||
-rw-r--r-- | fs/bcachefs/super.c | 80 | ||||
-rw-r--r-- | fs/bcachefs/super.h | 1 |
7 files changed, 219 insertions, 63 deletions
diff --git a/fs/bcachefs/bcachefs_ioctl.h b/fs/bcachefs/bcachefs_ioctl.h index a6f024f1d0f4..aa2a20504a9f 100644 --- a/fs/bcachefs/bcachefs_ioctl.h +++ b/fs/bcachefs/bcachefs_ioctl.h @@ -2,6 +2,7 @@ #define _BCACHEFS_IOCTL_H #include <linux/uuid.h> +#include <asm/ioctl.h> #include "bcachefs_format.h" #define BCH_FORCE_IF_DATA_LOST (1 << 0) @@ -50,6 +51,7 @@ struct bch_ioctl_incremental { #define BCH_IOCTL_USAGE _IOWR(0xbc, 11, struct bch_ioctl_usage) #define BCH_IOCTL_READ_SUPER _IOW(0xbc, 12, struct bch_ioctl_read_super) #define BCH_IOCTL_DISK_GET_IDX _IOW(0xbc, 13, struct bch_ioctl_disk_get_idx) +#define BCH_IOCTL_DISK_RESIZE _IOW(0xbc, 13, struct bch_ioctl_disk_resize) struct bch_ioctl_query_uuid { uuid_le uuid; @@ -139,4 +141,11 @@ struct bch_ioctl_disk_get_idx { __u64 dev; }; +struct bch_ioctl_disk_resize { + __u32 flags; + __u32 pad; + __u64 dev; + __u64 nbuckets; +}; + #endif /* _BCACHEFS_IOCTL_H */ diff --git a/fs/bcachefs/buckets.c b/fs/bcachefs/buckets.c index 58497d12414a..2dbe7d379126 100644 --- a/fs/bcachefs/buckets.c +++ b/fs/bcachefs/buckets.c @@ -67,6 +67,7 @@ #include "btree_gc.h" #include "buckets.h" #include "error.h" +#include "movinggc.h" #include <linux/preempt.h> #include <trace/events/bcachefs.h> @@ -824,37 +825,157 @@ int bch2_disk_reservation_get(struct bch_fs *c, return bch2_disk_reservation_add(c, res, sectors, flags); } -void bch2_dev_buckets_free(struct bch_dev *ca) +/* Startup/shutdown: */ + +static void buckets_free_rcu(struct rcu_head *rcu) { - free_percpu(ca->usage_percpu); - kvpfree(ca->buckets_dirty, BITS_TO_LONGS(ca->mi.nbuckets) * sizeof(unsigned long)); - kvpfree(ca->oldest_gens, ca->mi.nbuckets * sizeof(u8)); - kvpfree(ca->buckets, ca->mi.nbuckets * sizeof(struct bucket)); + struct bucket_array *buckets = + container_of(rcu, struct bucket_array, rcu); + + kvpfree(buckets, + sizeof(struct bucket_array) + + buckets->nbuckets * sizeof(struct bucket)); } -int bch2_dev_buckets_alloc(struct bch_dev *ca) +int bch2_dev_buckets_resize(struct bch_fs *c, struct bch_dev *ca, u64 nbuckets) { - struct bucket_array *buckets; - - buckets = kvpmalloc(sizeof(struct bucket_array) + - ca->mi.nbuckets * - sizeof(struct bucket), - GFP_KERNEL|__GFP_ZERO); - if (!buckets) - return -ENOMEM; + struct bucket_array *buckets = NULL, *old_buckets; + unsigned long *buckets_dirty = NULL; + u8 *oldest_gens = NULL; + alloc_fifo free[RESERVE_NR]; + alloc_fifo free_inc; + alloc_heap alloc_heap; + copygc_heap copygc_heap; + + size_t btree_reserve = DIV_ROUND_UP(BTREE_NODE_RESERVE, + ca->mi.bucket_size / c->opts.btree_node_size); + /* XXX: these should be tunable */ + size_t reserve_none = max_t(size_t, 4, ca->mi.nbuckets >> 9); + size_t copygc_reserve = max_t(size_t, 16, ca->mi.nbuckets >> 7); + size_t free_inc_reserve = copygc_reserve / 2; + bool resize = ca->buckets != NULL, + start_copygc = ca->copygc_thread != NULL; + int ret = -ENOMEM; + unsigned i; - buckets->first_bucket = ca->mi.first_bucket; - buckets->nbuckets = ca->mi.nbuckets; - rcu_assign_pointer(ca->buckets, buckets); + memset(&free, 0, sizeof(free)); + memset(&free_inc, 0, sizeof(free_inc)); + memset(&alloc_heap, 0, sizeof(alloc_heap)); + memset(©gc_heap, 0, sizeof(copygc_heap)); - if (!(ca->oldest_gens = kvpmalloc(ca->mi.nbuckets * - sizeof(u8), + if (!(buckets = kvpmalloc(sizeof(struct bucket_array) + + nbuckets * sizeof(struct bucket), GFP_KERNEL|__GFP_ZERO)) || - !(ca->buckets_dirty = kvpmalloc(BITS_TO_LONGS(ca->mi.nbuckets) * + !(oldest_gens = kvpmalloc(nbuckets * sizeof(u8), + GFP_KERNEL|__GFP_ZERO)) || + !(buckets_dirty = kvpmalloc(BITS_TO_LONGS(nbuckets) * sizeof(unsigned long), GFP_KERNEL|__GFP_ZERO)) || - !(ca->usage_percpu = alloc_percpu(struct bch_dev_usage))) + !init_fifo(&free[RESERVE_BTREE], btree_reserve, GFP_KERNEL) || + !init_fifo(&free[RESERVE_MOVINGGC], + copygc_reserve, GFP_KERNEL) || + !init_fifo(&free[RESERVE_NONE], reserve_none, GFP_KERNEL) || + !init_fifo(&free_inc, free_inc_reserve, GFP_KERNEL) || + !init_heap(&alloc_heap, free_inc_reserve, GFP_KERNEL) || + !init_heap(©gc_heap, copygc_reserve, GFP_KERNEL)) + goto err; + + buckets->first_bucket = ca->mi.first_bucket; + buckets->nbuckets = nbuckets; + + bch2_copygc_stop(ca); + + down_write(&c->gc_lock); + down_write(&ca->bucket_lock); + lg_global_lock(&c->usage_lock); + + old_buckets = bucket_array(ca); + + if (resize) { + size_t n = min(buckets->nbuckets, old_buckets->nbuckets); + + memcpy(buckets->b, + old_buckets->b, + n * sizeof(struct bucket)); + memcpy(oldest_gens, + ca->oldest_gens, + n * sizeof(u8)); + memcpy(buckets_dirty, + ca->buckets_dirty, + BITS_TO_LONGS(n) * sizeof(unsigned long)); + } + + rcu_assign_pointer(ca->buckets, buckets); + buckets = old_buckets; + + swap(ca->oldest_gens, oldest_gens); + swap(ca->buckets_dirty, buckets_dirty); + + lg_global_unlock(&c->usage_lock); + + spin_lock(&c->freelist_lock); + for (i = 0; i < RESERVE_NR; i++) { + fifo_move(&free[i], &ca->free[i]); + swap(ca->free[i], free[i]); + } + fifo_move(&free_inc, &ca->free_inc); + swap(ca->free_inc, free_inc); + spin_unlock(&c->freelist_lock); + + /* with gc lock held, alloc_heap can't be in use: */ + swap(ca->alloc_heap, alloc_heap); + + /* and we shut down copygc: */ + swap(ca->copygc_heap, copygc_heap); + + nbuckets = ca->mi.nbuckets; + + up_write(&ca->bucket_lock); + up_write(&c->gc_lock); + + if (start_copygc && + bch2_copygc_start(c, ca)) + bch_err(ca, "error restarting copygc thread"); + + ret = 0; +err: + free_heap(©gc_heap); + free_heap(&alloc_heap); + free_fifo(&free_inc); + for (i = 0; i < RESERVE_NR; i++) + free_fifo(&free[i]); + kvpfree(buckets_dirty, + BITS_TO_LONGS(nbuckets) * sizeof(unsigned long)); + kvpfree(oldest_gens, + nbuckets * sizeof(u8)); + if (buckets) + call_rcu(&old_buckets->rcu, buckets_free_rcu); + + return ret; +} + +void bch2_dev_buckets_free(struct bch_dev *ca) +{ + unsigned i; + + free_heap(&ca->copygc_heap); + free_heap(&ca->alloc_heap); + free_fifo(&ca->free_inc); + for (i = 0; i < RESERVE_NR; i++) + free_fifo(&ca->free[i]); + kvpfree(ca->buckets_dirty, + BITS_TO_LONGS(ca->mi.nbuckets) * sizeof(unsigned long)); + kvpfree(ca->oldest_gens, ca->mi.nbuckets * sizeof(u8)); + kvpfree(ca->buckets, sizeof(struct bucket_array) + + ca->mi.nbuckets * sizeof(struct bucket)); + + free_percpu(ca->usage_percpu); +} + +int bch2_dev_buckets_alloc(struct bch_fs *c, struct bch_dev *ca) +{ + if (!(ca->usage_percpu = alloc_percpu(struct bch_dev_usage))) return -ENOMEM; - return 0; + return bch2_dev_buckets_resize(c, ca, ca->mi.nbuckets);; } diff --git a/fs/bcachefs/buckets.h b/fs/bcachefs/buckets.h index f067d1b01ad4..782431293c0d 100644 --- a/fs/bcachefs/buckets.h +++ b/fs/bcachefs/buckets.h @@ -247,7 +247,8 @@ int bch2_disk_reservation_get(struct bch_fs *, struct disk_reservation *, unsigned, int); +int bch2_dev_buckets_resize(struct bch_fs *, struct bch_dev *, u64); void bch2_dev_buckets_free(struct bch_dev *); -int bch2_dev_buckets_alloc(struct bch_dev *); +int bch2_dev_buckets_alloc(struct bch_fs *, struct bch_dev *); #endif /* _BUCKETS_H */ diff --git a/fs/bcachefs/buckets_types.h b/fs/bcachefs/buckets_types.h index eaf4a3863605..7cd8439a0227 100644 --- a/fs/bcachefs/buckets_types.h +++ b/fs/bcachefs/buckets_types.h @@ -42,7 +42,7 @@ struct bucket { struct bucket_array { struct rcu_head rcu; - size_t first_bucket; + u16 first_bucket; size_t nbuckets; struct bucket b[]; }; diff --git a/fs/bcachefs/chardev.c b/fs/bcachefs/chardev.c index bc011cb27521..1618ffe70e23 100644 --- a/fs/bcachefs/chardev.c +++ b/fs/bcachefs/chardev.c @@ -419,6 +419,26 @@ static long bch2_ioctl_disk_get_idx(struct bch_fs *c, return -ENOENT; } +static long bch2_ioctl_disk_resize(struct bch_fs *c, + struct bch_ioctl_disk_resize arg) +{ + struct bch_dev *ca; + int ret; + + if ((arg.flags & ~BCH_BY_INDEX) || + arg.pad) + return -EINVAL; + + ca = bch2_device_lookup(c, arg.dev, arg.flags); + if (IS_ERR(ca)) + return PTR_ERR(ca); + + ret = bch2_dev_resize(c, ca, arg.nbuckets); + + percpu_ref_put(&ca->ref); + return ret; +} + #define BCH_IOCTL(_name, _argtype) \ do { \ _argtype i; \ @@ -464,6 +484,8 @@ long bch2_fs_ioctl(struct bch_fs *c, unsigned cmd, void __user *arg) BCH_IOCTL(read_super, struct bch_ioctl_read_super); case BCH_IOCTL_DISK_GET_IDX: BCH_IOCTL(disk_get_idx, struct bch_ioctl_disk_get_idx); + case BCH_IOCTL_DISK_RESIZE: + BCH_IOCTL(disk_resize, struct bch_ioctl_disk_resize); default: return -ENOTTY; diff --git a/fs/bcachefs/super.c b/fs/bcachefs/super.c index 2fea8787d9df..c928307f31d6 100644 --- a/fs/bcachefs/super.c +++ b/fs/bcachefs/super.c @@ -952,8 +952,6 @@ static void bch2_dev_release(struct kobject *kobj) static void bch2_dev_free(struct bch_dev *ca) { - unsigned i; - cancel_work_sync(&ca->io_error_work); if (ca->kobj.state_in_sysfs && @@ -970,12 +968,6 @@ static void bch2_dev_free(struct bch_dev *ca) free_percpu(ca->io_done); bioset_exit(&ca->replica_set); bch2_dev_buckets_free(ca); - free_heap(&ca->copygc_heap); - free_heap(&ca->alloc_heap); - free_fifo(&ca->free_inc); - - for (i = 0; i < RESERVE_NR; i++) - free_fifo(&ca->free[i]); percpu_ref_exit(&ca->io_ref); percpu_ref_exit(&ca->ref); @@ -1054,9 +1046,6 @@ static int bch2_dev_sysfs_online(struct bch_fs *c, struct bch_dev *ca) static int bch2_dev_alloc(struct bch_fs *c, unsigned dev_idx) { struct bch_member *member; - size_t reserve_none, movinggc_reserve, free_inc_reserve, total_reserve; - size_t heap_size; - unsigned i, btree_node_reserve_buckets; struct bch_dev *ca; if (bch2_fs_init_fault("dev_alloc")) @@ -1091,43 +1080,16 @@ static int bch2_dev_alloc(struct bch_fs *c, unsigned dev_idx) ca->uuid = member->uuid; scnprintf(ca->name, sizeof(ca->name), "dev-%u", dev_idx); - /* XXX: tune these */ - movinggc_reserve = max_t(size_t, 16, ca->mi.nbuckets >> 7); - reserve_none = max_t(size_t, 4, ca->mi.nbuckets >> 9); - /* - * free_inc must be smaller than the copygc reserve: if it was bigger, - * one copygc iteration might not make enough buckets available to fill - * up free_inc and allow the allocator to make forward progress - */ - free_inc_reserve = movinggc_reserve / 2; - heap_size = movinggc_reserve * 8; - - btree_node_reserve_buckets = - DIV_ROUND_UP(BTREE_NODE_RESERVE, - ca->mi.bucket_size / c->opts.btree_node_size); - if (percpu_ref_init(&ca->ref, bch2_dev_ref_complete, 0, GFP_KERNEL) || percpu_ref_init(&ca->io_ref, bch2_dev_io_ref_complete, PERCPU_REF_INIT_DEAD, GFP_KERNEL) || - bch2_dev_buckets_alloc(ca) || - !init_fifo(&ca->free[RESERVE_BTREE], btree_node_reserve_buckets, - GFP_KERNEL) || - !init_fifo(&ca->free[RESERVE_MOVINGGC], - movinggc_reserve, GFP_KERNEL) || - !init_fifo(&ca->free[RESERVE_NONE], reserve_none, GFP_KERNEL) || - !init_fifo(&ca->free_inc, free_inc_reserve, GFP_KERNEL) || - !init_heap(&ca->alloc_heap, free_inc_reserve, GFP_KERNEL) || - !init_heap(&ca->copygc_heap,heap_size, GFP_KERNEL) || + bch2_dev_buckets_alloc(c, ca) || bioset_init(&ca->replica_set, 4, offsetof(struct bch_write_bio, bio), 0) || !(ca->io_done = alloc_percpu(*ca->io_done))) goto err; - total_reserve = ca->free_inc.size; - for (i = 0; i < RESERVE_NR; i++) - total_reserve += ca->free[i].size; - ca->fs = c; rcu_assign_pointer(c->devs[ca->dev_idx], ca); @@ -1659,6 +1621,46 @@ err: return ret; } +int bch2_dev_resize(struct bch_fs *c, struct bch_dev *ca, u64 nbuckets) +{ + struct bch_member *mi; + int ret = 0; + + mutex_lock(&c->state_lock); + + if (nbuckets < ca->mi.nbuckets) { + bch_err(ca, "Cannot shrink yet"); + ret = -EINVAL; + goto err; + } + + if (bch2_dev_is_online(ca) && + get_capacity(ca->disk_sb.bdev->bd_disk) < + ca->mi.bucket_size * nbuckets) { + bch_err(ca, "New size larger than device"); + ret = -EINVAL; + goto err; + } + + ret = bch2_dev_buckets_resize(c, ca, nbuckets); + if (ret) { + bch_err(ca, "Resize error: %i", ret); + goto err; + } + + mutex_lock(&c->sb_lock); + mi = &bch2_sb_get_members(c->disk_sb)->members[ca->dev_idx]; + mi->nbuckets = cpu_to_le64(nbuckets); + + bch2_write_super(c); + mutex_unlock(&c->sb_lock); + + bch2_recalc_capacity(c); +err: + mutex_unlock(&c->state_lock); + return ret; +} + /* Filesystem open: */ const char *bch2_fs_open(char * const *devices, unsigned nr_devices, diff --git a/fs/bcachefs/super.h b/fs/bcachefs/super.h index 7ebe5981bf45..3189da6a958f 100644 --- a/fs/bcachefs/super.h +++ b/fs/bcachefs/super.h @@ -189,6 +189,7 @@ int bch2_dev_add(struct bch_fs *, const char *); int bch2_dev_online(struct bch_fs *, const char *); int bch2_dev_offline(struct bch_fs *, struct bch_dev *, int); int bch2_dev_evacuate(struct bch_fs *, struct bch_dev *); +int bch2_dev_resize(struct bch_fs *, struct bch_dev *, u64); bool bch2_fs_emergency_read_only(struct bch_fs *); void bch2_fs_read_only(struct bch_fs *); |