summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorKent Overstreet <kent.overstreet@gmail.com>2017-12-27 16:41:59 -0500
committerKent Overstreet <kent.overstreet@gmail.com>2018-05-22 00:44:18 -0400
commited5c08b773c6f52b861f53a7c06b4f02d07be05b (patch)
tree7a75a642bb8ede0be7c442c3ba010f8d57795bb7
parent389e3f5919dac7178afe2c7cdbd34184337bb807 (diff)
bcachefs: device resize
-rw-r--r--fs/bcachefs/bcachefs_ioctl.h9
-rw-r--r--fs/bcachefs/buckets.c165
-rw-r--r--fs/bcachefs/buckets.h3
-rw-r--r--fs/bcachefs/buckets_types.h2
-rw-r--r--fs/bcachefs/chardev.c22
-rw-r--r--fs/bcachefs/super.c80
-rw-r--r--fs/bcachefs/super.h1
7 files changed, 219 insertions, 63 deletions
diff --git a/fs/bcachefs/bcachefs_ioctl.h b/fs/bcachefs/bcachefs_ioctl.h
index a6f024f1d0f4..aa2a20504a9f 100644
--- a/fs/bcachefs/bcachefs_ioctl.h
+++ b/fs/bcachefs/bcachefs_ioctl.h
@@ -2,6 +2,7 @@
#define _BCACHEFS_IOCTL_H
#include <linux/uuid.h>
+#include <asm/ioctl.h>
#include "bcachefs_format.h"
#define BCH_FORCE_IF_DATA_LOST (1 << 0)
@@ -50,6 +51,7 @@ struct bch_ioctl_incremental {
#define BCH_IOCTL_USAGE _IOWR(0xbc, 11, struct bch_ioctl_usage)
#define BCH_IOCTL_READ_SUPER _IOW(0xbc, 12, struct bch_ioctl_read_super)
#define BCH_IOCTL_DISK_GET_IDX _IOW(0xbc, 13, struct bch_ioctl_disk_get_idx)
+#define BCH_IOCTL_DISK_RESIZE _IOW(0xbc, 13, struct bch_ioctl_disk_resize)
struct bch_ioctl_query_uuid {
uuid_le uuid;
@@ -139,4 +141,11 @@ struct bch_ioctl_disk_get_idx {
__u64 dev;
};
+struct bch_ioctl_disk_resize {
+ __u32 flags;
+ __u32 pad;
+ __u64 dev;
+ __u64 nbuckets;
+};
+
#endif /* _BCACHEFS_IOCTL_H */
diff --git a/fs/bcachefs/buckets.c b/fs/bcachefs/buckets.c
index 58497d12414a..2dbe7d379126 100644
--- a/fs/bcachefs/buckets.c
+++ b/fs/bcachefs/buckets.c
@@ -67,6 +67,7 @@
#include "btree_gc.h"
#include "buckets.h"
#include "error.h"
+#include "movinggc.h"
#include <linux/preempt.h>
#include <trace/events/bcachefs.h>
@@ -824,37 +825,157 @@ int bch2_disk_reservation_get(struct bch_fs *c,
return bch2_disk_reservation_add(c, res, sectors, flags);
}
-void bch2_dev_buckets_free(struct bch_dev *ca)
+/* Startup/shutdown: */
+
+static void buckets_free_rcu(struct rcu_head *rcu)
{
- free_percpu(ca->usage_percpu);
- kvpfree(ca->buckets_dirty, BITS_TO_LONGS(ca->mi.nbuckets) * sizeof(unsigned long));
- kvpfree(ca->oldest_gens, ca->mi.nbuckets * sizeof(u8));
- kvpfree(ca->buckets, ca->mi.nbuckets * sizeof(struct bucket));
+ struct bucket_array *buckets =
+ container_of(rcu, struct bucket_array, rcu);
+
+ kvpfree(buckets,
+ sizeof(struct bucket_array) +
+ buckets->nbuckets * sizeof(struct bucket));
}
-int bch2_dev_buckets_alloc(struct bch_dev *ca)
+int bch2_dev_buckets_resize(struct bch_fs *c, struct bch_dev *ca, u64 nbuckets)
{
- struct bucket_array *buckets;
-
- buckets = kvpmalloc(sizeof(struct bucket_array) +
- ca->mi.nbuckets *
- sizeof(struct bucket),
- GFP_KERNEL|__GFP_ZERO);
- if (!buckets)
- return -ENOMEM;
+ struct bucket_array *buckets = NULL, *old_buckets;
+ unsigned long *buckets_dirty = NULL;
+ u8 *oldest_gens = NULL;
+ alloc_fifo free[RESERVE_NR];
+ alloc_fifo free_inc;
+ alloc_heap alloc_heap;
+ copygc_heap copygc_heap;
+
+ size_t btree_reserve = DIV_ROUND_UP(BTREE_NODE_RESERVE,
+ ca->mi.bucket_size / c->opts.btree_node_size);
+ /* XXX: these should be tunable */
+ size_t reserve_none = max_t(size_t, 4, ca->mi.nbuckets >> 9);
+ size_t copygc_reserve = max_t(size_t, 16, ca->mi.nbuckets >> 7);
+ size_t free_inc_reserve = copygc_reserve / 2;
+ bool resize = ca->buckets != NULL,
+ start_copygc = ca->copygc_thread != NULL;
+ int ret = -ENOMEM;
+ unsigned i;
- buckets->first_bucket = ca->mi.first_bucket;
- buckets->nbuckets = ca->mi.nbuckets;
- rcu_assign_pointer(ca->buckets, buckets);
+ memset(&free, 0, sizeof(free));
+ memset(&free_inc, 0, sizeof(free_inc));
+ memset(&alloc_heap, 0, sizeof(alloc_heap));
+ memset(&copygc_heap, 0, sizeof(copygc_heap));
- if (!(ca->oldest_gens = kvpmalloc(ca->mi.nbuckets *
- sizeof(u8),
+ if (!(buckets = kvpmalloc(sizeof(struct bucket_array) +
+ nbuckets * sizeof(struct bucket),
GFP_KERNEL|__GFP_ZERO)) ||
- !(ca->buckets_dirty = kvpmalloc(BITS_TO_LONGS(ca->mi.nbuckets) *
+ !(oldest_gens = kvpmalloc(nbuckets * sizeof(u8),
+ GFP_KERNEL|__GFP_ZERO)) ||
+ !(buckets_dirty = kvpmalloc(BITS_TO_LONGS(nbuckets) *
sizeof(unsigned long),
GFP_KERNEL|__GFP_ZERO)) ||
- !(ca->usage_percpu = alloc_percpu(struct bch_dev_usage)))
+ !init_fifo(&free[RESERVE_BTREE], btree_reserve, GFP_KERNEL) ||
+ !init_fifo(&free[RESERVE_MOVINGGC],
+ copygc_reserve, GFP_KERNEL) ||
+ !init_fifo(&free[RESERVE_NONE], reserve_none, GFP_KERNEL) ||
+ !init_fifo(&free_inc, free_inc_reserve, GFP_KERNEL) ||
+ !init_heap(&alloc_heap, free_inc_reserve, GFP_KERNEL) ||
+ !init_heap(&copygc_heap, copygc_reserve, GFP_KERNEL))
+ goto err;
+
+ buckets->first_bucket = ca->mi.first_bucket;
+ buckets->nbuckets = nbuckets;
+
+ bch2_copygc_stop(ca);
+
+ down_write(&c->gc_lock);
+ down_write(&ca->bucket_lock);
+ lg_global_lock(&c->usage_lock);
+
+ old_buckets = bucket_array(ca);
+
+ if (resize) {
+ size_t n = min(buckets->nbuckets, old_buckets->nbuckets);
+
+ memcpy(buckets->b,
+ old_buckets->b,
+ n * sizeof(struct bucket));
+ memcpy(oldest_gens,
+ ca->oldest_gens,
+ n * sizeof(u8));
+ memcpy(buckets_dirty,
+ ca->buckets_dirty,
+ BITS_TO_LONGS(n) * sizeof(unsigned long));
+ }
+
+ rcu_assign_pointer(ca->buckets, buckets);
+ buckets = old_buckets;
+
+ swap(ca->oldest_gens, oldest_gens);
+ swap(ca->buckets_dirty, buckets_dirty);
+
+ lg_global_unlock(&c->usage_lock);
+
+ spin_lock(&c->freelist_lock);
+ for (i = 0; i < RESERVE_NR; i++) {
+ fifo_move(&free[i], &ca->free[i]);
+ swap(ca->free[i], free[i]);
+ }
+ fifo_move(&free_inc, &ca->free_inc);
+ swap(ca->free_inc, free_inc);
+ spin_unlock(&c->freelist_lock);
+
+ /* with gc lock held, alloc_heap can't be in use: */
+ swap(ca->alloc_heap, alloc_heap);
+
+ /* and we shut down copygc: */
+ swap(ca->copygc_heap, copygc_heap);
+
+ nbuckets = ca->mi.nbuckets;
+
+ up_write(&ca->bucket_lock);
+ up_write(&c->gc_lock);
+
+ if (start_copygc &&
+ bch2_copygc_start(c, ca))
+ bch_err(ca, "error restarting copygc thread");
+
+ ret = 0;
+err:
+ free_heap(&copygc_heap);
+ free_heap(&alloc_heap);
+ free_fifo(&free_inc);
+ for (i = 0; i < RESERVE_NR; i++)
+ free_fifo(&free[i]);
+ kvpfree(buckets_dirty,
+ BITS_TO_LONGS(nbuckets) * sizeof(unsigned long));
+ kvpfree(oldest_gens,
+ nbuckets * sizeof(u8));
+ if (buckets)
+ call_rcu(&old_buckets->rcu, buckets_free_rcu);
+
+ return ret;
+}
+
+void bch2_dev_buckets_free(struct bch_dev *ca)
+{
+ unsigned i;
+
+ free_heap(&ca->copygc_heap);
+ free_heap(&ca->alloc_heap);
+ free_fifo(&ca->free_inc);
+ for (i = 0; i < RESERVE_NR; i++)
+ free_fifo(&ca->free[i]);
+ kvpfree(ca->buckets_dirty,
+ BITS_TO_LONGS(ca->mi.nbuckets) * sizeof(unsigned long));
+ kvpfree(ca->oldest_gens, ca->mi.nbuckets * sizeof(u8));
+ kvpfree(ca->buckets, sizeof(struct bucket_array) +
+ ca->mi.nbuckets * sizeof(struct bucket));
+
+ free_percpu(ca->usage_percpu);
+}
+
+int bch2_dev_buckets_alloc(struct bch_fs *c, struct bch_dev *ca)
+{
+ if (!(ca->usage_percpu = alloc_percpu(struct bch_dev_usage)))
return -ENOMEM;
- return 0;
+ return bch2_dev_buckets_resize(c, ca, ca->mi.nbuckets);;
}
diff --git a/fs/bcachefs/buckets.h b/fs/bcachefs/buckets.h
index f067d1b01ad4..782431293c0d 100644
--- a/fs/bcachefs/buckets.h
+++ b/fs/bcachefs/buckets.h
@@ -247,7 +247,8 @@ int bch2_disk_reservation_get(struct bch_fs *,
struct disk_reservation *,
unsigned, int);
+int bch2_dev_buckets_resize(struct bch_fs *, struct bch_dev *, u64);
void bch2_dev_buckets_free(struct bch_dev *);
-int bch2_dev_buckets_alloc(struct bch_dev *);
+int bch2_dev_buckets_alloc(struct bch_fs *, struct bch_dev *);
#endif /* _BUCKETS_H */
diff --git a/fs/bcachefs/buckets_types.h b/fs/bcachefs/buckets_types.h
index eaf4a3863605..7cd8439a0227 100644
--- a/fs/bcachefs/buckets_types.h
+++ b/fs/bcachefs/buckets_types.h
@@ -42,7 +42,7 @@ struct bucket {
struct bucket_array {
struct rcu_head rcu;
- size_t first_bucket;
+ u16 first_bucket;
size_t nbuckets;
struct bucket b[];
};
diff --git a/fs/bcachefs/chardev.c b/fs/bcachefs/chardev.c
index bc011cb27521..1618ffe70e23 100644
--- a/fs/bcachefs/chardev.c
+++ b/fs/bcachefs/chardev.c
@@ -419,6 +419,26 @@ static long bch2_ioctl_disk_get_idx(struct bch_fs *c,
return -ENOENT;
}
+static long bch2_ioctl_disk_resize(struct bch_fs *c,
+ struct bch_ioctl_disk_resize arg)
+{
+ struct bch_dev *ca;
+ int ret;
+
+ if ((arg.flags & ~BCH_BY_INDEX) ||
+ arg.pad)
+ return -EINVAL;
+
+ ca = bch2_device_lookup(c, arg.dev, arg.flags);
+ if (IS_ERR(ca))
+ return PTR_ERR(ca);
+
+ ret = bch2_dev_resize(c, ca, arg.nbuckets);
+
+ percpu_ref_put(&ca->ref);
+ return ret;
+}
+
#define BCH_IOCTL(_name, _argtype) \
do { \
_argtype i; \
@@ -464,6 +484,8 @@ long bch2_fs_ioctl(struct bch_fs *c, unsigned cmd, void __user *arg)
BCH_IOCTL(read_super, struct bch_ioctl_read_super);
case BCH_IOCTL_DISK_GET_IDX:
BCH_IOCTL(disk_get_idx, struct bch_ioctl_disk_get_idx);
+ case BCH_IOCTL_DISK_RESIZE:
+ BCH_IOCTL(disk_resize, struct bch_ioctl_disk_resize);
default:
return -ENOTTY;
diff --git a/fs/bcachefs/super.c b/fs/bcachefs/super.c
index 2fea8787d9df..c928307f31d6 100644
--- a/fs/bcachefs/super.c
+++ b/fs/bcachefs/super.c
@@ -952,8 +952,6 @@ static void bch2_dev_release(struct kobject *kobj)
static void bch2_dev_free(struct bch_dev *ca)
{
- unsigned i;
-
cancel_work_sync(&ca->io_error_work);
if (ca->kobj.state_in_sysfs &&
@@ -970,12 +968,6 @@ static void bch2_dev_free(struct bch_dev *ca)
free_percpu(ca->io_done);
bioset_exit(&ca->replica_set);
bch2_dev_buckets_free(ca);
- free_heap(&ca->copygc_heap);
- free_heap(&ca->alloc_heap);
- free_fifo(&ca->free_inc);
-
- for (i = 0; i < RESERVE_NR; i++)
- free_fifo(&ca->free[i]);
percpu_ref_exit(&ca->io_ref);
percpu_ref_exit(&ca->ref);
@@ -1054,9 +1046,6 @@ static int bch2_dev_sysfs_online(struct bch_fs *c, struct bch_dev *ca)
static int bch2_dev_alloc(struct bch_fs *c, unsigned dev_idx)
{
struct bch_member *member;
- size_t reserve_none, movinggc_reserve, free_inc_reserve, total_reserve;
- size_t heap_size;
- unsigned i, btree_node_reserve_buckets;
struct bch_dev *ca;
if (bch2_fs_init_fault("dev_alloc"))
@@ -1091,43 +1080,16 @@ static int bch2_dev_alloc(struct bch_fs *c, unsigned dev_idx)
ca->uuid = member->uuid;
scnprintf(ca->name, sizeof(ca->name), "dev-%u", dev_idx);
- /* XXX: tune these */
- movinggc_reserve = max_t(size_t, 16, ca->mi.nbuckets >> 7);
- reserve_none = max_t(size_t, 4, ca->mi.nbuckets >> 9);
- /*
- * free_inc must be smaller than the copygc reserve: if it was bigger,
- * one copygc iteration might not make enough buckets available to fill
- * up free_inc and allow the allocator to make forward progress
- */
- free_inc_reserve = movinggc_reserve / 2;
- heap_size = movinggc_reserve * 8;
-
- btree_node_reserve_buckets =
- DIV_ROUND_UP(BTREE_NODE_RESERVE,
- ca->mi.bucket_size / c->opts.btree_node_size);
-
if (percpu_ref_init(&ca->ref, bch2_dev_ref_complete,
0, GFP_KERNEL) ||
percpu_ref_init(&ca->io_ref, bch2_dev_io_ref_complete,
PERCPU_REF_INIT_DEAD, GFP_KERNEL) ||
- bch2_dev_buckets_alloc(ca) ||
- !init_fifo(&ca->free[RESERVE_BTREE], btree_node_reserve_buckets,
- GFP_KERNEL) ||
- !init_fifo(&ca->free[RESERVE_MOVINGGC],
- movinggc_reserve, GFP_KERNEL) ||
- !init_fifo(&ca->free[RESERVE_NONE], reserve_none, GFP_KERNEL) ||
- !init_fifo(&ca->free_inc, free_inc_reserve, GFP_KERNEL) ||
- !init_heap(&ca->alloc_heap, free_inc_reserve, GFP_KERNEL) ||
- !init_heap(&ca->copygc_heap,heap_size, GFP_KERNEL) ||
+ bch2_dev_buckets_alloc(c, ca) ||
bioset_init(&ca->replica_set, 4,
offsetof(struct bch_write_bio, bio), 0) ||
!(ca->io_done = alloc_percpu(*ca->io_done)))
goto err;
- total_reserve = ca->free_inc.size;
- for (i = 0; i < RESERVE_NR; i++)
- total_reserve += ca->free[i].size;
-
ca->fs = c;
rcu_assign_pointer(c->devs[ca->dev_idx], ca);
@@ -1659,6 +1621,46 @@ err:
return ret;
}
+int bch2_dev_resize(struct bch_fs *c, struct bch_dev *ca, u64 nbuckets)
+{
+ struct bch_member *mi;
+ int ret = 0;
+
+ mutex_lock(&c->state_lock);
+
+ if (nbuckets < ca->mi.nbuckets) {
+ bch_err(ca, "Cannot shrink yet");
+ ret = -EINVAL;
+ goto err;
+ }
+
+ if (bch2_dev_is_online(ca) &&
+ get_capacity(ca->disk_sb.bdev->bd_disk) <
+ ca->mi.bucket_size * nbuckets) {
+ bch_err(ca, "New size larger than device");
+ ret = -EINVAL;
+ goto err;
+ }
+
+ ret = bch2_dev_buckets_resize(c, ca, nbuckets);
+ if (ret) {
+ bch_err(ca, "Resize error: %i", ret);
+ goto err;
+ }
+
+ mutex_lock(&c->sb_lock);
+ mi = &bch2_sb_get_members(c->disk_sb)->members[ca->dev_idx];
+ mi->nbuckets = cpu_to_le64(nbuckets);
+
+ bch2_write_super(c);
+ mutex_unlock(&c->sb_lock);
+
+ bch2_recalc_capacity(c);
+err:
+ mutex_unlock(&c->state_lock);
+ return ret;
+}
+
/* Filesystem open: */
const char *bch2_fs_open(char * const *devices, unsigned nr_devices,
diff --git a/fs/bcachefs/super.h b/fs/bcachefs/super.h
index 7ebe5981bf45..3189da6a958f 100644
--- a/fs/bcachefs/super.h
+++ b/fs/bcachefs/super.h
@@ -189,6 +189,7 @@ int bch2_dev_add(struct bch_fs *, const char *);
int bch2_dev_online(struct bch_fs *, const char *);
int bch2_dev_offline(struct bch_fs *, struct bch_dev *, int);
int bch2_dev_evacuate(struct bch_fs *, struct bch_dev *);
+int bch2_dev_resize(struct bch_fs *, struct bch_dev *, u64);
bool bch2_fs_emergency_read_only(struct bch_fs *);
void bch2_fs_read_only(struct bch_fs *);