summaryrefslogtreecommitdiff
path: root/libbcache/super.c
diff options
context:
space:
mode:
Diffstat (limited to 'libbcache/super.c')
-rw-r--r--libbcache/super.c882
1 files changed, 482 insertions, 400 deletions
diff --git a/libbcache/super.c b/libbcache/super.c
index d2863e62..bb4a7dc3 100644
--- a/libbcache/super.c
+++ b/libbcache/super.c
@@ -62,28 +62,77 @@ static const uuid_le invalid_uuid = {
};
static struct kset *bcache_kset;
-struct mutex bch_register_lock;
-LIST_HEAD(bch_fs_list);
+static LIST_HEAD(bch_fs_list);
+static DEFINE_MUTEX(bch_fs_list_lock);
static DECLARE_WAIT_QUEUE_HEAD(bch_read_only_wait);
struct workqueue_struct *bcache_io_wq;
struct crypto_shash *bch_sha256;
-static void bch_dev_free(struct cache *);
-static int bch_dev_online(struct cache *);
+static void bch_dev_free(struct bch_dev *);
+static int bch_dev_alloc(struct bch_fs *, unsigned);
+static int bch_dev_sysfs_online(struct bch_dev *);
+static void __bch_dev_read_only(struct bch_fs *, struct bch_dev *);
-static int bch_congested_fn(void *data, int bdi_bits)
+struct bch_fs *bch_bdev_to_fs(struct block_device *bdev)
+{
+ struct bch_fs *c;
+ struct bch_dev *ca;
+ unsigned i;
+
+ mutex_lock(&bch_fs_list_lock);
+ rcu_read_lock();
+
+ list_for_each_entry(c, &bch_fs_list, list)
+ for_each_member_device_rcu(ca, c, i)
+ if (ca->disk_sb.bdev == bdev) {
+ closure_get(&c->cl);
+ goto found;
+ }
+ c = NULL;
+found:
+ rcu_read_unlock();
+ mutex_unlock(&bch_fs_list_lock);
+
+ return c;
+}
+
+static struct bch_fs *__bch_uuid_to_fs(uuid_le uuid)
+{
+ struct bch_fs *c;
+
+ lockdep_assert_held(&bch_fs_list_lock);
+
+ list_for_each_entry(c, &bch_fs_list, list)
+ if (!memcmp(&c->disk_sb->uuid, &uuid, sizeof(uuid_le)))
+ return c;
+
+ return NULL;
+}
+
+struct bch_fs *bch_uuid_to_fs(uuid_le uuid)
+{
+ struct bch_fs *c;
+
+ mutex_lock(&bch_fs_list_lock);
+ c = __bch_uuid_to_fs(uuid);
+ if (c)
+ closure_get(&c->cl);
+ mutex_unlock(&bch_fs_list_lock);
+
+ return c;
+}
+
+int bch_congested(struct bch_fs *c, int bdi_bits)
{
struct backing_dev_info *bdi;
- struct cache_set *c = data;
- struct cache *ca;
+ struct bch_dev *ca;
unsigned i;
int ret = 0;
- rcu_read_lock();
if (bdi_bits & (1 << WB_sync_congested)) {
/* Reads - check all devices: */
- for_each_cache_rcu(ca, c, i) {
+ for_each_readable_member(ca, c, i) {
bdi = blk_get_backing_dev_info(ca->disk_sb.bdev);
if (bdi_congested(bdi, bdi_bits)) {
@@ -94,9 +143,10 @@ static int bch_congested_fn(void *data, int bdi_bits)
} else {
/* Writes prefer fastest tier: */
struct bch_tier *tier = READ_ONCE(c->fastest_tier);
- struct cache_group *grp = tier ? &tier->devs : &c->cache_all;
+ struct dev_group *grp = tier ? &tier->devs : &c->all_devs;
- group_for_each_cache_rcu(ca, grp, i) {
+ rcu_read_lock();
+ group_for_each_dev(ca, grp, i) {
bdi = blk_get_backing_dev_info(ca->disk_sb.bdev);
if (bdi_congested(bdi, bdi_bits)) {
@@ -104,12 +154,19 @@ static int bch_congested_fn(void *data, int bdi_bits)
break;
}
}
+ rcu_read_unlock();
}
- rcu_read_unlock();
return ret;
}
+static int bch_congested_fn(void *data, int bdi_bits)
+{
+ struct bch_fs *c = data;
+
+ return bch_congested(c, bdi_bits);
+}
+
/* Filesystem RO/RW: */
/*
@@ -127,21 +184,21 @@ static int bch_congested_fn(void *data, int bdi_bits)
* - allocator depends on the journal (when it rewrites prios and gens)
*/
-static void __bch_fs_read_only(struct cache_set *c)
+static void __bch_fs_read_only(struct bch_fs *c)
{
- struct cache *ca;
+ struct bch_dev *ca;
unsigned i;
bch_tiering_stop(c);
- for_each_cache(ca, c, i)
+ for_each_member_device(ca, c, i)
bch_moving_gc_stop(ca);
bch_gc_thread_stop(c);
bch_btree_flush(c);
- for_each_cache(ca, c, i)
+ for_each_member_device(ca, c, i)
bch_dev_allocator_stop(ca);
bch_fs_journal_stop(&c->journal);
@@ -149,13 +206,13 @@ static void __bch_fs_read_only(struct cache_set *c)
static void bch_writes_disabled(struct percpu_ref *writes)
{
- struct cache_set *c = container_of(writes, struct cache_set, writes);
+ struct bch_fs *c = container_of(writes, struct bch_fs, writes);
set_bit(BCH_FS_WRITE_DISABLE_COMPLETE, &c->flags);
wake_up(&bch_read_only_wait);
}
-void bch_fs_read_only(struct cache_set *c)
+void bch_fs_read_only(struct bch_fs *c)
{
mutex_lock(&c->state_lock);
if (c->state != BCH_FS_STARTING &&
@@ -222,18 +279,18 @@ out:
static void bch_fs_read_only_work(struct work_struct *work)
{
- struct cache_set *c =
- container_of(work, struct cache_set, read_only_work);
+ struct bch_fs *c =
+ container_of(work, struct bch_fs, read_only_work);
bch_fs_read_only(c);
}
-static void bch_fs_read_only_async(struct cache_set *c)
+static void bch_fs_read_only_async(struct bch_fs *c)
{
queue_work(system_long_wq, &c->read_only_work);
}
-bool bch_fs_emergency_read_only(struct cache_set *c)
+bool bch_fs_emergency_read_only(struct bch_fs *c)
{
bool ret = !test_and_set_bit(BCH_FS_EMERGENCY_RO, &c->flags);
@@ -244,9 +301,9 @@ bool bch_fs_emergency_read_only(struct cache_set *c)
return ret;
}
-const char *bch_fs_read_write(struct cache_set *c)
+const char *bch_fs_read_write(struct bch_fs *c)
{
- struct cache *ca;
+ struct bch_dev *ca;
const char *err = NULL;
unsigned i;
@@ -256,10 +313,9 @@ const char *bch_fs_read_write(struct cache_set *c)
goto out;
err = "error starting allocator thread";
- for_each_cache(ca, c, i)
- if (ca->mi.state == BCH_MEMBER_STATE_ACTIVE &&
- bch_dev_allocator_start(ca)) {
- percpu_ref_put(&ca->ref);
+ for_each_rw_member(ca, c, i)
+ if (bch_dev_allocator_start(ca)) {
+ percpu_ref_put(&ca->io_ref);
goto err;
}
@@ -268,10 +324,9 @@ const char *bch_fs_read_write(struct cache_set *c)
goto err;
err = "error starting moving GC thread";
- for_each_cache(ca, c, i)
- if (ca->mi.state == BCH_MEMBER_STATE_ACTIVE &&
- bch_moving_gc_start(ca)) {
- percpu_ref_put(&ca->ref);
+ for_each_rw_member(ca, c, i)
+ if (bch_moving_gc_start(ca)) {
+ percpu_ref_put(&ca->io_ref);
goto err;
}
@@ -296,7 +351,7 @@ err:
/* Filesystem startup/shutdown: */
-static void bch_fs_free(struct cache_set *c)
+static void bch_fs_free(struct bch_fs *c)
{
bch_fs_encryption_exit(c);
bch_fs_btree_exit(c);
@@ -306,8 +361,8 @@ static void bch_fs_free(struct cache_set *c)
bch_fs_compress_exit(c);
bch_fs_blockdev_exit(c);
bdi_destroy(&c->bdi);
- lg_lock_free(&c->bucket_stats_lock);
- free_percpu(c->bucket_stats_percpu);
+ lg_lock_free(&c->usage_lock);
+ free_percpu(c->usage_percpu);
mempool_exit(&c->btree_bounce_pool);
mempool_exit(&c->bio_bounce_pages);
bioset_exit(&c->bio_write);
@@ -324,13 +379,12 @@ static void bch_fs_free(struct cache_set *c)
if (c->wq)
destroy_workqueue(c->wq);
- kfree_rcu(rcu_dereference_protected(c->members, 1), rcu); /* shutting down */
free_pages((unsigned long) c->disk_sb, c->disk_sb_order);
kfree(c);
module_put(THIS_MODULE);
}
-static void bch_fs_exit(struct cache_set *c)
+static void bch_fs_exit(struct bch_fs *c)
{
unsigned i;
@@ -341,29 +395,31 @@ static void bch_fs_exit(struct cache_set *c)
cancel_work_sync(&c->read_retry_work);
for (i = 0; i < c->sb.nr_devices; i++)
- if (c->cache[i])
- bch_dev_free(c->cache[i]);
+ if (c->devs[i])
+ bch_dev_free(c->devs[i]);
closure_debug_destroy(&c->cl);
kobject_put(&c->kobj);
}
-static void bch_fs_offline(struct cache_set *c)
+static void bch_fs_offline(struct bch_fs *c)
{
- struct cache *ca;
+ struct bch_dev *ca;
unsigned i;
- mutex_lock(&bch_register_lock);
+ mutex_lock(&bch_fs_list_lock);
list_del(&c->list);
- mutex_unlock(&bch_register_lock);
+ mutex_unlock(&bch_fs_list_lock);
+
+ for_each_member_device(ca, c, i)
+ if (ca->kobj.state_in_sysfs &&
+ ca->disk_sb.bdev)
+ sysfs_remove_link(&part_to_dev(ca->disk_sb.bdev->bd_part)->kobj,
+ "bcache");
if (c->kobj.state_in_sysfs)
kobject_del(&c->kobj);
- for_each_cache(ca, c, i)
- if (ca->kobj.state_in_sysfs)
- kobject_del(&ca->kobj);
-
bch_fs_debug_exit(c);
bch_fs_chardev_exit(c);
@@ -382,18 +438,18 @@ static void bch_fs_offline(struct cache_set *c)
*/
void bch_fs_release(struct kobject *kobj)
{
- struct cache_set *c = container_of(kobj, struct cache_set, kobj);
+ struct bch_fs *c = container_of(kobj, struct bch_fs, kobj);
bch_notify_fs_stopped(c);
bch_fs_free(c);
}
/*
- * All activity on the cache_set should have stopped now - close devices:
+ * All activity on the filesystem should have stopped now - close devices:
*/
static void __bch_fs_stop3(struct closure *cl)
{
- struct cache_set *c = container_of(cl, struct cache_set, cl);
+ struct bch_fs *c = container_of(cl, struct bch_fs, cl);
bch_fs_exit(c);
}
@@ -404,7 +460,7 @@ static void __bch_fs_stop3(struct closure *cl)
*/
static void __bch_fs_stop2(struct closure *cl)
{
- struct cache_set *c = container_of(cl, struct cache_set, caching);
+ struct bch_fs *c = container_of(cl, struct bch_fs, caching);
bch_fs_offline(c);
@@ -418,14 +474,14 @@ static void __bch_fs_stop2(struct closure *cl)
*/
static void __bch_fs_stop1(struct closure *cl)
{
- struct cache_set *c = container_of(cl, struct cache_set, caching);
+ struct bch_fs *c = container_of(cl, struct bch_fs, caching);
bch_blockdevs_stop(c);
continue_at(cl, __bch_fs_stop2, system_wq);
}
-void bch_fs_stop_async(struct cache_set *c)
+void bch_fs_stop_async(struct bch_fs *c)
{
mutex_lock(&c->state_lock);
if (c->state != BCH_FS_STOPPING) {
@@ -435,7 +491,7 @@ void bch_fs_stop_async(struct cache_set *c)
mutex_unlock(&c->state_lock);
}
-void bch_fs_stop(struct cache_set *c)
+void bch_fs_stop(struct bch_fs *c)
{
mutex_lock(&c->state_lock);
BUG_ON(c->state == BCH_FS_STOPPING);
@@ -453,53 +509,25 @@ void bch_fs_stop(struct cache_set *c)
closure_sync(&c->cl);
bch_fs_exit(c);
- kobject_put(&c->kobj);
}
/* Stop, detaching from backing devices: */
-void bch_fs_detach(struct cache_set *c)
+void bch_fs_detach(struct bch_fs *c)
{
if (!test_and_set_bit(BCH_FS_DETACHING, &c->flags))
bch_fs_stop_async(c);
}
-static unsigned bch_fs_nr_devices(struct cache_set *c)
-{
- struct bch_sb_field_members *mi;
- unsigned i, nr = 0;
-
- mutex_lock(&c->sb_lock);
- mi = bch_sb_get_members(c->disk_sb);
-
- for (i = 0; i < c->disk_sb->nr_devices; i++)
- if (!bch_is_zero(mi->members[i].uuid.b, sizeof(uuid_le)))
- nr++;
-
- mutex_unlock(&c->sb_lock);
-
- return nr;
-}
-
-static unsigned bch_fs_nr_online_devices(struct cache_set *c)
-{
- unsigned i, nr = 0;
-
- for (i = 0; i < c->sb.nr_devices; i++)
- if (c->cache[i])
- nr++;
-
- return nr;
-}
-
#define alloc_bucket_pages(gfp, ca) \
((void *) __get_free_pages(__GFP_ZERO|gfp, ilog2(bucket_pages(ca))))
-static struct cache_set *bch_fs_alloc(struct bch_sb *sb, struct bch_opts opts)
+static struct bch_fs *bch_fs_alloc(struct bch_sb *sb, struct bch_opts opts)
{
- struct cache_set *c;
- unsigned iter_size, journal_entry_bytes;
+ struct bch_sb_field_members *mi;
+ struct bch_fs *c;
+ unsigned i, iter_size, journal_entry_bytes;
- c = kzalloc(sizeof(struct cache_set), GFP_KERNEL);
+ c = kzalloc(sizeof(struct bch_fs), GFP_KERNEL);
if (!c)
return NULL;
@@ -572,7 +600,7 @@ static struct cache_set *bch_fs_alloc(struct bch_sb *sb, struct bch_opts opts)
mutex_lock(&c->sb_lock);
- if (bch_sb_to_cache_set(c, sb)) {
+ if (bch_sb_to_fs(c, sb)) {
mutex_unlock(&c->sb_lock);
goto err;
}
@@ -616,8 +644,8 @@ static struct cache_set *bch_fs_alloc(struct bch_sb *sb, struct bch_opts opts)
c->sb.btree_node_size,
BCH_ENCODED_EXTENT_MAX) /
PAGE_SECTORS, 0) ||
- !(c->bucket_stats_percpu = alloc_percpu(struct bch_fs_usage)) ||
- lg_lock_init(&c->bucket_stats_lock) ||
+ !(c->usage_percpu = alloc_percpu(struct bch_fs_usage)) ||
+ lg_lock_init(&c->usage_lock) ||
mempool_init_page_pool(&c->btree_bounce_pool, 1,
ilog2(btree_pages(c))) ||
bdi_setup_and_register(&c->bdi, "bcache") ||
@@ -635,6 +663,12 @@ static struct cache_set *bch_fs_alloc(struct bch_sb *sb, struct bch_opts opts)
c->bdi.congested_fn = bch_congested_fn;
c->bdi.congested_data = c;
+ mi = bch_sb_get_members(c->disk_sb);
+ for (i = 0; i < c->sb.nr_devices; i++)
+ if (!bch_is_zero(mi->members[i].uuid.b, sizeof(uuid_le)) &&
+ bch_dev_alloc(c, i))
+ goto err;
+
/*
* Now that all allocations have succeeded, init various refcounty
* things that let us shutdown:
@@ -660,31 +694,19 @@ err:
return NULL;
}
-static struct cache_set *bch_fs_lookup(uuid_le uuid)
-{
- struct cache_set *c;
-
- lockdep_assert_held(&bch_register_lock);
-
- list_for_each_entry(c, &bch_fs_list, list)
- if (!memcmp(&c->disk_sb->uuid, &uuid, sizeof(uuid_le)))
- return c;
-
- return NULL;
-}
-
-static const char *__bch_fs_online(struct cache_set *c)
+static const char *__bch_fs_online(struct bch_fs *c)
{
- struct cache *ca;
+ struct bch_dev *ca;
+ const char *err = NULL;
unsigned i;
int ret;
- lockdep_assert_held(&bch_register_lock);
+ lockdep_assert_held(&bch_fs_list_lock);
if (!list_empty(&c->list))
return NULL;
- if (bch_fs_lookup(c->sb.uuid))
+ if (__bch_uuid_to_fs(c->sb.uuid))
return "filesystem UUID already open";
ret = bch_fs_chardev_init(c);
@@ -700,44 +722,42 @@ static const char *__bch_fs_online(struct cache_set *c)
bch_cache_accounting_add_kobjs(&c->accounting, &c->kobj))
return "error creating sysfs objects";
- for_each_cache(ca, c, i)
- if (bch_dev_online(ca)) {
- percpu_ref_put(&ca->ref);
- return "error creating sysfs objects";
- }
-
mutex_lock(&c->state_lock);
- if (bch_blockdev_volumes_start(c)) {
- mutex_unlock(&c->state_lock);
- return "can't bring up blockdev volumes";
- }
+ err = "error creating sysfs objects";
+ __for_each_member_device(ca, c, i)
+ if (bch_dev_sysfs_online(ca))
+ goto err;
- bch_attach_backing_devs(c);
+ err = "can't bring up blockdev volumes";
+ if (bch_blockdev_volumes_start(c))
+ goto err;
- mutex_unlock(&c->state_lock);
+ bch_attach_backing_devs(c);
list_add(&c->list, &bch_fs_list);
-
- return 0;
+ err = NULL;
+err:
+ mutex_unlock(&c->state_lock);
+ return err;
}
-static const char *bch_fs_online(struct cache_set *c)
+static const char *bch_fs_online(struct bch_fs *c)
{
const char *err;
- mutex_lock(&bch_register_lock);
+ mutex_lock(&bch_fs_list_lock);
err = __bch_fs_online(c);
- mutex_unlock(&bch_register_lock);
+ mutex_unlock(&bch_fs_list_lock);
return err;
}
-static const char *__bch_fs_start(struct cache_set *c)
+static const char *__bch_fs_start(struct bch_fs *c)
{
const char *err = "cannot allocate memory";
struct bch_sb_field_members *mi;
- struct cache *ca;
+ struct bch_dev *ca;
unsigned i, id;
time64_t now;
LIST_HEAD(journal);
@@ -746,39 +766,30 @@ static const char *__bch_fs_start(struct cache_set *c)
BUG_ON(c->state != BCH_FS_STARTING);
- /*
- * Make sure that each cache object's mi is up to date before
- * we start testing it.
- */
- for_each_cache(ca, c, i)
- bch_sb_from_cache_set(c, ca);
+ mutex_lock(&c->sb_lock);
+ for_each_online_member(ca, c, i)
+ bch_sb_from_fs(c, ca);
+ mutex_unlock(&c->sb_lock);
if (BCH_SB_INITIALIZED(c->disk_sb)) {
ret = bch_journal_read(c, &journal);
if (ret)
goto err;
- pr_debug("btree_journal_read() done");
-
j = &list_entry(journal.prev, struct journal_replay, list)->j;
+ c->prio_clock[READ].hand = le16_to_cpu(j->read_clock);
+ c->prio_clock[WRITE].hand = le16_to_cpu(j->write_clock);
+
err = "error reading priorities";
- for_each_cache(ca, c, i) {
+ for_each_readable_member(ca, c, i) {
ret = bch_prio_read(ca);
if (ret) {
- percpu_ref_put(&ca->ref);
+ percpu_ref_put(&ca->io_ref);
goto err;
}
}
- c->prio_clock[READ].hand = le16_to_cpu(j->read_clock);
- c->prio_clock[WRITE].hand = le16_to_cpu(j->write_clock);
-
- for_each_cache(ca, c, i) {
- bch_recalc_min_prio(ca, READ);
- bch_recalc_min_prio(ca, WRITE);
- }
-
for (id = 0; id < BTREE_ID_NR; id++) {
unsigned level;
struct bkey_i *k;
@@ -816,10 +827,9 @@ static const char *__bch_fs_start(struct cache_set *c)
bch_journal_start(c);
err = "error starting allocator thread";
- for_each_cache(ca, c, i)
- if (ca->mi.state == BCH_MEMBER_STATE_ACTIVE &&
- bch_dev_allocator_start(ca)) {
- percpu_ref_put(&ca->ref);
+ for_each_rw_member(ca, c, i)
+ if (bch_dev_allocator_start(ca)) {
+ percpu_ref_put(&ca->io_ref);
goto err;
}
@@ -853,18 +863,10 @@ static const char *__bch_fs_start(struct cache_set *c)
bch_initial_gc(c, NULL);
- err = "error starting allocator thread";
- for_each_cache(ca, c, i)
- if (ca->mi.state == BCH_MEMBER_STATE_ACTIVE &&
- bch_dev_allocator_start(ca)) {
- percpu_ref_put(&ca->ref);
- goto err;
- }
-
err = "unable to allocate journal buckets";
- for_each_cache(ca, c, i)
+ for_each_rw_member(ca, c, i)
if (bch_dev_journal_alloc(ca)) {
- percpu_ref_put(&ca->ref);
+ percpu_ref_put(&ca->io_ref);
goto err;
}
@@ -875,6 +877,13 @@ static const char *__bch_fs_start(struct cache_set *c)
bch_journal_start(c);
bch_journal_set_replay_done(&c->journal);
+ err = "error starting allocator thread";
+ for_each_rw_member(ca, c, i)
+ if (bch_dev_allocator_start(ca)) {
+ percpu_ref_put(&ca->io_ref);
+ goto err;
+ }
+
err = "cannot allocate new btree root";
for (id = 0; id < BTREE_ID_NR; id++)
if (bch_btree_root_alloc(c, id, &cl)) {
@@ -918,10 +927,8 @@ recovery_done:
mi = bch_sb_get_members(c->disk_sb);
now = ktime_get_seconds();
- rcu_read_lock();
- for_each_cache_rcu(ca, c, i)
+ for_each_member_device(ca, c, i)
mi->members[ca->dev_idx].last_mount = cpu_to_le64(now);
- rcu_read_unlock();
SET_BCH_SB_INITIALIZED(c->disk_sb, true);
SET_BCH_SB_CLEAN(c->disk_sb, false);
@@ -966,12 +973,12 @@ err:
goto out;
}
-const char *bch_fs_start(struct cache_set *c)
+const char *bch_fs_start(struct bch_fs *c)
{
return __bch_fs_start(c) ?: bch_fs_online(c);
}
-static const char *bch_dev_may_add(struct bch_sb *sb, struct cache_set *c)
+static const char *bch_dev_may_add(struct bch_sb *sb, struct bch_fs *c)
{
struct bch_sb_field_members *sb_mi;
@@ -984,33 +991,28 @@ static const char *bch_dev_may_add(struct bch_sb *sb, struct cache_set *c)
if (le16_to_cpu(sb_mi->members[sb->dev_idx].bucket_size) <
BCH_SB_BTREE_NODE_SIZE(c->disk_sb))
- return "new cache bucket_size is too small";
+ return "new cache bucket size is too small";
return NULL;
}
-static const char *bch_dev_in_fs(struct bch_sb *sb, struct cache_set *c)
+static const char *bch_dev_in_fs(struct bch_sb *fs, struct bch_sb *sb)
{
- struct bch_sb_field_members *mi = bch_sb_get_members(c->disk_sb);
- struct bch_sb_field_members *dev_mi = bch_sb_get_members(sb);
- uuid_le dev_uuid = dev_mi->members[sb->dev_idx].uuid;
- const char *err;
+ struct bch_sb *newest =
+ le64_to_cpu(fs->seq) > le64_to_cpu(sb->seq) ? fs : sb;
+ struct bch_sb_field_members *mi = bch_sb_get_members(newest);
- err = bch_dev_may_add(sb, c);
- if (err)
- return err;
+ if (uuid_le_cmp(fs->uuid, sb->uuid))
+ return "device not a member of filesystem";
+
+ if (sb->dev_idx >= newest->nr_devices)
+ return "device has invalid dev_idx";
- if (bch_is_zero(&dev_uuid, sizeof(dev_uuid)))
+ if (bch_is_zero(mi->members[sb->dev_idx].uuid.b, sizeof(uuid_le)))
return "device has been removed";
- /*
- * When attaching an existing device, the cache set superblock must
- * already contain member_info with a matching UUID
- */
- if (sb->dev_idx >= c->disk_sb->nr_devices ||
- memcmp(&mi->members[sb->dev_idx].uuid,
- &dev_uuid, sizeof(uuid_le)))
- return "cache sb does not match set";
+ if (fs->block_size != sb->block_size)
+ return "mismatched block size";
return NULL;
}
@@ -1019,37 +1021,34 @@ static const char *bch_dev_in_fs(struct bch_sb *sb, struct cache_set *c)
void bch_dev_release(struct kobject *kobj)
{
- struct cache *ca = container_of(kobj, struct cache, kobj);
+ struct bch_dev *ca = container_of(kobj, struct bch_dev, kobj);
kfree(ca);
}
-static void bch_dev_free(struct cache *ca)
+static void bch_dev_free(struct bch_dev *ca)
{
- struct cache_set *c = ca->set;
unsigned i;
cancel_work_sync(&ca->io_error_work);
- if (c && c->kobj.state_in_sysfs) {
- char buf[12];
-
- sprintf(buf, "cache%u", ca->dev_idx);
- sysfs_remove_link(&c->kobj, buf);
- }
+ if (ca->kobj.state_in_sysfs &&
+ ca->disk_sb.bdev)
+ sysfs_remove_link(&part_to_dev(ca->disk_sb.bdev->bd_part)->kobj,
+ "bcache");
if (ca->kobj.state_in_sysfs)
kobject_del(&ca->kobj);
bch_free_super(&ca->disk_sb);
bch_dev_journal_exit(ca);
+
free_percpu(ca->sectors_written);
bioset_exit(&ca->replica_set);
- free_percpu(ca->bucket_stats_percpu);
+ free_percpu(ca->usage_percpu);
free_pages((unsigned long) ca->disk_buckets, ilog2(bucket_pages(ca)));
kfree(ca->prio_buckets);
kfree(ca->bio_prio);
- kfree(ca->journal.bio);
vfree(ca->buckets);
vfree(ca->oldest_gens);
free_heap(&ca->heap);
@@ -1058,133 +1057,135 @@ static void bch_dev_free(struct cache *ca)
for (i = 0; i < RESERVE_NR; i++)
free_fifo(&ca->free[i]);
+ percpu_ref_exit(&ca->io_ref);
percpu_ref_exit(&ca->ref);
kobject_put(&ca->kobj);
-
- if (c)
- kobject_put(&c->kobj);
}
-static void bch_dev_free_work(struct work_struct *work)
+static void bch_dev_io_ref_release(struct percpu_ref *ref)
{
- struct cache *ca = container_of(work, struct cache, free_work);
+ struct bch_dev *ca = container_of(ref, struct bch_dev, io_ref);
- bch_dev_free(ca);
+ complete(&ca->offline_complete);
}
-static void bch_dev_percpu_ref_release(struct percpu_ref *ref)
+static void bch_dev_offline(struct bch_dev *ca)
{
- struct cache *ca = container_of(ref, struct cache, ref);
+ struct bch_fs *c = ca->fs;
+
+ lockdep_assert_held(&c->state_lock);
+
+ __bch_dev_read_only(ca->fs, ca);
+
+ reinit_completion(&ca->offline_complete);
+ percpu_ref_kill(&ca->io_ref);
+ wait_for_completion(&ca->offline_complete);
+
+ if (ca->kobj.state_in_sysfs) {
+ struct kobject *block =
+ &part_to_dev(ca->disk_sb.bdev->bd_part)->kobj;
+
+ sysfs_remove_link(block, "bcache");
+ sysfs_remove_link(&ca->kobj, "block");
+ }
- schedule_work(&ca->free_work);
+ bch_free_super(&ca->disk_sb);
+ bch_dev_journal_exit(ca);
}
-static void bch_dev_free_rcu(struct rcu_head *rcu)
+static void bch_dev_ref_release(struct percpu_ref *ref)
{
- struct cache *ca = container_of(rcu, struct cache, free_rcu);
+ struct bch_dev *ca = container_of(ref, struct bch_dev, ref);
- /*
- * This decrements the ref count to ca, and once the ref count
- * is 0 (outstanding bios to the ca also incremented it and
- * decrement it on completion/error), bch_dev_percpu_ref_release
- * is called, and that eventually results in bch_dev_free_work
- * being called, which in turn results in bch_dev_release being
- * called.
- *
- * In particular, these functions won't be called until there are no
- * bios outstanding (the per-cpu ref counts are all 0), so it
- * is safe to remove the actual sysfs device at that point,
- * and that can indicate success to the user.
- */
-
- percpu_ref_kill(&ca->ref);
+ complete(&ca->stop_complete);
}
-static void bch_dev_stop(struct cache *ca)
+static void bch_dev_stop(struct bch_dev *ca)
{
- struct cache_set *c = ca->set;
+ struct bch_fs *c = ca->fs;
lockdep_assert_held(&c->state_lock);
- BUG_ON(rcu_access_pointer(c->cache[ca->dev_idx]) != ca);
- rcu_assign_pointer(c->cache[ca->dev_idx], NULL);
+ BUG_ON(rcu_access_pointer(c->devs[ca->dev_idx]) != ca);
+ rcu_assign_pointer(c->devs[ca->dev_idx], NULL);
- call_rcu(&ca->free_rcu, bch_dev_free_rcu);
+ synchronize_rcu();
+
+ reinit_completion(&ca->stop_complete);
+ percpu_ref_kill(&ca->ref);
+ wait_for_completion(&ca->stop_complete);
}
-static int bch_dev_online(struct cache *ca)
+static int bch_dev_sysfs_online(struct bch_dev *ca)
{
- char buf[12];
+ struct bch_fs *c = ca->fs;
+ int ret;
+
+ if (!c->kobj.state_in_sysfs)
+ return 0;
+
+ if (!ca->kobj.state_in_sysfs) {
+ ret = kobject_add(&ca->kobj, &ca->fs->kobj,
+ "dev-%u", ca->dev_idx);
+ if (ret)
+ return ret;
+ }
- sprintf(buf, "cache%u", ca->dev_idx);
+ if (ca->disk_sb.bdev) {
+ struct kobject *block =
+ &part_to_dev(ca->disk_sb.bdev->bd_part)->kobj;
- if (kobject_add(&ca->kobj,
- &part_to_dev(ca->disk_sb.bdev->bd_part)->kobj,
- "bcache") ||
- sysfs_create_link(&ca->kobj, &ca->set->kobj, "set") ||
- sysfs_create_link(&ca->set->kobj, &ca->kobj, buf))
- return -1;
+ ret = sysfs_create_link(block, &ca->kobj, "bcache");
+ if (ret)
+ return ret;
+ ret = sysfs_create_link(&ca->kobj, block, "block");
+ if (ret)
+ return ret;
+ }
return 0;
}
-static const char *bch_dev_alloc(struct bcache_superblock *sb,
- struct cache_set *c,
- struct cache **ret)
+static int bch_dev_alloc(struct bch_fs *c, unsigned dev_idx)
{
struct bch_member *member;
size_t reserve_none, movinggc_reserve, free_inc_reserve, total_reserve;
size_t heap_size;
unsigned i;
- const char *err = "cannot allocate memory";
- struct cache *ca;
-
- if (c->sb.nr_devices == 1)
- bdevname(sb->bdev, c->name);
+ struct bch_dev *ca;
if (bch_fs_init_fault("dev_alloc"))
- return err;
+ return -ENOMEM;
ca = kzalloc(sizeof(*ca), GFP_KERNEL);
if (!ca)
- return err;
-
- if (percpu_ref_init(&ca->ref, bch_dev_percpu_ref_release,
- 0, GFP_KERNEL)) {
- kfree(ca);
- return err;
- }
+ return -ENOMEM;
kobject_init(&ca->kobj, &bch_dev_ktype);
+ init_completion(&ca->stop_complete);
+ init_completion(&ca->offline_complete);
spin_lock_init(&ca->self.lock);
ca->self.nr = 1;
rcu_assign_pointer(ca->self.d[0].dev, ca);
- ca->dev_idx = sb->sb->dev_idx;
+ ca->dev_idx = dev_idx;
- INIT_WORK(&ca->free_work, bch_dev_free_work);
spin_lock_init(&ca->freelist_lock);
spin_lock_init(&ca->prio_buckets_lock);
mutex_init(&ca->heap_lock);
bch_dev_moving_gc_init(ca);
- ca->disk_sb = *sb;
- if (sb->mode & FMODE_EXCL)
- ca->disk_sb.bdev->bd_holder = ca;
- memset(sb, 0, sizeof(*sb));
-
INIT_WORK(&ca->io_error_work, bch_nonfatal_io_error_work);
- err = "dynamic fault";
if (bch_fs_init_fault("dev_alloc"))
goto err;
- member = bch_sb_get_members(ca->disk_sb.sb)->members +
- ca->disk_sb.sb->dev_idx;
+ member = bch_sb_get_members(c->disk_sb)->members + dev_idx;
- ca->mi = cache_mi_to_cpu_mi(member);
+ ca->mi = bch_mi_to_cpu(member);
ca->uuid = member->uuid;
ca->bucket_bits = ilog2(ca->mi.bucket_size);
+ scnprintf(ca->name, sizeof(ca->name), "dev-%u", dev_idx);
/* XXX: tune these */
movinggc_reserve = max_t(size_t, 16, ca->mi.nbuckets >> 7);
@@ -1197,7 +1198,11 @@ static const char *bch_dev_alloc(struct bcache_superblock *sb,
free_inc_reserve = movinggc_reserve / 2;
heap_size = movinggc_reserve * 8;
- if (!init_fifo(&ca->free[RESERVE_PRIO], prio_buckets(ca), GFP_KERNEL) ||
+ if (percpu_ref_init(&ca->ref, bch_dev_ref_release,
+ 0, GFP_KERNEL) ||
+ percpu_ref_init(&ca->io_ref, bch_dev_io_ref_release,
+ PERCPU_REF_INIT_DEAD, GFP_KERNEL) ||
+ !init_fifo(&ca->free[RESERVE_PRIO], prio_buckets(ca), GFP_KERNEL) ||
!init_fifo(&ca->free[RESERVE_BTREE], BTREE_NODE_RESERVE, GFP_KERNEL) ||
!init_fifo(&ca->free[RESERVE_MOVINGGC],
movinggc_reserve, GFP_KERNEL) ||
@@ -1208,15 +1213,14 @@ static const char *bch_dev_alloc(struct bcache_superblock *sb,
ca->mi.nbuckets)) ||
!(ca->buckets = vzalloc(sizeof(struct bucket) *
ca->mi.nbuckets)) ||
- !(ca->prio_buckets = kzalloc(sizeof(uint64_t) * prio_buckets(ca) *
+ !(ca->prio_buckets = kzalloc(sizeof(u64) * prio_buckets(ca) *
2, GFP_KERNEL)) ||
!(ca->disk_buckets = alloc_bucket_pages(GFP_KERNEL, ca)) ||
- !(ca->bucket_stats_percpu = alloc_percpu(struct bch_dev_usage)) ||
+ !(ca->usage_percpu = alloc_percpu(struct bch_dev_usage)) ||
!(ca->bio_prio = bio_kmalloc(GFP_NOIO, bucket_pages(ca))) ||
bioset_init(&ca->replica_set, 4,
offsetof(struct bch_write_bio, bio)) ||
- !(ca->sectors_written = alloc_percpu(*ca->sectors_written)) ||
- bch_dev_journal_init(ca))
+ !(ca->sectors_written = alloc_percpu(*ca->sectors_written)))
goto err;
ca->prio_last_buckets = ca->prio_buckets + prio_buckets(ca);
@@ -1224,94 +1228,130 @@ static const char *bch_dev_alloc(struct bcache_superblock *sb,
total_reserve = ca->free_inc.size;
for (i = 0; i < RESERVE_NR; i++)
total_reserve += ca->free[i].size;
- pr_debug("%zu buckets reserved", total_reserve);
ca->copygc_write_point.group = &ca->self;
ca->tiering_write_point.group = &ca->self;
+ ca->fs = c;
+ rcu_assign_pointer(c->devs[ca->dev_idx], ca);
+
+ if (bch_dev_sysfs_online(ca))
+ pr_warn("error creating sysfs objects");
+
+ return 0;
+err:
+ bch_dev_free(ca);
+ return -ENOMEM;
+}
+
+static int bch_dev_online(struct bch_fs *c, struct bcache_superblock *sb)
+{
+ struct bch_dev *ca;
+ int ret;
+
+ lockdep_assert_held(&c->sb_lock);
+
+ if (le64_to_cpu(sb->sb->seq) >
+ le64_to_cpu(c->disk_sb->seq))
+ bch_sb_to_fs(c, sb->sb);
+
+ BUG_ON(sb->sb->dev_idx >= c->sb.nr_devices ||
+ !c->devs[sb->sb->dev_idx]);
+
+ ca = c->devs[sb->sb->dev_idx];
+ if (ca->disk_sb.bdev) {
+ bch_err(c, "already have device online in slot %u",
+ sb->sb->dev_idx);
+ return -EINVAL;
+ }
+
+ ret = bch_dev_journal_init(ca, sb->sb);
+ if (ret)
+ return ret;
+
/*
* Increase journal write timeout if flushes to this device are
* expensive:
*/
- if (!blk_queue_nonrot(bdev_get_queue(ca->disk_sb.bdev)) &&
+ if (!blk_queue_nonrot(bdev_get_queue(sb->bdev)) &&
journal_flushes_device(ca))
c->journal.write_delay_ms =
max(c->journal.write_delay_ms, 1000U);
- kobject_get(&c->kobj);
- ca->set = c;
-
- kobject_get(&ca->kobj);
- rcu_assign_pointer(c->cache[ca->dev_idx], ca);
-
- mutex_lock(&c->sb_lock);
-
- if (le64_to_cpu(ca->disk_sb.sb->seq) > le64_to_cpu(c->disk_sb->seq))
- bch_sb_to_cache_set(c, ca->disk_sb.sb);
+ /* Commit: */
+ ca->disk_sb = *sb;
+ if (sb->mode & FMODE_EXCL)
+ ca->disk_sb.bdev->bd_holder = ca;
+ memset(sb, 0, sizeof(*sb));
- mutex_unlock(&c->sb_lock);
+ if (c->sb.nr_devices == 1)
+ bdevname(ca->disk_sb.bdev, c->name);
+ bdevname(ca->disk_sb.bdev, ca->name);
- err = "error creating kobject";
- if (c->kobj.state_in_sysfs &&
- bch_dev_online(ca))
+ if (bch_dev_sysfs_online(ca))
pr_warn("error creating sysfs objects");
- if (ret)
- *ret = ca;
- else
- kobject_put(&ca->kobj);
- return NULL;
-err:
- bch_dev_free(ca);
- return err;
+ lg_local_lock(&c->usage_lock);
+ if (!gc_will_visit(c, gc_phase(GC_PHASE_SB_METADATA)))
+ bch_mark_dev_metadata(ca->fs, ca);
+ lg_local_unlock(&c->usage_lock);
+
+ percpu_ref_reinit(&ca->io_ref);
+ return 0;
}
/* Device management: */
-static void __bch_dev_read_only(struct cache_set *c, struct cache *ca)
+bool bch_fs_may_start(struct bch_fs *c, int flags)
{
- bch_moving_gc_stop(ca);
-
- /*
- * This stops new data writes (e.g. to existing open data
- * buckets) and then waits for all existing writes to
- * complete.
- */
- bch_dev_allocator_stop(ca);
-
- bch_dev_group_remove(&c->journal.devs, ca);
-}
+ struct bch_sb_field_members *mi;
+ unsigned meta_missing = 0;
+ unsigned data_missing = 0;
+ bool degraded = false;
+ unsigned i;
-static const char *__bch_dev_read_write(struct cache_set *c, struct cache *ca)
-{
- lockdep_assert_held(&c->state_lock);
+ mutex_lock(&c->sb_lock);
+ mi = bch_sb_get_members(c->disk_sb);
- if (ca->mi.state == BCH_MEMBER_STATE_ACTIVE)
- return NULL;
+ for (i = 0; i < c->disk_sb->nr_devices; i++)
+ if (!c->devs[i] &&
+ !bch_is_zero(mi->members[i].uuid.b, sizeof(uuid_le))) {
+ degraded = true;
+ if (BCH_MEMBER_HAS_METADATA(&mi->members[i]))
+ meta_missing++;
+ if (BCH_MEMBER_HAS_DATA(&mi->members[i]))
+ data_missing++;
+ }
+ mutex_unlock(&c->sb_lock);
- trace_bcache_cache_read_write(ca);
+ if (degraded &&
+ !(flags & BCH_FORCE_IF_DEGRADED))
+ return false;
- if (bch_dev_allocator_start(ca))
- return "error starting allocator thread";
+ if (meta_missing &&
+ !(flags & BCH_FORCE_IF_METADATA_DEGRADED))
+ return false;
- if (bch_moving_gc_start(ca))
- return "error starting moving GC thread";
+ if (meta_missing >= BCH_SB_META_REPLICAS_HAVE(c->disk_sb) &&
+ !(flags & BCH_FORCE_IF_METADATA_LOST))
+ return false;
- if (bch_tiering_start(c))
- return "error starting tiering thread";
+ if (data_missing && !(flags & BCH_FORCE_IF_DATA_DEGRADED))
+ return false;
- bch_notify_dev_read_write(ca);
- trace_bcache_cache_read_write_done(ca);
+ if (data_missing >= BCH_SB_DATA_REPLICAS_HAVE(c->disk_sb) &&
+ !(flags & BCH_FORCE_IF_DATA_LOST))
+ return false;
- return NULL;
+ return true;
}
-bool bch_dev_state_allowed(struct cache_set *c, struct cache *ca,
+bool bch_dev_state_allowed(struct bch_fs *c, struct bch_dev *ca,
enum bch_member_state new_state, int flags)
{
lockdep_assert_held(&c->state_lock);
- if (new_state == BCH_MEMBER_STATE_ACTIVE)
+ if (new_state == BCH_MEMBER_STATE_RW)
return true;
if (ca->mi.has_data &&
@@ -1335,11 +1375,47 @@ bool bch_dev_state_allowed(struct cache_set *c, struct cache *ca,
return true;
}
-int __bch_dev_set_state(struct cache_set *c, struct cache *ca,
+static void __bch_dev_read_only(struct bch_fs *c, struct bch_dev *ca)
+{
+ bch_moving_gc_stop(ca);
+
+ /*
+ * This stops new data writes (e.g. to existing open data
+ * buckets) and then waits for all existing writes to
+ * complete.
+ */
+ bch_dev_allocator_stop(ca);
+
+ bch_dev_group_remove(&c->journal.devs, ca);
+}
+
+static const char *__bch_dev_read_write(struct bch_fs *c, struct bch_dev *ca)
+{
+ lockdep_assert_held(&c->state_lock);
+
+ BUG_ON(ca->mi.state != BCH_MEMBER_STATE_RW);
+
+ trace_bcache_cache_read_write(ca);
+
+ if (bch_dev_allocator_start(ca))
+ return "error starting allocator thread";
+
+ if (bch_moving_gc_start(ca))
+ return "error starting moving GC thread";
+
+ if (bch_tiering_start(c))
+ return "error starting tiering thread";
+
+ bch_notify_dev_read_write(ca);
+ trace_bcache_cache_read_write_done(ca);
+
+ return NULL;
+}
+
+int __bch_dev_set_state(struct bch_fs *c, struct bch_dev *ca,
enum bch_member_state new_state, int flags)
{
struct bch_sb_field_members *mi;
- char buf[BDEVNAME_SIZE];
if (ca->mi.state == new_state)
return 0;
@@ -1347,16 +1423,14 @@ int __bch_dev_set_state(struct cache_set *c, struct cache *ca,
if (!bch_dev_state_allowed(c, ca, new_state, flags))
return -EINVAL;
- if (new_state == BCH_MEMBER_STATE_ACTIVE) {
+ if (new_state == BCH_MEMBER_STATE_RW) {
if (__bch_dev_read_write(c, ca))
return -ENOMEM;
} else {
__bch_dev_read_only(c, ca);
}
- bch_notice(c, "%s %s",
- bdevname(ca->disk_sb.bdev, buf),
- bch_dev_state[new_state]);
+ bch_notice(ca, "%s", bch_dev_state[new_state]);
mutex_lock(&c->sb_lock);
mi = bch_sb_get_members(c->disk_sb);
@@ -1367,7 +1441,7 @@ int __bch_dev_set_state(struct cache_set *c, struct cache *ca,
return 0;
}
-int bch_dev_set_state(struct cache_set *c, struct cache *ca,
+int bch_dev_set_state(struct bch_fs *c, struct bch_dev *ca,
enum bch_member_state new_state, int flags)
{
int ret;
@@ -1380,7 +1454,7 @@ int bch_dev_set_state(struct cache_set *c, struct cache *ca,
}
#if 0
-int bch_dev_migrate_from(struct cache_set *c, struct cache *ca)
+int bch_dev_migrate_from(struct bch_fs *c, struct bch_dev *ca)
{
/* First, go RO before we try to migrate data off: */
ret = bch_dev_set_state(c, ca, BCH_MEMBER_STATE_RO, flags);
@@ -1414,23 +1488,20 @@ int bch_dev_migrate_from(struct cache_set *c, struct cache *ca)
/* Device add/removal: */
-static int __bch_dev_remove(struct cache_set *c, struct cache *ca, int flags)
+static int __bch_dev_remove(struct bch_fs *c, struct bch_dev *ca, int flags)
{
struct bch_sb_field_members *mi;
- char name[BDEVNAME_SIZE];
unsigned dev_idx = ca->dev_idx;
int ret;
- bdevname(ca->disk_sb.bdev, name);
-
- if (ca->mi.state == BCH_MEMBER_STATE_ACTIVE) {
- bch_err(ca->set, "Cannot remove RW device");
+ if (ca->mi.state == BCH_MEMBER_STATE_RW) {
+ bch_err(ca, "Cannot remove RW device");
bch_notify_dev_remove_failed(ca);
return -EINVAL;
}
if (!bch_dev_state_allowed(c, ca, BCH_MEMBER_STATE_FAILED, flags)) {
- bch_err(ca->set, "Cannot remove %s without losing data", name);
+ bch_err(ca, "Cannot remove without losing data");
bch_notify_dev_remove_failed(ca);
return -EINVAL;
}
@@ -1442,7 +1513,12 @@ static int __bch_dev_remove(struct cache_set *c, struct cache *ca, int flags)
*/
ret = bch_flag_data_bad(ca);
if (ret) {
- bch_err(c, "Remove of %s failed", name);
+ bch_err(ca, "Remove failed");
+ return ret;
+ }
+
+ if (ca->mi.has_data || ca->mi.has_metadata) {
+ bch_err(ca, "Can't remove, still has data");
return ret;
}
@@ -1458,13 +1534,9 @@ static int __bch_dev_remove(struct cache_set *c, struct cache *ca, int flags)
bch_journal_meta(&c->journal);
+ bch_dev_offline(ca);
bch_dev_stop(ca);
-
- /*
- * RCU barrier between dropping between c->cache and dropping from
- * member info:
- */
- synchronize_rcu();
+ bch_dev_free(ca);
/*
* Free this device's slot in the bch_member array - all pointers to
@@ -1481,28 +1553,29 @@ static int __bch_dev_remove(struct cache_set *c, struct cache *ca, int flags)
return 0;
}
-int bch_dev_remove(struct cache_set *c, struct cache *ca, int flags)
+int bch_dev_remove(struct bch_fs *c, struct bch_dev *ca, int flags)
{
int ret;
mutex_lock(&c->state_lock);
+ percpu_ref_put(&ca->ref);
ret = __bch_dev_remove(c, ca, flags);
mutex_unlock(&c->state_lock);
return ret;
}
-int bch_dev_add(struct cache_set *c, const char *path)
+int bch_dev_add(struct bch_fs *c, const char *path)
{
struct bcache_superblock sb;
const char *err;
- struct cache *ca;
+ struct bch_dev *ca = NULL;
struct bch_sb_field_members *mi, *dev_mi;
struct bch_member saved_mi;
unsigned dev_idx, nr_devices, u64s;
int ret = -EINVAL;
- err = bch_read_super(&sb, c->opts, path);
+ err = bch_read_super(&sb, bch_opts_empty(), path);
if (err)
return -EINVAL;
@@ -1525,14 +1598,9 @@ int bch_dev_add(struct cache_set *c, const char *path)
saved_mi = dev_mi->members[sb.sb->dev_idx];
saved_mi.last_mount = cpu_to_le64(ktime_get_seconds());
- down_read(&c->gc_lock);
-
if (dynamic_fault("bcache:add:no_slot"))
goto no_slot;
- if (test_bit(BCH_FS_GC_FAILURE, &c->flags))
- goto no_slot;
-
mi = bch_sb_get_members(c->disk_sb);
for (dev_idx = 0; dev_idx < BCH_SB_MEMBERS_MAX; dev_idx++)
if (dev_idx >= c->sb.nr_devices ||
@@ -1540,15 +1608,11 @@ int bch_dev_add(struct cache_set *c, const char *path)
sizeof(uuid_le)))
goto have_slot;
no_slot:
- up_read(&c->gc_lock);
-
err = "no slots available in superblock";
ret = -ENOSPC;
goto err_unlock;
have_slot:
- up_read(&c->gc_lock);
-
nr_devices = max_t(unsigned, dev_idx + 1, c->sb.nr_devices);
u64s = (sizeof(struct bch_sb_field_members) +
sizeof(struct bch_member) * nr_devices) / sizeof(u64);
@@ -1565,46 +1629,47 @@ have_slot:
memcpy(dev_mi, mi, u64s * sizeof(u64));
dev_mi->members[dev_idx] = saved_mi;
+ sb.sb->uuid = c->disk_sb->uuid;
sb.sb->dev_idx = dev_idx;
sb.sb->nr_devices = nr_devices;
- if (bch_fs_mi_update(c, dev_mi->members, nr_devices)) {
- err = "cannot allocate memory";
- ret = -ENOMEM;
- goto err_unlock;
- }
-
/* commit new member info */
memcpy(mi, dev_mi, u64s * sizeof(u64));
c->disk_sb->nr_devices = nr_devices;
c->sb.nr_devices = nr_devices;
- err = bch_dev_alloc(&sb, c, &ca);
- if (err)
+ if (bch_dev_alloc(c, dev_idx)) {
+ err = "cannot allocate memory";
+ ret = -ENOMEM;
goto err_unlock;
+ }
- bch_write_super(c);
+ if (bch_dev_online(c, &sb)) {
+ err = "bch_dev_online() error";
+ ret = -ENOMEM;
+ goto err_unlock;
+ }
- err = "journal alloc failed";
- if (bch_dev_journal_alloc(ca))
- goto err_put;
+ bch_write_super(c);
+ mutex_unlock(&c->sb_lock);
- bch_notify_dev_added(ca);
+ ca = c->devs[dev_idx];
+ if (ca->mi.state == BCH_MEMBER_STATE_RW) {
+ err = "journal alloc failed";
+ if (bch_dev_journal_alloc(ca))
+ goto err;
- if (ca->mi.state == BCH_MEMBER_STATE_ACTIVE) {
err = __bch_dev_read_write(c, ca);
if (err)
- goto err_put;
+ goto err;
}
- kobject_put(&ca->kobj);
- mutex_unlock(&c->sb_lock);
+ bch_notify_dev_added(ca);
mutex_unlock(&c->state_lock);
return 0;
-err_put:
- bch_dev_stop(ca);
err_unlock:
mutex_unlock(&c->sb_lock);
+err:
mutex_unlock(&c->state_lock);
bch_free_super(&sb);
@@ -1615,12 +1680,12 @@ err_unlock:
/* Filesystem open: */
const char *bch_fs_open(char * const *devices, unsigned nr_devices,
- struct bch_opts opts, struct cache_set **ret)
+ struct bch_opts opts, struct bch_fs **ret)
{
const char *err;
- struct cache_set *c = NULL;
+ struct bch_fs *c = NULL;
struct bcache_superblock *sb;
- unsigned i;
+ unsigned i, best_sb = 0;
if (!nr_devices)
return "need at least one device";
@@ -1647,19 +1712,33 @@ const char *bch_fs_open(char * const *devices, unsigned nr_devices,
goto err;
}
- err = "cannot allocate memory";
- c = bch_fs_alloc(sb[0].sb, opts);
- if (!c)
- goto err;
+ for (i = 1; i < nr_devices; i++)
+ if (le64_to_cpu(sb[i].sb->seq) >
+ le64_to_cpu(sb[best_sb].sb->seq))
+ best_sb = i;
for (i = 0; i < nr_devices; i++) {
- err = bch_dev_alloc(&sb[i], c, NULL);
+ err = bch_dev_in_fs(sb[best_sb].sb, sb[i].sb);
if (err)
goto err;
}
+ err = "cannot allocate memory";
+ c = bch_fs_alloc(sb[best_sb].sb, opts);
+ if (!c)
+ goto err;
+
+ err = "bch_dev_online() error";
+ mutex_lock(&c->sb_lock);
+ for (i = 0; i < nr_devices; i++)
+ if (bch_dev_online(c, &sb[i])) {
+ mutex_unlock(&c->sb_lock);
+ goto err;
+ }
+ mutex_unlock(&c->sb_lock);
+
err = "insufficient devices";
- if (bch_fs_nr_online_devices(c) != bch_fs_nr_devices(c))
+ if (!bch_fs_may_start(c, 0))
goto err;
if (!c->opts.nostart) {
@@ -1697,19 +1776,19 @@ static const char *__bch_fs_open_incremental(struct bcache_superblock *sb,
struct bch_opts opts)
{
const char *err;
- struct cache_set *c;
- bool allocated_cache_set = false;
+ struct bch_fs *c;
+ bool allocated_fs = false;
err = bch_validate_cache_super(sb);
if (err)
return err;
- mutex_lock(&bch_register_lock);
- c = bch_fs_lookup(sb->sb->uuid);
+ mutex_lock(&bch_fs_list_lock);
+ c = __bch_uuid_to_fs(sb->sb->uuid);
if (c) {
closure_get(&c->cl);
- err = bch_dev_in_fs(sb->sb, c);
+ err = bch_dev_in_fs(c->disk_sb, sb->sb);
if (err)
goto err;
} else {
@@ -1718,15 +1797,19 @@ static const char *__bch_fs_open_incremental(struct bcache_superblock *sb,
if (!c)
goto err;
- allocated_cache_set = true;
+ allocated_fs = true;
}
- err = bch_dev_alloc(sb, c, NULL);
- if (err)
+ err = "bch_dev_online() error";
+
+ mutex_lock(&c->sb_lock);
+ if (bch_dev_online(c, sb)) {
+ mutex_unlock(&c->sb_lock);
goto err;
+ }
+ mutex_unlock(&c->sb_lock);
- if (bch_fs_nr_online_devices(c) == bch_fs_nr_devices(c) &&
- !c->opts.nostart) {
+ if (!c->opts.nostart && bch_fs_may_start(c, 0)) {
err = __bch_fs_start(c);
if (err)
goto err;
@@ -1737,13 +1820,13 @@ static const char *__bch_fs_open_incremental(struct bcache_superblock *sb,
goto err;
closure_put(&c->cl);
- mutex_unlock(&bch_register_lock);
+ mutex_unlock(&bch_fs_list_lock);
return NULL;
err:
- mutex_unlock(&bch_register_lock);
+ mutex_unlock(&bch_fs_list_lock);
- if (allocated_cache_set)
+ if (allocated_fs)
bch_fs_stop(c);
else if (c)
closure_put(&c->cl);
@@ -1762,9 +1845,9 @@ const char *bch_fs_open_incremental(const char *path)
return err;
if (__SB_IS_BDEV(le64_to_cpu(sb.sb->version))) {
- mutex_lock(&bch_register_lock);
+ mutex_lock(&bch_fs_list_lock);
err = bch_backing_dev_register(&sb);
- mutex_unlock(&bch_register_lock);
+ mutex_unlock(&bch_fs_list_lock);
} else {
err = __bch_fs_open_incremental(&sb, opts);
}
@@ -1821,9 +1904,9 @@ static int bcache_reboot(struct notifier_block *n, unsigned long code, void *x)
if (code == SYS_DOWN ||
code == SYS_HALT ||
code == SYS_POWER_OFF) {
- struct cache_set *c;
+ struct bch_fs *c;
- mutex_lock(&bch_register_lock);
+ mutex_lock(&bch_fs_list_lock);
if (!list_empty(&bch_fs_list))
pr_info("Setting all devices read only:");
@@ -1834,7 +1917,7 @@ static int bcache_reboot(struct notifier_block *n, unsigned long code, void *x)
list_for_each_entry(c, &bch_fs_list, list)
bch_fs_read_only(c);
- mutex_unlock(&bch_register_lock);
+ mutex_unlock(&bch_fs_list_lock);
}
return NOTIFY_DONE;
@@ -1878,7 +1961,6 @@ static int __init bcache_init(void)
NULL
};
- mutex_init(&bch_register_lock);
register_reboot_notifier(&reboot);
closure_debug_init();
bkey_pack_test();