diff options
Diffstat (limited to 'libbcache/super.c')
-rw-r--r-- | libbcache/super.c | 882 |
1 files changed, 482 insertions, 400 deletions
diff --git a/libbcache/super.c b/libbcache/super.c index d2863e62..bb4a7dc3 100644 --- a/libbcache/super.c +++ b/libbcache/super.c @@ -62,28 +62,77 @@ static const uuid_le invalid_uuid = { }; static struct kset *bcache_kset; -struct mutex bch_register_lock; -LIST_HEAD(bch_fs_list); +static LIST_HEAD(bch_fs_list); +static DEFINE_MUTEX(bch_fs_list_lock); static DECLARE_WAIT_QUEUE_HEAD(bch_read_only_wait); struct workqueue_struct *bcache_io_wq; struct crypto_shash *bch_sha256; -static void bch_dev_free(struct cache *); -static int bch_dev_online(struct cache *); +static void bch_dev_free(struct bch_dev *); +static int bch_dev_alloc(struct bch_fs *, unsigned); +static int bch_dev_sysfs_online(struct bch_dev *); +static void __bch_dev_read_only(struct bch_fs *, struct bch_dev *); -static int bch_congested_fn(void *data, int bdi_bits) +struct bch_fs *bch_bdev_to_fs(struct block_device *bdev) +{ + struct bch_fs *c; + struct bch_dev *ca; + unsigned i; + + mutex_lock(&bch_fs_list_lock); + rcu_read_lock(); + + list_for_each_entry(c, &bch_fs_list, list) + for_each_member_device_rcu(ca, c, i) + if (ca->disk_sb.bdev == bdev) { + closure_get(&c->cl); + goto found; + } + c = NULL; +found: + rcu_read_unlock(); + mutex_unlock(&bch_fs_list_lock); + + return c; +} + +static struct bch_fs *__bch_uuid_to_fs(uuid_le uuid) +{ + struct bch_fs *c; + + lockdep_assert_held(&bch_fs_list_lock); + + list_for_each_entry(c, &bch_fs_list, list) + if (!memcmp(&c->disk_sb->uuid, &uuid, sizeof(uuid_le))) + return c; + + return NULL; +} + +struct bch_fs *bch_uuid_to_fs(uuid_le uuid) +{ + struct bch_fs *c; + + mutex_lock(&bch_fs_list_lock); + c = __bch_uuid_to_fs(uuid); + if (c) + closure_get(&c->cl); + mutex_unlock(&bch_fs_list_lock); + + return c; +} + +int bch_congested(struct bch_fs *c, int bdi_bits) { struct backing_dev_info *bdi; - struct cache_set *c = data; - struct cache *ca; + struct bch_dev *ca; unsigned i; int ret = 0; - rcu_read_lock(); if (bdi_bits & (1 << WB_sync_congested)) { /* Reads - check all devices: */ - for_each_cache_rcu(ca, c, i) { + for_each_readable_member(ca, c, i) { bdi = blk_get_backing_dev_info(ca->disk_sb.bdev); if (bdi_congested(bdi, bdi_bits)) { @@ -94,9 +143,10 @@ static int bch_congested_fn(void *data, int bdi_bits) } else { /* Writes prefer fastest tier: */ struct bch_tier *tier = READ_ONCE(c->fastest_tier); - struct cache_group *grp = tier ? &tier->devs : &c->cache_all; + struct dev_group *grp = tier ? &tier->devs : &c->all_devs; - group_for_each_cache_rcu(ca, grp, i) { + rcu_read_lock(); + group_for_each_dev(ca, grp, i) { bdi = blk_get_backing_dev_info(ca->disk_sb.bdev); if (bdi_congested(bdi, bdi_bits)) { @@ -104,12 +154,19 @@ static int bch_congested_fn(void *data, int bdi_bits) break; } } + rcu_read_unlock(); } - rcu_read_unlock(); return ret; } +static int bch_congested_fn(void *data, int bdi_bits) +{ + struct bch_fs *c = data; + + return bch_congested(c, bdi_bits); +} + /* Filesystem RO/RW: */ /* @@ -127,21 +184,21 @@ static int bch_congested_fn(void *data, int bdi_bits) * - allocator depends on the journal (when it rewrites prios and gens) */ -static void __bch_fs_read_only(struct cache_set *c) +static void __bch_fs_read_only(struct bch_fs *c) { - struct cache *ca; + struct bch_dev *ca; unsigned i; bch_tiering_stop(c); - for_each_cache(ca, c, i) + for_each_member_device(ca, c, i) bch_moving_gc_stop(ca); bch_gc_thread_stop(c); bch_btree_flush(c); - for_each_cache(ca, c, i) + for_each_member_device(ca, c, i) bch_dev_allocator_stop(ca); bch_fs_journal_stop(&c->journal); @@ -149,13 +206,13 @@ static void __bch_fs_read_only(struct cache_set *c) static void bch_writes_disabled(struct percpu_ref *writes) { - struct cache_set *c = container_of(writes, struct cache_set, writes); + struct bch_fs *c = container_of(writes, struct bch_fs, writes); set_bit(BCH_FS_WRITE_DISABLE_COMPLETE, &c->flags); wake_up(&bch_read_only_wait); } -void bch_fs_read_only(struct cache_set *c) +void bch_fs_read_only(struct bch_fs *c) { mutex_lock(&c->state_lock); if (c->state != BCH_FS_STARTING && @@ -222,18 +279,18 @@ out: static void bch_fs_read_only_work(struct work_struct *work) { - struct cache_set *c = - container_of(work, struct cache_set, read_only_work); + struct bch_fs *c = + container_of(work, struct bch_fs, read_only_work); bch_fs_read_only(c); } -static void bch_fs_read_only_async(struct cache_set *c) +static void bch_fs_read_only_async(struct bch_fs *c) { queue_work(system_long_wq, &c->read_only_work); } -bool bch_fs_emergency_read_only(struct cache_set *c) +bool bch_fs_emergency_read_only(struct bch_fs *c) { bool ret = !test_and_set_bit(BCH_FS_EMERGENCY_RO, &c->flags); @@ -244,9 +301,9 @@ bool bch_fs_emergency_read_only(struct cache_set *c) return ret; } -const char *bch_fs_read_write(struct cache_set *c) +const char *bch_fs_read_write(struct bch_fs *c) { - struct cache *ca; + struct bch_dev *ca; const char *err = NULL; unsigned i; @@ -256,10 +313,9 @@ const char *bch_fs_read_write(struct cache_set *c) goto out; err = "error starting allocator thread"; - for_each_cache(ca, c, i) - if (ca->mi.state == BCH_MEMBER_STATE_ACTIVE && - bch_dev_allocator_start(ca)) { - percpu_ref_put(&ca->ref); + for_each_rw_member(ca, c, i) + if (bch_dev_allocator_start(ca)) { + percpu_ref_put(&ca->io_ref); goto err; } @@ -268,10 +324,9 @@ const char *bch_fs_read_write(struct cache_set *c) goto err; err = "error starting moving GC thread"; - for_each_cache(ca, c, i) - if (ca->mi.state == BCH_MEMBER_STATE_ACTIVE && - bch_moving_gc_start(ca)) { - percpu_ref_put(&ca->ref); + for_each_rw_member(ca, c, i) + if (bch_moving_gc_start(ca)) { + percpu_ref_put(&ca->io_ref); goto err; } @@ -296,7 +351,7 @@ err: /* Filesystem startup/shutdown: */ -static void bch_fs_free(struct cache_set *c) +static void bch_fs_free(struct bch_fs *c) { bch_fs_encryption_exit(c); bch_fs_btree_exit(c); @@ -306,8 +361,8 @@ static void bch_fs_free(struct cache_set *c) bch_fs_compress_exit(c); bch_fs_blockdev_exit(c); bdi_destroy(&c->bdi); - lg_lock_free(&c->bucket_stats_lock); - free_percpu(c->bucket_stats_percpu); + lg_lock_free(&c->usage_lock); + free_percpu(c->usage_percpu); mempool_exit(&c->btree_bounce_pool); mempool_exit(&c->bio_bounce_pages); bioset_exit(&c->bio_write); @@ -324,13 +379,12 @@ static void bch_fs_free(struct cache_set *c) if (c->wq) destroy_workqueue(c->wq); - kfree_rcu(rcu_dereference_protected(c->members, 1), rcu); /* shutting down */ free_pages((unsigned long) c->disk_sb, c->disk_sb_order); kfree(c); module_put(THIS_MODULE); } -static void bch_fs_exit(struct cache_set *c) +static void bch_fs_exit(struct bch_fs *c) { unsigned i; @@ -341,29 +395,31 @@ static void bch_fs_exit(struct cache_set *c) cancel_work_sync(&c->read_retry_work); for (i = 0; i < c->sb.nr_devices; i++) - if (c->cache[i]) - bch_dev_free(c->cache[i]); + if (c->devs[i]) + bch_dev_free(c->devs[i]); closure_debug_destroy(&c->cl); kobject_put(&c->kobj); } -static void bch_fs_offline(struct cache_set *c) +static void bch_fs_offline(struct bch_fs *c) { - struct cache *ca; + struct bch_dev *ca; unsigned i; - mutex_lock(&bch_register_lock); + mutex_lock(&bch_fs_list_lock); list_del(&c->list); - mutex_unlock(&bch_register_lock); + mutex_unlock(&bch_fs_list_lock); + + for_each_member_device(ca, c, i) + if (ca->kobj.state_in_sysfs && + ca->disk_sb.bdev) + sysfs_remove_link(&part_to_dev(ca->disk_sb.bdev->bd_part)->kobj, + "bcache"); if (c->kobj.state_in_sysfs) kobject_del(&c->kobj); - for_each_cache(ca, c, i) - if (ca->kobj.state_in_sysfs) - kobject_del(&ca->kobj); - bch_fs_debug_exit(c); bch_fs_chardev_exit(c); @@ -382,18 +438,18 @@ static void bch_fs_offline(struct cache_set *c) */ void bch_fs_release(struct kobject *kobj) { - struct cache_set *c = container_of(kobj, struct cache_set, kobj); + struct bch_fs *c = container_of(kobj, struct bch_fs, kobj); bch_notify_fs_stopped(c); bch_fs_free(c); } /* - * All activity on the cache_set should have stopped now - close devices: + * All activity on the filesystem should have stopped now - close devices: */ static void __bch_fs_stop3(struct closure *cl) { - struct cache_set *c = container_of(cl, struct cache_set, cl); + struct bch_fs *c = container_of(cl, struct bch_fs, cl); bch_fs_exit(c); } @@ -404,7 +460,7 @@ static void __bch_fs_stop3(struct closure *cl) */ static void __bch_fs_stop2(struct closure *cl) { - struct cache_set *c = container_of(cl, struct cache_set, caching); + struct bch_fs *c = container_of(cl, struct bch_fs, caching); bch_fs_offline(c); @@ -418,14 +474,14 @@ static void __bch_fs_stop2(struct closure *cl) */ static void __bch_fs_stop1(struct closure *cl) { - struct cache_set *c = container_of(cl, struct cache_set, caching); + struct bch_fs *c = container_of(cl, struct bch_fs, caching); bch_blockdevs_stop(c); continue_at(cl, __bch_fs_stop2, system_wq); } -void bch_fs_stop_async(struct cache_set *c) +void bch_fs_stop_async(struct bch_fs *c) { mutex_lock(&c->state_lock); if (c->state != BCH_FS_STOPPING) { @@ -435,7 +491,7 @@ void bch_fs_stop_async(struct cache_set *c) mutex_unlock(&c->state_lock); } -void bch_fs_stop(struct cache_set *c) +void bch_fs_stop(struct bch_fs *c) { mutex_lock(&c->state_lock); BUG_ON(c->state == BCH_FS_STOPPING); @@ -453,53 +509,25 @@ void bch_fs_stop(struct cache_set *c) closure_sync(&c->cl); bch_fs_exit(c); - kobject_put(&c->kobj); } /* Stop, detaching from backing devices: */ -void bch_fs_detach(struct cache_set *c) +void bch_fs_detach(struct bch_fs *c) { if (!test_and_set_bit(BCH_FS_DETACHING, &c->flags)) bch_fs_stop_async(c); } -static unsigned bch_fs_nr_devices(struct cache_set *c) -{ - struct bch_sb_field_members *mi; - unsigned i, nr = 0; - - mutex_lock(&c->sb_lock); - mi = bch_sb_get_members(c->disk_sb); - - for (i = 0; i < c->disk_sb->nr_devices; i++) - if (!bch_is_zero(mi->members[i].uuid.b, sizeof(uuid_le))) - nr++; - - mutex_unlock(&c->sb_lock); - - return nr; -} - -static unsigned bch_fs_nr_online_devices(struct cache_set *c) -{ - unsigned i, nr = 0; - - for (i = 0; i < c->sb.nr_devices; i++) - if (c->cache[i]) - nr++; - - return nr; -} - #define alloc_bucket_pages(gfp, ca) \ ((void *) __get_free_pages(__GFP_ZERO|gfp, ilog2(bucket_pages(ca)))) -static struct cache_set *bch_fs_alloc(struct bch_sb *sb, struct bch_opts opts) +static struct bch_fs *bch_fs_alloc(struct bch_sb *sb, struct bch_opts opts) { - struct cache_set *c; - unsigned iter_size, journal_entry_bytes; + struct bch_sb_field_members *mi; + struct bch_fs *c; + unsigned i, iter_size, journal_entry_bytes; - c = kzalloc(sizeof(struct cache_set), GFP_KERNEL); + c = kzalloc(sizeof(struct bch_fs), GFP_KERNEL); if (!c) return NULL; @@ -572,7 +600,7 @@ static struct cache_set *bch_fs_alloc(struct bch_sb *sb, struct bch_opts opts) mutex_lock(&c->sb_lock); - if (bch_sb_to_cache_set(c, sb)) { + if (bch_sb_to_fs(c, sb)) { mutex_unlock(&c->sb_lock); goto err; } @@ -616,8 +644,8 @@ static struct cache_set *bch_fs_alloc(struct bch_sb *sb, struct bch_opts opts) c->sb.btree_node_size, BCH_ENCODED_EXTENT_MAX) / PAGE_SECTORS, 0) || - !(c->bucket_stats_percpu = alloc_percpu(struct bch_fs_usage)) || - lg_lock_init(&c->bucket_stats_lock) || + !(c->usage_percpu = alloc_percpu(struct bch_fs_usage)) || + lg_lock_init(&c->usage_lock) || mempool_init_page_pool(&c->btree_bounce_pool, 1, ilog2(btree_pages(c))) || bdi_setup_and_register(&c->bdi, "bcache") || @@ -635,6 +663,12 @@ static struct cache_set *bch_fs_alloc(struct bch_sb *sb, struct bch_opts opts) c->bdi.congested_fn = bch_congested_fn; c->bdi.congested_data = c; + mi = bch_sb_get_members(c->disk_sb); + for (i = 0; i < c->sb.nr_devices; i++) + if (!bch_is_zero(mi->members[i].uuid.b, sizeof(uuid_le)) && + bch_dev_alloc(c, i)) + goto err; + /* * Now that all allocations have succeeded, init various refcounty * things that let us shutdown: @@ -660,31 +694,19 @@ err: return NULL; } -static struct cache_set *bch_fs_lookup(uuid_le uuid) -{ - struct cache_set *c; - - lockdep_assert_held(&bch_register_lock); - - list_for_each_entry(c, &bch_fs_list, list) - if (!memcmp(&c->disk_sb->uuid, &uuid, sizeof(uuid_le))) - return c; - - return NULL; -} - -static const char *__bch_fs_online(struct cache_set *c) +static const char *__bch_fs_online(struct bch_fs *c) { - struct cache *ca; + struct bch_dev *ca; + const char *err = NULL; unsigned i; int ret; - lockdep_assert_held(&bch_register_lock); + lockdep_assert_held(&bch_fs_list_lock); if (!list_empty(&c->list)) return NULL; - if (bch_fs_lookup(c->sb.uuid)) + if (__bch_uuid_to_fs(c->sb.uuid)) return "filesystem UUID already open"; ret = bch_fs_chardev_init(c); @@ -700,44 +722,42 @@ static const char *__bch_fs_online(struct cache_set *c) bch_cache_accounting_add_kobjs(&c->accounting, &c->kobj)) return "error creating sysfs objects"; - for_each_cache(ca, c, i) - if (bch_dev_online(ca)) { - percpu_ref_put(&ca->ref); - return "error creating sysfs objects"; - } - mutex_lock(&c->state_lock); - if (bch_blockdev_volumes_start(c)) { - mutex_unlock(&c->state_lock); - return "can't bring up blockdev volumes"; - } + err = "error creating sysfs objects"; + __for_each_member_device(ca, c, i) + if (bch_dev_sysfs_online(ca)) + goto err; - bch_attach_backing_devs(c); + err = "can't bring up blockdev volumes"; + if (bch_blockdev_volumes_start(c)) + goto err; - mutex_unlock(&c->state_lock); + bch_attach_backing_devs(c); list_add(&c->list, &bch_fs_list); - - return 0; + err = NULL; +err: + mutex_unlock(&c->state_lock); + return err; } -static const char *bch_fs_online(struct cache_set *c) +static const char *bch_fs_online(struct bch_fs *c) { const char *err; - mutex_lock(&bch_register_lock); + mutex_lock(&bch_fs_list_lock); err = __bch_fs_online(c); - mutex_unlock(&bch_register_lock); + mutex_unlock(&bch_fs_list_lock); return err; } -static const char *__bch_fs_start(struct cache_set *c) +static const char *__bch_fs_start(struct bch_fs *c) { const char *err = "cannot allocate memory"; struct bch_sb_field_members *mi; - struct cache *ca; + struct bch_dev *ca; unsigned i, id; time64_t now; LIST_HEAD(journal); @@ -746,39 +766,30 @@ static const char *__bch_fs_start(struct cache_set *c) BUG_ON(c->state != BCH_FS_STARTING); - /* - * Make sure that each cache object's mi is up to date before - * we start testing it. - */ - for_each_cache(ca, c, i) - bch_sb_from_cache_set(c, ca); + mutex_lock(&c->sb_lock); + for_each_online_member(ca, c, i) + bch_sb_from_fs(c, ca); + mutex_unlock(&c->sb_lock); if (BCH_SB_INITIALIZED(c->disk_sb)) { ret = bch_journal_read(c, &journal); if (ret) goto err; - pr_debug("btree_journal_read() done"); - j = &list_entry(journal.prev, struct journal_replay, list)->j; + c->prio_clock[READ].hand = le16_to_cpu(j->read_clock); + c->prio_clock[WRITE].hand = le16_to_cpu(j->write_clock); + err = "error reading priorities"; - for_each_cache(ca, c, i) { + for_each_readable_member(ca, c, i) { ret = bch_prio_read(ca); if (ret) { - percpu_ref_put(&ca->ref); + percpu_ref_put(&ca->io_ref); goto err; } } - c->prio_clock[READ].hand = le16_to_cpu(j->read_clock); - c->prio_clock[WRITE].hand = le16_to_cpu(j->write_clock); - - for_each_cache(ca, c, i) { - bch_recalc_min_prio(ca, READ); - bch_recalc_min_prio(ca, WRITE); - } - for (id = 0; id < BTREE_ID_NR; id++) { unsigned level; struct bkey_i *k; @@ -816,10 +827,9 @@ static const char *__bch_fs_start(struct cache_set *c) bch_journal_start(c); err = "error starting allocator thread"; - for_each_cache(ca, c, i) - if (ca->mi.state == BCH_MEMBER_STATE_ACTIVE && - bch_dev_allocator_start(ca)) { - percpu_ref_put(&ca->ref); + for_each_rw_member(ca, c, i) + if (bch_dev_allocator_start(ca)) { + percpu_ref_put(&ca->io_ref); goto err; } @@ -853,18 +863,10 @@ static const char *__bch_fs_start(struct cache_set *c) bch_initial_gc(c, NULL); - err = "error starting allocator thread"; - for_each_cache(ca, c, i) - if (ca->mi.state == BCH_MEMBER_STATE_ACTIVE && - bch_dev_allocator_start(ca)) { - percpu_ref_put(&ca->ref); - goto err; - } - err = "unable to allocate journal buckets"; - for_each_cache(ca, c, i) + for_each_rw_member(ca, c, i) if (bch_dev_journal_alloc(ca)) { - percpu_ref_put(&ca->ref); + percpu_ref_put(&ca->io_ref); goto err; } @@ -875,6 +877,13 @@ static const char *__bch_fs_start(struct cache_set *c) bch_journal_start(c); bch_journal_set_replay_done(&c->journal); + err = "error starting allocator thread"; + for_each_rw_member(ca, c, i) + if (bch_dev_allocator_start(ca)) { + percpu_ref_put(&ca->io_ref); + goto err; + } + err = "cannot allocate new btree root"; for (id = 0; id < BTREE_ID_NR; id++) if (bch_btree_root_alloc(c, id, &cl)) { @@ -918,10 +927,8 @@ recovery_done: mi = bch_sb_get_members(c->disk_sb); now = ktime_get_seconds(); - rcu_read_lock(); - for_each_cache_rcu(ca, c, i) + for_each_member_device(ca, c, i) mi->members[ca->dev_idx].last_mount = cpu_to_le64(now); - rcu_read_unlock(); SET_BCH_SB_INITIALIZED(c->disk_sb, true); SET_BCH_SB_CLEAN(c->disk_sb, false); @@ -966,12 +973,12 @@ err: goto out; } -const char *bch_fs_start(struct cache_set *c) +const char *bch_fs_start(struct bch_fs *c) { return __bch_fs_start(c) ?: bch_fs_online(c); } -static const char *bch_dev_may_add(struct bch_sb *sb, struct cache_set *c) +static const char *bch_dev_may_add(struct bch_sb *sb, struct bch_fs *c) { struct bch_sb_field_members *sb_mi; @@ -984,33 +991,28 @@ static const char *bch_dev_may_add(struct bch_sb *sb, struct cache_set *c) if (le16_to_cpu(sb_mi->members[sb->dev_idx].bucket_size) < BCH_SB_BTREE_NODE_SIZE(c->disk_sb)) - return "new cache bucket_size is too small"; + return "new cache bucket size is too small"; return NULL; } -static const char *bch_dev_in_fs(struct bch_sb *sb, struct cache_set *c) +static const char *bch_dev_in_fs(struct bch_sb *fs, struct bch_sb *sb) { - struct bch_sb_field_members *mi = bch_sb_get_members(c->disk_sb); - struct bch_sb_field_members *dev_mi = bch_sb_get_members(sb); - uuid_le dev_uuid = dev_mi->members[sb->dev_idx].uuid; - const char *err; + struct bch_sb *newest = + le64_to_cpu(fs->seq) > le64_to_cpu(sb->seq) ? fs : sb; + struct bch_sb_field_members *mi = bch_sb_get_members(newest); - err = bch_dev_may_add(sb, c); - if (err) - return err; + if (uuid_le_cmp(fs->uuid, sb->uuid)) + return "device not a member of filesystem"; + + if (sb->dev_idx >= newest->nr_devices) + return "device has invalid dev_idx"; - if (bch_is_zero(&dev_uuid, sizeof(dev_uuid))) + if (bch_is_zero(mi->members[sb->dev_idx].uuid.b, sizeof(uuid_le))) return "device has been removed"; - /* - * When attaching an existing device, the cache set superblock must - * already contain member_info with a matching UUID - */ - if (sb->dev_idx >= c->disk_sb->nr_devices || - memcmp(&mi->members[sb->dev_idx].uuid, - &dev_uuid, sizeof(uuid_le))) - return "cache sb does not match set"; + if (fs->block_size != sb->block_size) + return "mismatched block size"; return NULL; } @@ -1019,37 +1021,34 @@ static const char *bch_dev_in_fs(struct bch_sb *sb, struct cache_set *c) void bch_dev_release(struct kobject *kobj) { - struct cache *ca = container_of(kobj, struct cache, kobj); + struct bch_dev *ca = container_of(kobj, struct bch_dev, kobj); kfree(ca); } -static void bch_dev_free(struct cache *ca) +static void bch_dev_free(struct bch_dev *ca) { - struct cache_set *c = ca->set; unsigned i; cancel_work_sync(&ca->io_error_work); - if (c && c->kobj.state_in_sysfs) { - char buf[12]; - - sprintf(buf, "cache%u", ca->dev_idx); - sysfs_remove_link(&c->kobj, buf); - } + if (ca->kobj.state_in_sysfs && + ca->disk_sb.bdev) + sysfs_remove_link(&part_to_dev(ca->disk_sb.bdev->bd_part)->kobj, + "bcache"); if (ca->kobj.state_in_sysfs) kobject_del(&ca->kobj); bch_free_super(&ca->disk_sb); bch_dev_journal_exit(ca); + free_percpu(ca->sectors_written); bioset_exit(&ca->replica_set); - free_percpu(ca->bucket_stats_percpu); + free_percpu(ca->usage_percpu); free_pages((unsigned long) ca->disk_buckets, ilog2(bucket_pages(ca))); kfree(ca->prio_buckets); kfree(ca->bio_prio); - kfree(ca->journal.bio); vfree(ca->buckets); vfree(ca->oldest_gens); free_heap(&ca->heap); @@ -1058,133 +1057,135 @@ static void bch_dev_free(struct cache *ca) for (i = 0; i < RESERVE_NR; i++) free_fifo(&ca->free[i]); + percpu_ref_exit(&ca->io_ref); percpu_ref_exit(&ca->ref); kobject_put(&ca->kobj); - - if (c) - kobject_put(&c->kobj); } -static void bch_dev_free_work(struct work_struct *work) +static void bch_dev_io_ref_release(struct percpu_ref *ref) { - struct cache *ca = container_of(work, struct cache, free_work); + struct bch_dev *ca = container_of(ref, struct bch_dev, io_ref); - bch_dev_free(ca); + complete(&ca->offline_complete); } -static void bch_dev_percpu_ref_release(struct percpu_ref *ref) +static void bch_dev_offline(struct bch_dev *ca) { - struct cache *ca = container_of(ref, struct cache, ref); + struct bch_fs *c = ca->fs; + + lockdep_assert_held(&c->state_lock); + + __bch_dev_read_only(ca->fs, ca); + + reinit_completion(&ca->offline_complete); + percpu_ref_kill(&ca->io_ref); + wait_for_completion(&ca->offline_complete); + + if (ca->kobj.state_in_sysfs) { + struct kobject *block = + &part_to_dev(ca->disk_sb.bdev->bd_part)->kobj; + + sysfs_remove_link(block, "bcache"); + sysfs_remove_link(&ca->kobj, "block"); + } - schedule_work(&ca->free_work); + bch_free_super(&ca->disk_sb); + bch_dev_journal_exit(ca); } -static void bch_dev_free_rcu(struct rcu_head *rcu) +static void bch_dev_ref_release(struct percpu_ref *ref) { - struct cache *ca = container_of(rcu, struct cache, free_rcu); + struct bch_dev *ca = container_of(ref, struct bch_dev, ref); - /* - * This decrements the ref count to ca, and once the ref count - * is 0 (outstanding bios to the ca also incremented it and - * decrement it on completion/error), bch_dev_percpu_ref_release - * is called, and that eventually results in bch_dev_free_work - * being called, which in turn results in bch_dev_release being - * called. - * - * In particular, these functions won't be called until there are no - * bios outstanding (the per-cpu ref counts are all 0), so it - * is safe to remove the actual sysfs device at that point, - * and that can indicate success to the user. - */ - - percpu_ref_kill(&ca->ref); + complete(&ca->stop_complete); } -static void bch_dev_stop(struct cache *ca) +static void bch_dev_stop(struct bch_dev *ca) { - struct cache_set *c = ca->set; + struct bch_fs *c = ca->fs; lockdep_assert_held(&c->state_lock); - BUG_ON(rcu_access_pointer(c->cache[ca->dev_idx]) != ca); - rcu_assign_pointer(c->cache[ca->dev_idx], NULL); + BUG_ON(rcu_access_pointer(c->devs[ca->dev_idx]) != ca); + rcu_assign_pointer(c->devs[ca->dev_idx], NULL); - call_rcu(&ca->free_rcu, bch_dev_free_rcu); + synchronize_rcu(); + + reinit_completion(&ca->stop_complete); + percpu_ref_kill(&ca->ref); + wait_for_completion(&ca->stop_complete); } -static int bch_dev_online(struct cache *ca) +static int bch_dev_sysfs_online(struct bch_dev *ca) { - char buf[12]; + struct bch_fs *c = ca->fs; + int ret; + + if (!c->kobj.state_in_sysfs) + return 0; + + if (!ca->kobj.state_in_sysfs) { + ret = kobject_add(&ca->kobj, &ca->fs->kobj, + "dev-%u", ca->dev_idx); + if (ret) + return ret; + } - sprintf(buf, "cache%u", ca->dev_idx); + if (ca->disk_sb.bdev) { + struct kobject *block = + &part_to_dev(ca->disk_sb.bdev->bd_part)->kobj; - if (kobject_add(&ca->kobj, - &part_to_dev(ca->disk_sb.bdev->bd_part)->kobj, - "bcache") || - sysfs_create_link(&ca->kobj, &ca->set->kobj, "set") || - sysfs_create_link(&ca->set->kobj, &ca->kobj, buf)) - return -1; + ret = sysfs_create_link(block, &ca->kobj, "bcache"); + if (ret) + return ret; + ret = sysfs_create_link(&ca->kobj, block, "block"); + if (ret) + return ret; + } return 0; } -static const char *bch_dev_alloc(struct bcache_superblock *sb, - struct cache_set *c, - struct cache **ret) +static int bch_dev_alloc(struct bch_fs *c, unsigned dev_idx) { struct bch_member *member; size_t reserve_none, movinggc_reserve, free_inc_reserve, total_reserve; size_t heap_size; unsigned i; - const char *err = "cannot allocate memory"; - struct cache *ca; - - if (c->sb.nr_devices == 1) - bdevname(sb->bdev, c->name); + struct bch_dev *ca; if (bch_fs_init_fault("dev_alloc")) - return err; + return -ENOMEM; ca = kzalloc(sizeof(*ca), GFP_KERNEL); if (!ca) - return err; - - if (percpu_ref_init(&ca->ref, bch_dev_percpu_ref_release, - 0, GFP_KERNEL)) { - kfree(ca); - return err; - } + return -ENOMEM; kobject_init(&ca->kobj, &bch_dev_ktype); + init_completion(&ca->stop_complete); + init_completion(&ca->offline_complete); spin_lock_init(&ca->self.lock); ca->self.nr = 1; rcu_assign_pointer(ca->self.d[0].dev, ca); - ca->dev_idx = sb->sb->dev_idx; + ca->dev_idx = dev_idx; - INIT_WORK(&ca->free_work, bch_dev_free_work); spin_lock_init(&ca->freelist_lock); spin_lock_init(&ca->prio_buckets_lock); mutex_init(&ca->heap_lock); bch_dev_moving_gc_init(ca); - ca->disk_sb = *sb; - if (sb->mode & FMODE_EXCL) - ca->disk_sb.bdev->bd_holder = ca; - memset(sb, 0, sizeof(*sb)); - INIT_WORK(&ca->io_error_work, bch_nonfatal_io_error_work); - err = "dynamic fault"; if (bch_fs_init_fault("dev_alloc")) goto err; - member = bch_sb_get_members(ca->disk_sb.sb)->members + - ca->disk_sb.sb->dev_idx; + member = bch_sb_get_members(c->disk_sb)->members + dev_idx; - ca->mi = cache_mi_to_cpu_mi(member); + ca->mi = bch_mi_to_cpu(member); ca->uuid = member->uuid; ca->bucket_bits = ilog2(ca->mi.bucket_size); + scnprintf(ca->name, sizeof(ca->name), "dev-%u", dev_idx); /* XXX: tune these */ movinggc_reserve = max_t(size_t, 16, ca->mi.nbuckets >> 7); @@ -1197,7 +1198,11 @@ static const char *bch_dev_alloc(struct bcache_superblock *sb, free_inc_reserve = movinggc_reserve / 2; heap_size = movinggc_reserve * 8; - if (!init_fifo(&ca->free[RESERVE_PRIO], prio_buckets(ca), GFP_KERNEL) || + if (percpu_ref_init(&ca->ref, bch_dev_ref_release, + 0, GFP_KERNEL) || + percpu_ref_init(&ca->io_ref, bch_dev_io_ref_release, + PERCPU_REF_INIT_DEAD, GFP_KERNEL) || + !init_fifo(&ca->free[RESERVE_PRIO], prio_buckets(ca), GFP_KERNEL) || !init_fifo(&ca->free[RESERVE_BTREE], BTREE_NODE_RESERVE, GFP_KERNEL) || !init_fifo(&ca->free[RESERVE_MOVINGGC], movinggc_reserve, GFP_KERNEL) || @@ -1208,15 +1213,14 @@ static const char *bch_dev_alloc(struct bcache_superblock *sb, ca->mi.nbuckets)) || !(ca->buckets = vzalloc(sizeof(struct bucket) * ca->mi.nbuckets)) || - !(ca->prio_buckets = kzalloc(sizeof(uint64_t) * prio_buckets(ca) * + !(ca->prio_buckets = kzalloc(sizeof(u64) * prio_buckets(ca) * 2, GFP_KERNEL)) || !(ca->disk_buckets = alloc_bucket_pages(GFP_KERNEL, ca)) || - !(ca->bucket_stats_percpu = alloc_percpu(struct bch_dev_usage)) || + !(ca->usage_percpu = alloc_percpu(struct bch_dev_usage)) || !(ca->bio_prio = bio_kmalloc(GFP_NOIO, bucket_pages(ca))) || bioset_init(&ca->replica_set, 4, offsetof(struct bch_write_bio, bio)) || - !(ca->sectors_written = alloc_percpu(*ca->sectors_written)) || - bch_dev_journal_init(ca)) + !(ca->sectors_written = alloc_percpu(*ca->sectors_written))) goto err; ca->prio_last_buckets = ca->prio_buckets + prio_buckets(ca); @@ -1224,94 +1228,130 @@ static const char *bch_dev_alloc(struct bcache_superblock *sb, total_reserve = ca->free_inc.size; for (i = 0; i < RESERVE_NR; i++) total_reserve += ca->free[i].size; - pr_debug("%zu buckets reserved", total_reserve); ca->copygc_write_point.group = &ca->self; ca->tiering_write_point.group = &ca->self; + ca->fs = c; + rcu_assign_pointer(c->devs[ca->dev_idx], ca); + + if (bch_dev_sysfs_online(ca)) + pr_warn("error creating sysfs objects"); + + return 0; +err: + bch_dev_free(ca); + return -ENOMEM; +} + +static int bch_dev_online(struct bch_fs *c, struct bcache_superblock *sb) +{ + struct bch_dev *ca; + int ret; + + lockdep_assert_held(&c->sb_lock); + + if (le64_to_cpu(sb->sb->seq) > + le64_to_cpu(c->disk_sb->seq)) + bch_sb_to_fs(c, sb->sb); + + BUG_ON(sb->sb->dev_idx >= c->sb.nr_devices || + !c->devs[sb->sb->dev_idx]); + + ca = c->devs[sb->sb->dev_idx]; + if (ca->disk_sb.bdev) { + bch_err(c, "already have device online in slot %u", + sb->sb->dev_idx); + return -EINVAL; + } + + ret = bch_dev_journal_init(ca, sb->sb); + if (ret) + return ret; + /* * Increase journal write timeout if flushes to this device are * expensive: */ - if (!blk_queue_nonrot(bdev_get_queue(ca->disk_sb.bdev)) && + if (!blk_queue_nonrot(bdev_get_queue(sb->bdev)) && journal_flushes_device(ca)) c->journal.write_delay_ms = max(c->journal.write_delay_ms, 1000U); - kobject_get(&c->kobj); - ca->set = c; - - kobject_get(&ca->kobj); - rcu_assign_pointer(c->cache[ca->dev_idx], ca); - - mutex_lock(&c->sb_lock); - - if (le64_to_cpu(ca->disk_sb.sb->seq) > le64_to_cpu(c->disk_sb->seq)) - bch_sb_to_cache_set(c, ca->disk_sb.sb); + /* Commit: */ + ca->disk_sb = *sb; + if (sb->mode & FMODE_EXCL) + ca->disk_sb.bdev->bd_holder = ca; + memset(sb, 0, sizeof(*sb)); - mutex_unlock(&c->sb_lock); + if (c->sb.nr_devices == 1) + bdevname(ca->disk_sb.bdev, c->name); + bdevname(ca->disk_sb.bdev, ca->name); - err = "error creating kobject"; - if (c->kobj.state_in_sysfs && - bch_dev_online(ca)) + if (bch_dev_sysfs_online(ca)) pr_warn("error creating sysfs objects"); - if (ret) - *ret = ca; - else - kobject_put(&ca->kobj); - return NULL; -err: - bch_dev_free(ca); - return err; + lg_local_lock(&c->usage_lock); + if (!gc_will_visit(c, gc_phase(GC_PHASE_SB_METADATA))) + bch_mark_dev_metadata(ca->fs, ca); + lg_local_unlock(&c->usage_lock); + + percpu_ref_reinit(&ca->io_ref); + return 0; } /* Device management: */ -static void __bch_dev_read_only(struct cache_set *c, struct cache *ca) +bool bch_fs_may_start(struct bch_fs *c, int flags) { - bch_moving_gc_stop(ca); - - /* - * This stops new data writes (e.g. to existing open data - * buckets) and then waits for all existing writes to - * complete. - */ - bch_dev_allocator_stop(ca); - - bch_dev_group_remove(&c->journal.devs, ca); -} + struct bch_sb_field_members *mi; + unsigned meta_missing = 0; + unsigned data_missing = 0; + bool degraded = false; + unsigned i; -static const char *__bch_dev_read_write(struct cache_set *c, struct cache *ca) -{ - lockdep_assert_held(&c->state_lock); + mutex_lock(&c->sb_lock); + mi = bch_sb_get_members(c->disk_sb); - if (ca->mi.state == BCH_MEMBER_STATE_ACTIVE) - return NULL; + for (i = 0; i < c->disk_sb->nr_devices; i++) + if (!c->devs[i] && + !bch_is_zero(mi->members[i].uuid.b, sizeof(uuid_le))) { + degraded = true; + if (BCH_MEMBER_HAS_METADATA(&mi->members[i])) + meta_missing++; + if (BCH_MEMBER_HAS_DATA(&mi->members[i])) + data_missing++; + } + mutex_unlock(&c->sb_lock); - trace_bcache_cache_read_write(ca); + if (degraded && + !(flags & BCH_FORCE_IF_DEGRADED)) + return false; - if (bch_dev_allocator_start(ca)) - return "error starting allocator thread"; + if (meta_missing && + !(flags & BCH_FORCE_IF_METADATA_DEGRADED)) + return false; - if (bch_moving_gc_start(ca)) - return "error starting moving GC thread"; + if (meta_missing >= BCH_SB_META_REPLICAS_HAVE(c->disk_sb) && + !(flags & BCH_FORCE_IF_METADATA_LOST)) + return false; - if (bch_tiering_start(c)) - return "error starting tiering thread"; + if (data_missing && !(flags & BCH_FORCE_IF_DATA_DEGRADED)) + return false; - bch_notify_dev_read_write(ca); - trace_bcache_cache_read_write_done(ca); + if (data_missing >= BCH_SB_DATA_REPLICAS_HAVE(c->disk_sb) && + !(flags & BCH_FORCE_IF_DATA_LOST)) + return false; - return NULL; + return true; } -bool bch_dev_state_allowed(struct cache_set *c, struct cache *ca, +bool bch_dev_state_allowed(struct bch_fs *c, struct bch_dev *ca, enum bch_member_state new_state, int flags) { lockdep_assert_held(&c->state_lock); - if (new_state == BCH_MEMBER_STATE_ACTIVE) + if (new_state == BCH_MEMBER_STATE_RW) return true; if (ca->mi.has_data && @@ -1335,11 +1375,47 @@ bool bch_dev_state_allowed(struct cache_set *c, struct cache *ca, return true; } -int __bch_dev_set_state(struct cache_set *c, struct cache *ca, +static void __bch_dev_read_only(struct bch_fs *c, struct bch_dev *ca) +{ + bch_moving_gc_stop(ca); + + /* + * This stops new data writes (e.g. to existing open data + * buckets) and then waits for all existing writes to + * complete. + */ + bch_dev_allocator_stop(ca); + + bch_dev_group_remove(&c->journal.devs, ca); +} + +static const char *__bch_dev_read_write(struct bch_fs *c, struct bch_dev *ca) +{ + lockdep_assert_held(&c->state_lock); + + BUG_ON(ca->mi.state != BCH_MEMBER_STATE_RW); + + trace_bcache_cache_read_write(ca); + + if (bch_dev_allocator_start(ca)) + return "error starting allocator thread"; + + if (bch_moving_gc_start(ca)) + return "error starting moving GC thread"; + + if (bch_tiering_start(c)) + return "error starting tiering thread"; + + bch_notify_dev_read_write(ca); + trace_bcache_cache_read_write_done(ca); + + return NULL; +} + +int __bch_dev_set_state(struct bch_fs *c, struct bch_dev *ca, enum bch_member_state new_state, int flags) { struct bch_sb_field_members *mi; - char buf[BDEVNAME_SIZE]; if (ca->mi.state == new_state) return 0; @@ -1347,16 +1423,14 @@ int __bch_dev_set_state(struct cache_set *c, struct cache *ca, if (!bch_dev_state_allowed(c, ca, new_state, flags)) return -EINVAL; - if (new_state == BCH_MEMBER_STATE_ACTIVE) { + if (new_state == BCH_MEMBER_STATE_RW) { if (__bch_dev_read_write(c, ca)) return -ENOMEM; } else { __bch_dev_read_only(c, ca); } - bch_notice(c, "%s %s", - bdevname(ca->disk_sb.bdev, buf), - bch_dev_state[new_state]); + bch_notice(ca, "%s", bch_dev_state[new_state]); mutex_lock(&c->sb_lock); mi = bch_sb_get_members(c->disk_sb); @@ -1367,7 +1441,7 @@ int __bch_dev_set_state(struct cache_set *c, struct cache *ca, return 0; } -int bch_dev_set_state(struct cache_set *c, struct cache *ca, +int bch_dev_set_state(struct bch_fs *c, struct bch_dev *ca, enum bch_member_state new_state, int flags) { int ret; @@ -1380,7 +1454,7 @@ int bch_dev_set_state(struct cache_set *c, struct cache *ca, } #if 0 -int bch_dev_migrate_from(struct cache_set *c, struct cache *ca) +int bch_dev_migrate_from(struct bch_fs *c, struct bch_dev *ca) { /* First, go RO before we try to migrate data off: */ ret = bch_dev_set_state(c, ca, BCH_MEMBER_STATE_RO, flags); @@ -1414,23 +1488,20 @@ int bch_dev_migrate_from(struct cache_set *c, struct cache *ca) /* Device add/removal: */ -static int __bch_dev_remove(struct cache_set *c, struct cache *ca, int flags) +static int __bch_dev_remove(struct bch_fs *c, struct bch_dev *ca, int flags) { struct bch_sb_field_members *mi; - char name[BDEVNAME_SIZE]; unsigned dev_idx = ca->dev_idx; int ret; - bdevname(ca->disk_sb.bdev, name); - - if (ca->mi.state == BCH_MEMBER_STATE_ACTIVE) { - bch_err(ca->set, "Cannot remove RW device"); + if (ca->mi.state == BCH_MEMBER_STATE_RW) { + bch_err(ca, "Cannot remove RW device"); bch_notify_dev_remove_failed(ca); return -EINVAL; } if (!bch_dev_state_allowed(c, ca, BCH_MEMBER_STATE_FAILED, flags)) { - bch_err(ca->set, "Cannot remove %s without losing data", name); + bch_err(ca, "Cannot remove without losing data"); bch_notify_dev_remove_failed(ca); return -EINVAL; } @@ -1442,7 +1513,12 @@ static int __bch_dev_remove(struct cache_set *c, struct cache *ca, int flags) */ ret = bch_flag_data_bad(ca); if (ret) { - bch_err(c, "Remove of %s failed", name); + bch_err(ca, "Remove failed"); + return ret; + } + + if (ca->mi.has_data || ca->mi.has_metadata) { + bch_err(ca, "Can't remove, still has data"); return ret; } @@ -1458,13 +1534,9 @@ static int __bch_dev_remove(struct cache_set *c, struct cache *ca, int flags) bch_journal_meta(&c->journal); + bch_dev_offline(ca); bch_dev_stop(ca); - - /* - * RCU barrier between dropping between c->cache and dropping from - * member info: - */ - synchronize_rcu(); + bch_dev_free(ca); /* * Free this device's slot in the bch_member array - all pointers to @@ -1481,28 +1553,29 @@ static int __bch_dev_remove(struct cache_set *c, struct cache *ca, int flags) return 0; } -int bch_dev_remove(struct cache_set *c, struct cache *ca, int flags) +int bch_dev_remove(struct bch_fs *c, struct bch_dev *ca, int flags) { int ret; mutex_lock(&c->state_lock); + percpu_ref_put(&ca->ref); ret = __bch_dev_remove(c, ca, flags); mutex_unlock(&c->state_lock); return ret; } -int bch_dev_add(struct cache_set *c, const char *path) +int bch_dev_add(struct bch_fs *c, const char *path) { struct bcache_superblock sb; const char *err; - struct cache *ca; + struct bch_dev *ca = NULL; struct bch_sb_field_members *mi, *dev_mi; struct bch_member saved_mi; unsigned dev_idx, nr_devices, u64s; int ret = -EINVAL; - err = bch_read_super(&sb, c->opts, path); + err = bch_read_super(&sb, bch_opts_empty(), path); if (err) return -EINVAL; @@ -1525,14 +1598,9 @@ int bch_dev_add(struct cache_set *c, const char *path) saved_mi = dev_mi->members[sb.sb->dev_idx]; saved_mi.last_mount = cpu_to_le64(ktime_get_seconds()); - down_read(&c->gc_lock); - if (dynamic_fault("bcache:add:no_slot")) goto no_slot; - if (test_bit(BCH_FS_GC_FAILURE, &c->flags)) - goto no_slot; - mi = bch_sb_get_members(c->disk_sb); for (dev_idx = 0; dev_idx < BCH_SB_MEMBERS_MAX; dev_idx++) if (dev_idx >= c->sb.nr_devices || @@ -1540,15 +1608,11 @@ int bch_dev_add(struct cache_set *c, const char *path) sizeof(uuid_le))) goto have_slot; no_slot: - up_read(&c->gc_lock); - err = "no slots available in superblock"; ret = -ENOSPC; goto err_unlock; have_slot: - up_read(&c->gc_lock); - nr_devices = max_t(unsigned, dev_idx + 1, c->sb.nr_devices); u64s = (sizeof(struct bch_sb_field_members) + sizeof(struct bch_member) * nr_devices) / sizeof(u64); @@ -1565,46 +1629,47 @@ have_slot: memcpy(dev_mi, mi, u64s * sizeof(u64)); dev_mi->members[dev_idx] = saved_mi; + sb.sb->uuid = c->disk_sb->uuid; sb.sb->dev_idx = dev_idx; sb.sb->nr_devices = nr_devices; - if (bch_fs_mi_update(c, dev_mi->members, nr_devices)) { - err = "cannot allocate memory"; - ret = -ENOMEM; - goto err_unlock; - } - /* commit new member info */ memcpy(mi, dev_mi, u64s * sizeof(u64)); c->disk_sb->nr_devices = nr_devices; c->sb.nr_devices = nr_devices; - err = bch_dev_alloc(&sb, c, &ca); - if (err) + if (bch_dev_alloc(c, dev_idx)) { + err = "cannot allocate memory"; + ret = -ENOMEM; goto err_unlock; + } - bch_write_super(c); + if (bch_dev_online(c, &sb)) { + err = "bch_dev_online() error"; + ret = -ENOMEM; + goto err_unlock; + } - err = "journal alloc failed"; - if (bch_dev_journal_alloc(ca)) - goto err_put; + bch_write_super(c); + mutex_unlock(&c->sb_lock); - bch_notify_dev_added(ca); + ca = c->devs[dev_idx]; + if (ca->mi.state == BCH_MEMBER_STATE_RW) { + err = "journal alloc failed"; + if (bch_dev_journal_alloc(ca)) + goto err; - if (ca->mi.state == BCH_MEMBER_STATE_ACTIVE) { err = __bch_dev_read_write(c, ca); if (err) - goto err_put; + goto err; } - kobject_put(&ca->kobj); - mutex_unlock(&c->sb_lock); + bch_notify_dev_added(ca); mutex_unlock(&c->state_lock); return 0; -err_put: - bch_dev_stop(ca); err_unlock: mutex_unlock(&c->sb_lock); +err: mutex_unlock(&c->state_lock); bch_free_super(&sb); @@ -1615,12 +1680,12 @@ err_unlock: /* Filesystem open: */ const char *bch_fs_open(char * const *devices, unsigned nr_devices, - struct bch_opts opts, struct cache_set **ret) + struct bch_opts opts, struct bch_fs **ret) { const char *err; - struct cache_set *c = NULL; + struct bch_fs *c = NULL; struct bcache_superblock *sb; - unsigned i; + unsigned i, best_sb = 0; if (!nr_devices) return "need at least one device"; @@ -1647,19 +1712,33 @@ const char *bch_fs_open(char * const *devices, unsigned nr_devices, goto err; } - err = "cannot allocate memory"; - c = bch_fs_alloc(sb[0].sb, opts); - if (!c) - goto err; + for (i = 1; i < nr_devices; i++) + if (le64_to_cpu(sb[i].sb->seq) > + le64_to_cpu(sb[best_sb].sb->seq)) + best_sb = i; for (i = 0; i < nr_devices; i++) { - err = bch_dev_alloc(&sb[i], c, NULL); + err = bch_dev_in_fs(sb[best_sb].sb, sb[i].sb); if (err) goto err; } + err = "cannot allocate memory"; + c = bch_fs_alloc(sb[best_sb].sb, opts); + if (!c) + goto err; + + err = "bch_dev_online() error"; + mutex_lock(&c->sb_lock); + for (i = 0; i < nr_devices; i++) + if (bch_dev_online(c, &sb[i])) { + mutex_unlock(&c->sb_lock); + goto err; + } + mutex_unlock(&c->sb_lock); + err = "insufficient devices"; - if (bch_fs_nr_online_devices(c) != bch_fs_nr_devices(c)) + if (!bch_fs_may_start(c, 0)) goto err; if (!c->opts.nostart) { @@ -1697,19 +1776,19 @@ static const char *__bch_fs_open_incremental(struct bcache_superblock *sb, struct bch_opts opts) { const char *err; - struct cache_set *c; - bool allocated_cache_set = false; + struct bch_fs *c; + bool allocated_fs = false; err = bch_validate_cache_super(sb); if (err) return err; - mutex_lock(&bch_register_lock); - c = bch_fs_lookup(sb->sb->uuid); + mutex_lock(&bch_fs_list_lock); + c = __bch_uuid_to_fs(sb->sb->uuid); if (c) { closure_get(&c->cl); - err = bch_dev_in_fs(sb->sb, c); + err = bch_dev_in_fs(c->disk_sb, sb->sb); if (err) goto err; } else { @@ -1718,15 +1797,19 @@ static const char *__bch_fs_open_incremental(struct bcache_superblock *sb, if (!c) goto err; - allocated_cache_set = true; + allocated_fs = true; } - err = bch_dev_alloc(sb, c, NULL); - if (err) + err = "bch_dev_online() error"; + + mutex_lock(&c->sb_lock); + if (bch_dev_online(c, sb)) { + mutex_unlock(&c->sb_lock); goto err; + } + mutex_unlock(&c->sb_lock); - if (bch_fs_nr_online_devices(c) == bch_fs_nr_devices(c) && - !c->opts.nostart) { + if (!c->opts.nostart && bch_fs_may_start(c, 0)) { err = __bch_fs_start(c); if (err) goto err; @@ -1737,13 +1820,13 @@ static const char *__bch_fs_open_incremental(struct bcache_superblock *sb, goto err; closure_put(&c->cl); - mutex_unlock(&bch_register_lock); + mutex_unlock(&bch_fs_list_lock); return NULL; err: - mutex_unlock(&bch_register_lock); + mutex_unlock(&bch_fs_list_lock); - if (allocated_cache_set) + if (allocated_fs) bch_fs_stop(c); else if (c) closure_put(&c->cl); @@ -1762,9 +1845,9 @@ const char *bch_fs_open_incremental(const char *path) return err; if (__SB_IS_BDEV(le64_to_cpu(sb.sb->version))) { - mutex_lock(&bch_register_lock); + mutex_lock(&bch_fs_list_lock); err = bch_backing_dev_register(&sb); - mutex_unlock(&bch_register_lock); + mutex_unlock(&bch_fs_list_lock); } else { err = __bch_fs_open_incremental(&sb, opts); } @@ -1821,9 +1904,9 @@ static int bcache_reboot(struct notifier_block *n, unsigned long code, void *x) if (code == SYS_DOWN || code == SYS_HALT || code == SYS_POWER_OFF) { - struct cache_set *c; + struct bch_fs *c; - mutex_lock(&bch_register_lock); + mutex_lock(&bch_fs_list_lock); if (!list_empty(&bch_fs_list)) pr_info("Setting all devices read only:"); @@ -1834,7 +1917,7 @@ static int bcache_reboot(struct notifier_block *n, unsigned long code, void *x) list_for_each_entry(c, &bch_fs_list, list) bch_fs_read_only(c); - mutex_unlock(&bch_register_lock); + mutex_unlock(&bch_fs_list_lock); } return NOTIFY_DONE; @@ -1878,7 +1961,6 @@ static int __init bcache_init(void) NULL }; - mutex_init(&bch_register_lock); register_reboot_notifier(&reboot); closure_debug_init(); bkey_pack_test(); |