bcachefs: Fix device add

Adding a device to an existing filesystem now works again - fixed a number of bugs related to multiple devices (when mounting a filesystem with multiple devices, we weren't actually checking that all the devices were part of the same filesystem!). Also did some refactoring to work towards being able to run in degraded mode.
author: Kent Overstreet <kent.overstreet@gmail.com> 2017-03-09 11:40:22 -0900
committer: Kent Overstreet <kent.overstreet@gmail.com> 2017-03-17 19:49:22 -0800
commit: b6b65b426e43d9d1b0608030a070fcc26ee22c9c (patch)
tree: 20c2607694b8926429288da1c40582f7d9edd6e1
parent: 04a21c62bcb995f21e1aa80462e5e8aaa80928c5 (diff)
7 files changed, 283 insertions, 163 deletions
diff --git a/fs/bcachefs/btree_gc.c b/fs/bcachefs/btree_gc.c
index bbd9b29fae86..c803cec5baa4 100644
--- a/fs/bcachefs/btree_gc.c
+++ b/fs/bcachefs/btree_gc.c
@@ -273,16 +273,11 @@ static void mark_metadata_sectors(struct cache *ca, u64 start, u64 end,
 	} while (b < end >> ca->bucket_bits);
 }
 
-/*
- * Mark non btree metadata - prios, journal
- */
-static void bch_mark_dev_metadata(struct cache_set *c, struct cache *ca)
+void bch_dev_mark_superblocks(struct cache *ca)
 {
 	struct bch_sb_layout *layout = &ca->disk_sb.sb->layout;
 	unsigned i;
-	u64 b;
 
-	/* Mark superblocks: */
 	for (i = 0; i < layout->nr_superblocks; i++) {
 		if (layout->sb_offset[i] == BCH_SB_SECTOR)
 			mark_metadata_sectors(ca, 0, BCH_SB_SECTOR,
@@ -294,6 +289,17 @@ static void bch_mark_dev_metadata(struct cache_set *c, struct cache *ca)
 				      (1 << layout->sb_max_size_bits),
 				      BUCKET_SB);
 	}
+}
+
+/*
+ * Mark non btree metadata - prios, journal
+ */
+static void bch_mark_dev_metadata(struct cache_set *c, struct cache *ca)
+{
+	unsigned i;
+	u64 b;
+
+	bch_dev_mark_superblocks(ca);
 
 	spin_lock(&c->journal.lock);
 
diff --git a/fs/bcachefs/btree_gc.h b/fs/bcachefs/btree_gc.h
index 0607187f6081..293af0c86e23 100644
--- a/fs/bcachefs/btree_gc.h
+++ b/fs/bcachefs/btree_gc.h
@@ -13,6 +13,7 @@ int bch_initial_gc(struct cache_set *, struct list_head *);
 u8 bch_btree_key_recalc_oldest_gen(struct cache_set *, struct bkey_s_c);
 u8 bch_btree_mark_key_initial(struct cache_set *, enum bkey_type,
 				struct bkey_s_c);
+void bch_dev_mark_superblocks(struct cache *);
 
 /*
  * For concurrent mark and sweep (with other index updates), we define a total
diff --git a/fs/bcachefs/buckets.c b/fs/bcachefs/buckets.c
index 0dde6fb0c6eb..df37a8817b20 100644
--- a/fs/bcachefs/buckets.c
+++ b/fs/bcachefs/buckets.c
@@ -247,7 +247,7 @@ static bool bucket_became_unavailable(struct cache_set *c,
 {
 	return is_available_bucket(old) &&
 	       !is_available_bucket(new) &&
-	       c->gc_pos.phase == GC_PHASE_DONE;
+	       c && c->gc_pos.phase == GC_PHASE_DONE;
 }
 
 static void bucket_stats_update(struct cache *ca,
diff --git a/fs/bcachefs/journal.c b/fs/bcachefs/journal.c
index 2992789850d4..9cd31c437833 100644
--- a/fs/bcachefs/journal.c
+++ b/fs/bcachefs/journal.c
@@ -1527,8 +1527,13 @@ err:
 	return ret;
 }
 
+#if 0
+/*
+ * Allocate more journal space at runtime - not currently making use if it, but
+ * the code works:
+ */
 static int bch_set_nr_journal_buckets(struct cache_set *c, struct cache *ca,
-				      unsigned nr, bool write_super)
+				      unsigned nr)
 {
 	struct journal *j = &c->journal;
 	struct journal_device *ja = &ca->journal;
@@ -1615,8 +1620,7 @@ static int bch_set_nr_journal_buckets(struct cache_set *c, struct cache *ca,
 
 	BUG_ON(bch_validate_journal_layout(ca->disk_sb.sb, ca->mi));
 
-	if (write_super)
-		bch_write_super(c);
+	bch_write_super(c);
 
 	ret = 0;
 err:
@@ -1628,9 +1632,15 @@ err:
 
 	return ret;
 }
+#endif
 
 int bch_dev_journal_alloc(struct cache *ca)
 {
+	struct journal_device *ja = &ca->journal;
+	struct bch_sb_field_journal *journal_buckets;
+	unsigned i, nr;
+	u64 b, *p;
+
 	if (dynamic_fault("bcache:add:journal_alloc"))
 		return -ENOMEM;
 
@@ -1638,12 +1648,50 @@ int bch_dev_journal_alloc(struct cache *ca)
 	 * clamp journal size to 1024 buckets or 512MB (in sectors), whichever
 	 * is smaller:
 	 */
-	return bch_set_nr_journal_buckets(ca->set, ca,
-			clamp_t(unsigned, ca->mi.nbuckets >> 8,
-				BCH_JOURNAL_BUCKETS_MIN,
-				min(1 << 10,
-				    (1 << 20) / ca->mi.bucket_size)),
-			false);
+	nr = clamp_t(unsigned, ca->mi.nbuckets >> 8,
+		     BCH_JOURNAL_BUCKETS_MIN,
+		     min(1 << 10,
+			 (1 << 20) / ca->mi.bucket_size));
+
+	p = krealloc(ja->bucket_seq, nr * sizeof(u64),
+		     GFP_KERNEL|__GFP_ZERO);
+	if (!p)
+		return -ENOMEM;
+
+	ja->bucket_seq = p;
+
+	p = krealloc(ja->buckets, nr * sizeof(u64),
+		     GFP_KERNEL|__GFP_ZERO);
+	if (!p)
+		return -ENOMEM;
+
+	ja->buckets = p;
+
+	journal_buckets = bch_sb_resize_journal(&ca->disk_sb,
+				nr + sizeof(*journal_buckets) / sizeof(u64));
+	if (!journal_buckets)
+		return -ENOMEM;
+
+	for (i = 0, b = ca->mi.first_bucket;
+	     i < nr && b < ca->mi.nbuckets; b++) {
+		if (!is_available_bucket(ca->buckets[b].mark))
+			continue;
+
+		bch_mark_metadata_bucket(ca, &ca->buckets[b],
+					 BUCKET_JOURNAL, true);
+		ja->buckets[i] = b;
+		journal_buckets->buckets[i] = cpu_to_le64(b);
+		i++;
+	}
+
+	if (i < nr)
+		return -ENOSPC;
+
+	BUG_ON(bch_validate_journal_layout(ca->disk_sb.sb, ca->mi));
+
+	ja->nr = nr;
+
+	return 0;
 }
 
 /* Journalling */
diff --git a/fs/bcachefs/super-io.c b/fs/bcachefs/super-io.c
index 3a53b7ea2761..75f861b70381 100644
--- a/fs/bcachefs/super-io.c
+++ b/fs/bcachefs/super-io.c
@@ -276,6 +276,31 @@ err:
 	return err;
 }
 
+static const char *bch_sb_validate_members(struct bch_sb *sb)
+{
+	struct bch_sb_field_members *mi;
+	unsigned i;
+
+	mi = bch_sb_get_members(sb);
+	if (!mi)
+		return "Invalid superblock: member info area missing";
+
+	if ((void *) (mi->members + sb->nr_devices) >
+	    vstruct_end(&mi->field))
+		return "Invalid superblock: bad member info";
+
+	for (i = 0; i < sb->nr_devices; i++) {
+		if (bch_is_zero(mi->members[i].uuid.b, sizeof(uuid_le)))
+			continue;
+
+		if (le16_to_cpu(mi->members[i].bucket_size) <
+		    BCH_SB_BTREE_NODE_SIZE(sb))
+			return "bucket size smaller than btree node size";
+	}
+
+	return NULL;
+}
+
 const char *bch_validate_cache_super(struct bcache_superblock *disk_sb)
 {
 	struct bch_sb *sb = disk_sb->sb;
@@ -378,15 +403,11 @@ const char *bch_validate_cache_super(struct bcache_superblock *disk_sb)
 			return "Invalid superblock: unknown optional field type";
 	}
 
-	/* Validate member info: */
-	sb_mi = bch_sb_get_members(sb);
-	if (!sb_mi)
-		return "Invalid superblock: member info area missing";
-
-	if ((void *) (sb_mi->members + sb->nr_devices) >
-	    vstruct_end(&sb_mi->field))
-		return "Invalid superblock: bad member info";
+	err = bch_sb_validate_members(sb);
+	if (err)
+		return err;
 
+	sb_mi = bch_sb_get_members(sb);
 	mi = cache_mi_to_cpu_mi(sb_mi->members + sb->dev_idx);
 
 	if (mi.nbuckets > LONG_MAX)
@@ -808,9 +829,6 @@ void bch_write_super(struct cache_set *c)
 
 	lockdep_assert_held(&c->sb_lock);
 
-	if (c->opts.nochanges)
-		return;
-
 	closure_init_stack(cl);
 
 	le64_add_cpu(&c->disk_sb->seq, 1);
@@ -818,6 +836,9 @@ void bch_write_super(struct cache_set *c)
 	for_each_cache(ca, c, i)
 		bch_sb_from_cache_set(c, ca);
 
+	if (c->opts.nochanges)
+		goto out;
+
 	do {
 		wrote = false;
 		for_each_cache(ca, c, i)
@@ -827,7 +848,7 @@ void bch_write_super(struct cache_set *c)
 		closure_sync(cl);
 		super_idx++;
 	} while (wrote);
-
+out:
 	/* Make new options visible after they're persistent: */
 	bch_fs_mi_update(c, members->members, c->sb.nr_devices);
 	bch_sb_update(c);
diff --git a/fs/bcachefs/super.c b/fs/bcachefs/super.c
index 57b6a0a42491..9aa35bbe5a9c 100644
--- a/fs/bcachefs/super.c
+++ b/fs/bcachefs/super.c
@@ -463,34 +463,6 @@ void bch_fs_detach(struct cache_set *c)
 		bch_fs_stop_async(c);
 }
 
-static unsigned bch_fs_nr_devices(struct cache_set *c)
-{
-	struct bch_sb_field_members *mi;
-	unsigned i, nr = 0;
-
-	mutex_lock(&c->sb_lock);
-	mi = bch_sb_get_members(c->disk_sb);
-
-	for (i = 0; i < c->disk_sb->nr_devices; i++)
-		if (!bch_is_zero(mi->members[i].uuid.b, sizeof(uuid_le)))
-			nr++;
-
-	mutex_unlock(&c->sb_lock);
-
-	return nr;
-}
-
-static unsigned bch_fs_nr_online_devices(struct cache_set *c)
-{
-	unsigned i, nr = 0;
-
-	for (i = 0; i < c->sb.nr_devices; i++)
-		if (c->cache[i])
-			nr++;
-
-	return nr;
-}
-
 #define alloc_bucket_pages(gfp, ca)			\
 	((void *) __get_free_pages(__GFP_ZERO|gfp, ilog2(bucket_pages(ca))))
 
@@ -746,12 +718,10 @@ static const char *__bch_fs_start(struct cache_set *c)
 
 	BUG_ON(c->state != BCH_FS_STARTING);
 
-	/*
-	 * Make sure that each cache object's mi is up to date before
-	 * we start testing it.
-	 */
+	mutex_lock(&c->sb_lock);
 	for_each_cache(ca, c, i)
 		bch_sb_from_cache_set(c, ca);
+	mutex_unlock(&c->sb_lock);
 
 	if (BCH_SB_INITIALIZED(c->disk_sb)) {
 		ret = bch_journal_read(c, &journal);
@@ -853,14 +823,6 @@ static const char *__bch_fs_start(struct cache_set *c)
 
 		bch_initial_gc(c, NULL);
 
-		err = "error starting allocator thread";
-		for_each_cache(ca, c, i)
-			if (ca->mi.state == BCH_MEMBER_STATE_ACTIVE &&
-			    bch_dev_allocator_start(ca)) {
-				percpu_ref_put(&ca->ref);
-				goto err;
-			}
-
 		err = "unable to allocate journal buckets";
 		for_each_cache(ca, c, i)
 			if (bch_dev_journal_alloc(ca)) {
@@ -875,6 +837,14 @@ static const char *__bch_fs_start(struct cache_set *c)
 		bch_journal_start(c);
 		bch_journal_set_replay_done(&c->journal);
 
+		err = "error starting allocator thread";
+		for_each_cache(ca, c, i)
+			if (ca->mi.state == BCH_MEMBER_STATE_ACTIVE &&
+			    bch_dev_allocator_start(ca)) {
+				percpu_ref_put(&ca->ref);
+				goto err;
+			}
+
 		err = "cannot allocate new btree root";
 		for (id = 0; id < BTREE_ID_NR; id++)
 			if (bch_btree_root_alloc(c, id, &cl)) {
@@ -984,33 +954,28 @@ static const char *bch_dev_may_add(struct bch_sb *sb, struct cache_set *c)
 
 	if (le16_to_cpu(sb_mi->members[sb->dev_idx].bucket_size) <
 	    BCH_SB_BTREE_NODE_SIZE(c->disk_sb))
-		return "new cache bucket_size is too small";
+		return "new cache bucket size is too small";
 
 	return NULL;
 }
 
-static const char *bch_dev_in_fs(struct bch_sb *sb, struct cache_set *c)
+static const char *bch_dev_in_fs(struct bch_sb *fs, struct bch_sb *sb)
 {
-	struct bch_sb_field_members *mi = bch_sb_get_members(c->disk_sb);
-	struct bch_sb_field_members *dev_mi = bch_sb_get_members(sb);
-	uuid_le dev_uuid = dev_mi->members[sb->dev_idx].uuid;
-	const char *err;
+	struct bch_sb *newest =
+		le64_to_cpu(fs->seq) > le64_to_cpu(sb->seq) ? fs : sb;
+	struct bch_sb_field_members *mi = bch_sb_get_members(newest);
 
-	err = bch_dev_may_add(sb, c);
-	if (err)
-		return err;
+	if (uuid_le_cmp(fs->uuid, sb->uuid))
+		return "device not a member of filesystem";
 
-	if (bch_is_zero(&dev_uuid, sizeof(dev_uuid)))
+	if (sb->dev_idx >= newest->nr_devices)
+		return "device has invalid dev_idx";
+
+	if (bch_is_zero(mi->members[sb->dev_idx].uuid.b, sizeof(uuid_le)))
 		return "device has been removed";
 
-	/*
-	 * When attaching an existing device, the cache set superblock must
-	 * already contain member_info with a matching UUID
-	 */
-	if (sb->dev_idx >= c->disk_sb->nr_devices ||
-	    memcmp(&mi->members[sb->dev_idx].uuid,
-		   &dev_uuid, sizeof(uuid_le)))
-		return "cache sb does not match set";
+	if (fs->block_size != sb->block_size)
+		return "mismatched block size";
 
 	return NULL;
 }
@@ -1128,31 +1093,25 @@ static int bch_dev_online(struct cache *ca)
 	return 0;
 }
 
-static const char *bch_dev_alloc(struct bcache_superblock *sb,
-				 struct cache_set *c,
-				 struct cache **ret)
+static struct cache *__bch_dev_alloc(struct bcache_superblock *sb)
 {
 	struct bch_member *member;
 	size_t reserve_none, movinggc_reserve, free_inc_reserve, total_reserve;
 	size_t heap_size;
 	unsigned i;
-	const char *err = "cannot allocate memory";
 	struct cache *ca;
 
-	if (c->sb.nr_devices == 1)
-		bdevname(sb->bdev, c->name);
-
 	if (bch_fs_init_fault("dev_alloc"))
-		return err;
+		return NULL;
 
 	ca = kzalloc(sizeof(*ca), GFP_KERNEL);
 	if (!ca)
-		return err;
+		return NULL;
 
 	if (percpu_ref_init(&ca->ref, bch_dev_percpu_ref_release,
 			    0, GFP_KERNEL)) {
 		kfree(ca);
-		return err;
+		return NULL;
 	}
 
 	kobject_init(&ca->kobj, &bch_dev_ktype);
@@ -1175,7 +1134,6 @@ static const char *bch_dev_alloc(struct bcache_superblock *sb,
 
 	INIT_WORK(&ca->io_error_work, bch_nonfatal_io_error_work);
 
-	err = "dynamic fault";
 	if (bch_fs_init_fault("dev_alloc"))
 		goto err;
 
@@ -1229,6 +1187,20 @@ static const char *bch_dev_alloc(struct bcache_superblock *sb,
 	ca->copygc_write_point.group = &ca->self;
 	ca->tiering_write_point.group = &ca->self;
 
+	return ca;
+err:
+	bch_dev_free(ca);
+	return NULL;
+}
+
+static const char *__bch_dev_add(struct cache_set *c, struct cache *ca)
+{
+	if (c->cache[ca->dev_idx])
+		return "already have device online in this slot";
+
+	if (c->sb.nr_devices == 1)
+		bdevname(ca->disk_sb.bdev, c->name);
+
 	/*
 	 * Increase journal write timeout if flushes to this device are
 	 * expensive:
@@ -1244,66 +1216,87 @@ static const char *bch_dev_alloc(struct bcache_superblock *sb,
 	kobject_get(&ca->kobj);
 	rcu_assign_pointer(c->cache[ca->dev_idx], ca);
 
-	mutex_lock(&c->sb_lock);
-
-	if (le64_to_cpu(ca->disk_sb.sb->seq) > le64_to_cpu(c->disk_sb->seq))
-		bch_sb_to_cache_set(c, ca->disk_sb.sb);
-
-	mutex_unlock(&c->sb_lock);
-
-	err = "error creating kobject";
 	if (c->kobj.state_in_sysfs &&
 	    bch_dev_online(ca))
 		pr_warn("error creating sysfs objects");
 
+	return NULL;
+}
+
+static const char *bch_dev_alloc(struct bcache_superblock *sb,
+				 struct cache_set *c,
+				 struct cache **ret)
+{
+	struct cache *ca;
+	const char *err;
+
+	ca = __bch_dev_alloc(sb);
+	if (!ca)
+		return "cannot allocate memory";
+
+	err = __bch_dev_add(c, ca);
+	if (err) {
+		bch_dev_free(ca);
+		return err;
+	}
+
+	mutex_lock(&c->sb_lock);
+	if (le64_to_cpu(ca->disk_sb.sb->seq) >
+	    le64_to_cpu(c->disk_sb->seq))
+		bch_sb_to_cache_set(c, ca->disk_sb.sb);
+	mutex_unlock(&c->sb_lock);
+
 	if (ret)
 		*ret = ca;
 	else
 		kobject_put(&ca->kobj);
 	return NULL;
-err:
-	bch_dev_free(ca);
-	return err;
 }
 
 /* Device management: */
 
-static void __bch_dev_read_only(struct cache_set *c, struct cache *ca)
+bool bch_fs_may_start(struct cache_set *c, int flags)
 {
-	bch_moving_gc_stop(ca);
-
-	/*
-	 * This stops new data writes (e.g. to existing open data
-	 * buckets) and then waits for all existing writes to
-	 * complete.
-	 */
-	bch_dev_allocator_stop(ca);
-
-	bch_dev_group_remove(&c->journal.devs, ca);
-}
+	struct bch_sb_field_members *mi;
+	unsigned meta_missing = 0;
+	unsigned data_missing = 0;
+	bool degraded = false;
+	unsigned i;
 
-static const char *__bch_dev_read_write(struct cache_set *c, struct cache *ca)
-{
-	lockdep_assert_held(&c->state_lock);
+	mutex_lock(&c->sb_lock);
+	mi = bch_sb_get_members(c->disk_sb);
 
-	if (ca->mi.state == BCH_MEMBER_STATE_ACTIVE)
-		return NULL;
+	for (i = 0; i < c->disk_sb->nr_devices; i++)
+		if (!c->cache[i] &&
+		    !bch_is_zero(mi->members[i].uuid.b, sizeof(uuid_le))) {
+			degraded = true;
+			if (BCH_MEMBER_HAS_METADATA(&mi->members[i]))
+				meta_missing++;
+			if (BCH_MEMBER_HAS_DATA(&mi->members[i]))
+				data_missing++;
+		}
+	mutex_unlock(&c->sb_lock);
 
-	trace_bcache_cache_read_write(ca);
+	if (degraded &&
+	    !(flags & BCH_FORCE_IF_DEGRADED))
+		return false;
 
-	if (bch_dev_allocator_start(ca))
-		return "error starting allocator thread";
+	if (meta_missing &&
+	    !(flags & BCH_FORCE_IF_METADATA_DEGRADED))
+		return false;
 
-	if (bch_moving_gc_start(ca))
-		return "error starting moving GC thread";
+	if (meta_missing >= BCH_SB_META_REPLICAS_HAVE(c->disk_sb) &&
+	    !(flags & BCH_FORCE_IF_METADATA_LOST))
+		return false;
 
-	if (bch_tiering_start(c))
-		return "error starting tiering thread";
+	if (data_missing && !(flags & BCH_FORCE_IF_DATA_DEGRADED))
+		return false;
 
-	bch_notify_dev_read_write(ca);
-	trace_bcache_cache_read_write_done(ca);
+	if (data_missing >= BCH_SB_DATA_REPLICAS_HAVE(c->disk_sb) &&
+	    !(flags & BCH_FORCE_IF_DATA_LOST))
+		return false;
 
-	return NULL;
+	return true;
 }
 
 bool bch_dev_state_allowed(struct cache_set *c, struct cache *ca,
@@ -1335,6 +1328,44 @@ bool bch_dev_state_allowed(struct cache_set *c, struct cache *ca,
 	return true;
 }
 
+static void __bch_dev_read_only(struct cache_set *c, struct cache *ca)
+{
+	bch_moving_gc_stop(ca);
+
+	/*
+	 * This stops new data writes (e.g. to existing open data
+	 * buckets) and then waits for all existing writes to
+	 * complete.
+	 */
+	bch_dev_allocator_stop(ca);
+
+	bch_dev_group_remove(&c->journal.devs, ca);
+}
+
+static const char *__bch_dev_read_write(struct cache_set *c, struct cache *ca)
+{
+	lockdep_assert_held(&c->state_lock);
+
+	if (ca->mi.state == BCH_MEMBER_STATE_ACTIVE)
+		return NULL;
+
+	trace_bcache_cache_read_write(ca);
+
+	if (bch_dev_allocator_start(ca))
+		return "error starting allocator thread";
+
+	if (bch_moving_gc_start(ca))
+		return "error starting moving GC thread";
+
+	if (bch_tiering_start(c))
+		return "error starting tiering thread";
+
+	bch_notify_dev_read_write(ca);
+	trace_bcache_cache_read_write_done(ca);
+
+	return NULL;
+}
+
 int __bch_dev_set_state(struct cache_set *c, struct cache *ca,
 			enum bch_member_state new_state, int flags)
 {
@@ -1496,13 +1527,13 @@ int bch_dev_add(struct cache_set *c, const char *path)
 {
 	struct bcache_superblock sb;
 	const char *err;
-	struct cache *ca;
+	struct cache *ca = NULL;
 	struct bch_sb_field_members *mi, *dev_mi;
 	struct bch_member saved_mi;
 	unsigned dev_idx, nr_devices, u64s;
 	int ret = -EINVAL;
 
-	err = bch_read_super(&sb, c->opts, path);
+	err = bch_read_super(&sb, bch_opts_empty(), path);
 	if (err)
 		return -EINVAL;
 
@@ -1525,6 +1556,10 @@ int bch_dev_add(struct cache_set *c, const char *path)
 	saved_mi = dev_mi->members[sb.sb->dev_idx];
 	saved_mi.last_mount = cpu_to_le64(ktime_get_seconds());
 
+	/*
+	 * XXX: ditch the GC stuff, just don't remove a device until nothing is
+	 * using its dev_idx anymore
+	 */
 	down_read(&c->gc_lock);
 
 	if (dynamic_fault("bcache:add:no_slot"))
@@ -1565,6 +1600,7 @@ have_slot:
 	memcpy(dev_mi, mi, u64s * sizeof(u64));
 	dev_mi->members[dev_idx] = saved_mi;
 
+	sb.sb->uuid		= c->disk_sb->uuid;
 	sb.sb->dev_idx		= dev_idx;
 	sb.sb->nr_devices	= nr_devices;
 
@@ -1579,33 +1615,42 @@ have_slot:
 	c->disk_sb->nr_devices	= nr_devices;
 	c->sb.nr_devices	= nr_devices;
 
-	err = bch_dev_alloc(&sb, c, &ca);
-	if (err)
+	ca = __bch_dev_alloc(&sb);
+	if (!ca) {
+		err = "cannot allocate memory";
+		ret = -ENOMEM;
 		goto err_unlock;
+	}
 
-	bch_write_super(c);
+	bch_dev_mark_superblocks(ca);
 
 	err = "journal alloc failed";
 	if (bch_dev_journal_alloc(ca))
-		goto err_put;
+		goto err_unlock;
 
-	bch_notify_dev_added(ca);
+	err = __bch_dev_add(c, ca);
+	BUG_ON(err);
+
+	bch_write_super(c);
+	mutex_unlock(&c->sb_lock);
 
 	if (ca->mi.state == BCH_MEMBER_STATE_ACTIVE) {
 		err = __bch_dev_read_write(c, ca);
 		if (err)
-			goto err_put;
+			goto err;
 	}
 
+	bch_notify_dev_added(ca);
+
 	kobject_put(&ca->kobj);
-	mutex_unlock(&c->sb_lock);
 	mutex_unlock(&c->state_lock);
 	return 0;
-err_put:
-	bch_dev_stop(ca);
 err_unlock:
 	mutex_unlock(&c->sb_lock);
+err:
 	mutex_unlock(&c->state_lock);
+	if (ca)
+		bch_dev_stop(ca);
 	bch_free_super(&sb);
 
 	bch_err(c, "Unable to add device: %s", err);
@@ -1620,7 +1665,7 @@ const char *bch_fs_open(char * const *devices, unsigned nr_devices,
 	const char *err;
 	struct cache_set *c = NULL;
 	struct bcache_superblock *sb;
-	unsigned i;
+	unsigned i, best_sb = 0;
 
 	if (!nr_devices)
 		return "need at least one device";
@@ -1647,8 +1692,19 @@ const char *bch_fs_open(char * const *devices, unsigned nr_devices,
 			goto err;
 	}
 
+	for (i = 1; i < nr_devices; i++)
+		if (le64_to_cpu(sb[i].sb->seq) >
+		    le64_to_cpu(sb[best_sb].sb->seq))
+			best_sb = i;
+
+	for (i = 0; i < nr_devices; i++) {
+		err = bch_dev_in_fs(sb[best_sb].sb, sb[i].sb);
+		if (err)
+			goto err;
+	}
+
 	err = "cannot allocate memory";
-	c = bch_fs_alloc(sb[0].sb, opts);
+	c = bch_fs_alloc(sb[best_sb].sb, opts);
 	if (!c)
 		goto err;
 
@@ -1659,7 +1715,7 @@ const char *bch_fs_open(char * const *devices, unsigned nr_devices,
 	}
 
 	err = "insufficient devices";
-	if (bch_fs_nr_online_devices(c) != bch_fs_nr_devices(c))
+	if (!bch_fs_may_start(c, 0))
 		goto err;
 
 	if (!c->opts.nostart) {
@@ -1709,7 +1765,7 @@ static const char *__bch_fs_open_incremental(struct bcache_superblock *sb,
 	if (c) {
 		closure_get(&c->cl);
 
-		err = bch_dev_in_fs(sb->sb, c);
+		err = bch_dev_in_fs(c->disk_sb, sb->sb);
 		if (err)
 			goto err;
 	} else {
@@ -1725,8 +1781,7 @@ static const char *__bch_fs_open_incremental(struct bcache_superblock *sb,
 	if (err)
 		goto err;
 
-	if (bch_fs_nr_online_devices(c) == bch_fs_nr_devices(c) &&
-	    !c->opts.nostart) {
+	if (!c->opts.nostart && bch_fs_may_start(c, 0)) {
 		err = __bch_fs_start(c);
 		if (err)
 			goto err;
diff --git a/fs/bcachefs/sysfs.c b/fs/bcachefs/sysfs.c
index 56df0089467a..5717bffe5c8c 100644
--- a/fs/bcachefs/sysfs.c
+++ b/fs/bcachefs/sysfs.c
@@ -37,7 +37,6 @@ write_attribute(trigger_btree_coalesce);
 write_attribute(trigger_gc);
 write_attribute(prune_cache);
 write_attribute(blockdev_volume_create);
-write_attribute(add_device);
 
 read_attribute(uuid);
 read_attribute(minor);
@@ -839,15 +838,6 @@ STORE(bch_fs)
 	size = __bch_fs_store(kobj, attr, buf, size);
 	mutex_unlock(&c->state_lock);
 
-	if (attr == &sysfs_add_device) {
-		char *path = kstrdup(buf, GFP_KERNEL);
-		int r = bch_dev_add(c, strim(path));
-
-		kfree(path);
-		if (r)
-			return r;
-	}
-
 	return size;
 }
 
@@ -858,7 +848,6 @@ static struct attribute *bch_fs_files[] = {
 	&sysfs_journal_reclaim_delay_ms,
 	&sysfs_journal_entry_size_max,
 	&sysfs_blockdev_volume_create,
-	&sysfs_add_device,
 
 	&sysfs_block_size,
 	&sysfs_block_size_bytes,
author	Kent Overstreet <kent.overstreet@gmail.com>	2017-03-09 11:40:22 -0900
committer	Kent Overstreet <kent.overstreet@gmail.com>	2017-03-17 19:49:22 -0800
commit	b6b65b426e43d9d1b0608030a070fcc26ee22c9c (patch)
tree	20c2607694b8926429288da1c40582f7d9edd6e1
parent	04a21c62bcb995f21e1aa80462e5e8aaa80928c5 (diff)