bcachefs: Fix various device add bugs

author: Kent Overstreet <kent.overstreet@gmail.com> 2018-04-06 23:09:13 -0400
committer: Kent Overstreet <kent.overstreet@gmail.com> 2018-05-22 00:44:18 -0400
commit: 2c90a2f3df10976d3a051487374fb12b8caadd4a (patch)
tree: b5b5c35ea930f86bbdea8f162b4296943e977c65
parent: f794fedb9ed1672cf776e3aed84cd24da560bfb0 (diff)
9 files changed, 279 insertions, 230 deletions
diff --git a/fs/bcachefs/alloc.c b/fs/bcachefs/alloc.c
index bb33cad9ee7f..13b7a3e50cd0 100644
--- a/fs/bcachefs/alloc.c
+++ b/fs/bcachefs/alloc.c
@@ -1111,8 +1111,8 @@ static struct open_bucket *bch2_open_bucket_alloc(struct bch_fs *c)
 	return ob;
 }
 
-/* _only_ for allocating the journal and btree roots on a brand new fs: */
-int bch2_bucket_alloc_startup(struct bch_fs *c, struct bch_dev *ca)
+/* _only_ for allocating the journal on a new device: */
+long bch2_bucket_alloc_new_fs(struct bch_dev *ca)
 {
 	struct bucket_array *buckets;
 	ssize_t b;
@@ -1121,14 +1121,8 @@ int bch2_bucket_alloc_startup(struct bch_fs *c, struct bch_dev *ca)
 	buckets = bucket_array(ca);
 
 	for (b = ca->mi.first_bucket; b < ca->mi.nbuckets; b++)
-		if (is_available_bucket(buckets->b[b].mark)) {
-			bch2_mark_alloc_bucket(c, ca, b, true,
-					gc_pos_alloc(c, NULL),
-					BCH_BUCKET_MARK_MAY_MAKE_UNAVAILABLE|
-					BCH_BUCKET_MARK_GC_LOCK_HELD);
-			set_bit(b, ca->buckets_dirty);
+		if (is_available_bucket(buckets->b[b].mark))
 			goto success;
-		}
 	b = -1;
 success:
 	rcu_read_unlock();
@@ -1200,9 +1194,8 @@ int bch2_bucket_alloc(struct bch_fs *c, struct bch_dev *ca,
 		break;
 	}
 
-	if (unlikely(test_bit(BCH_FS_BRAND_NEW_FS, &c->flags)) &&
-	    (bucket = bch2_bucket_alloc_startup(c, ca)) >= 0)
-		goto out;
+	if (cl)
+		closure_wait(&c->freelist_wait, cl);
 
 	spin_unlock(&c->freelist_lock);
 
@@ -1283,7 +1276,7 @@ void bch2_wp_rescale(struct bch_fs *c, struct bch_dev *ca,
 		*v = *v < scale ? 0 : *v - scale;
 }
 
-static enum bucket_alloc_ret __bch2_bucket_alloc_set(struct bch_fs *c,
+static enum bucket_alloc_ret bch2_bucket_alloc_set(struct bch_fs *c,
 					struct write_point *wp,
 					unsigned nr_replicas,
 					enum alloc_reserve reserve,
@@ -1349,52 +1342,22 @@ static enum bucket_alloc_ret __bch2_bucket_alloc_set(struct bch_fs *c,
 			break;
 		}
 	}
+	rcu_read_unlock();
 
 	EBUG_ON(reserve == RESERVE_MOVINGGC &&
 		ret != ALLOC_SUCCESS &&
 		ret != OPEN_BUCKETS_EMPTY);
-	rcu_read_unlock();
-	return ret;
-}
-
-static int bch2_bucket_alloc_set(struct bch_fs *c, struct write_point *wp,
-				unsigned nr_replicas,
-				enum alloc_reserve reserve,
-				struct bch_devs_mask *devs,
-				struct closure *cl)
-{
-	bool waiting = false;
 
-	while (1) {
-		switch (__bch2_bucket_alloc_set(c, wp, nr_replicas,
-						reserve, devs, cl)) {
-		case ALLOC_SUCCESS:
-			if (waiting)
-				closure_wake_up(&c->freelist_wait);
-
-			return 0;
-
-		case NO_DEVICES:
-			if (waiting)
-				closure_wake_up(&c->freelist_wait);
-			return -EROFS;
-
-		case FREELIST_EMPTY:
-			if (!cl)
-				return -ENOSPC;
-
-			if (waiting)
-				return -EAGAIN;
-
-			/* Retry allocation after adding ourself to waitlist: */
-			closure_wait(&c->freelist_wait, cl);
-			waiting = true;
-			break;
-		case OPEN_BUCKETS_EMPTY:
-			return cl ? -EAGAIN : -ENOSPC;
-		default:
-			BUG();
-		}
+	switch (ret) {
+	case ALLOC_SUCCESS:
+		return 0;
+	case NO_DEVICES:
+		return -EROFS;
+	case FREELIST_EMPTY:
+	case OPEN_BUCKETS_EMPTY:
+		return cl ? -EAGAIN : -ENOSPC;
+	default:
+		BUG();
 	}
 }
 
@@ -2038,8 +2001,10 @@ static int __bch2_fs_allocator_start(struct bch_fs *c)
 
 	/* did we find enough buckets? */
 	for_each_rw_member(ca, c, dev_iter)
-		if (fifo_used(&ca->free_inc) < ca->free[RESERVE_BTREE].size)
+		if (fifo_used(&ca->free_inc) < ca->free[RESERVE_BTREE].size) {
+			percpu_ref_put(&ca->io_ref);
 			goto not_enough;
+		}
 
 	return 0;
 not_enough:
diff --git a/fs/bcachefs/alloc.h b/fs/bcachefs/alloc.h
index f914dbd56c2c..c3faaf7a7c5b 100644
--- a/fs/bcachefs/alloc.h
+++ b/fs/bcachefs/alloc.h
@@ -30,6 +30,8 @@ enum bucket_alloc_ret {
 	NO_DEVICES		= -3,	/* -EROFS */
 };
 
+long bch2_bucket_alloc_new_fs(struct bch_dev *);
+
 int bch2_bucket_alloc(struct bch_fs *, struct bch_dev *, enum alloc_reserve, bool,
 		      struct closure *);
 
diff --git a/fs/bcachefs/bcachefs.h b/fs/bcachefs/bcachefs.h
index 9c4e7fdecbc7..362dfbdccb1e 100644
--- a/fs/bcachefs/bcachefs.h
+++ b/fs/bcachefs/bcachefs.h
@@ -431,7 +431,6 @@ struct bch_dev {
  */
 enum {
 	/* startup: */
-	BCH_FS_BRAND_NEW_FS,
 	BCH_FS_ALLOC_READ_DONE,
 	BCH_FS_ALLOCATOR_STARTED,
 	BCH_FS_INITIAL_GC_DONE,
diff --git a/fs/bcachefs/btree_gc.c b/fs/bcachefs/btree_gc.c
index 70e4049f7daa..9ab04afa1a45 100644
--- a/fs/bcachefs/btree_gc.c
+++ b/fs/bcachefs/btree_gc.c
@@ -318,7 +318,8 @@ void bch2_mark_dev_superblock(struct bch_fs *c, struct bch_dev *ca,
 	unsigned i;
 	u64 b;
 
-	lockdep_assert_held(&c->sb_lock);
+	if (c)
+		lockdep_assert_held(&c->sb_lock);
 
 	for (i = 0; i < layout->nr_superblocks; i++) {
 		u64 offset = le64_to_cpu(layout->sb_offset[i]);
@@ -332,7 +333,8 @@ void bch2_mark_dev_superblock(struct bch_fs *c, struct bch_dev *ca,
 				      BCH_DATA_SB, flags);
 	}
 
-	spin_lock(&c->journal.lock);
+	if (c)
+		spin_lock(&c->journal.lock);
 
 	for (i = 0; i < ca->journal.nr; i++) {
 		b = ca->journal.buckets[i];
@@ -341,7 +343,8 @@ void bch2_mark_dev_superblock(struct bch_fs *c, struct bch_dev *ca,
 					  gc_phase(GC_PHASE_SB), flags);
 	}
 
-	spin_unlock(&c->journal.lock);
+	if (c)
+		spin_unlock(&c->journal.lock);
 }
 
 static void bch2_mark_superblocks(struct bch_fs *c)
diff --git a/fs/bcachefs/buckets.c b/fs/bcachefs/buckets.c
index 864de940f1e7..1f944cb8a3e5 100644
--- a/fs/bcachefs/buckets.c
+++ b/fs/bcachefs/buckets.c
@@ -309,7 +309,7 @@ static bool bucket_became_unavailable(struct bch_fs *c,
 {
 	return is_available_bucket(old) &&
 	       !is_available_bucket(new) &&
-	       c && c->gc_pos.phase == GC_PHASE_DONE;
+	       (!c || c->gc_pos.phase == GC_PHASE_DONE);
 }
 
 void bch2_fs_usage_apply(struct bch_fs *c,
@@ -351,12 +351,16 @@ static void bch2_dev_usage_update(struct bch_fs *c, struct bch_dev *ca,
 {
 	struct bch_dev_usage *dev_usage;
 
-	lockdep_assert_held(&c->usage_lock);
+	if (c)
+		lockdep_assert_held(&c->usage_lock);
 
-	bch2_fs_inconsistent_on(old.data_type && new.data_type &&
-			old.data_type != new.data_type, c,
+	if (old.data_type && new.data_type &&
+	    old.data_type != new.data_type) {
+		BUG_ON(!c);
+		bch2_fs_inconsistent(c,
 			"different types of data in same bucket: %u, %u",
 			old.data_type, new.data_type);
+	}
 
 	dev_usage = this_cpu_ptr(ca->usage_percpu);
 
@@ -466,21 +470,29 @@ void bch2_mark_metadata_bucket(struct bch_fs *c, struct bch_dev *ca,
 
 	BUG_ON(!type);
 
-	lg_local_lock(&c->usage_lock);
-	g = bucket(ca, b);
+	if (likely(c)) {
+		lg_local_lock(&c->usage_lock);
 
-	if (!(flags & BCH_BUCKET_MARK_GC_LOCK_HELD) &&
-	    gc_will_visit(c, pos)) {
-		lg_local_unlock(&c->usage_lock);
-		return;
+		if (!(flags & BCH_BUCKET_MARK_GC_LOCK_HELD) &&
+		    gc_will_visit(c, pos)) {
+			lg_local_unlock(&c->usage_lock);
+			return;
+		}
 	}
 
+	preempt_disable();
+
+	g = bucket(ca, b);
 	old = bucket_data_cmpxchg(c, ca, g, new, ({
 		saturated_add(ca, new.dirty_sectors, sectors,
 			      GC_MAX_SECTORS_USED);
 		new.data_type		= type;
 	}));
-	lg_local_unlock(&c->usage_lock);
+
+	preempt_enable();
+
+	if (likely(c))
+		lg_local_unlock(&c->usage_lock);
 
 	BUG_ON(!(flags & BCH_BUCKET_MARK_MAY_MAKE_UNAVAILABLE) &&
 	       bucket_became_unavailable(c, old, new));
@@ -859,9 +871,11 @@ int bch2_dev_buckets_resize(struct bch_fs *c, struct bch_dev *ca, u64 nbuckets)
 
 	bch2_copygc_stop(ca);
 
-	down_write(&c->gc_lock);
-	down_write(&ca->bucket_lock);
-	lg_global_lock(&c->usage_lock);
+	if (resize) {
+		down_write(&c->gc_lock);
+		down_write(&ca->bucket_lock);
+		lg_global_lock(&c->usage_lock);
+	}
 
 	old_buckets = bucket_array(ca);
 
@@ -885,7 +899,8 @@ int bch2_dev_buckets_resize(struct bch_fs *c, struct bch_dev *ca, u64 nbuckets)
 	swap(ca->oldest_gens, oldest_gens);
 	swap(ca->buckets_dirty, buckets_dirty);
 
-	lg_global_unlock(&c->usage_lock);
+	if (resize)
+		lg_global_unlock(&c->usage_lock);
 
 	spin_lock(&c->freelist_lock);
 	for (i = 0; i < RESERVE_NR; i++) {
@@ -904,8 +919,10 @@ int bch2_dev_buckets_resize(struct bch_fs *c, struct bch_dev *ca, u64 nbuckets)
 
 	nbuckets = ca->mi.nbuckets;
 
-	up_write(&ca->bucket_lock);
-	up_write(&c->gc_lock);
+	if (resize) {
+		up_write(&ca->bucket_lock);
+		up_write(&c->gc_lock);
+	}
 
 	if (start_copygc &&
 	    bch2_copygc_start(c, ca))
diff --git a/fs/bcachefs/buckets.h b/fs/bcachefs/buckets.h
index 8dbd9152aec8..399a853c80bb 100644
--- a/fs/bcachefs/buckets.h
+++ b/fs/bcachefs/buckets.h
@@ -31,6 +31,7 @@
 static inline struct bucket_array *bucket_array(struct bch_dev *ca)
 {
 	return rcu_dereference_check(ca->buckets,
+				     !ca->fs ||
 				     lockdep_is_held(&ca->fs->usage_lock) ||
 				     lockdep_is_held(&ca->fs->gc_lock) ||
 				     lockdep_is_held(&ca->bucket_lock));
diff --git a/fs/bcachefs/journal.c b/fs/bcachefs/journal.c
index 207f95be9ed1..b5bc7edcacf7 100644
--- a/fs/bcachefs/journal.c
+++ b/fs/bcachefs/journal.c
@@ -1584,40 +1584,19 @@ err:
 	return ret;
 }
 
-/*
- * Allocate more journal space at runtime - not currently making use if it, but
- * the code works:
- */
-static int bch2_set_nr_journal_buckets(struct bch_fs *c, struct bch_dev *ca,
-				       unsigned nr)
+static int __bch2_set_nr_journal_buckets(struct bch_dev *ca, unsigned nr,
+					 bool new_fs, struct closure *cl)
 {
-	struct journal *j = &c->journal;
+	struct bch_fs *c = ca->fs;
 	struct journal_device *ja = &ca->journal;
 	struct bch_sb_field_journal *journal_buckets;
-	struct disk_reservation disk_res = { 0, 0 };
-	struct closure cl;
 	u64 *new_bucket_seq = NULL, *new_buckets = NULL;
 	int ret = 0;
 
-	closure_init_stack(&cl);
-
 	/* don't handle reducing nr of buckets yet: */
 	if (nr <= ja->nr)
 		return 0;
 
-	/*
-	 * note: journal buckets aren't really counted as _sectors_ used yet, so
-	 * we don't need the disk reservation to avoid the BUG_ON() in buckets.c
-	 * when space used goes up without a reservation - but we do need the
-	 * reservation to ensure we'll actually be able to allocate:
-	 */
-
-	if (bch2_disk_reservation_get(c, &disk_res,
-			bucket_to_sector(ca, nr - ja->nr), 1, 0))
-		return -ENOSPC;
-
-	mutex_lock(&c->sb_lock);
-
 	ret = -ENOMEM;
 	new_buckets	= kzalloc(nr * sizeof(u64), GFP_KERNEL);
 	new_bucket_seq	= kzalloc(nr * sizeof(u64), GFP_KERNEL);
@@ -1629,29 +1608,41 @@ static int bch2_set_nr_journal_buckets(struct bch_fs *c, struct bch_dev *ca,
 	if (!journal_buckets)
 		goto err;
 
-	spin_lock(&j->lock);
+	if (c)
+		spin_lock(&c->journal.lock);
+
 	memcpy(new_buckets,	ja->buckets,	ja->nr * sizeof(u64));
 	memcpy(new_bucket_seq,	ja->bucket_seq,	ja->nr * sizeof(u64));
 	swap(new_buckets,	ja->buckets);
 	swap(new_bucket_seq,	ja->bucket_seq);
-	spin_unlock(&j->lock);
+
+	if (c)
+		spin_unlock(&c->journal.lock);
 
 	while (ja->nr < nr) {
-		struct open_bucket *ob;
-		size_t bucket;
-		int ob_idx;
+		struct open_bucket *ob = NULL;
+		long bucket;
 
-		ob_idx = bch2_bucket_alloc(c, ca, RESERVE_ALLOC, false, &cl);
-		if (ob_idx < 0) {
-			if (!closure_wait(&c->freelist_wait, &cl))
-				closure_sync(&cl);
-			continue;
+		if (new_fs) {
+			bucket = bch2_bucket_alloc_new_fs(ca);
+			if (bucket < 0) {
+				ret = -ENOSPC;
+				goto err;
+			}
+		} else {
+			int ob_idx = bch2_bucket_alloc(c, ca, RESERVE_ALLOC, false, cl);
+			if (ob_idx < 0) {
+				ret = cl ? -EAGAIN : -ENOSPC;
+				goto err;
+			}
+
+			ob = c->open_buckets + ob_idx;
+			bucket = sector_to_bucket(ca, ob->ptr.offset);
 		}
 
-		ob = c->open_buckets + ob_idx;
-		bucket = sector_to_bucket(ca, ob->ptr.offset);
+		if (c)
+			spin_lock(&c->journal.lock);
 
-		spin_lock(&j->lock);
 		__array_insert_item(ja->buckets,		ja->nr, ja->last_idx);
 		__array_insert_item(ja->bucket_seq,		ja->nr, ja->last_idx);
 		__array_insert_item(journal_buckets->buckets,	ja->nr, ja->last_idx);
@@ -1666,34 +1657,77 @@ static int bch2_set_nr_journal_buckets(struct bch_fs *c, struct bch_dev *ca,
 			ja->last_idx++;
 		}
 		ja->nr++;
-		spin_unlock(&j->lock);
 
-		bch2_mark_metadata_bucket(c, ca, bucket, BCH_DATA_JOURNAL,
-					  ca->mi.bucket_size,
-					  gc_phase(GC_PHASE_SB), 0);
+		if (c)
+			spin_unlock(&c->journal.lock);
 
-		bch2_open_bucket_put(c, ob);
+		bch2_mark_metadata_bucket(c, ca, bucket, BCH_DATA_JOURNAL,
+				ca->mi.bucket_size,
+				gc_phase(GC_PHASE_SB),
+				new_fs
+				? BCH_BUCKET_MARK_MAY_MAKE_UNAVAILABLE
+				: 0);
+
+		if (!new_fs)
+			bch2_open_bucket_put(c, ob);
 	}
 
-	bch2_write_super(c);
-
 	ret = 0;
 err:
-	mutex_unlock(&c->sb_lock);
-
 	kfree(new_bucket_seq);
 	kfree(new_buckets);
-	bch2_disk_reservation_put(c, &disk_res);
 
-	if (!ret)
-		bch2_dev_allocator_add(c, ca);
+	return ret;
+}
+
+/*
+ * Allocate more journal space at runtime - not currently making use if it, but
+ * the code works:
+ */
+int bch2_set_nr_journal_buckets(struct bch_fs *c, struct bch_dev *ca,
+				unsigned nr)
+{
+	struct journal_device *ja = &ca->journal;
+	struct closure cl;
+	unsigned current_nr;
+	int ret;
+
+	closure_init_stack(&cl);
+
+	do {
+		struct disk_reservation disk_res = { 0, 0 };
+
+		closure_sync(&cl);
+
+		mutex_lock(&c->sb_lock);
+		current_nr = ja->nr;
+
+		/*
+		 * note: journal buckets aren't really counted as _sectors_ used yet, so
+		 * we don't need the disk reservation to avoid the BUG_ON() in buckets.c
+		 * when space used goes up without a reservation - but we do need the
+		 * reservation to ensure we'll actually be able to allocate:
+		 */
+
+		if (bch2_disk_reservation_get(c, &disk_res,
+				bucket_to_sector(ca, nr - ja->nr), 1, 0)) {
+			mutex_unlock(&c->sb_lock);
+			return -ENOSPC;
+		}
+
+		ret = __bch2_set_nr_journal_buckets(ca, nr, false, &cl);
+
+		bch2_disk_reservation_put(c, &disk_res);
 
-	closure_sync(&cl);
+		if (ja->nr != current_nr)
+			bch2_write_super(c);
+		mutex_unlock(&c->sb_lock);
+	} while (ret == -EAGAIN);
 
 	return ret;
 }
 
-int bch2_dev_journal_alloc(struct bch_fs *c, struct bch_dev *ca)
+int bch2_dev_journal_alloc(struct bch_dev *ca)
 {
 	unsigned nr;
 
@@ -1709,7 +1743,7 @@ int bch2_dev_journal_alloc(struct bch_fs *c, struct bch_dev *ca)
 		     min(1 << 10,
 			 (1 << 20) / ca->mi.bucket_size));
 
-	return bch2_set_nr_journal_buckets(c, ca, nr);
+	return __bch2_set_nr_journal_buckets(ca, nr, true, NULL);
 }
 
 /* Journalling */
diff --git a/fs/bcachefs/journal.h b/fs/bcachefs/journal.h
index 46ae8f0d256d..cf5cc9ba008e 100644
--- a/fs/bcachefs/journal.h
+++ b/fs/bcachefs/journal.h
@@ -400,7 +400,7 @@ static inline void bch2_journal_set_replay_done(struct journal *j)
 ssize_t bch2_journal_print_debug(struct journal *, char *);
 ssize_t bch2_journal_print_pins(struct journal *, char *);
 
-int bch2_dev_journal_alloc(struct bch_fs *, struct bch_dev *);
+int bch2_dev_journal_alloc(struct bch_dev *);
 
 void bch2_dev_journal_stop(struct journal *, struct bch_dev *);
 void bch2_fs_journal_stop(struct journal *);
diff --git a/fs/bcachefs/super.c b/fs/bcachefs/super.c
index 92cf630fcf8b..8b78ac3e4f5d 100644
--- a/fs/bcachefs/super.c
+++ b/fs/bcachefs/super.c
@@ -813,21 +813,18 @@ const char *bch2_fs_start(struct bch_fs *c)
 		bch_notice(c, "initializing new filesystem");
 
 		set_bit(BCH_FS_ALLOC_READ_DONE, &c->flags);
-		set_bit(BCH_FS_BRAND_NEW_FS, &c->flags);
 
 		ret = bch2_initial_gc(c, &journal);
 		if (ret)
 			goto err;
 
 		err = "unable to allocate journal buckets";
-		for_each_rw_member(ca, c, i)
-			if (bch2_dev_journal_alloc(c, ca)) {
+		for_each_online_member(ca, c, i)
+			if (bch2_dev_journal_alloc(ca)) {
 				percpu_ref_put(&ca->io_ref);
 				goto err;
 			}
 
-		clear_bit(BCH_FS_BRAND_NEW_FS, &c->flags);
-
 		for (i = 0; i < BTREE_ID_NR; i++)
 			bch2_btree_root_alloc(c, i);
 
@@ -1068,28 +1065,19 @@ static int bch2_dev_sysfs_online(struct bch_fs *c, struct bch_dev *ca)
 	return 0;
 }
 
-static int bch2_dev_alloc(struct bch_fs *c, unsigned dev_idx)
+static struct bch_dev *__bch2_dev_alloc(struct bch_fs *c,
+					struct bch_member *member)
 {
-	struct bch_member *member;
-	struct bch_dev *ca = NULL;
-	int ret = 0;
-
-	pr_verbose_init(c->opts, "");
-
-	if (bch2_fs_init_fault("dev_alloc"))
-		goto err;
+	struct bch_dev *ca;
 
 	ca = kzalloc(sizeof(*ca), GFP_KERNEL);
 	if (!ca)
-		goto err;
+		return NULL;
 
 	kobject_init(&ca->kobj, &bch2_dev_ktype);
 	init_completion(&ca->ref_completion);
 	init_completion(&ca->io_ref_completion);
 
-	ca->dev_idx = dev_idx;
-	__set_bit(ca->dev_idx, ca->self.d);
-
 	init_rwsem(&ca->bucket_lock);
 
 	writepoint_init(&ca->copygc_write_point, BCH_DATA_USER);
@@ -1099,14 +1087,8 @@ static int bch2_dev_alloc(struct bch_fs *c, unsigned dev_idx)
 
 	INIT_WORK(&ca->io_error_work, bch2_io_error_work);
 
-	if (bch2_fs_init_fault("dev_alloc"))
-		goto err;
-
-	member = bch2_sb_get_members(c->disk_sb)->members + dev_idx;
-
 	ca->mi = bch2_mi_to_cpu(member);
 	ca->uuid = member->uuid;
-	scnprintf(ca->name, sizeof(ca->name), "dev-%u", dev_idx);
 
 	if (percpu_ref_init(&ca->ref, bch2_dev_ref_complete,
 			    0, GFP_KERNEL) ||
@@ -1118,11 +1100,43 @@ static int bch2_dev_alloc(struct bch_fs *c, unsigned dev_idx)
 	    !(ca->io_done	= alloc_percpu(*ca->io_done)))
 		goto err;
 
+	return ca;
+err:
+	bch2_dev_free(ca);
+	return NULL;
+}
+
+static void bch2_dev_attach(struct bch_fs *c, struct bch_dev *ca,
+			    unsigned dev_idx)
+{
+	ca->dev_idx = dev_idx;
+	__set_bit(ca->dev_idx, ca->self.d);
+	scnprintf(ca->name, sizeof(ca->name), "dev-%u", dev_idx);
+
 	ca->fs = c;
 	rcu_assign_pointer(c->devs[ca->dev_idx], ca);
 
 	if (bch2_dev_sysfs_online(c, ca))
 		pr_warn("error creating sysfs objects");
+}
+
+static int bch2_dev_alloc(struct bch_fs *c, unsigned dev_idx)
+{
+	struct bch_member *member =
+		bch2_sb_get_members(c->disk_sb)->members + dev_idx;
+	struct bch_dev *ca = NULL;
+	int ret = 0;
+
+	pr_verbose_init(c->opts, "");
+
+	if (bch2_fs_init_fault("dev_alloc"))
+		goto err;
+
+	ca = __bch2_dev_alloc(c, member);
+	if (!ca)
+		goto err;
+
+	bch2_dev_attach(c, ca, dev_idx);
 out:
 	pr_verbose_init(c->opts, "ret %i", ret);
 	return ret;
@@ -1133,21 +1147,9 @@ err:
 	goto out;
 }
 
-static int __bch2_dev_online(struct bch_fs *c, struct bch_sb_handle *sb)
+static int __bch2_dev_attach_bdev(struct bch_dev *ca, struct bch_sb_handle *sb)
 {
-	struct bch_dev *ca;
-	int ret;
-
-	lockdep_assert_held(&c->state_lock);
-
-	if (le64_to_cpu(sb->sb->seq) >
-	    le64_to_cpu(c->disk_sb->seq))
-		bch2_sb_to_fs(c, sb->sb);
-
-	BUG_ON(sb->sb->dev_idx >= c->sb.nr_devices ||
-	       !c->devs[sb->sb->dev_idx]);
-
-	ca = bch_dev_locked(c, sb->sb->dev_idx);
+	unsigned ret;
 
 	if (bch2_dev_is_online(ca)) {
 		bch_err(ca, "already have device online in slot %u",
@@ -1165,7 +1167,7 @@ static int __bch2_dev_online(struct bch_fs *c, struct bch_sb_handle *sb)
 
 	if (get_capacity(sb->bdev->bd_disk) <
 	    ca->mi.bucket_size * ca->mi.nbuckets) {
-		bch_err(c, "device too small");
+		bch_err(ca, "device too small");
 		return -EINVAL;
 	}
 
@@ -1173,35 +1175,50 @@ static int __bch2_dev_online(struct bch_fs *c, struct bch_sb_handle *sb)
 	if (ret)
 		return ret;
 
-	/*
-	 * Increase journal write timeout if flushes to this device are
-	 * expensive:
-	 */
-	if (!blk_queue_nonrot(bdev_get_queue(sb->bdev)) &&
-	    journal_flushes_device(ca))
-		c->journal.write_delay_ms =
-			max(c->journal.write_delay_ms, 1000U);
-
 	/* Commit: */
 	ca->disk_sb = *sb;
 	if (sb->mode & FMODE_EXCL)
 		ca->disk_sb.bdev->bd_holder = ca;
 	memset(sb, 0, sizeof(*sb));
 
+	if (ca->fs)
+		mutex_lock(&ca->fs->sb_lock);
+
+	bch2_mark_dev_superblock(ca->fs, ca, BCH_BUCKET_MARK_MAY_MAKE_UNAVAILABLE);
+
+	if (ca->fs)
+		mutex_unlock(&ca->fs->sb_lock);
+
+	percpu_ref_reinit(&ca->io_ref);
+
+	return 0;
+}
+
+static int bch2_dev_attach_bdev(struct bch_fs *c, struct bch_sb_handle *sb)
+{
+	struct bch_dev *ca;
+	int ret;
+
+	lockdep_assert_held(&c->state_lock);
+
+	if (le64_to_cpu(sb->sb->seq) >
+	    le64_to_cpu(c->disk_sb->seq))
+		bch2_sb_to_fs(c, sb->sb);
+
+	BUG_ON(sb->sb->dev_idx >= c->sb.nr_devices ||
+	       !c->devs[sb->sb->dev_idx]);
+
+	ca = bch_dev_locked(c, sb->sb->dev_idx);
+
+	ret = __bch2_dev_attach_bdev(ca, sb);
+	if (ret)
+		return ret;
+
 	if (c->sb.nr_devices == 1)
 		bdevname(ca->disk_sb.bdev, c->name);
 	bdevname(ca->disk_sb.bdev, ca->name);
 
-	mutex_lock(&c->sb_lock);
-	bch2_mark_dev_superblock(c, ca, BCH_BUCKET_MARK_MAY_MAKE_UNAVAILABLE);
-	mutex_unlock(&c->sb_lock);
-
-	if (ca->mi.state == BCH_MEMBER_STATE_RW)
-		bch2_dev_allocator_add(c, ca);
-
 	rebalance_wakeup(c);
-
-	percpu_ref_reinit(&ca->io_ref);
 	return 0;
 }
 
@@ -1478,8 +1495,8 @@ int bch2_dev_add(struct bch_fs *c, const char *path)
 	struct bch_sb_handle sb;
 	const char *err;
 	struct bch_dev *ca = NULL;
-	struct bch_sb_field_members *mi, *dev_mi;
-	struct bch_member saved_mi;
+	struct bch_sb_field_members *mi;
+	struct bch_member dev_mi;
 	unsigned dev_idx, nr_devices, u64s;
 	int ret;
 
@@ -1491,17 +1508,45 @@ int bch2_dev_add(struct bch_fs *c, const char *path)
 	if (err)
 		return -EINVAL;
 
+	dev_mi = bch2_sb_get_members(sb.sb)->members[sb.sb->dev_idx];
+
 	err = bch2_dev_may_add(sb.sb, c);
 	if (err)
 		return -EINVAL;
 
+	ca = __bch2_dev_alloc(c, &dev_mi);
+	if (!ca) {
+		bch2_free_super(&sb);
+		return -ENOMEM;
+	}
+
+	ret = __bch2_dev_attach_bdev(ca, &sb);
+	if (ret) {
+		bch2_dev_free(ca);
+		return ret;
+	}
+
+	err = "journal alloc failed";
+	ret = bch2_dev_journal_alloc(ca);
+	if (ret)
+		goto err;
+
 	mutex_lock(&c->state_lock);
 	mutex_lock(&c->sb_lock);
 
-	/* Grab member info for new disk: */
-	dev_mi = bch2_sb_get_members(sb.sb);
-	saved_mi = dev_mi->members[sb.sb->dev_idx];
-	saved_mi.last_mount = cpu_to_le64(ktime_get_seconds());
+	err = "insufficient space in new superblock";
+	ret = bch2_sb_from_fs(c, ca);
+	if (ret)
+		goto err_unlock;
+
+	mi = bch2_sb_get_members(ca->disk_sb.sb);
+
+	if (!bch2_sb_resize_members(&ca->disk_sb,
+				le32_to_cpu(mi->field.u64s) +
+				sizeof(dev_mi) / sizeof(u64))) {
+		ret = -ENOSPC;
+		goto err_unlock;
+	}
 
 	if (dynamic_fault("bcachefs:add:no_slot"))
 		goto no_slot;
@@ -1519,64 +1564,47 @@ have_slot:
 	nr_devices = max_t(unsigned, dev_idx + 1, c->sb.nr_devices);
 	u64s = (sizeof(struct bch_sb_field_members) +
 		sizeof(struct bch_member) * nr_devices) / sizeof(u64);
-	err = "no space in superblock for member info";
 
-	dev_mi = bch2_sb_resize_members(&sb, u64s);
-	if (!dev_mi)
-		goto err_unlock;
+	err = "no space in superblock for member info";
+	ret = -ENOSPC;
 
 	mi = bch2_fs_sb_resize_members(c, u64s);
 	if (!mi)
 		goto err_unlock;
 
-	memcpy(dev_mi, mi, u64s * sizeof(u64));
-	dev_mi->members[dev_idx] = saved_mi;
+	/* success: */
 
-	sb.sb->uuid		= c->disk_sb->uuid;
-	sb.sb->dev_idx		= dev_idx;
-	sb.sb->nr_devices	= nr_devices;
-
-	/* commit new member info */
-	memcpy(mi, dev_mi, u64s * sizeof(u64));
+	mi->members[dev_idx] = dev_mi;
+	mi->members[dev_idx].last_mount = cpu_to_le64(ktime_get_seconds());
 	c->disk_sb->nr_devices	= nr_devices;
-	c->sb.nr_devices	= nr_devices;
+
+	ca->disk_sb.sb->dev_idx	= dev_idx;
+	bch2_dev_attach(c, ca, dev_idx);
 
 	bch2_write_super(c);
 	mutex_unlock(&c->sb_lock);
 
-	if (bch2_dev_alloc(c, dev_idx)) {
-		err = "cannot allocate memory";
-		ret = -ENOMEM;
-		goto err;
-	}
-
-	if (__bch2_dev_online(c, &sb)) {
-		err = "bch2_dev_online() error";
-		ret = -ENOMEM;
-		goto err;
-	}
-
-	ca = bch_dev_locked(c, dev_idx);
 	if (ca->mi.state == BCH_MEMBER_STATE_RW) {
 		err = __bch2_dev_read_write(c, ca);
 		if (err)
-			goto err;
-
-		err = "journal alloc failed";
-		if (bch2_dev_journal_alloc(c, ca))
-			goto err;
+			goto err_late;
 	}
 
 	mutex_unlock(&c->state_lock);
 	return 0;
+
 err_unlock:
 	mutex_unlock(&c->sb_lock);
-err:
 	mutex_unlock(&c->state_lock);
+err:
+	if (ca)
+		bch2_dev_free(ca);
 	bch2_free_super(&sb);
-
 	bch_err(c, "Unable to add device: %s", err);
-	return ret ?: -EINVAL;
+	return ret;
+err_late:
+	bch_err(c, "Error going rw after adding device: %s", err);
+	return -EINVAL;
 }
 
 /* Hot add existing device to running filesystem: */
@@ -1603,8 +1631,8 @@ int bch2_dev_online(struct bch_fs *c, const char *path)
 	if (err)
 		goto err;
 
-	if (__bch2_dev_online(c, &sb)) {
-		err = "__bch2_dev_online() error";
+	if (bch2_dev_attach_bdev(c, &sb)) {
+		err = "bch2_dev_attach_bdev() error";
 		goto err;
 	}
 
@@ -1763,7 +1791,7 @@ struct bch_fs *bch2_fs_open(char * const *devices, unsigned nr_devices,
 	err = "bch2_dev_online() error";
 	mutex_lock(&c->state_lock);
 	for (i = 0; i < nr_devices; i++)
-		if (__bch2_dev_online(c, &sb[i])) {
+		if (bch2_dev_attach_bdev(c, &sb[i])) {
 			mutex_unlock(&c->state_lock);
 			goto err_print;
 		}
@@ -1828,7 +1856,7 @@ static const char *__bch2_fs_open_incremental(struct bch_sb_handle *sb,
 	err = "bch2_dev_online() error";
 
 	mutex_lock(&c->sb_lock);
-	if (__bch2_dev_online(c, sb)) {
+	if (bch2_dev_attach_bdev(c, sb)) {
 		mutex_unlock(&c->sb_lock);
 		goto err;
 	}
author	Kent Overstreet <kent.overstreet@gmail.com>	2018-04-06 23:09:13 -0400
committer	Kent Overstreet <kent.overstreet@gmail.com>	2018-05-22 00:44:18 -0400
commit	2c90a2f3df10976d3a051487374fb12b8caadd4a (patch)
tree	b5b5c35ea930f86bbdea8f162b4296943e977c65
parent	f794fedb9ed1672cf776e3aed84cd24da560bfb0 (diff)