bcachefs: Rework struct bch_dev lifetime

Allocate all member devices when allocating struct bch_fs, not when they come online - this will let us handle running in degraded mode better, and ends up simplifying things a good bit.
author: Kent Overstreet <kent.overstreet@gmail.com> 2017-03-10 07:08:39 -0900
committer: Kent Overstreet <kent.overstreet@gmail.com> 2017-03-17 19:49:23 -0800
commit: a5b2efedf8485ee4a36c736cf6cfe907c0db91c5 (patch)
tree: 8dc1073413af469ad03da7132a5d7354aa40dc85
parent: de9690db2991d5d3a1f88211e9ef46c3b5a5dae4 (diff)
33 files changed, 686 insertions, 930 deletions
diff --git a/fs/bcachefs/alloc.c b/fs/bcachefs/alloc.c
index 5937c292b7cb..5bd6de9fb05c 100644
--- a/fs/bcachefs/alloc.c
+++ b/fs/bcachefs/alloc.c
@@ -74,6 +74,7 @@
 #include <trace/events/bcachefs.h>
 
 static void __bch_bucket_free(struct bch_dev *, struct bucket *);
+static void bch_recalc_min_prio(struct bch_dev *, int);
 
 /* Allocation groups: */
 
@@ -84,7 +85,7 @@ void bch_dev_group_remove(struct dev_group *grp, struct bch_dev *ca)
 	spin_lock(&grp->lock);
 
 	for (i = 0; i < grp->nr; i++)
-		if (rcu_access_pointer(grp->d[i].dev) == ca) {
+		if (grp->d[i].dev == ca) {
 			grp->nr--;
 			memmove(&grp->d[i],
 				&grp->d[i + 1],
@@ -101,12 +102,12 @@ void bch_dev_group_add(struct dev_group *grp, struct bch_dev *ca)
 
 	spin_lock(&grp->lock);
 	for (i = 0; i < grp->nr; i++)
-		if (rcu_access_pointer(grp->d[i].dev) == ca)
+		if (grp->d[i].dev == ca)
 			goto out;
 
 	BUG_ON(grp->nr>= BCH_SB_MEMBERS_MAX);
 
-	rcu_assign_pointer(grp->d[grp->nr++].dev, ca);
+	grp->d[grp->nr++].dev = ca;
 out:
 	spin_unlock(&grp->lock);
 }
@@ -137,7 +138,8 @@ static void pd_controllers_update(struct work_struct *work)
 				faster_tiers_dirty,
 				-1);
 
-		group_for_each_dev_rcu(ca, &c->tiers[i].devs, iter) {
+		spin_lock(&c->tiers[i].devs.lock);
+		group_for_each_dev(ca, &c->tiers[i].devs, iter) {
 			struct bch_dev_usage stats = bch_dev_usage_read(ca);
 			unsigned bucket_bits = ca->bucket_bits + 9;
 
@@ -172,6 +174,7 @@ static void pd_controllers_update(struct work_struct *work)
 
 			copygc_can_free			+= fragmented;
 		}
+		spin_unlock(&c->tiers[i].devs.lock);
 	}
 
 	rcu_read_unlock();
@@ -441,8 +444,15 @@ int bch_prio_read(struct bch_dev *ca)
 
 		bucket_cmpxchg(&ca->buckets[b], new, new.gen = d->gen);
 	}
+
+	mutex_lock(&c->bucket_lock);
+	bch_recalc_min_prio(ca, READ);
+	bch_recalc_min_prio(ca, WRITE);
+	mutex_unlock(&c->bucket_lock);
+
+	ret = 0;
 fsck_err:
-	return 0;
+	return ret;
 }
 
 #define BUCKET_GC_GEN_MAX	96U
@@ -520,6 +530,8 @@ void bch_recalc_min_prio(struct bch_dev *ca, int rw)
 	u16 max_delta = 1;
 	unsigned i;
 
+	lockdep_assert_held(&c->bucket_lock);
+
 	/* Determine min prio for this particular cache */
 	for_each_bucket(g, ca)
 		max_delta = max(max_delta, (u16) (clock->hand - g->prio[rw]));
@@ -821,8 +833,8 @@ static void bch_find_empty_buckets(struct bch_fs *c, struct bch_dev *ca)
 			spin_lock(&ca->freelist_lock);
 
 			bch_mark_alloc_bucket(ca, g, true);
-			g->read_prio = ca->fs->prio_clock[READ].hand;
-			g->write_prio = ca->fs->prio_clock[WRITE].hand;
+			g->read_prio = c->prio_clock[READ].hand;
+			g->write_prio = c->prio_clock[WRITE].hand;
 
 			verify_not_on_freelist(ca, g - ca->buckets);
 			BUG_ON(!fifo_push(&ca->free_inc, g - ca->buckets));
@@ -1058,7 +1070,6 @@ static enum bucket_alloc_ret bch_bucket_alloc_group(struct bch_fs *c,
 	if (ob->nr_ptrs >= nr_replicas)
 		return ALLOC_SUCCESS;
 
-	rcu_read_lock();
 	spin_lock(&devs->lock);
 
 	for (i = 0; i < devs->nr; i++)
@@ -1128,7 +1139,6 @@ static enum bucket_alloc_ret bch_bucket_alloc_group(struct bch_fs *c,
 err:
 	EBUG_ON(ret != ALLOC_SUCCESS && reserve == RESERVE_MOVINGGC);
 	spin_unlock(&devs->lock);
-	rcu_read_unlock();
 	return ret;
 }
 
@@ -1223,14 +1233,14 @@ static int bch_bucket_alloc_set(struct bch_fs *c, struct write_point *wp,
 static void __bch_open_bucket_put(struct bch_fs *c, struct open_bucket *ob)
 {
 	const struct bch_extent_ptr *ptr;
-	struct bch_dev *ca;
 
 	lockdep_assert_held(&c->open_buckets_lock);
 
-	rcu_read_lock();
-	open_bucket_for_each_online_device(c, ob, ptr, ca)
+	open_bucket_for_each_ptr(ob, ptr) {
+		struct bch_dev *ca = c->devs[ptr->dev];
+
 		bch_mark_alloc_bucket(ca, PTR_BUCKET(ca, ptr), false);
-	rcu_read_unlock();
+	}
 
 	ob->nr_ptrs = 0;
 
@@ -1283,12 +1293,13 @@ static struct open_bucket *bch_open_bucket_get(struct bch_fs *c,
 	return ret;
 }
 
-static unsigned ob_ptr_sectors_free(struct open_bucket *ob,
-				    struct bch_member_rcu *mi,
+static unsigned ob_ptr_sectors_free(struct bch_fs *c,
+				    struct open_bucket *ob,
 				    struct bch_extent_ptr *ptr)
 {
+	struct bch_dev *ca = c->devs[ptr->dev];
 	unsigned i = ptr - ob->ptrs;
-	unsigned bucket_size = mi->m[ptr->dev].bucket_size;
+	unsigned bucket_size = ca->mi.bucket_size;
 	unsigned used = (ptr->offset & (bucket_size - 1)) +
 		ob->ptr_offset[i];
 
@@ -1301,14 +1312,11 @@ static unsigned open_bucket_sectors_free(struct bch_fs *c,
 					 struct open_bucket *ob,
 					 unsigned nr_replicas)
 {
-	struct bch_member_rcu *mi = fs_member_info_get(c);
 	unsigned i, sectors_free = UINT_MAX;
 
 	for (i = 0; i < min(nr_replicas, ob->nr_ptrs); i++)
 		sectors_free = min(sectors_free,
-				   ob_ptr_sectors_free(ob, mi, &ob->ptrs[i]));
-
-	fs_member_info_put();
+				   ob_ptr_sectors_free(c, ob, &ob->ptrs[i]));
 
 	return sectors_free != UINT_MAX ? sectors_free : 0;
 }
@@ -1317,11 +1325,10 @@ static void open_bucket_copy_unused_ptrs(struct bch_fs *c,
 					 struct open_bucket *new,
 					 struct open_bucket *old)
 {
-	struct bch_member_rcu *mi = fs_member_info_get(c);
 	unsigned i;
 
 	for (i = 0; i < old->nr_ptrs; i++)
-		if (ob_ptr_sectors_free(old, mi, &old->ptrs[i])) {
+		if (ob_ptr_sectors_free(c, old, &old->ptrs[i])) {
 			struct bch_extent_ptr tmp = old->ptrs[i];
 
 			tmp.offset += old->ptr_offset[i];
@@ -1329,19 +1336,18 @@ static void open_bucket_copy_unused_ptrs(struct bch_fs *c,
 			new->ptr_offset[new->nr_ptrs] = 0;
 			new->nr_ptrs++;
 		}
-	fs_member_info_put();
 }
 
 static void verify_not_stale(struct bch_fs *c, const struct open_bucket *ob)
 {
 #ifdef CONFIG_BCACHEFS_DEBUG
 	const struct bch_extent_ptr *ptr;
-	struct bch_dev *ca;
 
-	rcu_read_lock();
-	open_bucket_for_each_online_device(c, ob, ptr, ca)
+	open_bucket_for_each_ptr(ob, ptr) {
+		struct bch_dev *ca = c->devs[ptr->dev];
+
 		BUG_ON(ptr_stale(ca, ptr));
-	rcu_read_unlock();
+	}
 #endif
 }
 
@@ -1485,7 +1491,6 @@ void bch_alloc_sectors_append_ptrs(struct bch_fs *c, struct bkey_i_extent *e,
 				   unsigned sectors)
 {
 	struct bch_extent_ptr tmp;
-	struct bch_dev *ca;
 	bool has_data = false;
 	unsigned i;
 
@@ -1500,8 +1505,6 @@ void bch_alloc_sectors_append_ptrs(struct bch_fs *c, struct bkey_i_extent *e,
 	if (nr_replicas < ob->nr_ptrs)
 		has_data = true;
 
-	rcu_read_lock();
-
 	for (i = 0; i < min(ob->nr_ptrs, nr_replicas); i++) {
 		EBUG_ON(bch_extent_has_device(extent_i_to_s_c(e), ob->ptrs[i].dev));
 
@@ -1512,11 +1515,8 @@ void bch_alloc_sectors_append_ptrs(struct bch_fs *c, struct bkey_i_extent *e,
 
 		ob->ptr_offset[i] += sectors;
 
-		if ((ca = PTR_DEV(c, &ob->ptrs[i])))
-			this_cpu_add(*ca->sectors_written, sectors);
+		this_cpu_add(*c->devs[tmp.dev]->sectors_written, sectors);
 	}
-
-	rcu_read_unlock();
 }
 
 /*
@@ -1526,19 +1526,16 @@ void bch_alloc_sectors_append_ptrs(struct bch_fs *c, struct bkey_i_extent *e,
 void bch_alloc_sectors_done(struct bch_fs *c, struct write_point *wp,
 			    struct open_bucket *ob)
 {
-	struct bch_member_rcu *mi = fs_member_info_get(c);
 	bool has_data = false;
 	unsigned i;
 
 	for (i = 0; i < ob->nr_ptrs; i++) {
-		if (!ob_ptr_sectors_free(ob, mi, &ob->ptrs[i]))
+		if (!ob_ptr_sectors_free(c, ob, &ob->ptrs[i]))
 			ob->has_full_ptrs = true;
 		else
 			has_data = true;
 	}
 
-	fs_member_info_put();
-
 	if (likely(has_data))
 		atomic_inc(&ob->pin);
 	else
@@ -1600,8 +1597,7 @@ void bch_recalc_capacity(struct bch_fs *c)
 	unsigned long ra_pages = 0;
 	unsigned i, j;
 
-	rcu_read_lock();
-	for_each_member_device_rcu(ca, c, i) {
+	for_each_online_member(ca, c, i) {
 		struct backing_dev_info *bdi =
 			blk_get_backing_dev_info(ca->disk_sb.bdev);
 
@@ -1632,7 +1628,8 @@ void bch_recalc_capacity(struct bch_fs *c)
 	 * Capacity of the filesystem is the capacity of all the devices in the
 	 * slowest (highest) tier - we don't include lower tier devices.
 	 */
-	group_for_each_dev_rcu(ca, &slowest_tier->devs, i) {
+	spin_lock(&slowest_tier->devs.lock);
+	group_for_each_dev(ca, &slowest_tier->devs, i) {
 		size_t reserve = 0;
 
 		/*
@@ -1668,8 +1665,8 @@ void bch_recalc_capacity(struct bch_fs *c)
 			     ca->mi.first_bucket) <<
 			ca->bucket_bits;
 	}
+	spin_unlock(&slowest_tier->devs.lock);
 set_capacity:
-	rcu_read_unlock();
 	total_capacity = capacity;
 
 	capacity *= (100 - c->opts.gc_reserve_percent);
@@ -1828,6 +1825,8 @@ int bch_dev_allocator_start(struct bch_dev *ca)
 {
 	struct bch_fs *c = ca->fs;
 	struct dev_group *tier = &c->tiers[ca->mi.tier].devs;
+	struct bch_sb_field_journal *journal_buckets;
+	bool has_journal;
 	struct task_struct *k;
 
 	/*
@@ -1845,7 +1844,15 @@ int bch_dev_allocator_start(struct bch_dev *ca)
 
 	bch_dev_group_add(tier, ca);
 	bch_dev_group_add(&c->all_devs, ca);
-	bch_dev_group_add(&c->journal.devs, ca);
+
+	mutex_lock(&c->sb_lock);
+	journal_buckets = bch_sb_get_journal(ca->disk_sb.sb);
+	has_journal = bch_nr_journal_buckets(journal_buckets) >=
+		BCH_JOURNAL_BUCKETS_MIN;
+	mutex_unlock(&c->sb_lock);
+
+	if (has_journal)
+		bch_dev_group_add(&c->journal.devs, ca);
 
 	bch_recalc_capacity(c);
 
diff --git a/fs/bcachefs/alloc.h b/fs/bcachefs/alloc.h
index bd50fec8f3c7..f8aa762de2e0 100644
--- a/fs/bcachefs/alloc.h
+++ b/fs/bcachefs/alloc.h
@@ -25,8 +25,6 @@ void bch_dev_group_add(struct dev_group *, struct bch_dev *);
 
 int bch_prio_read(struct bch_dev *);
 
-void bch_recalc_min_prio(struct bch_dev *, int);
-
 size_t bch_bucket_alloc(struct bch_dev *, enum alloc_reserve);
 
 void bch_open_bucket_put(struct bch_fs *, struct open_bucket *);
@@ -56,54 +54,27 @@ static inline void bch_wake_allocator(struct bch_dev *ca)
 	rcu_read_unlock();
 }
 
-static inline struct bch_dev *dev_group_next_rcu(struct dev_group *devs,
-						 unsigned *iter)
+static inline struct bch_dev *dev_group_next(struct dev_group *devs,
+					     unsigned *iter)
 {
 	struct bch_dev *ret = NULL;
 
 	while (*iter < devs->nr &&
-	       !(ret = rcu_dereference(devs->d[*iter].dev)))
+	       !(ret = rcu_dereference_check(devs->d[*iter].dev,
+					     lockdep_is_held(&devs->lock))))
 		(*iter)++;
 
 	return ret;
 }
 
-#define group_for_each_dev_rcu(ca, devs, iter)				\
+#define group_for_each_dev(ca, devs, iter)				\
 	for ((iter) = 0;						\
-	     ((ca) = dev_group_next_rcu((devs), &(iter)));		\
+	     ((ca) = dev_group_next((devs), &(iter)));			\
 	     (iter)++)
 
-static inline struct bch_dev *dev_group_next(struct dev_group *devs,
-					     unsigned *iter)
-{
-	struct bch_dev *ret;
-
-	rcu_read_lock();
-	if ((ret = dev_group_next_rcu(devs, iter)))
-		percpu_ref_get(&ret->ref);
-	rcu_read_unlock();
-
-	return ret;
-}
-
-#define group_for_each_dev(ca, devs, iter)				\
-	for ((iter) = 0;						\
-	     (ca = dev_group_next(devs, &(iter)));			\
-	     percpu_ref_put(&ca->ref), (iter)++)
-
-#define __open_bucket_next_online_device(_c, _ob, _ptr, _ca)            \
-({									\
-	(_ca) = NULL;							\
-									\
-	while ((_ptr) < (_ob)->ptrs + (_ob)->nr_ptrs &&			\
-	       !((_ca) = PTR_DEV(_c, _ptr)))				\
-		(_ptr)++;						\
-	(_ca);								\
-})
-
-#define open_bucket_for_each_online_device(_c, _ob, _ptr, _ca)		\
+#define open_bucket_for_each_ptr(_ob, _ptr)				\
 	for ((_ptr) = (_ob)->ptrs;					\
-	     ((_ca) = __open_bucket_next_online_device(_c, _ob,	_ptr, _ca));\
+	     (_ptr) < (_ob)->ptrs + (_ob)->nr_ptrs;			\
 	     (_ptr)++)
 
 void bch_recalc_capacity(struct bch_fs *);
diff --git a/fs/bcachefs/bcache.h b/fs/bcachefs/bcache.h
index af2cfc5fe77d..dd9e3b8253ff 100644
--- a/fs/bcachefs/bcache.h
+++ b/fs/bcachefs/bcache.h
@@ -317,7 +317,8 @@ struct crypto_blkcipher;
 struct crypto_ahash;
 
 enum gc_phase {
-	GC_PHASE_PENDING_DELETE		= BTREE_ID_NR + 1,
+	GC_PHASE_SB_METADATA		= BTREE_ID_NR + 1,
+	GC_PHASE_PENDING_DELETE,
 	GC_PHASE_DONE
 };
 
@@ -340,21 +341,15 @@ struct bch_member_cpu {
 	u8			valid;
 };
 
-struct bch_member_rcu {
-	struct rcu_head		rcu;
-	unsigned		nr_devices;
-	struct bch_member_cpu	m[];
-};
-
 struct bch_dev {
+	struct kobject		kobj;
 	struct percpu_ref	ref;
-	struct rcu_head		free_rcu;
-	struct work_struct	free_work;
+	struct percpu_ref	io_ref;
+	struct completion	stop_complete;
+	struct completion	offline_complete;
 
 	struct bch_fs		*fs;
 
-	struct dev_group	self;
-
 	u8			dev_idx;
 	/*
 	 * Cached version of this device's member info from superblock
@@ -362,10 +357,11 @@ struct bch_dev {
 	 */
 	struct bch_member_cpu	mi;
 	uuid_le			uuid;
+	char			name[BDEVNAME_SIZE];
 
 	struct bcache_superblock disk_sb;
 
-	struct kobject		kobj;
+	struct dev_group	self;
 
 	/* biosets used in cloned bios for replicas and moving_gc */
 	struct bio_set		replica_set;
@@ -517,12 +513,6 @@ struct bch_fs {
 
 	struct bch_opts		opts;
 
-	/*
-	 * Cached copy in native endianness:
-	 * Set by bch_fs_mi_update():
-	 */
-	struct bch_member_rcu __rcu *members;
-
 	/* Updated by bch_sb_update():*/
 	struct {
 		uuid_le		uuid;
diff --git a/fs/bcachefs/bcachefs_format.h b/fs/bcachefs/bcachefs_format.h
index ac3b8b458f44..f4c2f275bf78 100644
--- a/fs/bcachefs/bcachefs_format.h
+++ b/fs/bcachefs/bcachefs_format.h
@@ -788,7 +788,7 @@ LE64_BITMASK(BCH_MEMBER_NR_WRITE_ERRORS,struct bch_member, flags[1], 20, 40);
 #endif
 
 enum bch_member_state {
-	BCH_MEMBER_STATE_ACTIVE		= 0,
+	BCH_MEMBER_STATE_RW		= 0,
 	BCH_MEMBER_STATE_RO		= 1,
 	BCH_MEMBER_STATE_FAILED		= 2,
 	BCH_MEMBER_STATE_SPARE		= 3,
diff --git a/fs/bcachefs/blockdev.c b/fs/bcachefs/blockdev.c
index 5da771e1158c..a4522ad2836f 100644
--- a/fs/bcachefs/blockdev.c
+++ b/fs/bcachefs/blockdev.c
@@ -17,6 +17,8 @@
 static int bch_blockdev_major;
 static DEFINE_IDA(bch_blockdev_minor);
 static LIST_HEAD(uncached_devices);
+static DEFINE_MUTEX(bch_blockdev_lock);
+
 static struct kmem_cache *bch_search_cache;
 
 static void write_bdev_super_endio(struct bio *bio)
@@ -62,21 +64,6 @@ void bch_write_bdev_super(struct cached_dev *dc, struct closure *parent)
 	closure_return_with_destructor(cl, bch_write_bdev_super_unlock);
 }
 
-bool bch_is_open_backing_dev(struct block_device *bdev)
-{
-	struct bch_fs *c, *tc;
-	struct cached_dev *dc, *t;
-
-	list_for_each_entry_safe(c, tc, &bch_fs_list, list)
-		list_for_each_entry_safe(dc, t, &c->cached_devs, list)
-			if (dc->disk_sb.bdev == bdev)
-				return true;
-	list_for_each_entry_safe(dc, t, &uncached_devices, list)
-		if (dc->disk_sb.bdev == bdev)
-			return true;
-	return false;
-}
-
 static int open_dev(struct block_device *b, fmode_t mode)
 {
 	struct bcache_device *d = b->bd_disk->private_data;
@@ -118,8 +105,6 @@ void bch_blockdev_stop(struct bcache_device *d)
 
 static void bcache_device_unlink(struct bcache_device *d)
 {
-	lockdep_assert_held(&bch_register_lock);
-
 	if (d->c && !test_and_set_bit(BCACHE_DEV_UNLINK_DONE, &d->flags)) {
 		sysfs_remove_link(&d->c->kobj, d->name);
 		sysfs_remove_link(&d->kobj, "cache");
@@ -141,8 +126,6 @@ static void bcache_device_link(struct bcache_device *d, struct bch_fs *c,
 
 static void bcache_device_detach(struct bcache_device *d)
 {
-	lockdep_assert_held(&bch_register_lock);
-
 	if (test_bit(BCACHE_DEV_DETACHING, &d->flags)) {
 		mutex_lock(&d->inode_lock);
 		bch_inode_rm(d->c, bcache_dev_inum(d));
@@ -161,8 +144,6 @@ static int bcache_device_attach(struct bcache_device *d, struct bch_fs *c)
 {
 	int ret;
 
-	lockdep_assert_held(&bch_register_lock);
-
 	ret = radix_tree_insert(&c->devices, bcache_dev_inum(d), d);
 	if (ret) {
 		pr_err("radix_tree_insert() error for inum %llu",
@@ -178,8 +159,6 @@ static int bcache_device_attach(struct bcache_device *d, struct bch_fs *c)
 
 static void bcache_device_free(struct bcache_device *d)
 {
-	lockdep_assert_held(&bch_register_lock);
-
 	pr_info("%s stopped", d->disk->disk_name);
 
 	if (d->c)
@@ -325,7 +304,7 @@ static void cached_dev_detach_finish(struct work_struct *w)
 	BUG_ON(!test_bit(BCACHE_DEV_DETACHING, &dc->disk.flags));
 	BUG_ON(atomic_read(&dc->count));
 
-	mutex_lock(&bch_register_lock);
+	mutex_lock(&bch_blockdev_lock);
 
 	memset(&dc->disk_sb.sb->set_uuid, 0, 16);
 	SET_BDEV_STATE(dc->disk_sb.sb, BDEV_STATE_NONE);
@@ -339,7 +318,7 @@ static void cached_dev_detach_finish(struct work_struct *w)
 	clear_bit(BCACHE_DEV_DETACHING, &dc->disk.flags);
 	clear_bit(BCACHE_DEV_UNLINK_DONE, &dc->disk.flags);
 
-	mutex_unlock(&bch_register_lock);
+	mutex_unlock(&bch_blockdev_lock);
 
 	pr_info("Caching disabled for %s", bdevname(dc->disk_sb.bdev, buf));
 
@@ -349,8 +328,6 @@ static void cached_dev_detach_finish(struct work_struct *w)
 
 void bch_cached_dev_detach(struct cached_dev *dc)
 {
-	lockdep_assert_held(&bch_register_lock);
-
 	if (test_bit(BCACHE_DEV_CLOSING, &dc->disk.flags))
 		return;
 
@@ -495,11 +472,14 @@ void bch_attach_backing_devs(struct bch_fs *c)
 {
 	struct cached_dev *dc, *t;
 
-	lockdep_assert_held(&bch_register_lock);
 	lockdep_assert_held(&c->state_lock);
 
+	mutex_lock(&bch_blockdev_lock);
+
 	list_for_each_entry_safe(dc, t, &uncached_devices, list)
 		bch_cached_dev_attach(dc, c);
+
+	mutex_unlock(&bch_blockdev_lock);
 }
 
 void bch_cached_dev_release(struct kobject *kobj)
@@ -517,14 +497,14 @@ static void cached_dev_free(struct closure *cl)
 	bch_cached_dev_writeback_stop(dc);
 	bch_cached_dev_writeback_free(dc);
 
-	mutex_lock(&bch_register_lock);
+	mutex_lock(&bch_blockdev_lock);
 
 	if (atomic_read(&dc->running))
 		bd_unlink_disk_holder(dc->disk_sb.bdev, dc->disk.disk);
 	bcache_device_free(&dc->disk);
 	list_del(&dc->list);
 
-	mutex_unlock(&bch_register_lock);
+	mutex_unlock(&bch_blockdev_lock);
 
 	bch_free_super((void *) &dc->disk_sb);
 
@@ -536,11 +516,8 @@ static void cached_dev_flush(struct closure *cl)
 	struct cached_dev *dc = container_of(cl, struct cached_dev, disk.cl);
 	struct bcache_device *d = &dc->disk;
 
-	mutex_lock(&bch_register_lock);
-	bcache_device_unlink(d);
-	mutex_unlock(&bch_register_lock);
-
 	bch_cache_accounting_destroy(&dc->accounting);
+	bcache_device_unlink(d);
 	kobject_del(&d->kobj);
 
 	continue_at(cl, cached_dev_free, system_wq);
@@ -652,8 +629,11 @@ const char *bch_backing_dev_register(struct bcache_superblock *sb)
 		bdevname(dc->disk_sb.bdev, name));
 
 	list_add(&dc->list, &uncached_devices);
-	list_for_each_entry(c, &bch_fs_list, list)
+	c = bch_uuid_to_fs(dc->disk_sb.sb->set_uuid);
+	if (c) {
 		bch_cached_dev_attach(dc, c);
+		closure_put(&c->cl);
+	}
 
 	if (BDEV_STATE(dc->disk_sb.sb) == BDEV_STATE_NONE ||
 	    BDEV_STATE(dc->disk_sb.sb) == BDEV_STATE_STALE)
@@ -678,9 +658,7 @@ static void blockdev_volume_free(struct closure *cl)
 {
 	struct bcache_device *d = container_of(cl, struct bcache_device, cl);
 
-	mutex_lock(&bch_register_lock);
 	bcache_device_free(d);
-	mutex_unlock(&bch_register_lock);
 	kobject_put(&d->kobj);
 }
 
@@ -688,9 +666,7 @@ static void blockdev_volume_flush(struct closure *cl)
 {
 	struct bcache_device *d = container_of(cl, struct bcache_device, cl);
 
-	mutex_lock(&bch_register_lock);
 	bcache_device_unlink(d);
-	mutex_unlock(&bch_register_lock);
 	kobject_del(&d->kobj);
 	continue_at(cl, blockdev_volume_free, system_wq);
 }
@@ -792,7 +768,7 @@ void bch_blockdevs_stop(struct bch_fs *c)
 	struct radix_tree_iter iter;
 	void **slot;
 
-	mutex_lock(&bch_register_lock);
+	mutex_lock(&bch_blockdev_lock);
 	rcu_read_lock();
 
 	radix_tree_for_each_slot(slot, &c->devices, &iter, 0) {
@@ -808,7 +784,7 @@ void bch_blockdevs_stop(struct bch_fs *c)
 	}
 
 	rcu_read_unlock();
-	mutex_unlock(&bch_register_lock);
+	mutex_unlock(&bch_blockdev_lock);
 }
 
 void bch_fs_blockdev_exit(struct bch_fs *c)
diff --git a/fs/bcachefs/blockdev.h b/fs/bcachefs/blockdev.h
index 0062ef7d1df3..5423d77644f8 100644
--- a/fs/bcachefs/blockdev.h
+++ b/fs/bcachefs/blockdev.h
@@ -59,7 +59,6 @@ void bch_cached_dev_detach(struct cached_dev *);
 void bch_cached_dev_run(struct cached_dev *);
 void bch_blockdev_stop(struct bcache_device *);
 
-bool bch_is_open_backing_dev(struct block_device *);
 const char *bch_backing_dev_register(struct bcache_superblock *);
 
 int bch_blockdev_volume_create(struct bch_fs *, u64);
@@ -90,10 +89,6 @@ static inline void bch_cached_dev_detach(struct cached_dev *dc) {}
 static inline void bch_cached_dev_run(struct cached_dev *dc) {}
 static inline void bch_blockdev_stop(struct bcache_device *d) {}
 
-static inline bool bch_is_open_backing_dev(struct block_device *bdev)
-{
-	return false;
-}
 static inline const char *bch_backing_dev_register(struct bcache_superblock *sb)
 {
 	return "not implemented";
diff --git a/fs/bcachefs/btree_gc.c b/fs/bcachefs/btree_gc.c
index 9c34269736c8..7e8a3f6a17df 100644
--- a/fs/bcachefs/btree_gc.c
+++ b/fs/bcachefs/btree_gc.c
@@ -90,15 +90,13 @@ static void btree_node_range_checks(struct bch_fs *c, struct btree *b,
 u8 bch_btree_key_recalc_oldest_gen(struct bch_fs *c, struct bkey_s_c k)
 {
 	const struct bch_extent_ptr *ptr;
-	struct bch_dev *ca;
 	u8 max_stale = 0;
 
 	if (bkey_extent_is_data(k.k)) {
 		struct bkey_s_c_extent e = bkey_s_c_to_extent(k);
 
-		rcu_read_lock();
-
-		extent_for_each_online_device(c, e, ptr, ca) {
+		extent_for_each_ptr(e, ptr) {
+			struct bch_dev *ca = c->devs[ptr->dev];
 			size_t b = PTR_BUCKET_NR(ca, ptr);
 
 			if (__gen_after(ca->oldest_gens[b], ptr->gen))
@@ -106,8 +104,6 @@ u8 bch_btree_key_recalc_oldest_gen(struct bch_fs *c, struct bkey_s_c k)
 
 			max_stale = max(max_stale, ptr_stale(ca, ptr));
 		}
-
-		rcu_read_unlock();
 	}
 
 	return max_stale;
@@ -254,10 +250,10 @@ static void bch_mark_allocator_buckets(struct bch_fs *c)
 		const struct bch_extent_ptr *ptr;
 
 		mutex_lock(&ob->lock);
-		rcu_read_lock();
-		open_bucket_for_each_online_device(c, ob, ptr, ca)
+		open_bucket_for_each_ptr(ob, ptr) {
+			ca = c->devs[ptr->dev];
 			bch_mark_alloc_bucket(ca, PTR_BUCKET(ca, ptr), true);
-		rcu_read_unlock();
+		}
 		mutex_unlock(&ob->lock);
 	}
 }
@@ -273,7 +269,7 @@ static void mark_metadata_sectors(struct bch_dev *ca, u64 start, u64 end,
 	} while (b < end >> ca->bucket_bits);
 }
 
-void bch_dev_mark_superblocks(struct bch_dev *ca)
+static void bch_dev_mark_superblocks(struct bch_dev *ca)
 {
 	struct bch_sb_layout *layout = &ca->disk_sb.sb->layout;
 	unsigned i;
@@ -294,11 +290,13 @@ void bch_dev_mark_superblocks(struct bch_dev *ca)
 /*
  * Mark non btree metadata - prios, journal
  */
-static void bch_mark_dev_metadata(struct bch_fs *c, struct bch_dev *ca)
+void bch_mark_dev_metadata(struct bch_fs *c, struct bch_dev *ca)
 {
 	unsigned i;
 	u64 b;
 
+	lockdep_assert_held(&c->sb_lock);
+
 	bch_dev_mark_superblocks(ca);
 
 	spin_lock(&c->journal.lock);
@@ -329,10 +327,10 @@ static void bch_mark_metadata(struct bch_fs *c)
 	unsigned i;
 
 	mutex_lock(&c->sb_lock);
+	gc_pos_set(c, gc_phase(GC_PHASE_SB_METADATA));
 
-	for_each_member_device(ca, c, i)
+	for_each_online_member(ca, c, i)
 		bch_mark_dev_metadata(c, ca);
-
 	mutex_unlock(&c->sb_lock);
 }
 
@@ -935,14 +933,14 @@ int bch_initial_gc(struct bch_fs *c, struct list_head *journal)
 {
 	enum btree_id id;
 
-	bch_mark_metadata(c);
-
 	for (id = 0; id < BTREE_ID_NR; id++)
 		bch_initial_gc_btree(c, id);
 
 	if (journal)
 		bch_journal_mark(c, journal);
 
+	bch_mark_metadata(c);
+
 	/*
 	 * Skip past versions that might have possibly been used (as nonces),
 	 * but hadn't had their pointers written:
diff --git a/fs/bcachefs/btree_gc.h b/fs/bcachefs/btree_gc.h
index 590ade2d8211..f1794fdf4378 100644
--- a/fs/bcachefs/btree_gc.h
+++ b/fs/bcachefs/btree_gc.h
@@ -13,7 +13,7 @@ int bch_initial_gc(struct bch_fs *, struct list_head *);
 u8 bch_btree_key_recalc_oldest_gen(struct bch_fs *, struct bkey_s_c);
 u8 bch_btree_mark_key_initial(struct bch_fs *, enum bkey_type,
 				struct bkey_s_c);
-void bch_dev_mark_superblocks(struct bch_dev *);
+void bch_mark_dev_metadata(struct bch_fs *, struct bch_dev *);
 
 /*
  * For concurrent mark and sweep (with other index updates), we define a total
diff --git a/fs/bcachefs/btree_io.c b/fs/bcachefs/btree_io.c
index d11d72fc9f39..71478fb1cc89 100644
--- a/fs/bcachefs/btree_io.c
+++ b/fs/bcachefs/btree_io.c
@@ -1226,7 +1226,7 @@ void bch_btree_node_read(struct bch_fs *c, struct btree *b)
 	bch_time_stats_update(&c->btree_read_time, start_time);
 out:
 	bio_put(bio);
-	percpu_ref_put(&pick.ca->ref);
+	percpu_ref_put(&pick.ca->io_ref);
 }
 
 int bch_btree_root_read(struct bch_fs *c, enum btree_id id,
@@ -1319,7 +1319,7 @@ static void btree_node_write_endio(struct bio *bio)
 	}
 
 	if (ca)
-		percpu_ref_put(&ca->ref);
+		percpu_ref_put(&ca->io_ref);
 }
 
 void __bch_btree_node_write(struct bch_fs *c, struct btree *b,
@@ -1336,7 +1336,6 @@ void __bch_btree_node_write(struct bch_fs *c, struct btree *b,
 	BKEY_PADDED(key) k;
 	struct bkey_s_extent e;
 	struct bch_extent_ptr *ptr;
-	struct bch_dev *ca;
 	struct sort_iter sort_iter;
 	struct nonce nonce;
 	unsigned bytes_to_write, sectors_to_write, order, bytes, u64s;
@@ -1557,10 +1556,9 @@ void __bch_btree_node_write(struct bch_fs *c, struct btree *b,
 	extent_for_each_ptr(e, ptr)
 		ptr->offset += b->written;
 
-	rcu_read_lock();
-	extent_for_each_online_device(c, e, ptr, ca)
-		atomic64_add(sectors_to_write, &ca->btree_sectors_written);
-	rcu_read_unlock();
+	extent_for_each_ptr(e, ptr)
+		atomic64_add(sectors_to_write,
+			     &c->devs[ptr->dev]->btree_sectors_written);
 
 	b->written += sectors_to_write;
 
diff --git a/fs/bcachefs/buckets.c b/fs/bcachefs/buckets.c
index 59c68d493995..8514f5472016 100644
--- a/fs/bcachefs/buckets.c
+++ b/fs/bcachefs/buckets.c
@@ -450,8 +450,8 @@ static void bch_mark_pointer(struct bch_fs *c,
 {
 	struct bucket_mark old, new;
 	unsigned saturated;
-	struct bch_dev *ca;
-	struct bucket *g;
+	struct bch_dev *ca = c->devs[ptr->dev];
+	struct bucket *g = ca->buckets + PTR_BUCKET_NR(ca, ptr);
 	u64 v;
 	unsigned old_sectors, new_sectors;
 	int disk_sectors, compressed_sectors;
@@ -469,12 +469,6 @@ static void bch_mark_pointer(struct bch_fs *c,
 	compressed_sectors = -__compressed_sectors(crc, old_sectors)
 		+ __compressed_sectors(crc, new_sectors);
 
-	ca = PTR_DEV(c, ptr);
-	if (!ca)
-		goto out;
-
-	g = ca->buckets + PTR_BUCKET_NR(ca, ptr);
-
 	if (gc_will_visit) {
 		if (journal_seq)
 			bucket_cmpxchg(g, new, new.journal_seq = journal_seq);
@@ -565,13 +559,11 @@ static void bch_mark_extent(struct bch_fs *c, struct bkey_s_c_extent e,
 	BUG_ON(metadata && bkey_extent_is_cached(e.k));
 	BUG_ON(!sectors);
 
-	rcu_read_lock();
 	extent_for_each_ptr_crc(e, ptr, crc)
 		bch_mark_pointer(c, e, crc, ptr, sectors,
 				 ptr->cached ? S_CACHED : type,
 				 may_make_unavailable,
 				 stats, gc_will_visit, journal_seq);
-	rcu_read_unlock();
 }
 
 static void __bch_mark_key(struct bch_fs *c, struct bkey_s_c k,
diff --git a/fs/bcachefs/buckets.h b/fs/bcachefs/buckets.h
index d189c72fb8ad..9a00d38a682a 100644
--- a/fs/bcachefs/buckets.h
+++ b/fs/bcachefs/buckets.h
@@ -39,14 +39,6 @@ static inline u8 bucket_gc_gen(struct bch_dev *ca, struct bucket *g)
 	return g->mark.gen - ca->oldest_gens[r];
 }
 
-static inline struct bch_dev *PTR_DEV(const struct bch_fs *c,
-				      const struct bch_extent_ptr *ptr)
-{
-	EBUG_ON(ptr->dev > rcu_dereference(c->members)->nr_devices);
-
-	return rcu_dereference(c->devs[ptr->dev]);
-}
-
 static inline size_t PTR_BUCKET_NR(const struct bch_dev *ca,
 				   const struct bch_extent_ptr *ptr)
 {
@@ -64,14 +56,12 @@ static inline size_t PTR_BUCKET_NR_TRACE(const struct bch_fs *c,
 #if 0
 	if (bkey_extent_is_data(&k->k)) {
 		const struct bch_extent_ptr *ptr;
-		const struct bch_dev *ca;
 
-		rcu_read_lock();
-		extent_for_each_online_device(c, bkey_i_to_s_c_extent(k), ptr, ca) {
+		extent_for_each_ptr(bkey_i_to_s_c_extent(k), ptr) {
+			const struct bch_dev *ca = c->devs[ptr->dev];
 			bucket = PTR_BUCKET_NR(ca, ptr);
 			break;
 		}
-		rcu_read_unlock();
 	}
 #endif
 	return bucket;
@@ -102,8 +92,6 @@ static inline u8 gen_after(u8 a, u8 b)
 /**
  * ptr_stale() - check if a pointer points into a bucket that has been
  * invalidated.
- *
- * Warning: PTR_DEV(c, k, ptr) must equal ca.
  */
 static inline u8 ptr_stale(const struct bch_dev *ca,
 			   const struct bch_extent_ptr *ptr)
diff --git a/fs/bcachefs/chardev.c b/fs/bcachefs/chardev.c
index 4b1fd946da32..9ef8cfc64d99 100644
--- a/fs/bcachefs/chardev.c
+++ b/fs/bcachefs/chardev.c
@@ -124,7 +124,7 @@ static long bch_ioctl_disk_add(struct bch_fs *c,
 
 /* returns with ref on ca->ref */
 static struct bch_dev *bch_device_lookup(struct bch_fs *c,
-				       const char __user *dev)
+					 const char __user *dev)
 {
 	struct block_device *bdev;
 	struct bch_dev *ca;
@@ -166,7 +166,6 @@ static long bch_ioctl_disk_remove(struct bch_fs *c,
 
 	ret = bch_dev_remove(c, ca, arg.flags);
 
-	percpu_ref_put(&ca->ref);
 	return ret;
 }
 
diff --git a/fs/bcachefs/debug.c b/fs/bcachefs/debug.c
index 7bfe73c22109..b91f53d261d2 100644
--- a/fs/bcachefs/debug.c
+++ b/fs/bcachefs/debug.c
@@ -88,7 +88,7 @@ void __bch_btree_verify(struct bch_fs *c, struct btree *b)
 	bch_btree_node_read_done(c, v, pick.ca, &pick.ptr);
 	n_sorted = c->verify_data->data;
 
-	percpu_ref_put(&pick.ca->ref);
+	percpu_ref_put(&pick.ca->io_ref);
 
 	sorted = &n_sorted->keys;
 	inmemory = &n_inmemory->keys;
diff --git a/fs/bcachefs/error.c b/fs/bcachefs/error.c
index 48087fba967c..ba46d2d12f59 100644
--- a/fs/bcachefs/error.c
+++ b/fs/bcachefs/error.c
@@ -112,7 +112,6 @@ void bch_nonfatal_io_error_work(struct work_struct *work)
 	struct bch_dev *ca = container_of(work, struct bch_dev, io_error_work);
 	struct bch_fs *c = ca->fs;
 	unsigned errors = atomic_read(&ca->io_errors);
-	char buf[BDEVNAME_SIZE];
 	bool dev;
 
 	if (errors < c->error_limit) {
@@ -127,9 +126,8 @@ void bch_nonfatal_io_error_work(struct work_struct *work)
 		    ? __bch_dev_set_state(c, ca, BCH_MEMBER_STATE_RO,
 					  BCH_FORCE_IF_DEGRADED)
 		    : bch_fs_emergency_read_only(c))
-			bch_err(c,
-				"too many IO errors on %s, setting %s RO",
-				bdevname(ca->disk_sb.bdev, buf),
+			bch_err(ca,
+				"too many IO errors, setting %s RO",
 				dev ? "device" : "filesystem");
 		mutex_unlock(&c->state_lock);
 	}
diff --git a/fs/bcachefs/error.h b/fs/bcachefs/error.h
index fe8e186ada1a..726b20d4434b 100644
--- a/fs/bcachefs/error.h
+++ b/fs/bcachefs/error.h
@@ -13,13 +13,6 @@ struct bch_fs;
 
 /* Error messages: */
 
-#define __bch_dev_error(ca, fmt, ...)					\
-do {									\
-	char _buf[BDEVNAME_SIZE];					\
-	bch_err((ca)->fs, "%s: " fmt,					\
-		bdevname((ca)->disk_sb.bdev, _buf), ##__VA_ARGS__);	\
-} while (0)
-
 /*
  * Very fatal logic/inconsistency errors: these indicate that we've majorly
  * screwed up at runtime, i.e. it's not likely that it was just caused by the
@@ -75,7 +68,7 @@ do {									\
 
 #define bch_dev_inconsistent(ca, ...)					\
 do {									\
-	__bch_dev_error(ca, __VA_ARGS__);				\
+	bch_err(ca, __VA_ARGS__);					\
 	bch_inconsistent_error((ca)->fs);				\
 } while (0)
 
@@ -171,17 +164,15 @@ do {									\
 
 #define bch_dev_fatal_error(ca, ...)					\
 do {									\
-	__bch_dev_error(ca, __VA_ARGS__);				\
+	bch_err(ca, __VA_ARGS__);					\
 	bch_fatal_error(c);						\
 } while (0)
 
 #define bch_dev_fatal_io_error(ca, fmt, ...)				\
 do {									\
-	char _buf[BDEVNAME_SIZE];					\
-									\
 	printk_ratelimited(KERN_ERR bch_fmt((ca)->fs,			\
 		"fatal IO error on %s for " fmt),			\
-		bdevname((ca)->disk_sb.bdev, _buf), ##__VA_ARGS__);	\
+		(ca)->name, ##__VA_ARGS__);				\
 	bch_fatal_error((ca)->fs);					\
 } while (0)
 
@@ -219,11 +210,9 @@ do {									\
 /* Logs message and handles the error: */
 #define bch_dev_nonfatal_io_error(ca, fmt, ...)				\
 do {									\
-	char _buf[BDEVNAME_SIZE];					\
-									\
 	printk_ratelimited(KERN_ERR bch_fmt((ca)->fs,			\
 		"IO error on %s for " fmt),				\
-		bdevname((ca)->disk_sb.bdev, _buf), ##__VA_ARGS__);	\
+		(ca)->name, ##__VA_ARGS__);				\
 	bch_nonfatal_io_error(ca);					\
 } while (0)
 
diff --git a/fs/bcachefs/extents.c b/fs/bcachefs/extents.c
index 731dce2ec7d5..87a68d738567 100644
--- a/fs/bcachefs/extents.c
+++ b/fs/bcachefs/extents.c
@@ -322,9 +322,9 @@ static bool should_drop_ptr(const struct bch_fs *c,
 			    struct bkey_s_c_extent e,
 			    const struct bch_extent_ptr *ptr)
 {
-	struct bch_dev *ca;
+	struct bch_dev *ca = c->devs[ptr->dev];
 
-	return (ca = PTR_DEV(c, ptr)) && ptr_stale(ca, ptr);
+	return ptr_stale(ca, ptr);
 }
 
 static void bch_extent_drop_stale(struct bch_fs *c, struct bkey_s_extent e)
@@ -332,14 +332,12 @@ static void bch_extent_drop_stale(struct bch_fs *c, struct bkey_s_extent e)
 	struct bch_extent_ptr *ptr = &e.v->start->ptr;
 	bool dropped = false;
 
-	rcu_read_lock();
 	while ((ptr = extent_ptr_next(e, ptr)))
 		if (should_drop_ptr(c, e.c, ptr)) {
 			__bch_extent_drop_ptr(e, ptr);
 			dropped = true;
 		} else
 			ptr++;
-	rcu_read_unlock();
 
 	if (dropped)
 		bch_extent_drop_redundant_crcs(e);
@@ -387,30 +385,39 @@ static void bch_ptr_swab(const struct bkey_format *f, struct bkey_packed *k)
 	}
 }
 
-static const char *extent_ptr_invalid(struct bkey_s_c_extent e,
-				      const struct bch_member_rcu *mi,
+static const char *extent_ptr_invalid(const struct bch_fs *c,
+				      struct bkey_s_c_extent e,
 				      const struct bch_extent_ptr *ptr,
-				      unsigned size_ondisk)
+				      unsigned size_ondisk,
+				      bool metadata)
 {
 	const struct bch_extent_ptr *ptr2;
-	const struct bch_member_cpu *m = mi->m + ptr->dev;
+	struct bch_dev *ca;
+
+	if (ptr->dev >= c->sb.nr_devices)
+		return "pointer to invalid device";
 
-	if (ptr->dev > mi->nr_devices || !m->valid)
+	ca = c->devs[ptr->dev];
+	if (!ca)
 		return "pointer to invalid device";
 
 	extent_for_each_ptr(e, ptr2)
 		if (ptr != ptr2 && ptr->dev == ptr2->dev)
 			return "multiple pointers to same device";
 
-	if (ptr->offset + size_ondisk > m->bucket_size * m->nbuckets)
+	if (ptr->offset + size_ondisk > ca->mi.bucket_size * ca->mi.nbuckets)
 		return "offset past end of device";
 
-	if (ptr->offset < m->bucket_size * m->first_bucket)
+	if (ptr->offset < ca->mi.bucket_size * ca->mi.first_bucket)
 		return "offset before first bucket";
 
-	if ((ptr->offset & (m->bucket_size - 1)) + size_ondisk > m->bucket_size)
+	if ((ptr->offset & (ca->mi.bucket_size - 1)) +
+	    size_ondisk > ca->mi.bucket_size)
 		return "spans multiple buckets";
 
+	if (!(metadata ? ca->mi.has_metadata : ca->mi.has_data))
+		return "device not marked as containing data";
+
 	return NULL;
 }
 
@@ -426,7 +433,6 @@ static size_t extent_print_ptrs(struct bch_fs *c, char *buf,
 
 #define p(...)	(out += scnprintf(out, end - out, __VA_ARGS__))
 
-	rcu_read_lock();
 	extent_for_each_entry(e, entry) {
 		if (!first)
 			p(" ");
@@ -445,10 +451,11 @@ static size_t extent_print_ptrs(struct bch_fs *c, char *buf,
 			break;
 		case BCH_EXTENT_ENTRY_ptr:
 			ptr = entry_to_ptr(entry);
+			ca = c->devs[ptr->dev];
 
 			p("ptr: %u:%llu gen %u%s", ptr->dev,
 			  (u64) ptr->offset, ptr->gen,
-			  (ca = PTR_DEV(c, ptr)) && ptr_stale(ca, ptr)
+			  ca && ptr_stale(ca, ptr)
 			  ? " stale" : "");
 			break;
 		default:
@@ -459,8 +466,6 @@ static size_t extent_print_ptrs(struct bch_fs *c, char *buf,
 		first = false;
 	}
 out:
-	rcu_read_unlock();
-
 	if (bkey_extent_is_cached(e.k))
 		p(" cached");
 #undef p
@@ -487,27 +492,20 @@ static const char *bch_btree_ptr_invalid(const struct bch_fs *c,
 		const union bch_extent_entry *entry;
 		const struct bch_extent_ptr *ptr;
 		const union bch_extent_crc *crc;
-		struct bch_member_rcu *mi;
 		const char *reason;
 
 		extent_for_each_entry(e, entry)
 			if (__extent_entry_type(entry) >= BCH_EXTENT_ENTRY_MAX)
 				return "invalid extent entry type";
 
-		mi = fs_member_info_get(c);
-
 		extent_for_each_ptr_crc(e, ptr, crc) {
-			reason = extent_ptr_invalid(e, mi, ptr,
-						c->sb.btree_node_size);
-
-			if (reason) {
-				fs_member_info_put();
+			reason = extent_ptr_invalid(c, e, ptr,
+						    c->sb.btree_node_size,
+						    true);
+			if (reason)
 				return reason;
-			}
 		}
 
-		fs_member_info_put();
-
 		if (crc)
 			return "has crc field";
 
@@ -532,32 +530,26 @@ static void btree_ptr_debugcheck(struct bch_fs *c, struct btree *b,
 	unsigned replicas = 0;
 	bool bad;
 
-	rcu_read_lock();
-
-	extent_for_each_online_device(c, e, ptr, ca) {
+	extent_for_each_ptr(e, ptr) {
+		ca = c->devs[ptr->dev];
+		g = PTR_BUCKET(ca, ptr);
 		replicas++;
 
-		if ((ca = PTR_DEV(c, ptr))) {
-			g = PTR_BUCKET(ca, ptr);
+		err = "stale";
+		if (ptr_stale(ca, ptr))
+			goto err;
 
-			err = "stale";
-			if (ptr_stale(ca, ptr))
-				goto err;
-
-			do {
-				seq = read_seqcount_begin(&c->gc_pos_lock);
-				bad = gc_pos_cmp(c->gc_pos, gc_pos_btree_node(b)) > 0 &&
-				       g->mark.data_type != BUCKET_BTREE;
-			} while (read_seqcount_retry(&c->gc_pos_lock, seq));
+		do {
+			seq = read_seqcount_begin(&c->gc_pos_lock);
+			bad = gc_pos_cmp(c->gc_pos, gc_pos_btree_node(b)) > 0 &&
+				g->mark.data_type != BUCKET_BTREE;
+		} while (read_seqcount_retry(&c->gc_pos_lock, seq));
 
-			err = "inconsistent";
-			if (bad)
-				goto err;
-		}
+		err = "inconsistent";
+		if (bad)
+			goto err;
 	}
 
-	rcu_read_unlock();
-
 	if (replicas < c->sb.meta_replicas_have) {
 		bch_bkey_val_to_text(c, btree_node_type(b),
 				     buf, sizeof(buf), k);
@@ -576,7 +568,6 @@ err:
 		      g->read_prio, PTR_BUCKET(ca, ptr)->mark.gen,
 		      ca->oldest_gens[PTR_BUCKET_NR(ca, ptr)],
 		      (unsigned) g->mark.counter);
-	rcu_read_unlock();
 }
 
 static void bch_btree_ptr_to_text(struct bch_fs *c, char *buf,
@@ -603,11 +594,9 @@ bch_btree_pick_ptr(struct bch_fs *c, const struct btree *b)
 	const union bch_extent_crc *crc;
 	const struct bch_extent_ptr *ptr;
 	struct extent_pick_ptr pick = { .ca = NULL };
-	struct bch_dev *ca;
-
-	rcu_read_lock();
 
-	extent_for_each_online_device_crc(c, e, crc, ptr, ca) {
+	extent_for_each_ptr_crc(e, ptr, crc) {
+		struct bch_dev *ca = c->devs[ptr->dev];
 		struct btree *root = btree_node_root(c, b);
 
 		if (bch_fs_inconsistent_on(crc, c,
@@ -628,15 +617,16 @@ bch_btree_pick_ptr(struct bch_fs *c, const struct btree *b)
 		if (pick.ca && pick.ca->mi.tier < ca->mi.tier)
 			continue;
 
+		if (!percpu_ref_tryget(&ca->io_ref))
+			continue;
+
+		if (pick.ca)
+			percpu_ref_put(&pick.ca->io_ref);
+
 		pick.ca		= ca;
 		pick.ptr	= *ptr;
 	}
 
-	if (pick.ca)
-		percpu_ref_get(&pick.ca->ref);
-
-	rcu_read_unlock();
-
 	return pick;
 }
 
@@ -1757,47 +1747,38 @@ static const char *bch_extent_invalid(const struct bch_fs *c,
 		const union bch_extent_entry *entry;
 		const union bch_extent_crc *crc;
 		const struct bch_extent_ptr *ptr;
-		struct bch_member_rcu *mi = fs_member_info_get(c);
 		unsigned size_ondisk = e.k->size;
 		const char *reason;
 
 		extent_for_each_entry(e, entry) {
-			reason = "invalid extent entry type";
 			if (__extent_entry_type(entry) >= BCH_EXTENT_ENTRY_MAX)
-				goto invalid;
+				return "invalid extent entry type";
 
 			if (extent_entry_is_crc(entry)) {
 				crc = entry_to_crc(entry);
 
-				reason = "checksum offset + key size > uncompressed size";
 				if (crc_offset(crc) + e.k->size >
 				    crc_uncompressed_size(e.k, crc))
-					goto invalid;
+					return "checksum offset + key size > uncompressed size";
 
 				size_ondisk = crc_compressed_size(e.k, crc);
 
-				reason = "invalid checksum type";
 				if (!bch_checksum_type_valid(c, crc_csum_type(crc)))
-					goto invalid;
+					return "invalid checksum type";
 
-				reason = "invalid compression type";
 				if (crc_compression_type(crc) >= BCH_COMPRESSION_NR)
-					goto invalid;
+					return "invalid compression type";
 			} else {
 				ptr = entry_to_ptr(entry);
 
-				reason = extent_ptr_invalid(e, mi,
-						&entry->ptr, size_ondisk);
+				reason = extent_ptr_invalid(c, e, &entry->ptr,
+							    size_ondisk, false);
 				if (reason)
-					goto invalid;
+					return reason;
 			}
 		}
 
-		fs_member_info_put();
 		return NULL;
-invalid:
-		fs_member_info_put();
-		return reason;
 	}
 
 	case BCH_RESERVATION: {
@@ -1821,14 +1802,13 @@ static void bch_extent_debugcheck_extent(struct bch_fs *c, struct btree *b,
 					 struct bkey_s_c_extent e)
 {
 	const struct bch_extent_ptr *ptr;
-	struct bch_member_rcu *mi;
 	struct bch_dev *ca;
 	struct bucket *g;
 	unsigned seq, stale;
 	char buf[160];
 	bool bad;
 	unsigned ptrs_per_tier[BCH_TIER_MAX];
-	unsigned tier, replicas = 0;
+	unsigned replicas = 0;
 
 	/*
 	 * XXX: we should be doing most/all of these checks at startup time,
@@ -1841,13 +1821,11 @@ static void bch_extent_debugcheck_extent(struct bch_fs *c, struct btree *b,
 
 	memset(ptrs_per_tier, 0, sizeof(ptrs_per_tier));
 
-	mi = fs_member_info_get(c);
-
 	extent_for_each_ptr(e, ptr) {
+		ca = c->devs[ptr->dev];
+		g = PTR_BUCKET(ca, ptr);
 		replicas++;
-
-		if (ptr->dev >= mi->nr_devices)
-			goto bad_device;
+		ptrs_per_tier[ca->mi.tier]++;
 
 		/*
 		 * If journal replay hasn't finished, we might be seeing keys
@@ -1856,51 +1834,40 @@ static void bch_extent_debugcheck_extent(struct bch_fs *c, struct btree *b,
 		if (!test_bit(JOURNAL_REPLAY_DONE, &c->journal.flags))
 			continue;
 
-		if (!mi->m[ptr->dev].valid)
-			goto bad_device;
-
-		tier = mi->m[ptr->dev].tier;
-		ptrs_per_tier[tier]++;
-
 		stale = 0;
 
-		if ((ca = PTR_DEV(c, ptr))) {
-			g = PTR_BUCKET(ca, ptr);
-
-			do {
-				struct bucket_mark mark;
+		do {
+			struct bucket_mark mark;
 
-				seq = read_seqcount_begin(&c->gc_pos_lock);
-				mark = READ_ONCE(g->mark);
+			seq = read_seqcount_begin(&c->gc_pos_lock);
+			mark = READ_ONCE(g->mark);
 
-				/* between mark and bucket gen */
-				smp_rmb();
+			/* between mark and bucket gen */
+			smp_rmb();
 
-				stale = ptr_stale(ca, ptr);
+			stale = ptr_stale(ca, ptr);
 
-				bch_fs_bug_on(stale && !ptr->cached, c,
-						 "stale dirty pointer");
+			bch_fs_bug_on(stale && !ptr->cached, c,
+					 "stale dirty pointer");
 
-				bch_fs_bug_on(stale > 96, c,
-						 "key too stale: %i",
-						 stale);
+			bch_fs_bug_on(stale > 96, c,
+					 "key too stale: %i",
+					 stale);
 
-				if (stale)
-					break;
+			if (stale)
+				break;
 
-				bad = (mark.data_type != BUCKET_DATA ||
-				       (gc_pos_cmp(c->gc_pos, gc_pos_btree_node(b)) > 0 &&
-					!mark.owned_by_allocator &&
-					!(ptr->cached
-					  ? mark.cached_sectors
-					  : mark.dirty_sectors)));
-			} while (read_seqcount_retry(&c->gc_pos_lock, seq));
+			bad = (mark.data_type != BUCKET_DATA ||
+			       (gc_pos_cmp(c->gc_pos, gc_pos_btree_node(b)) > 0 &&
+				!mark.owned_by_allocator &&
+				!(ptr->cached
+				  ? mark.cached_sectors
+				  : mark.dirty_sectors)));
+		} while (read_seqcount_retry(&c->gc_pos_lock, seq));
 
-			if (bad)
-				goto bad_ptr;
-		}
+		if (bad)
+			goto bad_ptr;
 	}
-	fs_member_info_put();
 
 	if (replicas > BCH_REPLICAS_MAX) {
 		bch_bkey_val_to_text(c, btree_node_type(b), buf,
@@ -1923,14 +1890,6 @@ static void bch_extent_debugcheck_extent(struct bch_fs *c, struct btree *b,
 
 	return;
 
-bad_device:
-	bch_bkey_val_to_text(c, btree_node_type(b), buf,
-			     sizeof(buf), e.s_c);
-	bch_fs_bug(c, "extent pointer to dev %u missing device: %s",
-		   ptr->dev, buf);
-	fs_member_info_put();
-	return;
-
 bad_ptr:
 	bch_bkey_val_to_text(c, btree_node_type(b), buf,
 			     sizeof(buf), e.s_c);
@@ -1940,7 +1899,6 @@ bad_ptr:
 		   g->read_prio, PTR_BUCKET(ca, ptr)->mark.gen,
 		   ca->oldest_gens[PTR_BUCKET_NR(ca, ptr)],
 		   (unsigned) g->mark.counter);
-	fs_member_info_put();
 	return;
 }
 
@@ -1976,12 +1934,10 @@ static void bch_extent_to_text(struct bch_fs *c, char *buf,
 #undef p
 }
 
-static unsigned PTR_TIER(struct bch_member_rcu *mi,
+static unsigned PTR_TIER(struct bch_fs *c,
 			 const struct bch_extent_ptr *ptr)
 {
-	return ptr->dev < mi->nr_devices
-		? mi->m[ptr->dev].tier
-		: UINT_MAX;
+	return c->devs[ptr->dev]->mi.tier;
 }
 
 static void bch_extent_crc_init(union bch_extent_crc *crc,
@@ -2136,35 +2092,30 @@ void bch_extent_mark_replicas_cached(struct bch_fs *c,
 				     unsigned nr_cached)
 {
 	struct bch_extent_ptr *ptr;
-	struct bch_member_rcu *mi;
 	bool have_higher_tier;
 	unsigned tier = 0;
 
 	if (!nr_cached)
 		return;
 
-	mi = fs_member_info_get(c);
-
 	do {
 		have_higher_tier = false;
 
 		extent_for_each_ptr(e, ptr) {
 			if (!ptr->cached &&
-			    PTR_TIER(mi, ptr) == tier) {
+			    PTR_TIER(c, ptr) == tier) {
 				ptr->cached = true;
 				nr_cached--;
 				if (!nr_cached)
-					goto out;
+					return;
 			}
 
-			if (PTR_TIER(mi, ptr) > tier)
+			if (PTR_TIER(c, ptr) > tier)
 				have_higher_tier = true;
 		}
 
 		tier++;
 	} while (have_higher_tier);
-out:
-	fs_member_info_put();
 }
 
 /*
@@ -2182,7 +2133,6 @@ void bch_extent_pick_ptr_avoiding(struct bch_fs *c, struct bkey_s_c k,
 	struct bkey_s_c_extent e;
 	const union bch_extent_crc *crc;
 	const struct bch_extent_ptr *ptr;
-	struct bch_dev *ca;
 
 	switch (k.k->type) {
 	case KEY_TYPE_DELETED:
@@ -2198,10 +2148,11 @@ void bch_extent_pick_ptr_avoiding(struct bch_fs *c, struct bkey_s_c k,
 	case BCH_EXTENT:
 	case BCH_EXTENT_CACHED:
 		e = bkey_s_c_to_extent(k);
-		rcu_read_lock();
 		ret->ca = NULL;
 
-		extent_for_each_online_device_crc(c, e, crc, ptr, ca) {
+		extent_for_each_ptr_crc(e, ptr, crc) {
+			struct bch_dev *ca = c->devs[ptr->dev];
+
 			if (ptr_stale(ca, ptr))
 				continue;
 
@@ -2213,6 +2164,12 @@ void bch_extent_pick_ptr_avoiding(struct bch_fs *c, struct bkey_s_c k,
 			     ret->ca->mi.tier < ca->mi.tier))
 				continue;
 
+			if (!percpu_ref_tryget(&ca->io_ref))
+				continue;
+
+			if (ret->ca)
+				percpu_ref_put(&ret->ca->io_ref);
+
 			*ret = (struct extent_pick_ptr) {
 				.crc = crc_to_128(e.k, crc),
 				.ptr = *ptr,
@@ -2220,12 +2177,8 @@ void bch_extent_pick_ptr_avoiding(struct bch_fs *c, struct bkey_s_c k,
 			};
 		}
 
-		if (ret->ca)
-			percpu_ref_get(&ret->ca->ref);
-		else if (!bkey_extent_is_cached(e.k))
+		if (!ret->ca && !bkey_extent_is_cached(e.k))
 			ret->ca = ERR_PTR(-EIO);
-
-		rcu_read_unlock();
 		return;
 
 	case BCH_RESERVATION:
@@ -2273,7 +2226,7 @@ static enum merge_result bch_extent_merge(struct bch_fs *c,
 
 		extent_for_each_entry(el, en_l) {
 			struct bch_extent_ptr *lp, *rp;
-			struct bch_member_cpu *m;
+			unsigned bucket_size;
 
 			en_r = vstruct_idx(er.v, (u64 *) en_l - el.v->_data);
 
@@ -2291,15 +2244,11 @@ static enum merge_result bch_extent_merge(struct bch_fs *c,
 				return BCH_MERGE_NOMERGE;
 
 			/* We don't allow extents to straddle buckets: */
+			bucket_size = c->devs[lp->dev]->mi.bucket_size;
 
-			m = fs_member_info_get(c)->m + lp->dev;
-			if ((lp->offset & ~((u64) m->bucket_size - 1)) !=
-			    (rp->offset & ~((u64) m->bucket_size - 1))) {
-				fs_member_info_put();
+			if ((lp->offset & ~((u64) bucket_size - 1)) !=
+			    (rp->offset & ~((u64) bucket_size - 1)))
 				return BCH_MERGE_NOMERGE;
-
-			}
-			fs_member_info_put();
 		}
 
 		break;
diff --git a/fs/bcachefs/extents.h b/fs/bcachefs/extents.h
index 2d70c42a695b..db7bd4f14988 100644
--- a/fs/bcachefs/extents.h
+++ b/fs/bcachefs/extents.h
@@ -285,10 +285,6 @@ out:									\
 #define extent_for_each_ptr_crc(_e, _ptr, _crc)				\
 	extent_for_each_ptr_crc_filter(_e, _ptr, _crc, true)
 
-#define extent_for_each_online_device_crc(_c, _e, _crc, _ptr, _ca)	\
-	extent_for_each_ptr_crc_filter(_e, _ptr, _crc,			\
-				       ((_ca) = PTR_DEV(_c, _ptr)))
-
 /* Iterate over pointers only, and from a given position: */
 
 #define extent_ptr_next_filter(_e, _ptr, _filter)			\
@@ -309,9 +305,6 @@ out:									\
 #define extent_for_each_ptr(_e, _ptr)					\
 	extent_for_each_ptr_filter(_e, _ptr, true)
 
-#define extent_for_each_online_device(_c, _e, _ptr, _ca)		\
-	extent_for_each_ptr_filter(_e, _ptr, ((_ca) = PTR_DEV(_c, _ptr)))
-
 #define extent_ptr_prev(_e, _ptr)					\
 ({									\
 	typeof(&(_e).v->start->ptr) _p;					\
diff --git a/fs/bcachefs/fs.c b/fs/bcachefs/fs.c
index b0dc1c142c58..f1125a32239f 100644
--- a/fs/bcachefs/fs.c
+++ b/fs/bcachefs/fs.c
@@ -1183,33 +1183,13 @@ static int bch_sync_fs(struct super_block *sb, int wait)
 	return bch_journal_flush(&c->journal);
 }
 
-static struct bch_fs *bch_bdev_to_fs(struct block_device *bdev)
-{
-	struct bch_fs *c;
-	struct bch_dev *ca;
-	unsigned i;
-
-	rcu_read_lock();
-
-	list_for_each_entry(c, &bch_fs_list, list)
-		for_each_member_device_rcu(ca, c, i)
-			if (ca->disk_sb.bdev == bdev) {
-				rcu_read_unlock();
-				return c;
-			}
-
-	rcu_read_unlock();
-
-	return NULL;
-}
-
 static struct bch_fs *bch_open_as_blockdevs(const char *_dev_name,
-					       struct bch_opts opts)
+					    struct bch_opts opts)
 {
 	size_t nr_devs = 0, i = 0;
 	char *dev_name, *s, **devs;
 	struct bch_fs *c = NULL;
-	const char *err;
+	const char *err = "cannot allocate memory";
 
 	dev_name = kstrdup(_dev_name, GFP_KERNEL);
 	if (!dev_name)
@@ -1235,40 +1215,40 @@ static struct bch_fs *bch_open_as_blockdevs(const char *_dev_name,
 		 * filesystem and they all belong to the _same_ filesystem
 		 */
 
-		mutex_lock(&bch_register_lock);
-
 		for (i = 0; i < nr_devs; i++) {
 			struct block_device *bdev = lookup_bdev(devs[i]);
 			struct bch_fs *c2;
 
 			if (IS_ERR(bdev))
-				goto err_unlock;
+				goto err;
 
 			c2 = bch_bdev_to_fs(bdev);
 			bdput(bdev);
 
 			if (!c)
 				c = c2;
+			else if (c2)
+				closure_put(&c2->cl);
 
-			if (c != c2)
-				goto err_unlock;
+			if (!c)
+				goto err;
+			if (c != c2) {
+				closure_put(&c->cl);
+				goto err;
+			}
 		}
 
-		if (!c)
-			goto err_unlock;
-
 		mutex_lock(&c->state_lock);
 
 		if (!bch_fs_running(c)) {
 			mutex_unlock(&c->state_lock);
+			closure_put(&c->cl);
 			err = "incomplete filesystem";
 			c = NULL;
-			goto err_unlock;
+			goto err;
 		}
 
-		closure_get(&c->cl);
 		mutex_unlock(&c->state_lock);
-		mutex_unlock(&bch_register_lock);
 	}
 
 	set_bit(BCH_FS_BDEV_MOUNTED, &c->flags);
@@ -1276,11 +1256,9 @@ err:
 	kfree(devs);
 	kfree(dev_name);
 
+	if (!c)
+		pr_err("bch_fs_open err %s", err);
 	return c;
-err_unlock:
-	mutex_unlock(&bch_register_lock);
-	pr_err("bch_fs_open err %s", err);
-	goto err;
 }
 
 static int bch_remount(struct super_block *sb, int *flags, char *data)
@@ -1398,21 +1376,17 @@ static struct dentry *bch_mount(struct file_system_type *fs_type,
 	sb->s_time_gran		= c->sb.time_precision;
 	c->vfs_sb		= sb;
 	sb->s_bdi		= &c->bdi;
+	strlcpy(sb->s_id, c->name, sizeof(sb->s_id));
 
-	rcu_read_lock();
-	for_each_member_device_rcu(ca, c, i) {
+	for_each_online_member(ca, c, i) {
 		struct block_device *bdev = ca->disk_sb.bdev;
 
-		BUILD_BUG_ON(sizeof(sb->s_id) < BDEVNAME_SIZE);
-
-		bdevname(bdev, sb->s_id);
-
-		/* XXX: do we even need s_bdev? */
+		/* XXX: create an anonymous device for multi device filesystems */
 		sb->s_bdev	= bdev;
 		sb->s_dev	= bdev->bd_dev;
+		percpu_ref_put(&ca->io_ref);
 		break;
 	}
-	rcu_read_unlock();
 
 	if (opts.posix_acl < 0)
 		sb->s_flags	|= MS_POSIXACL;
diff --git a/fs/bcachefs/io.c b/fs/bcachefs/io.c
index 9a2f9c1c683b..fbcc40427f23 100644
--- a/fs/bcachefs/io.c
+++ b/fs/bcachefs/io.c
@@ -146,14 +146,9 @@ void bch_submit_wbio_replicas(struct bch_write_bio *wbio, struct bch_fs *c,
 	wbio->c = c;
 
 	extent_for_each_ptr(e, ptr) {
-		rcu_read_lock();
-		ca = PTR_DEV(c, ptr);
-		if (ca)
-			percpu_ref_get(&ca->ref);
-		rcu_read_unlock();
-
-		if (!ca) {
-			bch_submit_wbio(c, wbio, ca, ptr, punt);
+		ca = c->devs[ptr->dev];
+		if (!percpu_ref_tryget(&ca->io_ref)) {
+			bch_submit_wbio(c, wbio, NULL, ptr, punt);
 			break;
 		}
 
@@ -365,7 +360,7 @@ static void bch_write_endio(struct bio *bio)
 	bch_account_io_completion_time(ca, wbio->submit_time_us,
 				       REQ_OP_WRITE);
 	if (ca)
-		percpu_ref_put(&ca->ref);
+		percpu_ref_put(&ca->io_ref);
 
 	if (bio->bi_error && orig)
 		orig->bi_error = bio->bi_error;
@@ -992,7 +987,7 @@ static void bch_rbio_done(struct bch_fs *c, struct bch_read_bio *rbio)
 {
 	struct bio *orig = &bch_rbio_parent(rbio)->bio;
 
-	percpu_ref_put(&rbio->ca->ref);
+	percpu_ref_put(&rbio->ca->io_ref);
 	rbio->ca = NULL;
 
 	if (rbio->split) {
@@ -1034,7 +1029,7 @@ static void bch_read_error_maybe_retry(struct bch_fs *c,
 	bch_rbio_done(c, rbio);
 	return;
 retry:
-	percpu_ref_put(&rbio->ca->ref);
+	percpu_ref_put(&rbio->ca->io_ref);
 	rbio->ca = NULL;
 
 	spin_lock_irqsave(&c->read_retry_lock, flags);
diff --git a/fs/bcachefs/journal.c b/fs/bcachefs/journal.c
index 01c2b92f064f..109c27c88be5 100644
--- a/fs/bcachefs/journal.c
+++ b/fs/bcachefs/journal.c
@@ -897,6 +897,7 @@ search_done:
 			break;
 out:
 	free_pages((unsigned long) buf.data, get_order(buf.size));
+	percpu_ref_put(&ca->io_ref);
 	closure_return(cl);
 err:
 	mutex_lock(&jlist->lock);
@@ -974,11 +975,13 @@ int bch_journal_read(struct bch_fs *c, struct list_head *list)
 	jlist.head = list;
 	jlist.ret = 0;
 
-	for_each_member_device(ca, c, iter)
+	for_each_readable_member(ca, c, iter) {
+		percpu_ref_get(&ca->io_ref);
 		closure_call(&ca->journal.read,
 			     bch_journal_read_device,
 			     system_unbound_wq,
 			     &jlist.cl);
+	}
 
 	closure_sync(&jlist.cl);
 
@@ -1285,8 +1288,8 @@ static int journal_entry_sectors(struct journal *j)
 
 	lockdep_assert_held(&j->lock);
 
-	rcu_read_lock();
-	group_for_each_dev_rcu(ca, &j->devs, i) {
+	spin_lock(&j->devs.lock);
+	group_for_each_dev(ca, &j->devs, i) {
 		unsigned buckets_required = 0;
 
 		sectors_available = min_t(unsigned, sectors_available,
@@ -1317,7 +1320,7 @@ static int journal_entry_sectors(struct journal *j)
 			nr_devs++;
 		nr_online++;
 	}
-	rcu_read_unlock();
+	spin_unlock(&j->devs.lock);
 
 	if (nr_online < c->opts.metadata_replicas_required)
 		return -EROFS;
@@ -1881,8 +1884,9 @@ static bool should_discard_bucket(struct journal *j, struct journal_device *ja)
 	bool ret;
 
 	spin_lock(&j->lock);
-	ret = (ja->last_idx != ja->cur_idx &&
-	       ja->bucket_seq[ja->last_idx] < j->last_seq_ondisk);
+	ret = ja->nr &&
+		(ja->last_idx != ja->cur_idx &&
+		 ja->bucket_seq[ja->last_idx] < j->last_seq_ondisk);
 	spin_unlock(&j->lock);
 
 	return ret;
@@ -1922,9 +1926,12 @@ static void journal_reclaim_work(struct work_struct *work)
 	 * Advance last_idx to point to the oldest journal entry containing
 	 * btree node updates that have not yet been written out
 	 */
-	group_for_each_dev(ca, &j->devs, iter) {
+	for_each_rw_member(ca, c, iter) {
 		struct journal_device *ja = &ca->journal;
 
+		if (!ja->nr)
+			continue;
+
 		while (should_discard_bucket(j, ja)) {
 			if (!reclaim_lock_held) {
 				/*
@@ -2012,7 +2019,6 @@ static int journal_write_alloc(struct journal *j, unsigned sectors)
 		READ_ONCE(c->opts.metadata_replicas);
 
 	spin_lock(&j->lock);
-	rcu_read_lock();
 
 	/*
 	 * Drop any pointers to devices that have been removed, are no longer
@@ -2023,13 +2029,15 @@ static int journal_write_alloc(struct journal *j, unsigned sectors)
 	 * entry - that's why we drop pointers to devices <= current free space,
 	 * i.e. whichever device was limiting the current journal entry size.
 	 */
-	extent_for_each_ptr_backwards(e, ptr)
-		if (!(ca = PTR_DEV(c, ptr)) ||
-		    ca->mi.state != BCH_MEMBER_STATE_ACTIVE ||
+	extent_for_each_ptr_backwards(e, ptr) {
+		ca = c->devs[ptr->dev];
+
+		if (ca->mi.state != BCH_MEMBER_STATE_RW ||
 		    ca->journal.sectors_free <= sectors)
 			__bch_extent_drop_ptr(e, ptr);
 		else
 			ca->journal.sectors_free -= sectors;
+	}
 
 	replicas = bch_extent_nr_ptrs(e.c);
 
@@ -2051,8 +2059,7 @@ static int journal_write_alloc(struct journal *j, unsigned sectors)
 	 * Pick devices for next journal write:
 	 * XXX: sort devices by free journal space?
 	 */
-	for (i = 0; i < j->devs.nr; i++) {
-		ca = j->devs.d[i].dev;
+	group_for_each_dev(ca, &j->devs, i) {
 		ja = &ca->journal;
 
 		if (replicas >= replicas_want)
@@ -2082,7 +2089,6 @@ static int journal_write_alloc(struct journal *j, unsigned sectors)
 		trace_bcache_journal_next_bucket(ca, ja->cur_idx, ja->last_idx);
 	}
 	spin_unlock(&j->devs.lock);
-	rcu_read_unlock();
 
 	j->prev_buf_sectors = 0;
 	spin_unlock(&j->lock);
@@ -2148,7 +2154,7 @@ static void journal_write_endio(struct bio *bio)
 		bch_journal_halt(j);
 
 	closure_put(&j->io);
-	percpu_ref_put(&ca->ref);
+	percpu_ref_put(&ca->io_ref);
 }
 
 static void journal_write_done(struct closure *cl)
@@ -2253,13 +2259,8 @@ static void journal_write(struct closure *cl)
 		goto no_io;
 
 	extent_for_each_ptr(bkey_i_to_s_extent(&j->key), ptr) {
-		rcu_read_lock();
-		ca = PTR_DEV(c, ptr);
-		if (ca)
-			percpu_ref_get(&ca->ref);
-		rcu_read_unlock();
-
-		if (!ca) {
+		ca = c->devs[ptr->dev];
+		if (!percpu_ref_tryget(&ca->io_ref)) {
 			/* XXX: fix this */
 			bch_err(c, "missing device for journal write\n");
 			continue;
@@ -2284,11 +2285,10 @@ static void journal_write(struct closure *cl)
 		ca->journal.bucket_seq[ca->journal.cur_idx] = le64_to_cpu(w->data->seq);
 	}
 
-	for_each_member_device(ca, c, i)
-		if (ca->mi.state == BCH_MEMBER_STATE_ACTIVE &&
-		    journal_flushes_device(ca) &&
+	for_each_rw_member(ca, c, i)
+		if (journal_flushes_device(ca) &&
 		    !bch_extent_has_device(bkey_i_to_s_c_extent(&j->key), i)) {
-			percpu_ref_get(&ca->ref);
+			percpu_ref_get(&ca->io_ref);
 
 			bio = ca->journal.bio;
 			bio_reset(bio);
@@ -2631,7 +2631,8 @@ ssize_t bch_journal_print_debug(struct journal *j, char *buf)
 			 journal_entry_is_open(j),
 			 test_bit(JOURNAL_REPLAY_DONE,	&j->flags));
 
-	group_for_each_dev_rcu(ca, &j->devs, iter) {
+	spin_lock(&j->devs.lock);
+	group_for_each_dev(ca, &j->devs, iter) {
 		struct journal_device *ja = &ca->journal;
 
 		ret += scnprintf(buf + ret, PAGE_SIZE - ret,
@@ -2643,6 +2644,7 @@ ssize_t bch_journal_print_debug(struct journal *j, char *buf)
 				 ja->cur_idx,	ja->bucket_seq[ja->cur_idx],
 				 ja->last_idx,	ja->bucket_seq[ja->last_idx]);
 	}
+	spin_unlock(&j->devs.lock);
 
 	spin_unlock(&j->lock);
 	rcu_read_unlock();
@@ -2748,19 +2750,24 @@ void bch_fs_journal_stop(struct journal *j)
 
 void bch_dev_journal_exit(struct bch_dev *ca)
 {
+	kfree(ca->journal.bio);
 	kfree(ca->journal.buckets);
 	kfree(ca->journal.bucket_seq);
+
+	ca->journal.bio		= NULL;
+	ca->journal.buckets	= NULL;
+	ca->journal.bucket_seq	= NULL;
 }
 
-int bch_dev_journal_init(struct bch_dev *ca)
+int bch_dev_journal_init(struct bch_dev *ca, struct bch_sb *sb)
 {
 	struct journal_device *ja = &ca->journal;
 	struct bch_sb_field_journal *journal_buckets =
-		bch_sb_get_journal(ca->disk_sb.sb);
+		bch_sb_get_journal(sb);
 	unsigned i, journal_entry_pages;
 
 	journal_entry_pages =
-		DIV_ROUND_UP(1U << BCH_SB_JOURNAL_ENTRY_SIZE(ca->disk_sb.sb),
+		DIV_ROUND_UP(1U << BCH_SB_JOURNAL_ENTRY_SIZE(sb),
 			     PAGE_SECTORS);
 
 	ja->nr = bch_nr_journal_buckets(journal_buckets);
diff --git a/fs/bcachefs/journal.h b/fs/bcachefs/journal.h
index 96f0b764837b..c83f81046f47 100644
--- a/fs/bcachefs/journal.h
+++ b/fs/bcachefs/journal.h
@@ -366,7 +366,7 @@ int bch_journal_move(struct bch_dev *);
 
 void bch_fs_journal_stop(struct journal *);
 void bch_dev_journal_exit(struct bch_dev *);
-int bch_dev_journal_init(struct bch_dev *);
+int bch_dev_journal_init(struct bch_dev *, struct bch_sb *);
 void bch_fs_journal_exit(struct journal *);
 int bch_fs_journal_init(struct journal *, unsigned);
 
diff --git a/fs/bcachefs/migrate.c b/fs/bcachefs/migrate.c
index ce6defe5bda1..5bd93be2fddf 100644
--- a/fs/bcachefs/migrate.c
+++ b/fs/bcachefs/migrate.c
@@ -64,7 +64,7 @@ int bch_move_data_off_device(struct bch_dev *ca)
 	u64 seen_key_count;
 	int ret = 0;
 
-	BUG_ON(ca->mi.state == BCH_MEMBER_STATE_ACTIVE);
+	BUG_ON(ca->mi.state == BCH_MEMBER_STATE_RW);
 
 	if (!ca->mi.has_data)
 		return 0;
@@ -163,7 +163,7 @@ static int bch_move_btree_off(struct bch_dev *ca, enum btree_id id)
 	struct btree *b;
 	int ret;
 
-	BUG_ON(ca->mi.state == BCH_MEMBER_STATE_ACTIVE);
+	BUG_ON(ca->mi.state == BCH_MEMBER_STATE_RW);
 
 	closure_init_stack(&cl);
 
@@ -259,7 +259,7 @@ int bch_move_metadata_off_device(struct bch_dev *ca)
 	unsigned i;
 	int ret;
 
-	BUG_ON(ca->mi.state == BCH_MEMBER_STATE_ACTIVE);
+	BUG_ON(ca->mi.state == BCH_MEMBER_STATE_RW);
 
 	if (!ca->mi.has_metadata)
 		return 0;
diff --git a/fs/bcachefs/move.c b/fs/bcachefs/move.c
index 25b203a142ce..a9a9d3197b6d 100644
--- a/fs/bcachefs/move.c
+++ b/fs/bcachefs/move.c
@@ -17,12 +17,7 @@ static struct bch_extent_ptr *bkey_find_ptr(struct bch_fs *c,
 					    struct bch_extent_ptr ptr)
 {
 	struct bch_extent_ptr *ptr2;
-	struct bch_member_rcu *mi;
-	unsigned bucket_bits;
-
-	mi = fs_member_info_get(c);
-	bucket_bits = ilog2(mi->m[ptr.dev].bucket_size);
-	fs_member_info_put();
+	unsigned bucket_bits = c->devs[ptr.dev]->bucket_bits;
 
 	extent_for_each_ptr(e, ptr2)
 		if (ptr2->dev == ptr.dev &&
diff --git a/fs/bcachefs/notify.c b/fs/bcachefs/notify.c
index 675dc26cd9ef..1d5f626fcf5d 100644
--- a/fs/bcachefs/notify.c
+++ b/fs/bcachefs/notify.c
@@ -31,11 +31,10 @@ static void notify_get(struct bch_fs *c)
 static void notify_get_cache(struct bch_dev *ca)
 {
 	struct bch_fs *c = ca->fs;
-	char buf[BDEVNAME_SIZE];
 
 	notify_get(c);
 	notify_var(c, "UUID=%pU", ca->uuid.b);
-	notify_var(c, "BLOCKDEV=%s", bdevname(ca->disk_sb.bdev, buf));
+	notify_var(c, "BLOCKDEV=%s", ca->name);
 }
 
 static void notify_put(struct bch_fs *c)
diff --git a/fs/bcachefs/opts.c b/fs/bcachefs/opts.c
index 23302d44a8bb..41780d594af1 100644
--- a/fs/bcachefs/opts.c
+++ b/fs/bcachefs/opts.c
@@ -50,7 +50,7 @@ const char * const bch_cache_modes[] = {
 };
 
 const char * const bch_dev_state[] = {
-	"active",
+	"readwrite",
 	"readonly",
 	"failed",
 	"spare",
diff --git a/fs/bcachefs/request.c b/fs/bcachefs/request.c
index 2b9e687e742b..0646346e4667 100644
--- a/fs/bcachefs/request.c
+++ b/fs/bcachefs/request.c
@@ -712,14 +712,7 @@ static int cached_dev_congested(void *data, int bits)
 		return 1;
 
 	if (cached_dev_get(dc)) {
-		unsigned i;
-		struct bch_dev *ca;
-
-		for_each_member_device(ca, d->c, i) {
-			q = bdev_get_queue(ca->disk_sb.bdev);
-			ret |= bdi_congested(&q->backing_dev_info, bits);
-		}
-
+		ret |= bch_congested(d->c, bits);
 		cached_dev_put(dc);
 	}
 
@@ -802,17 +795,8 @@ static int blockdev_volume_ioctl(struct bcache_device *d, fmode_t mode,
 static int blockdev_volume_congested(void *data, int bits)
 {
 	struct bcache_device *d = data;
-	struct request_queue *q;
-	struct bch_dev *ca;
-	unsigned i;
-	int ret = 0;
 
-	for_each_member_device(ca, d->c, i) {
-		q = bdev_get_queue(ca->disk_sb.bdev);
-		ret |= bdi_congested(&q->backing_dev_info, bits);
-	}
-
-	return ret;
+	return bch_congested(d->c, bits);
 }
 
 void bch_blockdev_volume_request_init(struct bcache_device *d)
diff --git a/fs/bcachefs/super-io.c b/fs/bcachefs/super-io.c
index fd635e64f096..67c03e1932b1 100644
--- a/fs/bcachefs/super-io.c
+++ b/fs/bcachefs/super-io.c
@@ -174,7 +174,9 @@ struct bch_sb_field *bch_fs_sb_field_resize(struct bch_fs *c,
 	if (bch_fs_sb_realloc(c, le32_to_cpu(c->disk_sb->u64s) + d))
 		return NULL;
 
-	for_each_member_device(ca, c, i) {
+	/* XXX: we're not checking that offline device have enough space */
+
+	for_each_online_member(ca, c, i) {
 		struct bcache_superblock *sb = &ca->disk_sb;
 
 		if (bch_sb_realloc(sb, le32_to_cpu(sb->sb->u64s) + d)) {
@@ -306,7 +308,7 @@ const char *bch_validate_cache_super(struct bcache_superblock *disk_sb)
 	struct bch_sb *sb = disk_sb->sb;
 	struct bch_sb_field *f;
 	struct bch_sb_field_members *sb_mi;
-	struct bch_member_cpu	mi;
+	struct bch_member_cpu mi;
 	const char *err;
 	u16 block_size;
 
@@ -408,7 +410,7 @@ const char *bch_validate_cache_super(struct bcache_superblock *disk_sb)
 		return err;
 
 	sb_mi = bch_sb_get_members(sb);
-	mi = cache_mi_to_cpu_mi(sb_mi->members + sb->dev_idx);
+	mi = bch_mi_to_cpu(sb_mi->members + sb->dev_idx);
 
 	if (mi.nbuckets > LONG_MAX)
 		return "Too many buckets";
@@ -434,104 +436,33 @@ const char *bch_validate_cache_super(struct bcache_superblock *disk_sb)
 
 /* device open: */
 
-static bool bch_is_open_cache(struct block_device *bdev)
-{
-	struct bch_fs *c;
-	struct bch_dev *ca;
-	unsigned i;
-
-	rcu_read_lock();
-	list_for_each_entry(c, &bch_fs_list, list)
-		for_each_member_device_rcu(ca, c, i)
-			if (ca->disk_sb.bdev == bdev) {
-				rcu_read_unlock();
-				return true;
-			}
-	rcu_read_unlock();
-	return false;
-}
-
-static bool bch_is_open(struct block_device *bdev)
-{
-	bool ret;
-
-	mutex_lock(&bch_register_lock);
-	ret = bch_is_open_cache(bdev) || bch_is_open_backing_dev(bdev);
-	mutex_unlock(&bch_register_lock);
-
-	return ret;
-}
-
 static const char *bch_blkdev_open(const char *path, fmode_t mode,
 				   void *holder, struct block_device **ret)
 {
 	struct block_device *bdev;
-	const char *err;
 
 	*ret = NULL;
 	bdev = blkdev_get_by_path(path, mode, holder);
-
-	if (bdev == ERR_PTR(-EBUSY)) {
-		bdev = lookup_bdev(path);
-		if (IS_ERR(bdev))
-			return "device busy";
-
-		err = bch_is_open(bdev)
-			? "device already registered"
-			: "device busy";
-
-		bdput(bdev);
-		return err;
-	}
+	if (bdev == ERR_PTR(-EBUSY))
+		return "device busy";
 
 	if (IS_ERR(bdev))
 		return "failed to open device";
 
-	bdev_get_queue(bdev)->backing_dev_info.capabilities |= BDI_CAP_STABLE_WRITES;
+	if (mode & FMODE_WRITE)
+		bdev_get_queue(bdev)->backing_dev_info.capabilities
+			|= BDI_CAP_STABLE_WRITES;
 
 	*ret = bdev;
 	return NULL;
 }
 
-/* Update cached mi: */
-int bch_fs_mi_update(struct bch_fs *c, struct bch_member *mi,
-		     unsigned nr_devices)
-{
-	struct bch_member_rcu *new, *old;
-	struct bch_dev *ca;
-	unsigned i;
-
-	lockdep_assert_held(&c->sb_lock);
-
-	new = kzalloc(sizeof(struct bch_member_rcu) +
-		      sizeof(struct bch_member_cpu) * nr_devices,
-		      GFP_KERNEL);
-	if (!new)
-		return -ENOMEM;
-
-	new->nr_devices = nr_devices;
-
-	for (i = 0; i < nr_devices; i++)
-		new->m[i] = cache_mi_to_cpu_mi(&mi[i]);
-
-	rcu_read_lock();
-	for_each_member_device(ca, c, i)
-		ca->mi = new->m[i];
-	rcu_read_unlock();
-
-	old = rcu_dereference_protected(c->members,
-				lockdep_is_held(&c->sb_lock));
-
-	rcu_assign_pointer(c->members, new);
-	if (old)
-		kfree_rcu(old, rcu);
-
-	return 0;
-}
-
 static void bch_sb_update(struct bch_fs *c)
 {
 	struct bch_sb *src = c->disk_sb;
+	struct bch_sb_field_members *mi = bch_sb_get_members(src);
+	struct bch_dev *ca;
+	unsigned i;
 
 	lockdep_assert_held(&c->sb_lock);
 
@@ -548,6 +479,9 @@ static void bch_sb_update(struct bch_fs *c)
 	c->sb.time_base_lo	= le64_to_cpu(src->time_base_lo);
 	c->sb.time_base_hi	= le32_to_cpu(src->time_base_hi);
 	c->sb.time_precision	= le32_to_cpu(src->time_precision);
+
+	for_each_member_device(ca, c, i)
+		ca->mi = bch_mi_to_cpu(mi->members + i);
 }
 
 /* doesn't copy member info */
@@ -586,8 +520,6 @@ static void __copy_super(struct bch_sb *dst, struct bch_sb *src)
 
 int bch_sb_to_fs(struct bch_fs *c, struct bch_sb *src)
 {
-	struct bch_sb_field_members *members =
-		bch_sb_get_members(src);
 	struct bch_sb_field_journal *journal_buckets =
 		bch_sb_get_journal(src);
 	unsigned journal_u64s = journal_buckets
@@ -599,9 +531,6 @@ int bch_sb_to_fs(struct bch_fs *c, struct bch_sb *src)
 	if (bch_fs_sb_realloc(c, le32_to_cpu(src->u64s) - journal_u64s))
 		return -ENOMEM;
 
-	if (bch_fs_mi_update(c, members->members, src->nr_devices))
-		return -ENOMEM;
-
 	__copy_super(c->disk_sb, src);
 	bch_sb_update(c);
 
@@ -784,7 +713,7 @@ static void write_super_endio(struct bio *bio)
 	bch_account_io_completion(ca);
 
 	closure_put(&ca->fs->sb_write);
-	percpu_ref_put(&ca->ref);
+	percpu_ref_put(&ca->io_ref);
 }
 
 static bool write_one_super(struct bch_fs *c, struct bch_dev *ca, unsigned idx)
@@ -795,6 +724,9 @@ static bool write_one_super(struct bch_fs *c, struct bch_dev *ca, unsigned idx)
 	if (idx >= sb->layout.nr_superblocks)
 		return false;
 
+	if (!percpu_ref_tryget(&ca->io_ref))
+		return false;
+
 	sb->offset = sb->layout.sb_offset[idx];
 
 	SET_BCH_SB_CSUM_TYPE(sb, c->opts.metadata_checksum);
@@ -812,16 +744,12 @@ static bool write_one_super(struct bch_fs *c, struct bch_dev *ca, unsigned idx)
 	bio_set_op_attrs(bio, REQ_OP_WRITE, REQ_SYNC|REQ_META);
 	bch_bio_map(bio, sb);
 
-	percpu_ref_get(&ca->ref);
 	closure_bio_submit_punt(bio, &c->sb_write, c);
-
 	return true;
 }
 
 void bch_write_super(struct bch_fs *c)
 {
-	struct bch_sb_field_members *members =
-		bch_sb_get_members(c->disk_sb);
 	struct closure *cl = &c->sb_write;
 	struct bch_dev *ca;
 	unsigned i, super_idx = 0;
@@ -833,7 +761,7 @@ void bch_write_super(struct bch_fs *c)
 
 	le64_add_cpu(&c->disk_sb->seq, 1);
 
-	for_each_member_device(ca, c, i)
+	for_each_online_member(ca, c, i)
 		bch_sb_from_fs(c, ca);
 
 	if (c->opts.nochanges)
@@ -841,7 +769,7 @@ void bch_write_super(struct bch_fs *c)
 
 	do {
 		wrote = false;
-		for_each_member_device(ca, c, i)
+		for_each_online_member(ca, c, i)
 			if (write_one_super(c, ca, super_idx))
 				wrote = true;
 
@@ -850,7 +778,6 @@ void bch_write_super(struct bch_fs *c)
 	} while (wrote);
 out:
 	/* Make new options visible after they're persistent: */
-	bch_fs_mi_update(c, members->members, c->sb.nr_devices);
 	bch_sb_update(c);
 }
 
diff --git a/fs/bcachefs/super-io.h b/fs/bcachefs/super-io.h
index ed0338cf22a6..1a9bd3092e4c 100644
--- a/fs/bcachefs/super-io.h
+++ b/fs/bcachefs/super-io.h
@@ -83,7 +83,7 @@ static inline __u64 bset_magic(struct bch_fs *c)
 	return __le64_to_cpu(bch_sb_magic(c) ^ BSET_MAGIC);
 }
 
-static inline struct bch_member_cpu cache_mi_to_cpu_mi(struct bch_member *mi)
+static inline struct bch_member_cpu bch_mi_to_cpu(struct bch_member *mi)
 {
 	return (struct bch_member_cpu) {
 		.nbuckets	= le64_to_cpu(mi->nbuckets),
@@ -99,8 +99,6 @@ static inline struct bch_member_cpu cache_mi_to_cpu_mi(struct bch_member *mi)
 	};
 }
 
-int bch_fs_mi_update(struct bch_fs *, struct bch_member *, unsigned);
-
 int bch_sb_to_fs(struct bch_fs *, struct bch_sb *);
 int bch_sb_from_fs(struct bch_fs *, struct bch_dev *);
 
@@ -118,27 +116,23 @@ void bch_write_super(struct bch_fs *);
 void bch_check_mark_super_slowpath(struct bch_fs *,
 				   const struct bkey_i *, bool);
 
-#define fs_member_info_get(_c)					\
-	(rcu_read_lock(), rcu_dereference((_c)->members))
-
-#define fs_member_info_put()	rcu_read_unlock()
-
 static inline bool bch_check_super_marked(struct bch_fs *c,
 					  const struct bkey_i *k, bool meta)
 {
 	struct bkey_s_c_extent e = bkey_i_to_s_c_extent(k);
 	const struct bch_extent_ptr *ptr;
-	struct bch_member_cpu *mi = fs_member_info_get(c)->m;
 	unsigned nr_replicas = 0;
 	bool ret = true;
 
 	extent_for_each_ptr(e, ptr) {
+		struct bch_dev *ca = c->devs[ptr->dev];
+
 		if (ptr->cached)
 			continue;
 
 		if (!(meta
-		      ? mi[ptr->dev].has_metadata
-		      : mi[ptr->dev].has_data)) {
+		      ? ca->mi.has_metadata
+		      : ca->mi.has_data)) {
 			ret = false;
 			break;
 		}
@@ -150,8 +144,6 @@ static inline bool bch_check_super_marked(struct bch_fs *c,
 	    (meta ? c->sb.meta_replicas_have : c->sb.data_replicas_have))
 		ret = false;
 
-	fs_member_info_put();
-
 	return ret;
 }
 
diff --git a/fs/bcachefs/super.c b/fs/bcachefs/super.c
index 19c139418790..200b2b31eba0 100644
--- a/fs/bcachefs/super.c
+++ b/fs/bcachefs/super.c
@@ -62,28 +62,77 @@ static const uuid_le invalid_uuid = {
 };
 
 static struct kset *bcache_kset;
-struct mutex bch_register_lock;
-LIST_HEAD(bch_fs_list);
+static LIST_HEAD(bch_fs_list);
+static DEFINE_MUTEX(bch_fs_list_lock);
 
 static DECLARE_WAIT_QUEUE_HEAD(bch_read_only_wait);
 struct workqueue_struct *bcache_io_wq;
 struct crypto_shash *bch_sha256;
 
 static void bch_dev_free(struct bch_dev *);
-static int bch_dev_online(struct bch_dev *);
+static int bch_dev_alloc(struct bch_fs *, unsigned);
+static int bch_dev_sysfs_online(struct bch_dev *);
+static void __bch_dev_read_only(struct bch_fs *, struct bch_dev *);
 
-static int bch_congested_fn(void *data, int bdi_bits)
+struct bch_fs *bch_bdev_to_fs(struct block_device *bdev)
+{
+	struct bch_fs *c;
+	struct bch_dev *ca;
+	unsigned i;
+
+	mutex_lock(&bch_fs_list_lock);
+	rcu_read_lock();
+
+	list_for_each_entry(c, &bch_fs_list, list)
+		for_each_member_device_rcu(ca, c, i)
+			if (ca->disk_sb.bdev == bdev) {
+				closure_get(&c->cl);
+				goto found;
+			}
+	c = NULL;
+found:
+	rcu_read_unlock();
+	mutex_unlock(&bch_fs_list_lock);
+
+	return c;
+}
+
+static struct bch_fs *__bch_uuid_to_fs(uuid_le uuid)
+{
+	struct bch_fs *c;
+
+	lockdep_assert_held(&bch_fs_list_lock);
+
+	list_for_each_entry(c, &bch_fs_list, list)
+		if (!memcmp(&c->disk_sb->uuid, &uuid, sizeof(uuid_le)))
+			return c;
+
+	return NULL;
+}
+
+struct bch_fs *bch_uuid_to_fs(uuid_le uuid)
+{
+	struct bch_fs *c;
+
+	mutex_lock(&bch_fs_list_lock);
+	c = __bch_uuid_to_fs(uuid);
+	if (c)
+		closure_get(&c->cl);
+	mutex_unlock(&bch_fs_list_lock);
+
+	return c;
+}
+
+int bch_congested(struct bch_fs *c, int bdi_bits)
 {
 	struct backing_dev_info *bdi;
-	struct bch_fs *c = data;
 	struct bch_dev *ca;
 	unsigned i;
 	int ret = 0;
 
-	rcu_read_lock();
 	if (bdi_bits & (1 << WB_sync_congested)) {
 		/* Reads - check all devices: */
-		for_each_member_device_rcu(ca, c, i) {
+		for_each_readable_member(ca, c, i) {
 			bdi = blk_get_backing_dev_info(ca->disk_sb.bdev);
 
 			if (bdi_congested(bdi, bdi_bits)) {
@@ -96,7 +145,8 @@ static int bch_congested_fn(void *data, int bdi_bits)
 		struct bch_tier *tier = READ_ONCE(c->fastest_tier);
 		struct dev_group *grp = tier ? &tier->devs : &c->all_devs;
 
-		group_for_each_dev_rcu(ca, grp, i) {
+		rcu_read_lock();
+		group_for_each_dev(ca, grp, i) {
 			bdi = blk_get_backing_dev_info(ca->disk_sb.bdev);
 
 			if (bdi_congested(bdi, bdi_bits)) {
@@ -104,12 +154,19 @@ static int bch_congested_fn(void *data, int bdi_bits)
 				break;
 			}
 		}
+		rcu_read_unlock();
 	}
-	rcu_read_unlock();
 
 	return ret;
 }
 
+static int bch_congested_fn(void *data, int bdi_bits)
+{
+	struct bch_fs *c = data;
+
+	return bch_congested(c, bdi_bits);
+}
+
 /* Filesystem RO/RW: */
 
 /*
@@ -256,10 +313,9 @@ const char *bch_fs_read_write(struct bch_fs *c)
 		goto out;
 
 	err = "error starting allocator thread";
-	for_each_member_device(ca, c, i)
-		if (ca->mi.state == BCH_MEMBER_STATE_ACTIVE &&
-		    bch_dev_allocator_start(ca)) {
-			percpu_ref_put(&ca->ref);
+	for_each_rw_member(ca, c, i)
+		if (bch_dev_allocator_start(ca)) {
+			percpu_ref_put(&ca->io_ref);
 			goto err;
 		}
 
@@ -268,10 +324,9 @@ const char *bch_fs_read_write(struct bch_fs *c)
 		goto err;
 
 	err = "error starting moving GC thread";
-	for_each_member_device(ca, c, i)
-		if (ca->mi.state == BCH_MEMBER_STATE_ACTIVE &&
-		    bch_moving_gc_start(ca)) {
-			percpu_ref_put(&ca->ref);
+	for_each_rw_member(ca, c, i)
+		if (bch_moving_gc_start(ca)) {
+			percpu_ref_put(&ca->io_ref);
 			goto err;
 		}
 
@@ -324,7 +379,6 @@ static void bch_fs_free(struct bch_fs *c)
 	if (c->wq)
 		destroy_workqueue(c->wq);
 
-	kfree_rcu(rcu_dereference_protected(c->members, 1), rcu); /* shutting down */
 	free_pages((unsigned long) c->disk_sb, c->disk_sb_order);
 	kfree(c);
 	module_put(THIS_MODULE);
@@ -353,17 +407,19 @@ static void bch_fs_offline(struct bch_fs *c)
 	struct bch_dev *ca;
 	unsigned i;
 
-	mutex_lock(&bch_register_lock);
+	mutex_lock(&bch_fs_list_lock);
 	list_del(&c->list);
-	mutex_unlock(&bch_register_lock);
+	mutex_unlock(&bch_fs_list_lock);
+
+	for_each_member_device(ca, c, i)
+		if (ca->kobj.state_in_sysfs &&
+		    ca->disk_sb.bdev)
+			sysfs_remove_link(&part_to_dev(ca->disk_sb.bdev->bd_part)->kobj,
+					  "bcache");
 
 	if (c->kobj.state_in_sysfs)
 		kobject_del(&c->kobj);
 
-	for_each_member_device(ca, c, i)
-		if (ca->kobj.state_in_sysfs)
-			kobject_del(&ca->kobj);
-
 	bch_fs_debug_exit(c);
 	bch_fs_chardev_exit(c);
 
@@ -453,7 +509,6 @@ void bch_fs_stop(struct bch_fs *c)
 	closure_sync(&c->cl);
 
 	bch_fs_exit(c);
-	kobject_put(&c->kobj);
 }
 
 /* Stop, detaching from backing devices: */
@@ -468,8 +523,9 @@ void bch_fs_detach(struct bch_fs *c)
 
 static struct bch_fs *bch_fs_alloc(struct bch_sb *sb, struct bch_opts opts)
 {
+	struct bch_sb_field_members *mi;
 	struct bch_fs *c;
-	unsigned iter_size, journal_entry_bytes;
+	unsigned i, iter_size, journal_entry_bytes;
 
 	c = kzalloc(sizeof(struct bch_fs), GFP_KERNEL);
 	if (!c)
@@ -607,6 +663,12 @@ static struct bch_fs *bch_fs_alloc(struct bch_sb *sb, struct bch_opts opts)
 	c->bdi.congested_fn	= bch_congested_fn;
 	c->bdi.congested_data	= c;
 
+	mi = bch_sb_get_members(c->disk_sb);
+	for (i = 0; i < c->sb.nr_devices; i++)
+		if (!bch_is_zero(mi->members[i].uuid.b, sizeof(uuid_le)) &&
+		    bch_dev_alloc(c, i))
+			goto err;
+
 	/*
 	 * Now that all allocations have succeeded, init various refcounty
 	 * things that let us shutdown:
@@ -632,31 +694,19 @@ err:
 	return NULL;
 }
 
-static struct bch_fs *bch_fs_lookup(uuid_le uuid)
-{
-	struct bch_fs *c;
-
-	lockdep_assert_held(&bch_register_lock);
-
-	list_for_each_entry(c, &bch_fs_list, list)
-		if (!memcmp(&c->disk_sb->uuid, &uuid, sizeof(uuid_le)))
-			return c;
-
-	return NULL;
-}
-
 static const char *__bch_fs_online(struct bch_fs *c)
 {
 	struct bch_dev *ca;
+	const char *err = NULL;
 	unsigned i;
 	int ret;
 
-	lockdep_assert_held(&bch_register_lock);
+	lockdep_assert_held(&bch_fs_list_lock);
 
 	if (!list_empty(&c->list))
 		return NULL;
 
-	if (bch_fs_lookup(c->sb.uuid))
+	if (__bch_uuid_to_fs(c->sb.uuid))
 		return "filesystem UUID already open";
 
 	ret = bch_fs_chardev_init(c);
@@ -672,35 +722,33 @@ static const char *__bch_fs_online(struct bch_fs *c)
 	    bch_cache_accounting_add_kobjs(&c->accounting, &c->kobj))
 		return "error creating sysfs objects";
 
-	for_each_member_device(ca, c, i)
-		if (bch_dev_online(ca)) {
-			percpu_ref_put(&ca->ref);
-			return "error creating sysfs objects";
-		}
-
 	mutex_lock(&c->state_lock);
 
-	if (bch_blockdev_volumes_start(c)) {
-		mutex_unlock(&c->state_lock);
-		return "can't bring up blockdev volumes";
-	}
+	err = "error creating sysfs objects";
+	__for_each_member_device(ca, c, i)
+		if (bch_dev_sysfs_online(ca))
+			goto err;
 
-	bch_attach_backing_devs(c);
+	err = "can't bring up blockdev volumes";
+	if (bch_blockdev_volumes_start(c))
+		goto err;
 
-	mutex_unlock(&c->state_lock);
+	bch_attach_backing_devs(c);
 
 	list_add(&c->list, &bch_fs_list);
-
-	return 0;
+	err = NULL;
+err:
+	mutex_unlock(&c->state_lock);
+	return err;
 }
 
 static const char *bch_fs_online(struct bch_fs *c)
 {
 	const char *err;
 
-	mutex_lock(&bch_register_lock);
+	mutex_lock(&bch_fs_list_lock);
 	err = __bch_fs_online(c);
-	mutex_unlock(&bch_register_lock);
+	mutex_unlock(&bch_fs_list_lock);
 
 	return err;
 }
@@ -719,7 +767,7 @@ static const char *__bch_fs_start(struct bch_fs *c)
 	BUG_ON(c->state != BCH_FS_STARTING);
 
 	mutex_lock(&c->sb_lock);
-	for_each_member_device(ca, c, i)
+	for_each_online_member(ca, c, i)
 		bch_sb_from_fs(c, ca);
 	mutex_unlock(&c->sb_lock);
 
@@ -728,27 +776,20 @@ static const char *__bch_fs_start(struct bch_fs *c)
 		if (ret)
 			goto err;
 
-		pr_debug("btree_journal_read() done");
-
 		j = &list_entry(journal.prev, struct journal_replay, list)->j;
 
+		c->prio_clock[READ].hand = le16_to_cpu(j->read_clock);
+		c->prio_clock[WRITE].hand = le16_to_cpu(j->write_clock);
+
 		err = "error reading priorities";
-		for_each_member_device(ca, c, i) {
+		for_each_readable_member(ca, c, i) {
 			ret = bch_prio_read(ca);
 			if (ret) {
-				percpu_ref_put(&ca->ref);
+				percpu_ref_put(&ca->io_ref);
 				goto err;
 			}
 		}
 
-		c->prio_clock[READ].hand = le16_to_cpu(j->read_clock);
-		c->prio_clock[WRITE].hand = le16_to_cpu(j->write_clock);
-
-		for_each_member_device(ca, c, i) {
-			bch_recalc_min_prio(ca, READ);
-			bch_recalc_min_prio(ca, WRITE);
-		}
-
 		for (id = 0; id < BTREE_ID_NR; id++) {
 			unsigned level;
 			struct bkey_i *k;
@@ -786,10 +827,9 @@ static const char *__bch_fs_start(struct bch_fs *c)
 		bch_journal_start(c);
 
 		err = "error starting allocator thread";
-		for_each_member_device(ca, c, i)
-			if (ca->mi.state == BCH_MEMBER_STATE_ACTIVE &&
-			    bch_dev_allocator_start(ca)) {
-				percpu_ref_put(&ca->ref);
+		for_each_rw_member(ca, c, i)
+			if (bch_dev_allocator_start(ca)) {
+				percpu_ref_put(&ca->io_ref);
 				goto err;
 			}
 
@@ -824,9 +864,9 @@ static const char *__bch_fs_start(struct bch_fs *c)
 		bch_initial_gc(c, NULL);
 
 		err = "unable to allocate journal buckets";
-		for_each_member_device(ca, c, i)
+		for_each_rw_member(ca, c, i)
 			if (bch_dev_journal_alloc(ca)) {
-				percpu_ref_put(&ca->ref);
+				percpu_ref_put(&ca->io_ref);
 				goto err;
 			}
 
@@ -838,10 +878,9 @@ static const char *__bch_fs_start(struct bch_fs *c)
 		bch_journal_set_replay_done(&c->journal);
 
 		err = "error starting allocator thread";
-		for_each_member_device(ca, c, i)
-			if (ca->mi.state == BCH_MEMBER_STATE_ACTIVE &&
-			    bch_dev_allocator_start(ca)) {
-				percpu_ref_put(&ca->ref);
+		for_each_rw_member(ca, c, i)
+			if (bch_dev_allocator_start(ca)) {
+				percpu_ref_put(&ca->io_ref);
 				goto err;
 			}
 
@@ -888,10 +927,8 @@ recovery_done:
 	mi = bch_sb_get_members(c->disk_sb);
 	now = ktime_get_seconds();
 
-	rcu_read_lock();
-	for_each_member_device_rcu(ca, c, i)
+	for_each_member_device(ca, c, i)
 		mi->members[ca->dev_idx].last_mount = cpu_to_le64(now);
-	rcu_read_unlock();
 
 	SET_BCH_SB_INITIALIZED(c->disk_sb, true);
 	SET_BCH_SB_CLEAN(c->disk_sb, false);
@@ -991,30 +1028,27 @@ void bch_dev_release(struct kobject *kobj)
 
 static void bch_dev_free(struct bch_dev *ca)
 {
-	struct bch_fs *c = ca->fs;
 	unsigned i;
 
 	cancel_work_sync(&ca->io_error_work);
 
-	if (c && c->kobj.state_in_sysfs) {
-		char buf[12];
-
-		sprintf(buf, "cache%u", ca->dev_idx);
-		sysfs_remove_link(&c->kobj, buf);
-	}
+	if (ca->kobj.state_in_sysfs &&
+	    ca->disk_sb.bdev)
+		sysfs_remove_link(&part_to_dev(ca->disk_sb.bdev->bd_part)->kobj,
+				  "bcache");
 
 	if (ca->kobj.state_in_sysfs)
 		kobject_del(&ca->kobj);
 
 	bch_free_super(&ca->disk_sb);
 	bch_dev_journal_exit(ca);
+
 	free_percpu(ca->sectors_written);
 	bioset_exit(&ca->replica_set);
 	free_percpu(ca->usage_percpu);
 	free_pages((unsigned long) ca->disk_buckets, ilog2(bucket_pages(ca)));
 	kfree(ca->prio_buckets);
 	kfree(ca->bio_prio);
-	kfree(ca->journal.bio);
 	vfree(ca->buckets);
 	vfree(ca->oldest_gens);
 	free_heap(&ca->heap);
@@ -1023,46 +1057,47 @@ static void bch_dev_free(struct bch_dev *ca)
 	for (i = 0; i < RESERVE_NR; i++)
 		free_fifo(&ca->free[i]);
 
+	percpu_ref_exit(&ca->io_ref);
 	percpu_ref_exit(&ca->ref);
 	kobject_put(&ca->kobj);
-
-	if (c)
-		kobject_put(&c->kobj);
 }
 
-static void bch_dev_free_work(struct work_struct *work)
+static void bch_dev_io_ref_release(struct percpu_ref *ref)
 {
-	struct bch_dev *ca = container_of(work, struct bch_dev, free_work);
+	struct bch_dev *ca = container_of(ref, struct bch_dev, io_ref);
 
-	bch_dev_free(ca);
+	complete(&ca->offline_complete);
 }
 
-static void bch_dev_percpu_ref_release(struct percpu_ref *ref)
+static void bch_dev_offline(struct bch_dev *ca)
 {
-	struct bch_dev *ca = container_of(ref, struct bch_dev, ref);
+	struct bch_fs *c = ca->fs;
+
+	lockdep_assert_held(&c->state_lock);
+
+	__bch_dev_read_only(ca->fs, ca);
+
+	reinit_completion(&ca->offline_complete);
+	percpu_ref_kill(&ca->io_ref);
+	wait_for_completion(&ca->offline_complete);
+
+	if (ca->kobj.state_in_sysfs) {
+		struct kobject *block =
+			&part_to_dev(ca->disk_sb.bdev->bd_part)->kobj;
 
-	schedule_work(&ca->free_work);
+		sysfs_remove_link(block, "bcache");
+		sysfs_remove_link(&ca->kobj, "block");
+	}
+
+	bch_free_super(&ca->disk_sb);
+	bch_dev_journal_exit(ca);
 }
 
-static void bch_dev_free_rcu(struct rcu_head *rcu)
+static void bch_dev_ref_release(struct percpu_ref *ref)
 {
-	struct bch_dev *ca = container_of(rcu, struct bch_dev, free_rcu);
-
-	/*
-	 * This decrements the ref count to ca, and once the ref count
-	 * is 0 (outstanding bios to the ca also incremented it and
-	 * decrement it on completion/error), bch_dev_percpu_ref_release
-	 * is called, and that eventually results in bch_dev_free_work
-	 * being called, which in turn results in bch_dev_release being
-	 * called.
-	 *
-	 * In particular, these functions won't be called until there are no
-	 * bios outstanding (the per-cpu ref counts are all 0), so it
-	 * is safe to remove the actual sysfs device at that point,
-	 * and that can indicate success to the user.
-	 */
+	struct bch_dev *ca = container_of(ref, struct bch_dev, ref);
 
-	percpu_ref_kill(&ca->ref);
+	complete(&ca->stop_complete);
 }
 
 static void bch_dev_stop(struct bch_dev *ca)
@@ -1074,26 +1109,44 @@ static void bch_dev_stop(struct bch_dev *ca)
 	BUG_ON(rcu_access_pointer(c->devs[ca->dev_idx]) != ca);
 	rcu_assign_pointer(c->devs[ca->dev_idx], NULL);
 
-	call_rcu(&ca->free_rcu, bch_dev_free_rcu);
+	synchronize_rcu();
+
+	reinit_completion(&ca->stop_complete);
+	percpu_ref_kill(&ca->ref);
+	wait_for_completion(&ca->stop_complete);
 }
 
-static int bch_dev_online(struct bch_dev *ca)
+static int bch_dev_sysfs_online(struct bch_dev *ca)
 {
-	char buf[12];
+	struct bch_fs *c = ca->fs;
+	int ret;
 
-	sprintf(buf, "cache%u", ca->dev_idx);
+	if (!c->kobj.state_in_sysfs)
+		return 0;
+
+	if (!ca->kobj.state_in_sysfs) {
+		ret = kobject_add(&ca->kobj, &ca->fs->kobj,
+				  "dev-%u", ca->dev_idx);
+		if (ret)
+			return ret;
+	}
 
-	if (kobject_add(&ca->kobj,
-			&part_to_dev(ca->disk_sb.bdev->bd_part)->kobj,
-			"bcache") ||
-	    sysfs_create_link(&ca->kobj, &ca->fs->kobj, "set") ||
-	    sysfs_create_link(&ca->fs->kobj, &ca->kobj, buf))
-		return -1;
+	if (ca->disk_sb.bdev) {
+		struct kobject *block =
+			&part_to_dev(ca->disk_sb.bdev->bd_part)->kobj;
+
+		ret = sysfs_create_link(block, &ca->kobj, "bcache");
+		if (ret)
+			return ret;
+		ret = sysfs_create_link(&ca->kobj, block, "block");
+		if (ret)
+			return ret;
+	}
 
 	return 0;
 }
 
-static struct bch_dev *__bch_dev_alloc(struct bcache_superblock *sb)
+static int bch_dev_alloc(struct bch_fs *c, unsigned dev_idx)
 {
 	struct bch_member *member;
 	size_t reserve_none, movinggc_reserve, free_inc_reserve, total_reserve;
@@ -1102,47 +1155,37 @@ static struct bch_dev *__bch_dev_alloc(struct bcache_superblock *sb)
 	struct bch_dev *ca;
 
 	if (bch_fs_init_fault("dev_alloc"))
-		return NULL;
+		return -ENOMEM;
 
 	ca = kzalloc(sizeof(*ca), GFP_KERNEL);
 	if (!ca)
-		return NULL;
-
-	if (percpu_ref_init(&ca->ref, bch_dev_percpu_ref_release,
-			    0, GFP_KERNEL)) {
-		kfree(ca);
-		return NULL;
-	}
+		return -ENOMEM;
 
 	kobject_init(&ca->kobj, &bch_dev_ktype);
+	init_completion(&ca->stop_complete);
+	init_completion(&ca->offline_complete);
 
 	spin_lock_init(&ca->self.lock);
 	ca->self.nr = 1;
 	rcu_assign_pointer(ca->self.d[0].dev, ca);
-	ca->dev_idx = sb->sb->dev_idx;
+	ca->dev_idx = dev_idx;
 
-	INIT_WORK(&ca->free_work, bch_dev_free_work);
 	spin_lock_init(&ca->freelist_lock);
 	spin_lock_init(&ca->prio_buckets_lock);
 	mutex_init(&ca->heap_lock);
 	bch_dev_moving_gc_init(ca);
 
-	ca->disk_sb = *sb;
-	if (sb->mode & FMODE_EXCL)
-		ca->disk_sb.bdev->bd_holder = ca;
-	memset(sb, 0, sizeof(*sb));
-
 	INIT_WORK(&ca->io_error_work, bch_nonfatal_io_error_work);
 
 	if (bch_fs_init_fault("dev_alloc"))
 		goto err;
 
-	member = bch_sb_get_members(ca->disk_sb.sb)->members +
-		ca->disk_sb.sb->dev_idx;
+	member = bch_sb_get_members(c->disk_sb)->members + dev_idx;
 
-	ca->mi = cache_mi_to_cpu_mi(member);
+	ca->mi = bch_mi_to_cpu(member);
 	ca->uuid = member->uuid;
 	ca->bucket_bits = ilog2(ca->mi.bucket_size);
+	scnprintf(ca->name, sizeof(ca->name), "dev-%u", dev_idx);
 
 	/* XXX: tune these */
 	movinggc_reserve = max_t(size_t, 16, ca->mi.nbuckets >> 7);
@@ -1155,7 +1198,11 @@ static struct bch_dev *__bch_dev_alloc(struct bcache_superblock *sb)
 	free_inc_reserve = movinggc_reserve / 2;
 	heap_size = movinggc_reserve * 8;
 
-	if (!init_fifo(&ca->free[RESERVE_PRIO], prio_buckets(ca), GFP_KERNEL) ||
+	if (percpu_ref_init(&ca->ref, bch_dev_ref_release,
+			    0, GFP_KERNEL) ||
+	    percpu_ref_init(&ca->io_ref, bch_dev_io_ref_release,
+			    PERCPU_REF_INIT_DEAD, GFP_KERNEL) ||
+	    !init_fifo(&ca->free[RESERVE_PRIO], prio_buckets(ca), GFP_KERNEL) ||
 	    !init_fifo(&ca->free[RESERVE_BTREE], BTREE_NODE_RESERVE, GFP_KERNEL) ||
 	    !init_fifo(&ca->free[RESERVE_MOVINGGC],
 		       movinggc_reserve, GFP_KERNEL) ||
@@ -1166,15 +1213,14 @@ static struct bch_dev *__bch_dev_alloc(struct bcache_superblock *sb)
 					  ca->mi.nbuckets)) ||
 	    !(ca->buckets	= vzalloc(sizeof(struct bucket) *
 					  ca->mi.nbuckets)) ||
-	    !(ca->prio_buckets	= kzalloc(sizeof(uint64_t) * prio_buckets(ca) *
+	    !(ca->prio_buckets	= kzalloc(sizeof(u64) * prio_buckets(ca) *
 					  2, GFP_KERNEL)) ||
 	    !(ca->disk_buckets	= alloc_bucket_pages(GFP_KERNEL, ca)) ||
 	    !(ca->usage_percpu = alloc_percpu(struct bch_dev_usage)) ||
 	    !(ca->bio_prio = bio_kmalloc(GFP_NOIO, bucket_pages(ca))) ||
 	    bioset_init(&ca->replica_set, 4,
 			offsetof(struct bch_write_bio, bio)) ||
-	    !(ca->sectors_written = alloc_percpu(*ca->sectors_written)) ||
-	    bch_dev_journal_init(ca))
+	    !(ca->sectors_written = alloc_percpu(*ca->sectors_written)))
 		goto err;
 
 	ca->prio_last_buckets = ca->prio_buckets + prio_buckets(ca);
@@ -1182,75 +1228,76 @@ static struct bch_dev *__bch_dev_alloc(struct bcache_superblock *sb)
 	total_reserve = ca->free_inc.size;
 	for (i = 0; i < RESERVE_NR; i++)
 		total_reserve += ca->free[i].size;
-	pr_debug("%zu buckets reserved", total_reserve);
 
 	ca->copygc_write_point.group = &ca->self;
 	ca->tiering_write_point.group = &ca->self;
 
-	return ca;
+	ca->fs = c;
+	rcu_assign_pointer(c->devs[ca->dev_idx], ca);
+
+	if (bch_dev_sysfs_online(ca))
+		pr_warn("error creating sysfs objects");
+
+	return 0;
 err:
 	bch_dev_free(ca);
-	return NULL;
+	return -ENOMEM;
 }
 
-static const char *__bch_dev_add(struct bch_fs *c, struct bch_dev *ca)
+static int bch_dev_online(struct bch_fs *c, struct bcache_superblock *sb)
 {
-	if (c->devs[ca->dev_idx])
-		return "already have device online in this slot";
+	struct bch_dev *ca;
+	int ret;
 
-	if (c->sb.nr_devices == 1)
-		bdevname(ca->disk_sb.bdev, c->name);
+	lockdep_assert_held(&c->sb_lock);
+
+	if (le64_to_cpu(sb->sb->seq) >
+	    le64_to_cpu(c->disk_sb->seq))
+		bch_sb_to_fs(c, sb->sb);
+
+	BUG_ON(sb->sb->dev_idx >= c->sb.nr_devices ||
+	       !c->devs[sb->sb->dev_idx]);
+
+	ca = c->devs[sb->sb->dev_idx];
+	if (ca->disk_sb.bdev) {
+		bch_err(c, "already have device online in slot %u",
+			sb->sb->dev_idx);
+		return -EINVAL;
+	}
+
+	ret = bch_dev_journal_init(ca, sb->sb);
+	if (ret)
+		return ret;
 
 	/*
 	 * Increase journal write timeout if flushes to this device are
 	 * expensive:
 	 */
-	if (!blk_queue_nonrot(bdev_get_queue(ca->disk_sb.bdev)) &&
+	if (!blk_queue_nonrot(bdev_get_queue(sb->bdev)) &&
 	    journal_flushes_device(ca))
 		c->journal.write_delay_ms =
 			max(c->journal.write_delay_ms, 1000U);
 
-	kobject_get(&c->kobj);
-	ca->fs = c;
+	/* Commit: */
+	ca->disk_sb = *sb;
+	if (sb->mode & FMODE_EXCL)
+		ca->disk_sb.bdev->bd_holder = ca;
+	memset(sb, 0, sizeof(*sb));
 
-	kobject_get(&ca->kobj);
-	rcu_assign_pointer(c->devs[ca->dev_idx], ca);
+	if (c->sb.nr_devices == 1)
+		bdevname(ca->disk_sb.bdev, c->name);
+	bdevname(ca->disk_sb.bdev, ca->name);
 
-	if (c->kobj.state_in_sysfs &&
-	    bch_dev_online(ca))
+	if (bch_dev_sysfs_online(ca))
 		pr_warn("error creating sysfs objects");
 
-	return NULL;
-}
+	lg_local_lock(&c->usage_lock);
+	if (!gc_will_visit(c, gc_phase(GC_PHASE_SB_METADATA)))
+		bch_mark_dev_metadata(ca->fs, ca);
+	lg_local_unlock(&c->usage_lock);
 
-static const char *bch_dev_alloc(struct bcache_superblock *sb,
-				 struct bch_fs *c,
-				 struct bch_dev **ret)
-{
-	struct bch_dev *ca;
-	const char *err;
-
-	ca = __bch_dev_alloc(sb);
-	if (!ca)
-		return "cannot allocate memory";
-
-	err = __bch_dev_add(c, ca);
-	if (err) {
-		bch_dev_free(ca);
-		return err;
-	}
-
-	mutex_lock(&c->sb_lock);
-	if (le64_to_cpu(ca->disk_sb.sb->seq) >
-	    le64_to_cpu(c->disk_sb->seq))
-		bch_sb_to_fs(c, ca->disk_sb.sb);
-	mutex_unlock(&c->sb_lock);
-
-	if (ret)
-		*ret = ca;
-	else
-		kobject_put(&ca->kobj);
-	return NULL;
+	percpu_ref_reinit(&ca->io_ref);
+	return 0;
 }
 
 /* Device management: */
@@ -1304,7 +1351,7 @@ bool bch_dev_state_allowed(struct bch_fs *c, struct bch_dev *ca,
 {
 	lockdep_assert_held(&c->state_lock);
 
-	if (new_state == BCH_MEMBER_STATE_ACTIVE)
+	if (new_state == BCH_MEMBER_STATE_RW)
 		return true;
 
 	if (ca->mi.has_data &&
@@ -1346,8 +1393,7 @@ static const char *__bch_dev_read_write(struct bch_fs *c, struct bch_dev *ca)
 {
 	lockdep_assert_held(&c->state_lock);
 
-	if (ca->mi.state == BCH_MEMBER_STATE_ACTIVE)
-		return NULL;
+	BUG_ON(ca->mi.state != BCH_MEMBER_STATE_RW);
 
 	trace_bcache_cache_read_write(ca);
 
@@ -1370,7 +1416,6 @@ int __bch_dev_set_state(struct bch_fs *c, struct bch_dev *ca,
 			enum bch_member_state new_state, int flags)
 {
 	struct bch_sb_field_members *mi;
-	char buf[BDEVNAME_SIZE];
 
 	if (ca->mi.state == new_state)
 		return 0;
@@ -1378,16 +1423,14 @@ int __bch_dev_set_state(struct bch_fs *c, struct bch_dev *ca,
 	if (!bch_dev_state_allowed(c, ca, new_state, flags))
 		return -EINVAL;
 
-	if (new_state == BCH_MEMBER_STATE_ACTIVE) {
+	if (new_state == BCH_MEMBER_STATE_RW) {
 		if (__bch_dev_read_write(c, ca))
 			return -ENOMEM;
 	} else {
 		__bch_dev_read_only(c, ca);
 	}
 
-	bch_notice(c, "%s %s",
-		   bdevname(ca->disk_sb.bdev, buf),
-		   bch_dev_state[new_state]);
+	bch_notice(ca, "%s", bch_dev_state[new_state]);
 
 	mutex_lock(&c->sb_lock);
 	mi = bch_sb_get_members(c->disk_sb);
@@ -1448,20 +1491,17 @@ int bch_dev_migrate_from(struct bch_fs *c, struct bch_dev *ca)
 static int __bch_dev_remove(struct bch_fs *c, struct bch_dev *ca, int flags)
 {
 	struct bch_sb_field_members *mi;
-	char name[BDEVNAME_SIZE];
 	unsigned dev_idx = ca->dev_idx;
 	int ret;
 
-	bdevname(ca->disk_sb.bdev, name);
-
-	if (ca->mi.state == BCH_MEMBER_STATE_ACTIVE) {
-		bch_err(ca->fs, "Cannot remove RW device");
+	if (ca->mi.state == BCH_MEMBER_STATE_RW) {
+		bch_err(ca, "Cannot remove RW device");
 		bch_notify_dev_remove_failed(ca);
 		return -EINVAL;
 	}
 
 	if (!bch_dev_state_allowed(c, ca, BCH_MEMBER_STATE_FAILED, flags)) {
-		bch_err(ca->fs, "Cannot remove %s without losing data", name);
+		bch_err(ca, "Cannot remove without losing data");
 		bch_notify_dev_remove_failed(ca);
 		return -EINVAL;
 	}
@@ -1473,7 +1513,12 @@ static int __bch_dev_remove(struct bch_fs *c, struct bch_dev *ca, int flags)
 	 */
 	ret = bch_flag_data_bad(ca);
 	if (ret) {
-		bch_err(c, "Remove of %s failed", name);
+		bch_err(ca, "Remove failed");
+		return ret;
+	}
+
+	if (ca->mi.has_data || ca->mi.has_metadata) {
+		bch_err(ca, "Can't remove, still has data");
 		return ret;
 	}
 
@@ -1489,13 +1534,9 @@ static int __bch_dev_remove(struct bch_fs *c, struct bch_dev *ca, int flags)
 
 	bch_journal_meta(&c->journal);
 
+	bch_dev_offline(ca);
 	bch_dev_stop(ca);
-
-	/*
-	 * RCU barrier between dropping between c->dev and dropping from
-	 * member info:
-	 */
-	synchronize_rcu();
+	bch_dev_free(ca);
 
 	/*
 	 * Free this device's slot in the bch_member array - all pointers to
@@ -1517,6 +1558,7 @@ int bch_dev_remove(struct bch_fs *c, struct bch_dev *ca, int flags)
 	int ret;
 
 	mutex_lock(&c->state_lock);
+	percpu_ref_put(&ca->ref);
 	ret = __bch_dev_remove(c, ca, flags);
 	mutex_unlock(&c->state_lock);
 
@@ -1556,18 +1598,9 @@ int bch_dev_add(struct bch_fs *c, const char *path)
 	saved_mi = dev_mi->members[sb.sb->dev_idx];
 	saved_mi.last_mount = cpu_to_le64(ktime_get_seconds());
 
-	/*
-	 * XXX: ditch the GC stuff, just don't remove a device until nothing is
-	 * using its dev_idx anymore
-	 */
-	down_read(&c->gc_lock);
-
 	if (dynamic_fault("bcache:add:no_slot"))
 		goto no_slot;
 
-	if (test_bit(BCH_FS_GC_FAILURE, &c->flags))
-		goto no_slot;
-
 	mi = bch_sb_get_members(c->disk_sb);
 	for (dev_idx = 0; dev_idx < BCH_SB_MEMBERS_MAX; dev_idx++)
 		if (dev_idx >= c->sb.nr_devices ||
@@ -1575,15 +1608,11 @@ int bch_dev_add(struct bch_fs *c, const char *path)
 				 sizeof(uuid_le)))
 			goto have_slot;
 no_slot:
-	up_read(&c->gc_lock);
-
 	err = "no slots available in superblock";
 	ret = -ENOSPC;
 	goto err_unlock;
 
 have_slot:
-	up_read(&c->gc_lock);
-
 	nr_devices = max_t(unsigned, dev_idx + 1, c->sb.nr_devices);
 	u64s = (sizeof(struct bch_sb_field_members) +
 		sizeof(struct bch_member) * nr_devices) / sizeof(u64);
@@ -1604,53 +1633,44 @@ have_slot:
 	sb.sb->dev_idx		= dev_idx;
 	sb.sb->nr_devices	= nr_devices;
 
-	if (bch_fs_mi_update(c, dev_mi->members, nr_devices)) {
-		err = "cannot allocate memory";
-		ret = -ENOMEM;
-		goto err_unlock;
-	}
-
 	/* commit new member info */
 	memcpy(mi, dev_mi, u64s * sizeof(u64));
 	c->disk_sb->nr_devices	= nr_devices;
 	c->sb.nr_devices	= nr_devices;
 
-	ca = __bch_dev_alloc(&sb);
-	if (!ca) {
+	if (bch_dev_alloc(c, dev_idx)) {
 		err = "cannot allocate memory";
 		ret = -ENOMEM;
 		goto err_unlock;
 	}
 
-	bch_dev_mark_superblocks(ca);
-
-	err = "journal alloc failed";
-	if (bch_dev_journal_alloc(ca))
+	if (bch_dev_online(c, &sb)) {
+		err = "bch_dev_online() error";
+		ret = -ENOMEM;
 		goto err_unlock;
-
-	err = __bch_dev_add(c, ca);
-	BUG_ON(err);
+	}
 
 	bch_write_super(c);
 	mutex_unlock(&c->sb_lock);
 
-	if (ca->mi.state == BCH_MEMBER_STATE_ACTIVE) {
+	ca = c->devs[dev_idx];
+	if (ca->mi.state == BCH_MEMBER_STATE_RW) {
+		err = "journal alloc failed";
+		if (bch_dev_journal_alloc(ca))
+			goto err;
+
 		err = __bch_dev_read_write(c, ca);
 		if (err)
 			goto err;
 	}
 
 	bch_notify_dev_added(ca);
-
-	kobject_put(&ca->kobj);
 	mutex_unlock(&c->state_lock);
 	return 0;
 err_unlock:
 	mutex_unlock(&c->sb_lock);
 err:
 	mutex_unlock(&c->state_lock);
-	if (ca)
-		bch_dev_stop(ca);
 	bch_free_super(&sb);
 
 	bch_err(c, "Unable to add device: %s", err);
@@ -1708,11 +1728,14 @@ const char *bch_fs_open(char * const *devices, unsigned nr_devices,
 	if (!c)
 		goto err;
 
-	for (i = 0; i < nr_devices; i++) {
-		err = bch_dev_alloc(&sb[i], c, NULL);
-		if (err)
+	err = "bch_dev_online() error";
+	mutex_lock(&c->sb_lock);
+	for (i = 0; i < nr_devices; i++)
+		if (bch_dev_online(c, &sb[i])) {
+			mutex_unlock(&c->sb_lock);
 			goto err;
-	}
+		}
+	mutex_unlock(&c->sb_lock);
 
 	err = "insufficient devices";
 	if (!bch_fs_may_start(c, 0))
@@ -1760,8 +1783,8 @@ static const char *__bch_fs_open_incremental(struct bcache_superblock *sb,
 	if (err)
 		return err;
 
-	mutex_lock(&bch_register_lock);
-	c = bch_fs_lookup(sb->sb->uuid);
+	mutex_lock(&bch_fs_list_lock);
+	c = __bch_uuid_to_fs(sb->sb->uuid);
 	if (c) {
 		closure_get(&c->cl);
 
@@ -1777,9 +1800,14 @@ static const char *__bch_fs_open_incremental(struct bcache_superblock *sb,
 		allocated_fs = true;
 	}
 
-	err = bch_dev_alloc(sb, c, NULL);
-	if (err)
+	err = "bch_dev_online() error";
+
+	mutex_lock(&c->sb_lock);
+	if (bch_dev_online(c, sb)) {
+		mutex_unlock(&c->sb_lock);
 		goto err;
+	}
+	mutex_unlock(&c->sb_lock);
 
 	if (!c->opts.nostart && bch_fs_may_start(c, 0)) {
 		err = __bch_fs_start(c);
@@ -1792,11 +1820,11 @@ static const char *__bch_fs_open_incremental(struct bcache_superblock *sb,
 		goto err;
 
 	closure_put(&c->cl);
-	mutex_unlock(&bch_register_lock);
+	mutex_unlock(&bch_fs_list_lock);
 
 	return NULL;
 err:
-	mutex_unlock(&bch_register_lock);
+	mutex_unlock(&bch_fs_list_lock);
 
 	if (allocated_fs)
 		bch_fs_stop(c);
@@ -1817,9 +1845,9 @@ const char *bch_fs_open_incremental(const char *path)
 		return err;
 
 	if (__SB_IS_BDEV(le64_to_cpu(sb.sb->version))) {
-		mutex_lock(&bch_register_lock);
+		mutex_lock(&bch_fs_list_lock);
 		err = bch_backing_dev_register(&sb);
-		mutex_unlock(&bch_register_lock);
+		mutex_unlock(&bch_fs_list_lock);
 	} else {
 		err = __bch_fs_open_incremental(&sb, opts);
 	}
@@ -1878,7 +1906,7 @@ static int bcache_reboot(struct notifier_block *n, unsigned long code, void *x)
 	    code == SYS_POWER_OFF) {
 		struct bch_fs *c;
 
-		mutex_lock(&bch_register_lock);
+		mutex_lock(&bch_fs_list_lock);
 
 		if (!list_empty(&bch_fs_list))
 			pr_info("Setting all devices read only:");
@@ -1889,7 +1917,7 @@ static int bcache_reboot(struct notifier_block *n, unsigned long code, void *x)
 		list_for_each_entry(c, &bch_fs_list, list)
 			bch_fs_read_only(c);
 
-		mutex_unlock(&bch_register_lock);
+		mutex_unlock(&bch_fs_list_lock);
 	}
 
 	return NOTIFY_DONE;
@@ -1933,7 +1961,6 @@ static int __init bcache_init(void)
 		NULL
 	};
 
-	mutex_init(&bch_register_lock);
 	register_reboot_notifier(&reboot);
 	bkey_pack_test();
 
diff --git a/fs/bcachefs/super.h b/fs/bcachefs/super.h
index 13fb0e6b42e3..53026cb73696 100644
--- a/fs/bcachefs/super.h
+++ b/fs/bcachefs/super.h
@@ -20,42 +20,79 @@ static inline sector_t bucket_remainder(const struct bch_dev *ca, sector_t s)
 	return s & (ca->mi.bucket_size - 1);
 }
 
-static inline struct bch_dev *bch_next_cache_rcu(struct bch_fs *c,
-					       unsigned *iter)
+static inline struct bch_dev *__bch_next_dev(struct bch_fs *c, unsigned *iter)
 {
-	struct bch_dev *ret = NULL;
+	struct bch_dev *ca = NULL;
 
 	while (*iter < c->sb.nr_devices &&
-	       !(ret = rcu_dereference(c->devs[*iter])))
+	       !(ca = rcu_dereference_check(c->devs[*iter],
+					    lockdep_is_held(&c->state_lock))))
 		(*iter)++;
 
-	return ret;
+	return ca;
 }
 
+#define __for_each_member_device(ca, c, iter)				\
+	for ((iter) = 0; ((ca) = __bch_next_dev((c), &(iter))); (iter)++)
+
 #define for_each_member_device_rcu(ca, c, iter)				\
-	for ((iter) = 0; ((ca) = bch_next_cache_rcu((c), &(iter))); (iter)++)
+	__for_each_member_device(ca, c, iter)
 
-static inline struct bch_dev *bch_get_next_cache(struct bch_fs *c,
-					       unsigned *iter)
+static inline struct bch_dev *bch_get_next_dev(struct bch_fs *c, unsigned *iter)
 {
-	struct bch_dev *ret;
+	struct bch_dev *ca;
 
 	rcu_read_lock();
-	if ((ret = bch_next_cache_rcu(c, iter)))
-		percpu_ref_get(&ret->ref);
+	if ((ca = __bch_next_dev(c, iter)))
+		percpu_ref_get(&ca->ref);
 	rcu_read_unlock();
 
-	return ret;
+	return ca;
 }
 
 /*
  * If you break early, you must drop your ref on the current device
  */
-#define for_each_member_device(ca, c, iter)					\
+#define for_each_member_device(ca, c, iter)				\
 	for ((iter) = 0;						\
-	     (ca = bch_get_next_cache(c, &(iter)));			\
+	     (ca = bch_get_next_dev(c, &(iter)));			\
 	     percpu_ref_put(&ca->ref), (iter)++)
 
+static inline struct bch_dev *bch_get_next_online_dev(struct bch_fs *c,
+						      unsigned *iter,
+						      int state_mask)
+{
+	struct bch_dev *ca;
+
+	rcu_read_lock();
+	while ((ca = __bch_next_dev(c, iter)) &&
+	       (!((1 << ca->mi.state) & state_mask) ||
+		!percpu_ref_tryget(&ca->io_ref)))
+		(*iter)++;
+	rcu_read_unlock();
+
+	return ca;
+}
+
+#define __for_each_online_member(ca, c, iter, state_mask)		\
+	for ((iter) = 0;						\
+	     (ca = bch_get_next_online_dev(c, &(iter), state_mask));	\
+	     percpu_ref_put(&ca->io_ref), (iter)++)
+
+#define for_each_online_member(ca, c, iter)				\
+	__for_each_online_member(ca, c, iter, ~0)
+
+#define for_each_rw_member(ca, c, iter)					\
+	__for_each_online_member(ca, c, iter, 1 << BCH_MEMBER_STATE_RW)
+
+#define for_each_readable_member(ca, c, iter)				\
+	__for_each_online_member(ca, c, iter,				\
+		(1 << BCH_MEMBER_STATE_RW)|(1 << BCH_MEMBER_STATE_RO))
+
+struct bch_fs *bch_bdev_to_fs(struct block_device *);
+struct bch_fs *bch_uuid_to_fs(uuid_le);
+int bch_congested(struct bch_fs *, int);
+
 void bch_dev_release(struct kobject *);
 
 bool bch_dev_state_allowed(struct bch_fs *, struct bch_dev *,
@@ -84,8 +121,6 @@ const char *bch_fs_open(char * const *, unsigned, struct bch_opts,
 			struct bch_fs **);
 const char *bch_fs_open_incremental(const char *path);
 
-extern struct mutex bch_register_lock;
-extern struct list_head bch_fs_list;
 extern struct workqueue_struct *bcache_io_wq;
 extern struct crypto_shash *bch_sha256;
 
diff --git a/fs/bcachefs/sysfs.c b/fs/bcachefs/sysfs.c
index 14675c2b721f..91897671b52d 100644
--- a/fs/bcachefs/sysfs.c
+++ b/fs/bcachefs/sysfs.c
@@ -206,12 +206,10 @@ SHOW(bch_cached_dev)
 	return 0;
 }
 
-STORE(__cached_dev)
+STORE(bch_cached_dev)
 {
 	struct cached_dev *dc = container_of(kobj, struct cached_dev,
 					     disk.kobj);
-	unsigned v = size;
-	struct bch_fs *c;
 	struct kobj_uevent_env *env;
 
 #define d_strtoul(var)		sysfs_strtoul(var, dc->var)
@@ -228,6 +226,13 @@ STORE(__cached_dev)
 	d_strtoi_h(sequential_cutoff);
 	d_strtoi_h(readahead);
 
+	if (attr == &sysfs_writeback_running)
+		bch_writeback_queue(dc);
+
+	if (attr == &sysfs_writeback_percent)
+		schedule_delayed_work(&dc->writeback_pd_update,
+				      dc->writeback_pd_update_seconds * HZ);
+
 	if (attr == &sysfs_clear_stats)
 		bch_cache_accounting_clear(&dc->accounting);
 
@@ -295,17 +300,25 @@ STORE(__cached_dev)
 	}
 
 	if (attr == &sysfs_attach) {
-		if (uuid_parse(buf, &dc->disk_sb.sb->user_uuid))
+		struct bch_fs *c;
+		uuid_le uuid;
+		int ret;
+
+		if (uuid_parse(buf, &uuid))
 			return -EINVAL;
 
-		list_for_each_entry(c, &bch_fs_list, list) {
-			v = bch_cached_dev_attach(dc, c);
-			if (!v)
-				return size;
+		c = bch_uuid_to_fs(uuid);
+		if (!c) {
+			pr_err("Can't attach %s: cache set not found", buf);
+			return -ENOENT;
 		}
 
-		pr_err("Can't attach %s: cache set not found", buf);
-		size = v;
+		dc->disk_sb.sb->set_uuid = uuid;
+
+		ret = bch_cached_dev_attach(dc, c);
+		closure_put(&c->cl);
+		if (ret)
+			return ret;
 	}
 
 	if (attr == &sysfs_detach && dc->disk.c)
@@ -317,25 +330,6 @@ STORE(__cached_dev)
 	return size;
 }
 
-STORE(bch_cached_dev)
-{
-	struct cached_dev *dc = container_of(kobj, struct cached_dev,
-					     disk.kobj);
-
-	mutex_lock(&bch_register_lock);
-	size = __cached_dev_store(kobj, attr, buf, size);
-
-	if (attr == &sysfs_writeback_running)
-		bch_writeback_queue(dc);
-
-	if (attr == &sysfs_writeback_percent)
-		schedule_delayed_work(&dc->writeback_pd_update,
-				      dc->writeback_pd_update_seconds * HZ);
-
-	mutex_unlock(&bch_register_lock);
-	return size;
-}
-
 static struct attribute *bch_cached_dev_files[] = {
 	&sysfs_attach,
 	&sysfs_detach,
@@ -380,7 +374,7 @@ SHOW(bch_blockdev_volume)
 	return 0;
 }
 
-STORE(__bch_blockdev_volume)
+STORE(bch_blockdev_volume)
 {
 	struct bcache_device *d = container_of(kobj, struct bcache_device,
 					       kobj);
@@ -438,7 +432,6 @@ STORE(__bch_blockdev_volume)
 
 	return size;
 }
-STORE_LOCKED(bch_blockdev_volume)
 
 static struct attribute *bch_blockdev_volume_files[] = {
 	&sysfs_unregister,
@@ -1224,7 +1217,7 @@ SHOW(bch_dev)
 	return 0;
 }
 
-STORE(__bch_dev)
+STORE(bch_dev)
 {
 	struct bch_dev *ca = container_of(kobj, struct bch_dev, kobj);
 	struct bch_fs *c = ca->fs;
@@ -1300,7 +1293,6 @@ STORE(__bch_dev)
 
 	return size;
 }
-STORE_LOCKED(bch_dev)
 
 static struct attribute *bch_dev_files[] = {
 	&sysfs_uuid,
diff --git a/fs/bcachefs/sysfs.h b/fs/bcachefs/sysfs.h
index 9d5845874931..02700246acaf 100644
--- a/fs/bcachefs/sysfs.h
+++ b/fs/bcachefs/sysfs.h
@@ -21,16 +21,6 @@ static ssize_t fn ## _show(struct kobject *kobj, struct attribute *attr,\
 static ssize_t fn ## _store(struct kobject *kobj, struct attribute *attr,\
 			    const char *buf, size_t size)		\
 
-#define STORE_LOCKED(fn)						\
-STORE(fn)								\
-{									\
-	ssize_t ret;							\
-	mutex_lock(&bch_register_lock);					\
-	ret = __ ## fn ## _store(kobj, attr, buf, size);		\
-	mutex_unlock(&bch_register_lock);				\
-	return ret;							\
-}
-
 #define __sysfs_attribute(_name, _mode)					\
 	static struct attribute sysfs_##_name =				\
 		{ .name = #_name, .mode = _mode }
diff --git a/fs/bcachefs/tier.c b/fs/bcachefs/tier.c
index 1d6e06519483..b1ac13c99275 100644
--- a/fs/bcachefs/tier.c
+++ b/fs/bcachefs/tier.c
@@ -30,7 +30,6 @@ static bool tiering_pred(struct bch_fs *c,
 	if (bkey_extent_is_data(k.k)) {
 		struct bkey_s_c_extent e = bkey_s_c_to_extent(k);
 		const struct bch_extent_ptr *ptr;
-		struct bch_member_rcu *mi;
 		unsigned replicas = 0;
 
 		/* Make sure we have room to add a new pointer: */
@@ -38,12 +37,9 @@ static bool tiering_pred(struct bch_fs *c,
 		    BKEY_EXTENT_VAL_U64s_MAX)
 			return false;
 
-		mi = fs_member_info_get(c);
 		extent_for_each_ptr(e, ptr)
-			if (ptr->dev < mi->nr_devices &&
-			    mi->m[ptr->dev].tier >= s->tier->idx)
+			if (c->devs[ptr->dev]->mi.tier >= s->tier->idx)
 				replicas++;
-		fs_member_info_put();
 
 		return replicas < c->opts.data_replicas;
 	}
@@ -54,7 +50,7 @@ static bool tiering_pred(struct bch_fs *c,
 static void tier_put_device(struct tiering_state *s)
 {
 	if (s->ca)
-		percpu_ref_put(&s->ca->ref);
+		percpu_ref_put(&s->ca->io_ref);
 	s->ca = NULL;
 }
 
@@ -74,7 +70,7 @@ static void tier_next_device(struct bch_fs *c, struct tiering_state *s)
 
 		if (s->tier->devs.nr) {
 			s->ca = s->tier->devs.d[s->dev_idx].dev;
-			percpu_ref_get(&s->ca->ref);
+			percpu_ref_get(&s->ca->io_ref);
 		}
 		spin_unlock(&s->tier->devs.lock);
 	}
@@ -183,19 +179,19 @@ static int bch_tiering_thread(void *arg)
 			last = atomic_long_read(&clock->now);
 
 			tier_capacity = available_sectors = 0;
-			rcu_read_lock();
 			for (faster_tier = c->tiers;
 			     faster_tier != tier;
 			     faster_tier++) {
-				group_for_each_dev_rcu(ca, &faster_tier->devs, i) {
+				spin_lock(&faster_tier->devs.lock);
+				group_for_each_dev(ca, &faster_tier->devs, i) {
 					tier_capacity +=
 						(ca->mi.nbuckets -
 						 ca->mi.first_bucket) << ca->bucket_bits;
 					available_sectors +=
 						dev_buckets_available(ca) << ca->bucket_bits;
 				}
+				spin_unlock(&faster_tier->devs.lock);
 			}
-			rcu_read_unlock();
 
 			if (available_sectors < (tier_capacity >> 1))
 				break;
author	Kent Overstreet <kent.overstreet@gmail.com>	2017-03-10 07:08:39 -0900
committer	Kent Overstreet <kent.overstreet@gmail.com>	2017-03-17 19:49:23 -0800
commit	a5b2efedf8485ee4a36c736cf6cfe907c0db91c5 (patch)
tree	8dc1073413af469ad03da7132a5d7354aa40dc85
parent	de9690db2991d5d3a1f88211e9ef46c3b5a5dae4 (diff)