diff options
author | Kent Overstreet <kent.overstreet@gmail.com> | 2018-04-06 23:09:13 -0400 |
---|---|---|
committer | Kent Overstreet <kent.overstreet@gmail.com> | 2018-05-22 00:44:18 -0400 |
commit | 2c90a2f3df10976d3a051487374fb12b8caadd4a (patch) | |
tree | b5b5c35ea930f86bbdea8f162b4296943e977c65 | |
parent | f794fedb9ed1672cf776e3aed84cd24da560bfb0 (diff) |
bcachefs: Fix various device add bugs
-rw-r--r-- | fs/bcachefs/alloc.c | 75 | ||||
-rw-r--r-- | fs/bcachefs/alloc.h | 2 | ||||
-rw-r--r-- | fs/bcachefs/bcachefs.h | 1 | ||||
-rw-r--r-- | fs/bcachefs/btree_gc.c | 9 | ||||
-rw-r--r-- | fs/bcachefs/buckets.c | 51 | ||||
-rw-r--r-- | fs/bcachefs/buckets.h | 1 | ||||
-rw-r--r-- | fs/bcachefs/journal.c | 138 | ||||
-rw-r--r-- | fs/bcachefs/journal.h | 2 | ||||
-rw-r--r-- | fs/bcachefs/super.c | 230 |
9 files changed, 279 insertions, 230 deletions
diff --git a/fs/bcachefs/alloc.c b/fs/bcachefs/alloc.c index bb33cad9ee7f..13b7a3e50cd0 100644 --- a/fs/bcachefs/alloc.c +++ b/fs/bcachefs/alloc.c @@ -1111,8 +1111,8 @@ static struct open_bucket *bch2_open_bucket_alloc(struct bch_fs *c) return ob; } -/* _only_ for allocating the journal and btree roots on a brand new fs: */ -int bch2_bucket_alloc_startup(struct bch_fs *c, struct bch_dev *ca) +/* _only_ for allocating the journal on a new device: */ +long bch2_bucket_alloc_new_fs(struct bch_dev *ca) { struct bucket_array *buckets; ssize_t b; @@ -1121,14 +1121,8 @@ int bch2_bucket_alloc_startup(struct bch_fs *c, struct bch_dev *ca) buckets = bucket_array(ca); for (b = ca->mi.first_bucket; b < ca->mi.nbuckets; b++) - if (is_available_bucket(buckets->b[b].mark)) { - bch2_mark_alloc_bucket(c, ca, b, true, - gc_pos_alloc(c, NULL), - BCH_BUCKET_MARK_MAY_MAKE_UNAVAILABLE| - BCH_BUCKET_MARK_GC_LOCK_HELD); - set_bit(b, ca->buckets_dirty); + if (is_available_bucket(buckets->b[b].mark)) goto success; - } b = -1; success: rcu_read_unlock(); @@ -1200,9 +1194,8 @@ int bch2_bucket_alloc(struct bch_fs *c, struct bch_dev *ca, break; } - if (unlikely(test_bit(BCH_FS_BRAND_NEW_FS, &c->flags)) && - (bucket = bch2_bucket_alloc_startup(c, ca)) >= 0) - goto out; + if (cl) + closure_wait(&c->freelist_wait, cl); spin_unlock(&c->freelist_lock); @@ -1283,7 +1276,7 @@ void bch2_wp_rescale(struct bch_fs *c, struct bch_dev *ca, *v = *v < scale ? 0 : *v - scale; } -static enum bucket_alloc_ret __bch2_bucket_alloc_set(struct bch_fs *c, +static enum bucket_alloc_ret bch2_bucket_alloc_set(struct bch_fs *c, struct write_point *wp, unsigned nr_replicas, enum alloc_reserve reserve, @@ -1349,52 +1342,22 @@ static enum bucket_alloc_ret __bch2_bucket_alloc_set(struct bch_fs *c, break; } } + rcu_read_unlock(); EBUG_ON(reserve == RESERVE_MOVINGGC && ret != ALLOC_SUCCESS && ret != OPEN_BUCKETS_EMPTY); - rcu_read_unlock(); - return ret; -} - -static int bch2_bucket_alloc_set(struct bch_fs *c, struct write_point *wp, - unsigned nr_replicas, - enum alloc_reserve reserve, - struct bch_devs_mask *devs, - struct closure *cl) -{ - bool waiting = false; - while (1) { - switch (__bch2_bucket_alloc_set(c, wp, nr_replicas, - reserve, devs, cl)) { - case ALLOC_SUCCESS: - if (waiting) - closure_wake_up(&c->freelist_wait); - - return 0; - - case NO_DEVICES: - if (waiting) - closure_wake_up(&c->freelist_wait); - return -EROFS; - - case FREELIST_EMPTY: - if (!cl) - return -ENOSPC; - - if (waiting) - return -EAGAIN; - - /* Retry allocation after adding ourself to waitlist: */ - closure_wait(&c->freelist_wait, cl); - waiting = true; - break; - case OPEN_BUCKETS_EMPTY: - return cl ? -EAGAIN : -ENOSPC; - default: - BUG(); - } + switch (ret) { + case ALLOC_SUCCESS: + return 0; + case NO_DEVICES: + return -EROFS; + case FREELIST_EMPTY: + case OPEN_BUCKETS_EMPTY: + return cl ? -EAGAIN : -ENOSPC; + default: + BUG(); } } @@ -2038,8 +2001,10 @@ static int __bch2_fs_allocator_start(struct bch_fs *c) /* did we find enough buckets? */ for_each_rw_member(ca, c, dev_iter) - if (fifo_used(&ca->free_inc) < ca->free[RESERVE_BTREE].size) + if (fifo_used(&ca->free_inc) < ca->free[RESERVE_BTREE].size) { + percpu_ref_put(&ca->io_ref); goto not_enough; + } return 0; not_enough: diff --git a/fs/bcachefs/alloc.h b/fs/bcachefs/alloc.h index f914dbd56c2c..c3faaf7a7c5b 100644 --- a/fs/bcachefs/alloc.h +++ b/fs/bcachefs/alloc.h @@ -30,6 +30,8 @@ enum bucket_alloc_ret { NO_DEVICES = -3, /* -EROFS */ }; +long bch2_bucket_alloc_new_fs(struct bch_dev *); + int bch2_bucket_alloc(struct bch_fs *, struct bch_dev *, enum alloc_reserve, bool, struct closure *); diff --git a/fs/bcachefs/bcachefs.h b/fs/bcachefs/bcachefs.h index 9c4e7fdecbc7..362dfbdccb1e 100644 --- a/fs/bcachefs/bcachefs.h +++ b/fs/bcachefs/bcachefs.h @@ -431,7 +431,6 @@ struct bch_dev { */ enum { /* startup: */ - BCH_FS_BRAND_NEW_FS, BCH_FS_ALLOC_READ_DONE, BCH_FS_ALLOCATOR_STARTED, BCH_FS_INITIAL_GC_DONE, diff --git a/fs/bcachefs/btree_gc.c b/fs/bcachefs/btree_gc.c index 70e4049f7daa..9ab04afa1a45 100644 --- a/fs/bcachefs/btree_gc.c +++ b/fs/bcachefs/btree_gc.c @@ -318,7 +318,8 @@ void bch2_mark_dev_superblock(struct bch_fs *c, struct bch_dev *ca, unsigned i; u64 b; - lockdep_assert_held(&c->sb_lock); + if (c) + lockdep_assert_held(&c->sb_lock); for (i = 0; i < layout->nr_superblocks; i++) { u64 offset = le64_to_cpu(layout->sb_offset[i]); @@ -332,7 +333,8 @@ void bch2_mark_dev_superblock(struct bch_fs *c, struct bch_dev *ca, BCH_DATA_SB, flags); } - spin_lock(&c->journal.lock); + if (c) + spin_lock(&c->journal.lock); for (i = 0; i < ca->journal.nr; i++) { b = ca->journal.buckets[i]; @@ -341,7 +343,8 @@ void bch2_mark_dev_superblock(struct bch_fs *c, struct bch_dev *ca, gc_phase(GC_PHASE_SB), flags); } - spin_unlock(&c->journal.lock); + if (c) + spin_unlock(&c->journal.lock); } static void bch2_mark_superblocks(struct bch_fs *c) diff --git a/fs/bcachefs/buckets.c b/fs/bcachefs/buckets.c index 864de940f1e7..1f944cb8a3e5 100644 --- a/fs/bcachefs/buckets.c +++ b/fs/bcachefs/buckets.c @@ -309,7 +309,7 @@ static bool bucket_became_unavailable(struct bch_fs *c, { return is_available_bucket(old) && !is_available_bucket(new) && - c && c->gc_pos.phase == GC_PHASE_DONE; + (!c || c->gc_pos.phase == GC_PHASE_DONE); } void bch2_fs_usage_apply(struct bch_fs *c, @@ -351,12 +351,16 @@ static void bch2_dev_usage_update(struct bch_fs *c, struct bch_dev *ca, { struct bch_dev_usage *dev_usage; - lockdep_assert_held(&c->usage_lock); + if (c) + lockdep_assert_held(&c->usage_lock); - bch2_fs_inconsistent_on(old.data_type && new.data_type && - old.data_type != new.data_type, c, + if (old.data_type && new.data_type && + old.data_type != new.data_type) { + BUG_ON(!c); + bch2_fs_inconsistent(c, "different types of data in same bucket: %u, %u", old.data_type, new.data_type); + } dev_usage = this_cpu_ptr(ca->usage_percpu); @@ -466,21 +470,29 @@ void bch2_mark_metadata_bucket(struct bch_fs *c, struct bch_dev *ca, BUG_ON(!type); - lg_local_lock(&c->usage_lock); - g = bucket(ca, b); + if (likely(c)) { + lg_local_lock(&c->usage_lock); - if (!(flags & BCH_BUCKET_MARK_GC_LOCK_HELD) && - gc_will_visit(c, pos)) { - lg_local_unlock(&c->usage_lock); - return; + if (!(flags & BCH_BUCKET_MARK_GC_LOCK_HELD) && + gc_will_visit(c, pos)) { + lg_local_unlock(&c->usage_lock); + return; + } } + preempt_disable(); + + g = bucket(ca, b); old = bucket_data_cmpxchg(c, ca, g, new, ({ saturated_add(ca, new.dirty_sectors, sectors, GC_MAX_SECTORS_USED); new.data_type = type; })); - lg_local_unlock(&c->usage_lock); + + preempt_enable(); + + if (likely(c)) + lg_local_unlock(&c->usage_lock); BUG_ON(!(flags & BCH_BUCKET_MARK_MAY_MAKE_UNAVAILABLE) && bucket_became_unavailable(c, old, new)); @@ -859,9 +871,11 @@ int bch2_dev_buckets_resize(struct bch_fs *c, struct bch_dev *ca, u64 nbuckets) bch2_copygc_stop(ca); - down_write(&c->gc_lock); - down_write(&ca->bucket_lock); - lg_global_lock(&c->usage_lock); + if (resize) { + down_write(&c->gc_lock); + down_write(&ca->bucket_lock); + lg_global_lock(&c->usage_lock); + } old_buckets = bucket_array(ca); @@ -885,7 +899,8 @@ int bch2_dev_buckets_resize(struct bch_fs *c, struct bch_dev *ca, u64 nbuckets) swap(ca->oldest_gens, oldest_gens); swap(ca->buckets_dirty, buckets_dirty); - lg_global_unlock(&c->usage_lock); + if (resize) + lg_global_unlock(&c->usage_lock); spin_lock(&c->freelist_lock); for (i = 0; i < RESERVE_NR; i++) { @@ -904,8 +919,10 @@ int bch2_dev_buckets_resize(struct bch_fs *c, struct bch_dev *ca, u64 nbuckets) nbuckets = ca->mi.nbuckets; - up_write(&ca->bucket_lock); - up_write(&c->gc_lock); + if (resize) { + up_write(&ca->bucket_lock); + up_write(&c->gc_lock); + } if (start_copygc && bch2_copygc_start(c, ca)) diff --git a/fs/bcachefs/buckets.h b/fs/bcachefs/buckets.h index 8dbd9152aec8..399a853c80bb 100644 --- a/fs/bcachefs/buckets.h +++ b/fs/bcachefs/buckets.h @@ -31,6 +31,7 @@ static inline struct bucket_array *bucket_array(struct bch_dev *ca) { return rcu_dereference_check(ca->buckets, + !ca->fs || lockdep_is_held(&ca->fs->usage_lock) || lockdep_is_held(&ca->fs->gc_lock) || lockdep_is_held(&ca->bucket_lock)); diff --git a/fs/bcachefs/journal.c b/fs/bcachefs/journal.c index 207f95be9ed1..b5bc7edcacf7 100644 --- a/fs/bcachefs/journal.c +++ b/fs/bcachefs/journal.c @@ -1584,40 +1584,19 @@ err: return ret; } -/* - * Allocate more journal space at runtime - not currently making use if it, but - * the code works: - */ -static int bch2_set_nr_journal_buckets(struct bch_fs *c, struct bch_dev *ca, - unsigned nr) +static int __bch2_set_nr_journal_buckets(struct bch_dev *ca, unsigned nr, + bool new_fs, struct closure *cl) { - struct journal *j = &c->journal; + struct bch_fs *c = ca->fs; struct journal_device *ja = &ca->journal; struct bch_sb_field_journal *journal_buckets; - struct disk_reservation disk_res = { 0, 0 }; - struct closure cl; u64 *new_bucket_seq = NULL, *new_buckets = NULL; int ret = 0; - closure_init_stack(&cl); - /* don't handle reducing nr of buckets yet: */ if (nr <= ja->nr) return 0; - /* - * note: journal buckets aren't really counted as _sectors_ used yet, so - * we don't need the disk reservation to avoid the BUG_ON() in buckets.c - * when space used goes up without a reservation - but we do need the - * reservation to ensure we'll actually be able to allocate: - */ - - if (bch2_disk_reservation_get(c, &disk_res, - bucket_to_sector(ca, nr - ja->nr), 1, 0)) - return -ENOSPC; - - mutex_lock(&c->sb_lock); - ret = -ENOMEM; new_buckets = kzalloc(nr * sizeof(u64), GFP_KERNEL); new_bucket_seq = kzalloc(nr * sizeof(u64), GFP_KERNEL); @@ -1629,29 +1608,41 @@ static int bch2_set_nr_journal_buckets(struct bch_fs *c, struct bch_dev *ca, if (!journal_buckets) goto err; - spin_lock(&j->lock); + if (c) + spin_lock(&c->journal.lock); + memcpy(new_buckets, ja->buckets, ja->nr * sizeof(u64)); memcpy(new_bucket_seq, ja->bucket_seq, ja->nr * sizeof(u64)); swap(new_buckets, ja->buckets); swap(new_bucket_seq, ja->bucket_seq); - spin_unlock(&j->lock); + + if (c) + spin_unlock(&c->journal.lock); while (ja->nr < nr) { - struct open_bucket *ob; - size_t bucket; - int ob_idx; + struct open_bucket *ob = NULL; + long bucket; - ob_idx = bch2_bucket_alloc(c, ca, RESERVE_ALLOC, false, &cl); - if (ob_idx < 0) { - if (!closure_wait(&c->freelist_wait, &cl)) - closure_sync(&cl); - continue; + if (new_fs) { + bucket = bch2_bucket_alloc_new_fs(ca); + if (bucket < 0) { + ret = -ENOSPC; + goto err; + } + } else { + int ob_idx = bch2_bucket_alloc(c, ca, RESERVE_ALLOC, false, cl); + if (ob_idx < 0) { + ret = cl ? -EAGAIN : -ENOSPC; + goto err; + } + + ob = c->open_buckets + ob_idx; + bucket = sector_to_bucket(ca, ob->ptr.offset); } - ob = c->open_buckets + ob_idx; - bucket = sector_to_bucket(ca, ob->ptr.offset); + if (c) + spin_lock(&c->journal.lock); - spin_lock(&j->lock); __array_insert_item(ja->buckets, ja->nr, ja->last_idx); __array_insert_item(ja->bucket_seq, ja->nr, ja->last_idx); __array_insert_item(journal_buckets->buckets, ja->nr, ja->last_idx); @@ -1666,34 +1657,77 @@ static int bch2_set_nr_journal_buckets(struct bch_fs *c, struct bch_dev *ca, ja->last_idx++; } ja->nr++; - spin_unlock(&j->lock); - bch2_mark_metadata_bucket(c, ca, bucket, BCH_DATA_JOURNAL, - ca->mi.bucket_size, - gc_phase(GC_PHASE_SB), 0); + if (c) + spin_unlock(&c->journal.lock); - bch2_open_bucket_put(c, ob); + bch2_mark_metadata_bucket(c, ca, bucket, BCH_DATA_JOURNAL, + ca->mi.bucket_size, + gc_phase(GC_PHASE_SB), + new_fs + ? BCH_BUCKET_MARK_MAY_MAKE_UNAVAILABLE + : 0); + + if (!new_fs) + bch2_open_bucket_put(c, ob); } - bch2_write_super(c); - ret = 0; err: - mutex_unlock(&c->sb_lock); - kfree(new_bucket_seq); kfree(new_buckets); - bch2_disk_reservation_put(c, &disk_res); - if (!ret) - bch2_dev_allocator_add(c, ca); + return ret; +} + +/* + * Allocate more journal space at runtime - not currently making use if it, but + * the code works: + */ +int bch2_set_nr_journal_buckets(struct bch_fs *c, struct bch_dev *ca, + unsigned nr) +{ + struct journal_device *ja = &ca->journal; + struct closure cl; + unsigned current_nr; + int ret; + + closure_init_stack(&cl); + + do { + struct disk_reservation disk_res = { 0, 0 }; + + closure_sync(&cl); + + mutex_lock(&c->sb_lock); + current_nr = ja->nr; + + /* + * note: journal buckets aren't really counted as _sectors_ used yet, so + * we don't need the disk reservation to avoid the BUG_ON() in buckets.c + * when space used goes up without a reservation - but we do need the + * reservation to ensure we'll actually be able to allocate: + */ + + if (bch2_disk_reservation_get(c, &disk_res, + bucket_to_sector(ca, nr - ja->nr), 1, 0)) { + mutex_unlock(&c->sb_lock); + return -ENOSPC; + } + + ret = __bch2_set_nr_journal_buckets(ca, nr, false, &cl); + + bch2_disk_reservation_put(c, &disk_res); - closure_sync(&cl); + if (ja->nr != current_nr) + bch2_write_super(c); + mutex_unlock(&c->sb_lock); + } while (ret == -EAGAIN); return ret; } -int bch2_dev_journal_alloc(struct bch_fs *c, struct bch_dev *ca) +int bch2_dev_journal_alloc(struct bch_dev *ca) { unsigned nr; @@ -1709,7 +1743,7 @@ int bch2_dev_journal_alloc(struct bch_fs *c, struct bch_dev *ca) min(1 << 10, (1 << 20) / ca->mi.bucket_size)); - return bch2_set_nr_journal_buckets(c, ca, nr); + return __bch2_set_nr_journal_buckets(ca, nr, true, NULL); } /* Journalling */ diff --git a/fs/bcachefs/journal.h b/fs/bcachefs/journal.h index 46ae8f0d256d..cf5cc9ba008e 100644 --- a/fs/bcachefs/journal.h +++ b/fs/bcachefs/journal.h @@ -400,7 +400,7 @@ static inline void bch2_journal_set_replay_done(struct journal *j) ssize_t bch2_journal_print_debug(struct journal *, char *); ssize_t bch2_journal_print_pins(struct journal *, char *); -int bch2_dev_journal_alloc(struct bch_fs *, struct bch_dev *); +int bch2_dev_journal_alloc(struct bch_dev *); void bch2_dev_journal_stop(struct journal *, struct bch_dev *); void bch2_fs_journal_stop(struct journal *); diff --git a/fs/bcachefs/super.c b/fs/bcachefs/super.c index 92cf630fcf8b..8b78ac3e4f5d 100644 --- a/fs/bcachefs/super.c +++ b/fs/bcachefs/super.c @@ -813,21 +813,18 @@ const char *bch2_fs_start(struct bch_fs *c) bch_notice(c, "initializing new filesystem"); set_bit(BCH_FS_ALLOC_READ_DONE, &c->flags); - set_bit(BCH_FS_BRAND_NEW_FS, &c->flags); ret = bch2_initial_gc(c, &journal); if (ret) goto err; err = "unable to allocate journal buckets"; - for_each_rw_member(ca, c, i) - if (bch2_dev_journal_alloc(c, ca)) { + for_each_online_member(ca, c, i) + if (bch2_dev_journal_alloc(ca)) { percpu_ref_put(&ca->io_ref); goto err; } - clear_bit(BCH_FS_BRAND_NEW_FS, &c->flags); - for (i = 0; i < BTREE_ID_NR; i++) bch2_btree_root_alloc(c, i); @@ -1068,28 +1065,19 @@ static int bch2_dev_sysfs_online(struct bch_fs *c, struct bch_dev *ca) return 0; } -static int bch2_dev_alloc(struct bch_fs *c, unsigned dev_idx) +static struct bch_dev *__bch2_dev_alloc(struct bch_fs *c, + struct bch_member *member) { - struct bch_member *member; - struct bch_dev *ca = NULL; - int ret = 0; - - pr_verbose_init(c->opts, ""); - - if (bch2_fs_init_fault("dev_alloc")) - goto err; + struct bch_dev *ca; ca = kzalloc(sizeof(*ca), GFP_KERNEL); if (!ca) - goto err; + return NULL; kobject_init(&ca->kobj, &bch2_dev_ktype); init_completion(&ca->ref_completion); init_completion(&ca->io_ref_completion); - ca->dev_idx = dev_idx; - __set_bit(ca->dev_idx, ca->self.d); - init_rwsem(&ca->bucket_lock); writepoint_init(&ca->copygc_write_point, BCH_DATA_USER); @@ -1099,14 +1087,8 @@ static int bch2_dev_alloc(struct bch_fs *c, unsigned dev_idx) INIT_WORK(&ca->io_error_work, bch2_io_error_work); - if (bch2_fs_init_fault("dev_alloc")) - goto err; - - member = bch2_sb_get_members(c->disk_sb)->members + dev_idx; - ca->mi = bch2_mi_to_cpu(member); ca->uuid = member->uuid; - scnprintf(ca->name, sizeof(ca->name), "dev-%u", dev_idx); if (percpu_ref_init(&ca->ref, bch2_dev_ref_complete, 0, GFP_KERNEL) || @@ -1118,11 +1100,43 @@ static int bch2_dev_alloc(struct bch_fs *c, unsigned dev_idx) !(ca->io_done = alloc_percpu(*ca->io_done))) goto err; + return ca; +err: + bch2_dev_free(ca); + return NULL; +} + +static void bch2_dev_attach(struct bch_fs *c, struct bch_dev *ca, + unsigned dev_idx) +{ + ca->dev_idx = dev_idx; + __set_bit(ca->dev_idx, ca->self.d); + scnprintf(ca->name, sizeof(ca->name), "dev-%u", dev_idx); + ca->fs = c; rcu_assign_pointer(c->devs[ca->dev_idx], ca); if (bch2_dev_sysfs_online(c, ca)) pr_warn("error creating sysfs objects"); +} + +static int bch2_dev_alloc(struct bch_fs *c, unsigned dev_idx) +{ + struct bch_member *member = + bch2_sb_get_members(c->disk_sb)->members + dev_idx; + struct bch_dev *ca = NULL; + int ret = 0; + + pr_verbose_init(c->opts, ""); + + if (bch2_fs_init_fault("dev_alloc")) + goto err; + + ca = __bch2_dev_alloc(c, member); + if (!ca) + goto err; + + bch2_dev_attach(c, ca, dev_idx); out: pr_verbose_init(c->opts, "ret %i", ret); return ret; @@ -1133,21 +1147,9 @@ err: goto out; } -static int __bch2_dev_online(struct bch_fs *c, struct bch_sb_handle *sb) +static int __bch2_dev_attach_bdev(struct bch_dev *ca, struct bch_sb_handle *sb) { - struct bch_dev *ca; - int ret; - - lockdep_assert_held(&c->state_lock); - - if (le64_to_cpu(sb->sb->seq) > - le64_to_cpu(c->disk_sb->seq)) - bch2_sb_to_fs(c, sb->sb); - - BUG_ON(sb->sb->dev_idx >= c->sb.nr_devices || - !c->devs[sb->sb->dev_idx]); - - ca = bch_dev_locked(c, sb->sb->dev_idx); + unsigned ret; if (bch2_dev_is_online(ca)) { bch_err(ca, "already have device online in slot %u", @@ -1165,7 +1167,7 @@ static int __bch2_dev_online(struct bch_fs *c, struct bch_sb_handle *sb) if (get_capacity(sb->bdev->bd_disk) < ca->mi.bucket_size * ca->mi.nbuckets) { - bch_err(c, "device too small"); + bch_err(ca, "device too small"); return -EINVAL; } @@ -1173,35 +1175,50 @@ static int __bch2_dev_online(struct bch_fs *c, struct bch_sb_handle *sb) if (ret) return ret; - /* - * Increase journal write timeout if flushes to this device are - * expensive: - */ - if (!blk_queue_nonrot(bdev_get_queue(sb->bdev)) && - journal_flushes_device(ca)) - c->journal.write_delay_ms = - max(c->journal.write_delay_ms, 1000U); - /* Commit: */ ca->disk_sb = *sb; if (sb->mode & FMODE_EXCL) ca->disk_sb.bdev->bd_holder = ca; memset(sb, 0, sizeof(*sb)); + if (ca->fs) + mutex_lock(&ca->fs->sb_lock); + + bch2_mark_dev_superblock(ca->fs, ca, BCH_BUCKET_MARK_MAY_MAKE_UNAVAILABLE); + + if (ca->fs) + mutex_unlock(&ca->fs->sb_lock); + + percpu_ref_reinit(&ca->io_ref); + + return 0; +} + +static int bch2_dev_attach_bdev(struct bch_fs *c, struct bch_sb_handle *sb) +{ + struct bch_dev *ca; + int ret; + + lockdep_assert_held(&c->state_lock); + + if (le64_to_cpu(sb->sb->seq) > + le64_to_cpu(c->disk_sb->seq)) + bch2_sb_to_fs(c, sb->sb); + + BUG_ON(sb->sb->dev_idx >= c->sb.nr_devices || + !c->devs[sb->sb->dev_idx]); + + ca = bch_dev_locked(c, sb->sb->dev_idx); + + ret = __bch2_dev_attach_bdev(ca, sb); + if (ret) + return ret; + if (c->sb.nr_devices == 1) bdevname(ca->disk_sb.bdev, c->name); bdevname(ca->disk_sb.bdev, ca->name); - mutex_lock(&c->sb_lock); - bch2_mark_dev_superblock(c, ca, BCH_BUCKET_MARK_MAY_MAKE_UNAVAILABLE); - mutex_unlock(&c->sb_lock); - - if (ca->mi.state == BCH_MEMBER_STATE_RW) - bch2_dev_allocator_add(c, ca); - rebalance_wakeup(c); - - percpu_ref_reinit(&ca->io_ref); return 0; } @@ -1478,8 +1495,8 @@ int bch2_dev_add(struct bch_fs *c, const char *path) struct bch_sb_handle sb; const char *err; struct bch_dev *ca = NULL; - struct bch_sb_field_members *mi, *dev_mi; - struct bch_member saved_mi; + struct bch_sb_field_members *mi; + struct bch_member dev_mi; unsigned dev_idx, nr_devices, u64s; int ret; @@ -1491,17 +1508,45 @@ int bch2_dev_add(struct bch_fs *c, const char *path) if (err) return -EINVAL; + dev_mi = bch2_sb_get_members(sb.sb)->members[sb.sb->dev_idx]; + err = bch2_dev_may_add(sb.sb, c); if (err) return -EINVAL; + ca = __bch2_dev_alloc(c, &dev_mi); + if (!ca) { + bch2_free_super(&sb); + return -ENOMEM; + } + + ret = __bch2_dev_attach_bdev(ca, &sb); + if (ret) { + bch2_dev_free(ca); + return ret; + } + + err = "journal alloc failed"; + ret = bch2_dev_journal_alloc(ca); + if (ret) + goto err; + mutex_lock(&c->state_lock); mutex_lock(&c->sb_lock); - /* Grab member info for new disk: */ - dev_mi = bch2_sb_get_members(sb.sb); - saved_mi = dev_mi->members[sb.sb->dev_idx]; - saved_mi.last_mount = cpu_to_le64(ktime_get_seconds()); + err = "insufficient space in new superblock"; + ret = bch2_sb_from_fs(c, ca); + if (ret) + goto err_unlock; + + mi = bch2_sb_get_members(ca->disk_sb.sb); + + if (!bch2_sb_resize_members(&ca->disk_sb, + le32_to_cpu(mi->field.u64s) + + sizeof(dev_mi) / sizeof(u64))) { + ret = -ENOSPC; + goto err_unlock; + } if (dynamic_fault("bcachefs:add:no_slot")) goto no_slot; @@ -1519,64 +1564,47 @@ have_slot: nr_devices = max_t(unsigned, dev_idx + 1, c->sb.nr_devices); u64s = (sizeof(struct bch_sb_field_members) + sizeof(struct bch_member) * nr_devices) / sizeof(u64); - err = "no space in superblock for member info"; - dev_mi = bch2_sb_resize_members(&sb, u64s); - if (!dev_mi) - goto err_unlock; + err = "no space in superblock for member info"; + ret = -ENOSPC; mi = bch2_fs_sb_resize_members(c, u64s); if (!mi) goto err_unlock; - memcpy(dev_mi, mi, u64s * sizeof(u64)); - dev_mi->members[dev_idx] = saved_mi; + /* success: */ - sb.sb->uuid = c->disk_sb->uuid; - sb.sb->dev_idx = dev_idx; - sb.sb->nr_devices = nr_devices; - - /* commit new member info */ - memcpy(mi, dev_mi, u64s * sizeof(u64)); + mi->members[dev_idx] = dev_mi; + mi->members[dev_idx].last_mount = cpu_to_le64(ktime_get_seconds()); c->disk_sb->nr_devices = nr_devices; - c->sb.nr_devices = nr_devices; + + ca->disk_sb.sb->dev_idx = dev_idx; + bch2_dev_attach(c, ca, dev_idx); bch2_write_super(c); mutex_unlock(&c->sb_lock); - if (bch2_dev_alloc(c, dev_idx)) { - err = "cannot allocate memory"; - ret = -ENOMEM; - goto err; - } - - if (__bch2_dev_online(c, &sb)) { - err = "bch2_dev_online() error"; - ret = -ENOMEM; - goto err; - } - - ca = bch_dev_locked(c, dev_idx); if (ca->mi.state == BCH_MEMBER_STATE_RW) { err = __bch2_dev_read_write(c, ca); if (err) - goto err; - - err = "journal alloc failed"; - if (bch2_dev_journal_alloc(c, ca)) - goto err; + goto err_late; } mutex_unlock(&c->state_lock); return 0; + err_unlock: mutex_unlock(&c->sb_lock); -err: mutex_unlock(&c->state_lock); +err: + if (ca) + bch2_dev_free(ca); bch2_free_super(&sb); - bch_err(c, "Unable to add device: %s", err); - return ret ?: -EINVAL; + return ret; +err_late: + bch_err(c, "Error going rw after adding device: %s", err); + return -EINVAL; } /* Hot add existing device to running filesystem: */ @@ -1603,8 +1631,8 @@ int bch2_dev_online(struct bch_fs *c, const char *path) if (err) goto err; - if (__bch2_dev_online(c, &sb)) { - err = "__bch2_dev_online() error"; + if (bch2_dev_attach_bdev(c, &sb)) { + err = "bch2_dev_attach_bdev() error"; goto err; } @@ -1763,7 +1791,7 @@ struct bch_fs *bch2_fs_open(char * const *devices, unsigned nr_devices, err = "bch2_dev_online() error"; mutex_lock(&c->state_lock); for (i = 0; i < nr_devices; i++) - if (__bch2_dev_online(c, &sb[i])) { + if (bch2_dev_attach_bdev(c, &sb[i])) { mutex_unlock(&c->state_lock); goto err_print; } @@ -1828,7 +1856,7 @@ static const char *__bch2_fs_open_incremental(struct bch_sb_handle *sb, err = "bch2_dev_online() error"; mutex_lock(&c->sb_lock); - if (__bch2_dev_online(c, sb)) { + if (bch2_dev_attach_bdev(c, sb)) { mutex_unlock(&c->sb_lock); goto err; } |