diff options
author | Kent Overstreet <kent.overstreet@gmail.com> | 2017-12-24 18:36:00 -0500 |
---|---|---|
committer | Kent Overstreet <kent.overstreet@gmail.com> | 2018-05-22 00:44:18 -0400 |
commit | c37101792786a50468fe64b911a80953374cfc29 (patch) | |
tree | 1b151988bf4361d373fd6912c916a5de1ef4e6d1 | |
parent | b444278c7ea416fa94c031cc20977824daa12a62 (diff) |
bcachefs: allow RW devices to be removed
-rw-r--r-- | fs/bcachefs/extents.c | 30 | ||||
-rw-r--r-- | fs/bcachefs/extents.h | 2 | ||||
-rw-r--r-- | fs/bcachefs/journal.c | 326 | ||||
-rw-r--r-- | fs/bcachefs/journal.h | 3 | ||||
-rw-r--r-- | fs/bcachefs/journal_types.h | 5 | ||||
-rw-r--r-- | fs/bcachefs/migrate.c | 20 | ||||
-rw-r--r-- | fs/bcachefs/super.c | 33 |
7 files changed, 172 insertions, 247 deletions
diff --git a/fs/bcachefs/extents.c b/fs/bcachefs/extents.c index 51262d6f5afb..e055ee93f586 100644 --- a/fs/bcachefs/extents.c +++ b/fs/bcachefs/extents.c @@ -123,6 +123,22 @@ bch2_extent_has_device(struct bkey_s_c_extent e, unsigned dev) return NULL; } +bool bch2_extent_drop_device(struct bkey_s_extent e, unsigned dev) +{ + struct bch_extent_ptr *ptr; + bool dropped = false; + + extent_for_each_ptr_backwards(e, ptr) + if (ptr->dev == dev) { + __bch2_extent_drop_ptr(e, ptr); + dropped = true; + } + + if (dropped) + bch2_extent_drop_redundant_crcs(e); + return dropped; +} + unsigned bch2_extent_nr_ptrs(struct bkey_s_c_extent e) { const struct bch_extent_ptr *ptr; @@ -225,20 +241,6 @@ void bch2_extent_drop_ptr(struct bkey_s_extent e, struct bch_extent_ptr *ptr) bch2_extent_drop_redundant_crcs(e); } -void bch2_extent_drop_ptr_idx(struct bkey_s_extent e, unsigned idx) -{ - struct bch_extent_ptr *ptr; - unsigned i = 0; - - extent_for_each_ptr(e, ptr) - if (i++ == idx) - goto found; - - BUG(); -found: - bch2_extent_drop_ptr(e, ptr); -} - static inline bool can_narrow_crc(struct bch_extent_crc_unpacked u, struct bch_extent_crc_unpacked n) { diff --git a/fs/bcachefs/extents.h b/fs/bcachefs/extents.h index eb81b74a9696..aeae361d9e28 100644 --- a/fs/bcachefs/extents.h +++ b/fs/bcachefs/extents.h @@ -42,6 +42,7 @@ void bch2_extent_mark_replicas_cached(struct bch_fs *, struct bkey_s_extent); const struct bch_extent_ptr * bch2_extent_has_device(struct bkey_s_c_extent, unsigned); +bool bch2_extent_drop_device(struct bkey_s_extent, unsigned); unsigned bch2_extent_nr_ptrs(struct bkey_s_c_extent); unsigned bch2_extent_nr_dirty_ptrs(struct bkey_s_c); @@ -432,7 +433,6 @@ void bch2_extent_drop_redundant_crcs(struct bkey_s_extent); void __bch2_extent_drop_ptr(struct bkey_s_extent, struct bch_extent_ptr *); void bch2_extent_drop_ptr(struct bkey_s_extent, struct bch_extent_ptr *); -void bch2_extent_drop_ptr_idx(struct bkey_s_extent, unsigned); bool bch2_cut_front(struct bpos, struct bkey_i *); bool bch2_cut_back(struct bpos, struct bkey *); diff --git a/fs/bcachefs/journal.c b/fs/bcachefs/journal.c index 914bc4539523..07b4d67dc33b 100644 --- a/fs/bcachefs/journal.c +++ b/fs/bcachefs/journal.c @@ -2022,10 +2022,11 @@ static void journal_reclaim_work(struct work_struct *work) /** * journal_next_bucket - move on to the next journal bucket if possible */ -static int journal_write_alloc(struct journal *j, unsigned sectors) +static int journal_write_alloc(struct journal *j, struct journal_buf *w, + unsigned sectors) { struct bch_fs *c = container_of(j, struct bch_fs, journal); - struct bkey_s_extent e = bkey_i_to_s_extent(&j->key); + struct bkey_s_extent e; struct bch_extent_ptr *ptr; struct journal_device *ja; struct bch_dev *ca; @@ -2034,6 +2035,7 @@ static int journal_write_alloc(struct journal *j, unsigned sectors) READ_ONCE(c->opts.metadata_replicas); spin_lock(&j->lock); + e = bkey_i_to_s_extent(&j->key); /* * Drop any pointers to devices that have been removed, are no longer @@ -2099,6 +2101,8 @@ static int journal_write_alloc(struct journal *j, unsigned sectors) rcu_read_unlock(); j->prev_buf_sectors = 0; + + bkey_copy(&w->key, &j->key); spin_unlock(&j->lock); if (replicas < c->opts.metadata_replicas_required) @@ -2174,13 +2178,26 @@ static void journal_buf_realloc(struct journal *j, struct journal_buf *buf) static void journal_write_done(struct closure *cl) { struct journal *j = container_of(cl, struct journal, io); + struct bch_fs *c = container_of(j, struct bch_fs, journal); struct journal_buf *w = journal_prev_buf(j); + struct bkey_s_c_extent e = bkey_i_to_s_c_extent(&w->key); + + if (!bch2_extent_nr_ptrs(e)) { + bch_err(c, "unable to write journal to sufficient devices"); + goto err; + } + if (bch2_check_mark_super(c, e, BCH_DATA_JOURNAL)) + goto err; +out: __bch2_time_stats_update(j->write_time, j->write_start_time); spin_lock(&j->lock); j->last_seq_ondisk = le64_to_cpu(w->data->last_seq); + journal_seq_pin(j, le64_to_cpu(w->data->seq))->devs = + bch2_extent_devs(bkey_i_to_s_c_extent(&w->key)); + /* * Updating last_seq_ondisk may let journal_reclaim_work() discard more * buckets: @@ -2203,31 +2220,6 @@ static void journal_write_done(struct closure *cl) if (test_bit(JOURNAL_NEED_WRITE, &j->flags)) mod_delayed_work(system_freezable_wq, &j->write_work, 0); spin_unlock(&j->lock); -} - -static void journal_write_error(struct closure *cl) -{ - struct journal *j = container_of(cl, struct journal, io); - struct bch_fs *c = container_of(j, struct bch_fs, journal); - struct bkey_s_extent e = bkey_i_to_s_extent(&j->key); - - while (j->replicas_failed) { - unsigned idx = __fls(j->replicas_failed); - - bch2_extent_drop_ptr_idx(e, idx); - j->replicas_failed ^= 1 << idx; - } - - if (!bch2_extent_nr_ptrs(e.c)) { - bch_err(c, "unable to write journal to sufficient devices"); - goto err; - } - - if (bch2_check_mark_super(c, e.c, BCH_DATA_JOURNAL)) - goto err; - -out: - journal_write_done(cl); return; err: bch2_fatal_error(c); @@ -2242,12 +2234,12 @@ static void journal_write_endio(struct bio *bio) if (bch2_dev_io_err_on(bio->bi_status, ca, "journal write") || bch2_meta_write_fault("journal")) { - /* Was this a flush or an actual journal write? */ - if (ca->journal.ptr_idx != U8_MAX) { - set_bit(ca->journal.ptr_idx, &j->replicas_failed); - set_closure_fn(&j->io, journal_write_error, - system_highpri_wq); - } + struct journal_buf *w = journal_prev_buf(j); + unsigned long flags; + + spin_lock_irqsave(&j->err_lock, flags); + bch2_extent_drop_device(bkey_i_to_s_extent(&w->key), ca->dev_idx); + spin_unlock_irqrestore(&j->err_lock, flags); } closure_put(&j->io); @@ -2263,7 +2255,7 @@ static void journal_write(struct closure *cl) struct jset *jset; struct bio *bio; struct bch_extent_ptr *ptr; - unsigned i, sectors, bytes, ptr_idx = 0; + unsigned i, sectors, bytes; journal_buf_realloc(j, w); jset = w->data; @@ -2310,7 +2302,7 @@ static void journal_write(struct closure *cl) bytes = vstruct_bytes(w->data); memset((void *) w->data + bytes, 0, (sectors << 9) - bytes); - if (journal_write_alloc(j, sectors)) { + if (journal_write_alloc(j, w, sectors)) { bch2_journal_halt(j); bch_err(c, "Unable to allocate journal write"); bch2_fatal_error(c); @@ -2318,13 +2310,6 @@ static void journal_write(struct closure *cl) return; } - if (bch2_check_mark_super(c, bkey_i_to_s_c_extent(&j->key), - BCH_DATA_JOURNAL)) - goto err; - - journal_seq_pin(j, le64_to_cpu(jset->seq))->devs = - bch2_extent_devs(bkey_i_to_s_c_extent(&j->key)); - /* * XXX: we really should just disable the entire journal in nochanges * mode @@ -2332,7 +2317,7 @@ static void journal_write(struct closure *cl) if (c->opts.nochanges) goto no_io; - extent_for_each_ptr(bkey_i_to_s_extent(&j->key), ptr) { + extent_for_each_ptr(bkey_i_to_s_extent(&w->key), ptr) { ca = bch_dev_bkey_exists(c, ptr->dev); if (!percpu_ref_tryget(&ca->io_ref)) { /* XXX: fix this */ @@ -2343,7 +2328,6 @@ static void journal_write(struct closure *cl) this_cpu_add(ca->io_done->sectors[WRITE][BCH_DATA_JOURNAL], sectors); - ca->journal.ptr_idx = ptr_idx++; bio = ca->journal.bio; bio_reset(bio); bio_set_dev(bio, ca->disk_sb.bdev); @@ -2363,10 +2347,9 @@ static void journal_write(struct closure *cl) for_each_rw_member(ca, c, i) if (journal_flushes_device(ca) && - !bch2_extent_has_device(bkey_i_to_s_c_extent(&j->key), i)) { + !bch2_extent_has_device(bkey_i_to_s_c_extent(&w->key), i)) { percpu_ref_get(&ca->io_ref); - ca->journal.ptr_idx = U8_MAX; bio = ca->journal.bio; bio_reset(bio); bio_set_dev(bio, ca->disk_sb.bdev); @@ -2377,7 +2360,7 @@ static void journal_write(struct closure *cl) } no_io: - extent_for_each_ptr(bkey_i_to_s_extent(&j->key), ptr) + extent_for_each_ptr(bkey_i_to_s_extent(&w->key), ptr) ptr->offset += sectors; continue_at(cl, journal_write_done, system_highpri_wq); @@ -2782,163 +2765,32 @@ int bch2_journal_flush_device(struct journal *j, unsigned dev_idx) return ret; } -ssize_t bch2_journal_print_debug(struct journal *j, char *buf) -{ - struct bch_fs *c = container_of(j, struct bch_fs, journal); - union journal_res_state *s = &j->reservations; - struct bch_dev *ca; - unsigned iter; - ssize_t ret = 0; - - rcu_read_lock(); - spin_lock(&j->lock); - - ret += scnprintf(buf + ret, PAGE_SIZE - ret, - "active journal entries:\t%zu\n" - "seq:\t\t\t%llu\n" - "last_seq:\t\t%llu\n" - "last_seq_ondisk:\t%llu\n" - "reservation count:\t%u\n" - "reservation offset:\t%u\n" - "current entry u64s:\t%u\n" - "io in flight:\t\t%i\n" - "need write:\t\t%i\n" - "dirty:\t\t\t%i\n" - "replay done:\t\t%i\n", - fifo_used(&j->pin), - (u64) atomic64_read(&j->seq), - last_seq(j), - j->last_seq_ondisk, - journal_state_count(*s, s->idx), - s->cur_entry_offset, - j->cur_entry_u64s, - s->prev_buf_unwritten, - test_bit(JOURNAL_NEED_WRITE, &j->flags), - journal_entry_is_open(j), - test_bit(JOURNAL_REPLAY_DONE, &j->flags)); - - for_each_member_device_rcu(ca, c, iter, - &c->rw_devs[BCH_DATA_JOURNAL]) { - struct journal_device *ja = &ca->journal; - - if (!ja->nr) - continue; - - ret += scnprintf(buf + ret, PAGE_SIZE - ret, - "dev %u:\n" - "\tnr\t\t%u\n" - "\tcur_idx\t\t%u (seq %llu)\n" - "\tlast_idx\t%u (seq %llu)\n", - iter, ja->nr, - ja->cur_idx, ja->bucket_seq[ja->cur_idx], - ja->last_idx, ja->bucket_seq[ja->last_idx]); - } - - spin_unlock(&j->lock); - rcu_read_unlock(); - - return ret; -} - -ssize_t bch2_journal_print_pins(struct journal *j, char *buf) -{ - struct journal_entry_pin_list *pin_list; - struct journal_entry_pin *pin; - ssize_t ret = 0; - unsigned i; - - spin_lock_irq(&j->pin_lock); - fifo_for_each_entry_ptr(pin_list, &j->pin, i) { - ret += scnprintf(buf + ret, PAGE_SIZE - ret, - "%llu: count %u\n", - journal_pin_seq(j, pin_list), - atomic_read(&pin_list->count)); - - list_for_each_entry(pin, &pin_list->list, list) - ret += scnprintf(buf + ret, PAGE_SIZE - ret, - "\t%p %pf\n", - pin, pin->flush); - - if (!list_empty(&pin_list->flushed)) - ret += scnprintf(buf + ret, PAGE_SIZE - ret, - "flushed:\n"); - - list_for_each_entry(pin, &pin_list->flushed, list) - ret += scnprintf(buf + ret, PAGE_SIZE - ret, - "\t%p %pf\n", - pin, pin->flush); - } - spin_unlock_irq(&j->pin_lock); - - return ret; -} +/* startup/shutdown: */ -static bool bch2_journal_writing_to_device(struct bch_dev *ca) +static bool bch2_journal_writing_to_device(struct journal *j, unsigned dev_idx) { - struct journal *j = &ca->fs->journal; + union journal_res_state state; + struct journal_buf *w; bool ret; spin_lock(&j->lock); - ret = bch2_extent_has_device(bkey_i_to_s_c_extent(&j->key), - ca->dev_idx); + state = READ_ONCE(j->reservations); + w = j->buf + !state.idx; + + ret = state.prev_buf_unwritten && + bch2_extent_has_device(bkey_i_to_s_c_extent(&w->key), dev_idx); spin_unlock(&j->lock); return ret; } -/* - * This asumes that ca has already been marked read-only so that - * journal_next_bucket won't pick buckets out of ca any more. - * Hence, if the journal is not currently pointing to ca, there - * will be no new writes to journal entries in ca after all the - * pending ones have been flushed to disk. - * - * If the journal is being written to ca, write a new record, and - * journal_next_bucket will notice that the device is no longer - * writeable and pick a new set of devices to write to. - */ - -int bch2_journal_move(struct bch_dev *ca) +void bch2_dev_journal_stop(struct journal *j, struct bch_dev *ca) { - struct journal_device *ja = &ca->journal; - struct journal *j = &ca->fs->journal; - u64 seq_to_flush = 0; - unsigned i; - int ret; - - if (bch2_journal_writing_to_device(ca)) { - /* - * bch_journal_meta will write a record and we'll wait - * for the write to complete. - * Actually writing the journal (journal_write_locked) - * will call journal_next_bucket which notices that the - * device is no longer writeable, and picks a new one. - */ - bch2_journal_meta(j); - BUG_ON(bch2_journal_writing_to_device(ca)); - } - - for (i = 0; i < ja->nr; i++) - seq_to_flush = max(seq_to_flush, ja->bucket_seq[i]); - - bch2_journal_flush_pins(j, seq_to_flush); - - /* - * Force a meta-data journal entry to be written so that - * we have newer journal entries in devices other than ca, - * and wait for the meta data write to complete. - */ - bch2_journal_meta(j); - - /* - * Verify that we no longer need any of the journal entries in - * the device - */ spin_lock(&j->lock); - ret = j->last_seq_ondisk > seq_to_flush ? 0 : -EIO; + bch2_extent_drop_device(bkey_i_to_s_extent(&j->key), ca->dev_idx); spin_unlock(&j->lock); - return ret; + wait_event(j->wait, !bch2_journal_writing_to_device(j, ca->dev_idx)); } void bch2_fs_journal_stop(struct journal *j) @@ -3009,6 +2861,7 @@ int bch2_fs_journal_init(struct journal *j) spin_lock_init(&j->lock); spin_lock_init(&j->pin_lock); + spin_lock_init(&j->err_lock); init_waitqueue_head(&j->wait); INIT_DELAYED_WORK(&j->write_work, journal_write_work); INIT_DELAYED_WORK(&j->reclaim_work, journal_reclaim_work); @@ -3038,3 +2891,96 @@ int bch2_fs_journal_init(struct journal *j) return 0; } + +/* debug: */ + +ssize_t bch2_journal_print_debug(struct journal *j, char *buf) +{ + struct bch_fs *c = container_of(j, struct bch_fs, journal); + union journal_res_state *s = &j->reservations; + struct bch_dev *ca; + unsigned iter; + ssize_t ret = 0; + + rcu_read_lock(); + spin_lock(&j->lock); + + ret += scnprintf(buf + ret, PAGE_SIZE - ret, + "active journal entries:\t%zu\n" + "seq:\t\t\t%llu\n" + "last_seq:\t\t%llu\n" + "last_seq_ondisk:\t%llu\n" + "reservation count:\t%u\n" + "reservation offset:\t%u\n" + "current entry u64s:\t%u\n" + "io in flight:\t\t%i\n" + "need write:\t\t%i\n" + "dirty:\t\t\t%i\n" + "replay done:\t\t%i\n", + fifo_used(&j->pin), + (u64) atomic64_read(&j->seq), + last_seq(j), + j->last_seq_ondisk, + journal_state_count(*s, s->idx), + s->cur_entry_offset, + j->cur_entry_u64s, + s->prev_buf_unwritten, + test_bit(JOURNAL_NEED_WRITE, &j->flags), + journal_entry_is_open(j), + test_bit(JOURNAL_REPLAY_DONE, &j->flags)); + + for_each_member_device_rcu(ca, c, iter, + &c->rw_devs[BCH_DATA_JOURNAL]) { + struct journal_device *ja = &ca->journal; + + if (!ja->nr) + continue; + + ret += scnprintf(buf + ret, PAGE_SIZE - ret, + "dev %u:\n" + "\tnr\t\t%u\n" + "\tcur_idx\t\t%u (seq %llu)\n" + "\tlast_idx\t%u (seq %llu)\n", + iter, ja->nr, + ja->cur_idx, ja->bucket_seq[ja->cur_idx], + ja->last_idx, ja->bucket_seq[ja->last_idx]); + } + + spin_unlock(&j->lock); + rcu_read_unlock(); + + return ret; +} + +ssize_t bch2_journal_print_pins(struct journal *j, char *buf) +{ + struct journal_entry_pin_list *pin_list; + struct journal_entry_pin *pin; + ssize_t ret = 0; + unsigned i; + + spin_lock_irq(&j->pin_lock); + fifo_for_each_entry_ptr(pin_list, &j->pin, i) { + ret += scnprintf(buf + ret, PAGE_SIZE - ret, + "%llu: count %u\n", + journal_pin_seq(j, pin_list), + atomic_read(&pin_list->count)); + + list_for_each_entry(pin, &pin_list->list, list) + ret += scnprintf(buf + ret, PAGE_SIZE - ret, + "\t%p %pf\n", + pin, pin->flush); + + if (!list_empty(&pin_list->flushed)) + ret += scnprintf(buf + ret, PAGE_SIZE - ret, + "flushed:\n"); + + list_for_each_entry(pin, &pin_list->flushed, list) + ret += scnprintf(buf + ret, PAGE_SIZE - ret, + "\t%p %pf\n", + pin, pin->flush); + } + spin_unlock_irq(&j->pin_lock); + + return ret; +} diff --git a/fs/bcachefs/journal.h b/fs/bcachefs/journal.h index 5f3ece089937..b3e6b2bc1f9b 100644 --- a/fs/bcachefs/journal.h +++ b/fs/bcachefs/journal.h @@ -398,8 +398,7 @@ static inline unsigned bch2_nr_journal_buckets(struct bch_sb_field_journal *j) : 0; } -int bch2_journal_move(struct bch_dev *); - +void bch2_dev_journal_stop(struct journal *, struct bch_dev *); void bch2_fs_journal_stop(struct journal *); void bch2_dev_journal_exit(struct bch_dev *); int bch2_dev_journal_init(struct bch_dev *, struct bch_sb *); diff --git a/fs/bcachefs/journal_types.h b/fs/bcachefs/journal_types.h index 87f378a6ac4f..66923cf43a83 100644 --- a/fs/bcachefs/journal_types.h +++ b/fs/bcachefs/journal_types.h @@ -17,6 +17,8 @@ struct journal_res; struct journal_buf { struct jset *data; + BKEY_PADDED(key); + struct closure_waitlist wait; unsigned size; @@ -141,7 +143,6 @@ struct journal { struct closure io; struct delayed_work write_work; - unsigned long replicas_failed; /* Sequence number of most recent journal entry (last entry in @pin) */ atomic64_t seq; @@ -179,6 +180,7 @@ struct journal { BKEY_PADDED(key); struct write_point wp; + spinlock_t err_lock; struct delayed_work reclaim_work; unsigned long last_flushed; @@ -230,7 +232,6 @@ struct journal_device { /* Bio for journal reads/writes to this device */ struct bio *bio; - u8 ptr_idx; /* for bch_journal_read_device */ struct closure read; diff --git a/fs/bcachefs/migrate.c b/fs/bcachefs/migrate.c index e11ee9532483..328316a10887 100644 --- a/fs/bcachefs/migrate.c +++ b/fs/bcachefs/migrate.c @@ -196,26 +196,13 @@ static int bch2_dev_metadata_migrate(struct bch_fs *c, struct bch_dev *ca, return 0; mutex_lock(&c->replicas_gc_lock); - bch2_replicas_gc_start(c, - (1 << BCH_DATA_JOURNAL)| - (1 << BCH_DATA_BTREE)); - - /* 1st, Move the btree nodes off the device */ + bch2_replicas_gc_start(c, 1 << BCH_DATA_BTREE); for (i = 0; i < BTREE_ID_NR; i++) { ret = bch2_move_btree_off(c, ca, i); if (ret) goto err; } - - /* There are no prios/gens to move -- they are already in the device. */ - - /* 2nd. Move the journal off the device */ - - ret = bch2_journal_move(ca); - if (ret) - goto err; - err: bch2_replicas_gc_end(c, ret); mutex_unlock(&c->replicas_gc_lock); @@ -231,15 +218,12 @@ int bch2_dev_data_migrate(struct bch_fs *c, struct bch_dev *ca, int flags) static int drop_dev_ptrs(struct bch_fs *c, struct bkey_s_extent e, unsigned dev_idx, int flags, bool metadata) { - struct bch_extent_ptr *ptr; unsigned replicas = metadata ? c->opts.metadata_replicas : c->opts.data_replicas; unsigned lost = metadata ? BCH_FORCE_IF_METADATA_LOST : BCH_FORCE_IF_DATA_LOST; unsigned degraded = metadata ? BCH_FORCE_IF_METADATA_DEGRADED : BCH_FORCE_IF_DATA_DEGRADED; unsigned nr_good; - extent_for_each_ptr_backwards(e, ptr) - if (ptr->dev == dev_idx) - bch2_extent_drop_ptr(e, ptr); + bch2_extent_drop_device(e, dev_idx); nr_good = bch2_extent_nr_good_ptrs(c, e.c); if ((!nr_good && !(flags & lost)) || diff --git a/fs/bcachefs/super.c b/fs/bcachefs/super.c index 60a2d83ed8c1..61acff4c873f 100644 --- a/fs/bcachefs/super.c +++ b/fs/bcachefs/super.c @@ -88,7 +88,7 @@ static DECLARE_WAIT_QUEUE_HEAD(bch_read_only_wait); static void bch2_dev_free(struct bch_dev *); static int bch2_dev_alloc(struct bch_fs *, unsigned); -static int bch2_dev_sysfs_online(struct bch_dev *); +static int bch2_dev_sysfs_online(struct bch_fs *, struct bch_dev *); static void __bch2_dev_read_only(struct bch_fs *, struct bch_dev *); struct bch_fs *bch2_bdev_to_fs(struct block_device *bdev) @@ -649,7 +649,7 @@ static const char *__bch2_fs_online(struct bch_fs *c) err = "error creating sysfs objects"; __for_each_member_device(ca, c, i, NULL) - if (bch2_dev_sysfs_online(ca)) + if (bch2_dev_sysfs_online(c, ca)) goto err; list_add(&c->list, &bch_fs_list); @@ -991,9 +991,8 @@ static void bch2_dev_free(struct bch_dev *ca) kobject_put(&ca->kobj); } -static void __bch2_dev_offline(struct bch_dev *ca) +static void __bch2_dev_offline(struct bch_fs *c, struct bch_dev *ca) { - struct bch_fs *c = ca->fs; lockdep_assert_held(&c->state_lock); @@ -1032,9 +1031,8 @@ static void bch2_dev_io_ref_complete(struct percpu_ref *ref) complete(&ca->io_ref_completion); } -static int bch2_dev_sysfs_online(struct bch_dev *ca) +static int bch2_dev_sysfs_online(struct bch_fs *c, struct bch_dev *ca) { - struct bch_fs *c = ca->fs; int ret; if (!c->kobj.state_in_sysfs) @@ -1149,7 +1147,7 @@ static int bch2_dev_alloc(struct bch_fs *c, unsigned dev_idx) ca->fs = c; rcu_assign_pointer(c->devs[ca->dev_idx], ca); - if (bch2_dev_sysfs_online(ca)) + if (bch2_dev_sysfs_online(c, ca)) pr_warn("error creating sysfs objects"); return 0; @@ -1202,9 +1200,6 @@ static int __bch2_dev_online(struct bch_fs *c, struct bch_sb_handle *sb) bdevname(ca->disk_sb.bdev, c->name); bdevname(ca->disk_sb.bdev, ca->name); - if (bch2_dev_sysfs_online(ca)) - pr_warn("error creating sysfs objects"); - bch2_mark_dev_superblock(c, ca, BCH_BUCKET_MARK_MAY_MAKE_UNAVAILABLE); if (ca->mi.state == BCH_MEMBER_STATE_RW) @@ -1311,12 +1306,11 @@ static void __bch2_dev_read_only(struct bch_fs *c, struct bch_dev *ca) bch2_copygc_stop(ca); /* - * This stops new data writes (e.g. to existing open data - * buckets) and then waits for all existing writes to - * complete. + * The allocator thread itself allocates btree nodes, so stop it first: */ bch2_dev_allocator_stop(ca); bch2_dev_allocator_remove(c, ca); + bch2_dev_journal_stop(&c->journal, ca); } static const char *__bch2_dev_read_write(struct bch_fs *c, struct bch_dev *ca) @@ -1393,16 +1387,13 @@ int bch2_dev_remove(struct bch_fs *c, struct bch_dev *ca, int flags) percpu_ref_put(&ca->ref); /* XXX */ - if (ca->mi.state == BCH_MEMBER_STATE_RW) { - bch_err(ca, "Cannot remove RW device"); - goto err; - } - if (!bch2_dev_state_allowed(c, ca, BCH_MEMBER_STATE_FAILED, flags)) { bch_err(ca, "Cannot remove without losing data"); goto err; } + __bch2_dev_read_only(c, ca); + /* * XXX: verify that dev_idx is really not in use anymore, anywhere * @@ -1452,7 +1443,7 @@ int bch2_dev_remove(struct bch_fs *c, struct bch_dev *ca, int flags) goto err; } - __bch2_dev_offline(ca); + __bch2_dev_offline(c, ca); mutex_lock(&c->sb_lock); rcu_assign_pointer(c->devs[ca->dev_idx], NULL); @@ -1477,6 +1468,8 @@ int bch2_dev_remove(struct bch_fs *c, struct bch_dev *ca, int flags) mutex_unlock(&c->state_lock); return 0; err: + if (ca->mi.state == BCH_MEMBER_STATE_RW) + __bch2_dev_read_write(c, ca); mutex_unlock(&c->state_lock); return ret; } @@ -1645,7 +1638,7 @@ int bch2_dev_offline(struct bch_fs *c, struct bch_dev *ca, int flags) return -EINVAL; } - __bch2_dev_offline(ca); + __bch2_dev_offline(c, ca); mutex_unlock(&c->state_lock); return 0; |