diff options
author | Kent Overstreet <kent.overstreet@gmail.com> | 2018-02-08 12:51:20 -0500 |
---|---|---|
committer | Kent Overstreet <kent.overstreet@gmail.com> | 2018-02-12 01:25:37 -0500 |
commit | 3ee023bb950638d332379ffd42b7feb4f0b69aa9 (patch) | |
tree | 515e498ff299a544f31e8f088bf4ec99393c5879 | |
parent | 3ae8314dc86919e4cc68b6ea031b988badeb79c9 (diff) |
bcachefs: refactor data move path
Prep work for scrubbing
-rw-r--r-- | fs/bcachefs/extents.c | 7 | ||||
-rw-r--r-- | fs/bcachefs/extents.h | 3 | ||||
-rw-r--r-- | fs/bcachefs/io.c | 39 | ||||
-rw-r--r-- | fs/bcachefs/keylist.h | 11 | ||||
-rw-r--r-- | fs/bcachefs/move.c | 263 | ||||
-rw-r--r-- | fs/bcachefs/move.h | 34 | ||||
-rw-r--r-- | fs/bcachefs/movinggc.c | 22 | ||||
-rw-r--r-- | fs/bcachefs/opts.h | 4 | ||||
-rw-r--r-- | fs/bcachefs/tier.c | 22 |
9 files changed, 283 insertions, 122 deletions
diff --git a/fs/bcachefs/extents.c b/fs/bcachefs/extents.c index e9ed165ff6dd..ce1f8ba23035 100644 --- a/fs/bcachefs/extents.c +++ b/fs/bcachefs/extents.c @@ -2013,17 +2013,18 @@ bool bch2_extent_normalize(struct bch_fs *c, struct bkey_s k) } void bch2_extent_mark_replicas_cached(struct bch_fs *c, - struct bkey_s_extent e) + struct bkey_s_extent e, + unsigned nr_desired_replicas) { struct bch_extent_ptr *ptr; unsigned tier = 0, nr_cached = 0; unsigned nr_good = bch2_extent_nr_good_ptrs(c, e.c); bool have_higher_tier; - if (nr_good <= c->opts.data_replicas) + if (nr_good <= nr_desired_replicas) return; - nr_cached = nr_good - c->opts.data_replicas; + nr_cached = nr_good - nr_desired_replicas; do { have_higher_tier = false; diff --git a/fs/bcachefs/extents.h b/fs/bcachefs/extents.h index a0cdfbfe3d98..75579273fae8 100644 --- a/fs/bcachefs/extents.h +++ b/fs/bcachefs/extents.h @@ -38,7 +38,8 @@ bch2_insert_fixup_extent(struct btree_insert *, struct btree_insert_entry *); bool bch2_extent_normalize(struct bch_fs *, struct bkey_s); -void bch2_extent_mark_replicas_cached(struct bch_fs *, struct bkey_s_extent); +void bch2_extent_mark_replicas_cached(struct bch_fs *, struct bkey_s_extent, + unsigned); const struct bch_extent_ptr * bch2_extent_has_device(struct bkey_s_c_extent, unsigned); diff --git a/fs/bcachefs/io.c b/fs/bcachefs/io.c index 163a94f1ce67..77bd7b00e3c7 100644 --- a/fs/bcachefs/io.c +++ b/fs/bcachefs/io.c @@ -209,17 +209,6 @@ static void bch2_write_done(struct closure *cl) closure_return(cl); } -static u64 keylist_sectors(struct keylist *keys) -{ - struct bkey_i *k; - u64 ret = 0; - - for_each_keylist_key(keys, k) - ret += k->k.size; - - return ret; -} - int bch2_write_index_default(struct bch_write_op *op) { struct keylist *keys = &op->insert_keys; @@ -909,18 +898,7 @@ static void promote_start(struct promote_op *op, struct bch_read_bio *rbio) swap(bio->bi_vcnt, rbio->bio.bi_vcnt); rbio->promote = NULL; - bch2_write_op_init(&op->write.op, c); - op->write.op.csum_type = bch2_data_checksum_type(c, rbio->opts.data_checksum); - op->write.op.compression_type = - bch2_compression_opt_to_type(rbio->opts.compression); - - op->write.move_dev = -1; - op->write.op.devs = c->fastest_devs; - op->write.op.write_point = writepoint_hashed((unsigned long) current); - op->write.op.flags |= BCH_WRITE_ALLOC_NOWAIT; - op->write.op.flags |= BCH_WRITE_CACHED; - - bch2_migrate_write_init(&op->write, rbio); + bch2_migrate_read_done(&op->write, rbio); closure_init(cl, NULL); closure_call(&op->write.op.cl, bch2_write, c->wq, cl); @@ -931,13 +909,16 @@ static void promote_start(struct promote_op *op, struct bch_read_bio *rbio) * XXX: multiple promotes can race with each other, wastefully. Keep a list of * outstanding promotes? */ -static struct promote_op *promote_alloc(struct bch_read_bio *rbio) +static struct promote_op *promote_alloc(struct bch_read_bio *rbio, + struct bkey_s_c k) { + struct bch_fs *c = rbio->c; struct promote_op *op; struct bio *bio; /* data might have to be decompressed in the write path: */ unsigned pages = DIV_ROUND_UP(rbio->pick.crc.uncompressed_size, PAGE_SECTORS); + int ret; BUG_ON(!rbio->bounce); BUG_ON(pages < rbio->bio.bi_vcnt); @@ -953,6 +934,14 @@ static struct promote_op *promote_alloc(struct bch_read_bio *rbio) memcpy(bio->bi_io_vec, rbio->bio.bi_io_vec, sizeof(struct bio_vec) * rbio->bio.bi_vcnt); + ret = bch2_migrate_write_init(c, &op->write, c->fastest_devs, + writepoint_hashed((unsigned long) current), + rbio->opts, + DATA_PROMOTE, + (struct data_opts) { 0 }, + k); + BUG_ON(ret); + return op; } @@ -1406,7 +1395,7 @@ noclone: rbio->pick = *pick; rbio->pos = pos; rbio->version = e.k->version; - rbio->promote = promote ? promote_alloc(rbio) : NULL; + rbio->promote = promote ? promote_alloc(rbio, e.s_c) : NULL; INIT_WORK(&rbio->work, NULL); bio_set_dev(&rbio->bio, pick->ca->disk_sb.bdev); diff --git a/fs/bcachefs/keylist.h b/fs/bcachefs/keylist.h index b7c8a861e537..a8c8883ba071 100644 --- a/fs/bcachefs/keylist.h +++ b/fs/bcachefs/keylist.h @@ -58,6 +58,17 @@ static inline struct bkey_i *bch2_keylist_front(struct keylist *l) #define keylist_single(k) \ ((struct keylist) { .keys = k, .top = bkey_next(k) }) +static inline u64 keylist_sectors(struct keylist *keys) +{ + struct bkey_i *k; + u64 ret = 0; + + for_each_keylist_key(keys, k) + ret += k->k.size; + + return ret; +} + #ifdef CONFIG_BCACHEFS_DEBUG void bch2_verify_keylist_sorted(struct keylist *); #else diff --git a/fs/bcachefs/move.c b/fs/bcachefs/move.c index 54641c791fce..72cd4c713a72 100644 --- a/fs/bcachefs/move.c +++ b/fs/bcachefs/move.c @@ -58,6 +58,7 @@ static int bch2_migrate_index_update(struct bch_write_op *op) BKEY_PADDED(k) _new, _insert; struct bch_extent_ptr *ptr; struct bch_extent_crc_unpacked crc; + unsigned nr_dirty; bool did_work = false; if (btree_iter_err(k)) { @@ -71,6 +72,11 @@ static int bch2_migrate_index_update(struct bch_write_op *op) m->ptr, m->offset)) goto nomatch; + if (m->data_cmd == DATA_REWRITE && + !bch2_extent_has_device(bkey_s_c_to_extent(k), + m->data_opts.rewrite_dev)) + goto nomatch; + bkey_reassemble(&_insert.k, k); insert = bkey_i_to_extent(&_insert.k); @@ -81,11 +87,12 @@ static int bch2_migrate_index_update(struct bch_write_op *op) bch2_cut_back(new->k.p, &insert->k); bch2_cut_back(insert->k.p, &new->k); - if (m->move_dev >= 0 && - (ptr = (struct bch_extent_ptr *) - bch2_extent_has_device(extent_i_to_s_c(insert), - m->move_dev))) + if (m->data_cmd == DATA_REWRITE) { + ptr = (struct bch_extent_ptr *) + bch2_extent_has_device(extent_i_to_s_c(insert), + m->data_opts.rewrite_dev); bch2_extent_drop_ptr(extent_i_to_s(insert), ptr); + } extent_for_each_ptr_crc(extent_i_to_s(new), ptr, crc) { if (bch2_extent_has_device(extent_i_to_s_c(insert), ptr->dev)) { @@ -108,7 +115,32 @@ static int bch2_migrate_index_update(struct bch_write_op *op) bch2_extent_narrow_crcs(insert, (struct bch_extent_crc_unpacked) { 0 }); bch2_extent_normalize(c, extent_i_to_s(insert).s); - bch2_extent_mark_replicas_cached(c, extent_i_to_s(insert)); + bch2_extent_mark_replicas_cached(c, extent_i_to_s(insert), + c->opts.data_replicas); + + /* + * It's possible we race, and for whatever reason the extent now + * has fewer replicas than when we last looked at it - meaning + * we need to get a disk reservation here: + */ + nr_dirty = bch2_extent_nr_dirty_ptrs(bkey_i_to_s_c(&insert->k_i)); + if (m->nr_ptrs_reserved < nr_dirty) { + unsigned sectors = (nr_dirty - m->nr_ptrs_reserved) * + keylist_sectors(keys); + + /* + * can't call bch2_disk_reservation_add() with btree + * locks held, at least not without a song and dance + */ + bch2_btree_iter_unlock(&iter); + + ret = bch2_disk_reservation_add(c, &op->res, sectors, 0); + if (ret) + goto out; + + m->nr_ptrs_reserved = nr_dirty; + goto next; + } ret = bch2_mark_bkey_replicas(c, BCH_DATA_USER, extent_i_to_s_c(insert).s_c); @@ -119,7 +151,7 @@ static int bch2_migrate_index_update(struct bch_write_op *op) NULL, op_journal_seq(op), BTREE_INSERT_ATOMIC| BTREE_INSERT_NOFAIL| - m->btree_insert_flags, + m->data_opts.btree_insert_flags, BTREE_INSERT_ENTRY(&iter, &insert->k_i)); if (!ret) atomic_long_inc(&c->extent_migrate_done); @@ -150,8 +182,7 @@ out: return ret; } -void bch2_migrate_write_init(struct migrate_write *m, - struct bch_read_bio *rbio) +void bch2_migrate_read_done(struct migrate_write *m, struct bch_read_bio *rbio) { /* write bio must own pages: */ BUG_ON(!m->op.wbio.bio.bi_vcnt); @@ -162,16 +193,39 @@ void bch2_migrate_write_init(struct migrate_write *m, m->op.pos = rbio->pos; m->op.version = rbio->version; m->op.crc = rbio->pick.crc; + m->op.wbio.bio.bi_iter.bi_size = m->op.crc.compressed_size << 9; if (bch2_csum_type_is_encryption(m->op.crc.csum_type)) { m->op.nonce = m->op.crc.nonce + m->op.crc.offset; m->op.csum_type = m->op.crc.csum_type; } - if (m->move_dev >= 0) - bch2_dev_list_drop_dev(&m->op.devs_have, m->move_dev); + if (m->data_cmd == DATA_REWRITE) + bch2_dev_list_drop_dev(&m->op.devs_have, m->data_opts.rewrite_dev); +} + +int bch2_migrate_write_init(struct bch_fs *c, struct migrate_write *m, + struct bch_devs_mask *devs, + struct write_point_specifier wp, + struct bch_io_opts io_opts, + enum data_cmd data_cmd, + struct data_opts data_opts, + struct bkey_s_c k) +{ + int ret; - if (m->btree_insert_flags & BTREE_INSERT_USE_RESERVE) + m->data_cmd = data_cmd; + m->data_opts = data_opts; + m->nr_ptrs_reserved = bch2_extent_nr_dirty_ptrs(k); + + bch2_write_op_init(&m->op, c); + m->op.csum_type = bch2_data_checksum_type(c, io_opts.data_checksum); + m->op.compression_type = + bch2_compression_opt_to_type(io_opts.compression); + m->op.devs = devs; + m->op.write_point = wp; + + if (m->data_opts.btree_insert_flags & BTREE_INSERT_USE_RESERVE) m->op.alloc_reserve = RESERVE_MOVINGGC; m->op.flags |= BCH_WRITE_ONLY_SPECIFIED_DEVS| @@ -180,10 +234,35 @@ void bch2_migrate_write_init(struct migrate_write *m, BCH_WRITE_DATA_ENCODED| BCH_WRITE_NOMARK_REPLICAS; - m->op.wbio.bio.bi_iter.bi_size = m->op.crc.compressed_size << 9; m->op.nr_replicas = 1; m->op.nr_replicas_required = 1; m->op.index_update_fn = bch2_migrate_index_update; + + switch (data_cmd) { + case DATA_ADD_REPLICAS: + if (m->nr_ptrs_reserved < c->opts.data_replicas) { + m->op.nr_replicas = c->opts.data_replicas - m->nr_ptrs_reserved; + + ret = bch2_disk_reservation_get(c, &m->op.res, + k.k->size, + m->op.nr_replicas, 0); + if (ret) + return ret; + + m->nr_ptrs_reserved = c->opts.data_replicas; + } + break; + case DATA_REWRITE: + break; + case DATA_PROMOTE: + m->op.flags |= BCH_WRITE_ALLOC_NOWAIT; + m->op.flags |= BCH_WRITE_CACHED; + break; + default: + BUG(); + } + + return 0; } static void move_free(struct closure *cl) @@ -210,7 +289,7 @@ static void move_write(struct closure *cl) struct moving_io *io = container_of(cl, struct moving_io, cl); if (likely(!io->rbio.bio.bi_status)) { - bch2_migrate_write_init(&io->write, &io->rbio); + bch2_migrate_read_done(&io->write, &io->rbio); closure_call(&io->write.op.cl, bch2_write, NULL, cl); } @@ -238,19 +317,19 @@ static void move_read_endio(struct bio *bio) } static int bch2_move_extent(struct bch_fs *c, - struct moving_context *ctxt, - struct bch_devs_mask *devs, - struct write_point_specifier wp, - int btree_insert_flags, - int move_device, - struct bch_io_opts opts, - struct bkey_s_c_extent e) + struct moving_context *ctxt, + struct bch_devs_mask *devs, + struct write_point_specifier wp, + struct bch_io_opts io_opts, + struct bkey_s_c_extent e, + enum data_cmd data_cmd, + struct data_opts data_opts) { struct extent_pick_ptr pick; struct moving_io *io; const struct bch_extent_ptr *ptr; struct bch_extent_crc_unpacked crc; - unsigned sectors = e.k->size, pages, nr_good; + unsigned sectors = e.k->size, pages; int ret = -ENOMEM; bch2_extent_pick_ptr(c, e.s_c, NULL, &pick); @@ -279,7 +358,7 @@ static int bch2_move_extent(struct bch_fs *c, if (bio_alloc_pages(&io->write.op.wbio.bio, GFP_KERNEL)) goto err_free; - io->rbio.opts = opts; + io->rbio.opts = io_opts; bio_init(&io->rbio.bio, io->bi_inline_vecs, pages); bio_set_prio(&io->rbio.bio, IOPRIO_PRIO_VALUE(IOPRIO_CLASS_IDLE, 0)); io->rbio.bio.bi_iter.bi_size = sectors << 9; @@ -288,27 +367,10 @@ static int bch2_move_extent(struct bch_fs *c, io->rbio.bio.bi_iter.bi_sector = bkey_start_offset(e.k); io->rbio.bio.bi_end_io = move_read_endio; - io->write.btree_insert_flags = btree_insert_flags; - io->write.move_dev = move_device; - - bch2_write_op_init(&io->write.op, c); - io->write.op.csum_type = bch2_data_checksum_type(c, opts.data_checksum); - io->write.op.compression_type = - bch2_compression_opt_to_type(opts.compression); - io->write.op.devs = devs; - io->write.op.write_point = wp; - - if (move_device < 0 && - ((nr_good = bch2_extent_nr_good_ptrs(c, e)) < - c->opts.data_replicas)) { - io->write.op.nr_replicas = c->opts.data_replicas - nr_good; - - ret = bch2_disk_reservation_get(c, &io->write.op.res, - e.k->size, - io->write.op.nr_replicas, 0); - if (ret) - goto err_free_pages; - } + ret = bch2_migrate_write_init(c, &io->write, devs, wp, + io_opts, data_cmd, data_opts, e.s_c); + if (ret) + goto err_free_pages; atomic64_inc(&ctxt->stats->keys_moved); atomic64_add(e.k->size, &ctxt->stats->sectors_moved); @@ -369,8 +431,6 @@ int bch2_move_data(struct bch_fs *c, unsigned sectors_in_flight, struct bch_devs_mask *devs, struct write_point_specifier wp, - int btree_insert_flags, - int move_device, struct bpos start, struct bpos end, move_pred_fn pred, void *arg, @@ -378,12 +438,14 @@ int bch2_move_data(struct bch_fs *c, { bool kthread = (current->flags & PF_KTHREAD) != 0; struct moving_context ctxt = { .stats = stats }; - struct bch_io_opts opts = bch2_opts_to_inode_opts(c->opts); + struct bch_io_opts io_opts = bch2_opts_to_inode_opts(c->opts); BKEY_PADDED(k) tmp; struct bkey_s_c k; struct bkey_s_c_extent e; + struct data_opts data_opts; + enum data_cmd data_cmd; u64 cur_inum = U64_MAX; - int ret = 0; + int ret = 0, ret2; closure_init_stack(&ctxt.cl); INIT_LIST_HEAD(&ctxt.reads); @@ -430,28 +492,44 @@ peek: /* don't hold btree locks while looking up inode: */ bch2_btree_iter_unlock(&stats->iter); - opts = bch2_opts_to_inode_opts(c->opts); + io_opts = bch2_opts_to_inode_opts(c->opts); if (!bch2_inode_find_by_inum(c, k.k->p.inode, &inode)) - bch2_io_opts_apply(&opts, bch2_inode_opts_get(&inode)); + bch2_io_opts_apply(&io_opts, bch2_inode_opts_get(&inode)); cur_inum = k.k->p.inode; goto peek; } - if (!pred(arg, e)) + switch ((data_cmd = pred(c, arg, BKEY_TYPE_EXTENTS, e, + &io_opts, &data_opts))) { + case DATA_SKIP: goto next; + case DATA_SCRUB: + BUG(); + case DATA_ADD_REPLICAS: + case DATA_REWRITE: + case DATA_PROMOTE: + break; + default: + BUG(); + } /* unlock before doing IO: */ bkey_reassemble(&tmp.k, k); k = bkey_i_to_s_c(&tmp.k); bch2_btree_iter_unlock(&stats->iter); - if (bch2_move_extent(c, &ctxt, devs, wp, - btree_insert_flags, - move_device, opts, - bkey_s_c_to_extent(k))) { - /* memory allocation failure, wait for some IO to finish */ - bch2_move_ctxt_wait_for_io(&ctxt); - continue; + ret2 = bch2_move_extent(c, &ctxt, devs, wp, io_opts, + bkey_s_c_to_extent(k), + data_cmd, data_opts); + if (ret2) { + if (ret2 == -ENOMEM) { + /* memory allocation failure, wait for some IO to finish */ + bch2_move_ctxt_wait_for_io(&ctxt); + continue; + } + + /* XXX signal failure */ + goto next; } if (rate) @@ -534,18 +612,35 @@ static int bch2_move_btree(struct bch_fs *c, void *arg, struct bch_move_stats *stats) { + struct bch_io_opts io_opts = bch2_opts_to_inode_opts(c->opts); struct btree *b; unsigned id; + struct data_opts data_opts; + enum data_cmd cmd; int ret = 0; stats->data_type = BCH_DATA_BTREE; for (id = 0; id < BTREE_ID_NR; id++) { for_each_btree_node(&stats->iter, c, id, POS_MIN, BTREE_ITER_PREFETCH, b) { - if (pred(arg, bkey_i_to_s_c_extent(&b->key))) - ret = bch2_btree_node_rewrite(c, &stats->iter, - b->data->keys.seq, 0) ?: ret; + switch ((cmd = pred(c, arg, BKEY_TYPE_BTREE, + bkey_i_to_s_c_extent(&b->key), + &io_opts, + &data_opts))) { + case DATA_SKIP: + goto next; + case DATA_SCRUB: + BUG(); + case DATA_ADD_REPLICAS: + case DATA_REWRITE: + break; + default: + BUG(); + } + ret = bch2_btree_node_rewrite(c, &stats->iter, + b->data->keys.seq, 0) ?: ret; +next: bch2_btree_iter_cond_resched(&stats->iter); } @@ -556,32 +651,48 @@ static int bch2_move_btree(struct bch_fs *c, } #if 0 -static bool scrub_data_pred(void *arg, struct bkey_s_c_extent e) +static enum data_cmd scrub_pred(struct bch_fs *c, void *arg, + enum bkey_type type, + struct bkey_s_c_extent e, + struct bch_io_opts *io_opts, + struct data_opts *data_opts) { + return DATA_SCRUB; } #endif -static bool rereplicate_metadata_pred(void *arg, struct bkey_s_c_extent e) +static enum data_cmd rereplicate_pred(struct bch_fs *c, void *arg, + enum bkey_type type, + struct bkey_s_c_extent e, + struct bch_io_opts *io_opts, + struct data_opts *data_opts) { - struct bch_fs *c = arg; unsigned nr_good = bch2_extent_nr_good_ptrs(c, e); + unsigned replicas = type == BKEY_TYPE_BTREE + ? c->opts.metadata_replicas + : c->opts.data_replicas; - return nr_good && nr_good < c->opts.metadata_replicas; -} + if (!nr_good || nr_good >= replicas) + return DATA_SKIP; -static bool rereplicate_data_pred(void *arg, struct bkey_s_c_extent e) -{ - struct bch_fs *c = arg; - unsigned nr_good = bch2_extent_nr_good_ptrs(c, e); - - return nr_good && nr_good < c->opts.data_replicas; + data_opts->btree_insert_flags = 0; + return DATA_ADD_REPLICAS; } -static bool migrate_pred(void *arg, struct bkey_s_c_extent e) +static enum data_cmd migrate_pred(struct bch_fs *c, void *arg, + enum bkey_type type, + struct bkey_s_c_extent e, + struct bch_io_opts *io_opts, + struct data_opts *data_opts) { struct bch_ioctl_data *op = arg; - return bch2_extent_has_device(e, op->migrate.dev); + if (!bch2_extent_has_device(e, op->migrate.dev)) + return DATA_SKIP; + + data_opts->btree_insert_flags = 0; + data_opts->rewrite_dev = op->migrate.dev; + return DATA_REWRITE; } int bch2_data_job(struct bch_fs *c, @@ -595,16 +706,15 @@ int bch2_data_job(struct bch_fs *c, stats->data_type = BCH_DATA_JOURNAL; ret = bch2_journal_flush_device(&c->journal, -1); - ret = bch2_move_btree(c, rereplicate_metadata_pred, c, stats) ?: ret; + ret = bch2_move_btree(c, rereplicate_pred, c, stats) ?: ret; ret = bch2_gc_btree_replicas(c) ?: ret; ret = bch2_move_data(c, NULL, SECTORS_IN_FLIGHT_PER_DEVICE, NULL, writepoint_hashed((unsigned long) current), - 0, -1, op.start, op.end, - rereplicate_data_pred, c, stats) ?: ret; + rereplicate_pred, c, stats) ?: ret; ret = bch2_gc_data_replicas(c) ?: ret; break; case BCH_DATA_OP_MIGRATE: @@ -620,7 +730,6 @@ int bch2_data_job(struct bch_fs *c, ret = bch2_move_data(c, NULL, SECTORS_IN_FLIGHT_PER_DEVICE, NULL, writepoint_hashed((unsigned long) current), - 0, -1, op.start, op.end, migrate_pred, &op, stats) ?: ret; diff --git a/fs/bcachefs/move.h b/fs/bcachefs/move.h index 07aa5669524c..819e5d9f0a24 100644 --- a/fs/bcachefs/move.h +++ b/fs/bcachefs/move.h @@ -8,23 +8,47 @@ struct bch_read_bio; struct moving_context; +enum data_cmd { + DATA_SKIP, + DATA_SCRUB, + DATA_ADD_REPLICAS, + DATA_REWRITE, + DATA_PROMOTE, +}; + +struct data_opts { + unsigned rewrite_dev; + int btree_insert_flags; +}; + struct migrate_write { + enum data_cmd data_cmd; + struct data_opts data_opts; + + unsigned nr_ptrs_reserved; + struct moving_context *ctxt; /* what we read: */ struct bch_extent_ptr ptr; u64 offset; - int move_dev; - int btree_insert_flags; struct bch_write_op op; }; -void bch2_migrate_write_init(struct migrate_write *, struct bch_read_bio *); +void bch2_migrate_read_done(struct migrate_write *, struct bch_read_bio *); +int bch2_migrate_write_init(struct bch_fs *, struct migrate_write *, + struct bch_devs_mask *, + struct write_point_specifier, + struct bch_io_opts, + enum data_cmd, struct data_opts, + struct bkey_s_c); #define SECTORS_IN_FLIGHT_PER_DEVICE 2048 -typedef bool (*move_pred_fn)(void *, struct bkey_s_c_extent); +typedef enum data_cmd (*move_pred_fn)(struct bch_fs *, void *, + enum bkey_type, struct bkey_s_c_extent, + struct bch_io_opts *, struct data_opts *); struct bch_move_stats { enum bch_data_type data_type; @@ -39,7 +63,7 @@ struct bch_move_stats { int bch2_move_data(struct bch_fs *, struct bch_ratelimit *, unsigned, struct bch_devs_mask *, struct write_point_specifier, - int, int, struct bpos, struct bpos, + struct bpos, struct bpos, move_pred_fn, void *, struct bch_move_stats *); diff --git a/fs/bcachefs/movinggc.c b/fs/bcachefs/movinggc.c index 515d5001aec8..c306a89f8401 100644 --- a/fs/bcachefs/movinggc.c +++ b/fs/bcachefs/movinggc.c @@ -61,9 +61,9 @@ static int bucket_offset_cmp(const void *_l, const void *_r, size_t size) return (l->offset > r->offset) - (l->offset < r->offset); } -static bool copygc_pred(void *arg, struct bkey_s_c_extent e) +static bool __copygc_pred(struct bch_dev *ca, + struct bkey_s_c_extent e) { - struct bch_dev *ca = arg; copygc_heap *h = &ca->copygc_heap; const struct bch_extent_ptr *ptr = bch2_extent_has_device(e, ca->dev_idx); @@ -83,6 +83,22 @@ static bool copygc_pred(void *arg, struct bkey_s_c_extent e) return false; } +static enum data_cmd copygc_pred(struct bch_fs *c, void *arg, + enum bkey_type type, + struct bkey_s_c_extent e, + struct bch_io_opts *io_opts, + struct data_opts *data_opts) +{ + struct bch_dev *ca = arg; + + if (!__copygc_pred(ca, e)) + return DATA_SKIP; + + data_opts->btree_insert_flags = BTREE_INSERT_USE_RESERVE, + data_opts->rewrite_dev = ca->dev_idx; + return DATA_REWRITE; +} + static bool have_copygc_reserve(struct bch_dev *ca) { bool ret; @@ -165,8 +181,6 @@ static void bch2_copygc(struct bch_fs *c, struct bch_dev *ca) SECTORS_IN_FLIGHT_PER_DEVICE, &ca->self, writepoint_ptr(&ca->copygc_write_point), - BTREE_INSERT_USE_RESERVE, - ca->dev_idx, POS_MIN, POS_MAX, copygc_pred, ca, &move_stats); diff --git a/fs/bcachefs/opts.h b/fs/bcachefs/opts.h index e722d1af388c..8a3ac66b948c 100644 --- a/fs/bcachefs/opts.h +++ b/fs/bcachefs/opts.h @@ -73,10 +73,10 @@ enum opt_type { BCH_OPT(errors, u8, OPT_RUNTIME, \ OPT_STR(bch2_error_actions), \ BCH_SB_ERROR_ACTION, BCH_ON_ERROR_RO) \ - BCH_OPT(metadata_replicas, u8, OPT_MOUNT, \ + BCH_OPT(metadata_replicas, u8, OPT_RUNTIME, \ OPT_UINT(1, BCH_REPLICAS_MAX), \ BCH_SB_META_REPLICAS_WANT, 1) \ - BCH_OPT(data_replicas, u8, OPT_MOUNT, \ + BCH_OPT(data_replicas, u8, OPT_RUNTIME, \ OPT_UINT(1, BCH_REPLICAS_MAX), \ BCH_SB_DATA_REPLICAS_WANT, 1) \ BCH_OPT(metadata_replicas_required, u8, OPT_MOUNT, \ diff --git a/fs/bcachefs/tier.c b/fs/bcachefs/tier.c index c4625c80bbf8..775c2e2be686 100644 --- a/fs/bcachefs/tier.c +++ b/fs/bcachefs/tier.c @@ -14,10 +14,9 @@ #include <linux/kthread.h> #include <trace/events/bcachefs.h> -static bool tiering_pred(void *arg, struct bkey_s_c_extent e) +static bool __tiering_pred(struct bch_fs *c, struct bch_tier *tier, + struct bkey_s_c_extent e) { - struct bch_tier *tier = arg; - struct bch_fs *c = container_of(tier, struct bch_fs, tiers[tier->idx]); const struct bch_extent_ptr *ptr; unsigned replicas = 0; @@ -33,6 +32,21 @@ static bool tiering_pred(void *arg, struct bkey_s_c_extent e) return replicas < c->opts.data_replicas; } +static enum data_cmd tiering_pred(struct bch_fs *c, void *arg, + enum bkey_type type, + struct bkey_s_c_extent e, + struct bch_io_opts *io_opts, + struct data_opts *data_opts) +{ + struct bch_tier *tier = arg; + + if (!__tiering_pred(c, tier, e)) + return DATA_SKIP; + + data_opts->btree_insert_flags = 0; + return DATA_ADD_REPLICAS; +} + static int bch2_tiering_thread(void *arg) { struct bch_tier *tier = arg; @@ -90,8 +104,6 @@ static int bch2_tiering_thread(void *arg) SECTORS_IN_FLIGHT_PER_DEVICE * nr_devices, &tier->devs, writepoint_ptr(&tier->wp), - 0, - -1, POS_MIN, POS_MAX, tiering_pred, tier, &move_stats); |