diff options
Diffstat (limited to 'fs')
-rw-r--r-- | fs/bcachefs/data_update.c | 217 | ||||
-rw-r--r-- | fs/bcachefs/data_update.h | 40 | ||||
-rw-r--r-- | fs/bcachefs/extents.c | 69 | ||||
-rw-r--r-- | fs/bcachefs/extents.h | 5 | ||||
-rw-r--r-- | fs/bcachefs/io.c | 13 | ||||
-rw-r--r-- | fs/bcachefs/move.c | 160 | ||||
-rw-r--r-- | fs/bcachefs/move.h | 8 | ||||
-rw-r--r-- | fs/bcachefs/movinggc.c | 8 | ||||
-rw-r--r-- | fs/bcachefs/rebalance.c | 84 |
9 files changed, 286 insertions, 318 deletions
diff --git a/fs/bcachefs/data_update.c b/fs/bcachefs/data_update.c index 4b0a866ab1fc..2d7c650ced55 100644 --- a/fs/bcachefs/data_update.c +++ b/fs/bcachefs/data_update.c @@ -90,6 +90,16 @@ next: return ret; } +static void bch2_bkey_mark_dev_cached(struct bkey_s k, unsigned dev) +{ + struct bkey_ptrs ptrs = bch2_bkey_ptrs(k); + struct bch_extent_ptr *ptr; + + bkey_for_each_ptr(ptrs, ptr) + if (ptr->dev == dev) + ptr->cached = true; +} + static int bch2_data_update_index_update(struct bch_write_op *op) { struct bch_fs *c = op->c; @@ -114,6 +124,7 @@ static int bch2_data_update_index_update(struct bch_write_op *op) while (1) { struct bkey_s_c k; + struct bkey_s_c old = bkey_i_to_s_c(m->k.k); struct bkey_i *insert; struct bkey_i_extent *new; const union bch_extent_entry *entry; @@ -122,6 +133,7 @@ static int bch2_data_update_index_update(struct bch_write_op *op) bool did_work = false; bool should_check_enospc; s64 i_sectors_delta = 0, disk_sectors_delta = 0; + unsigned i; bch2_trans_begin(&trans); @@ -132,8 +144,7 @@ static int bch2_data_update_index_update(struct bch_write_op *op) new = bkey_i_to_extent(bch2_keylist_front(keys)); - if (bversion_cmp(k.k->version, new->k.version) || - !bch2_bkey_matches_ptr(c, k, m->ptr, m->offset)) + if (!bch2_extents_match(k, old)) goto nomatch; bkey_reassemble(_insert.k, k); @@ -147,20 +158,39 @@ static int bch2_data_update_index_update(struct bch_write_op *op) bch2_cut_back(new->k.p, insert); bch2_cut_back(insert->k.p, &new->k_i); - if (m->data_cmd == DATA_REWRITE) { - struct bch_extent_ptr *new_ptr, *old_ptr = (void *) - bch2_bkey_has_device(bkey_i_to_s_c(insert), - m->data_opts.rewrite_dev); - if (!old_ptr) - goto nomatch; - - if (old_ptr->cached) - extent_for_each_ptr(extent_i_to_s(new), new_ptr) - new_ptr->cached = true; - - __bch2_bkey_drop_ptr(bkey_i_to_s(insert), old_ptr); + /* + * @old: extent that we read from + * @insert: key that we're going to update, initialized from + * extent currently in btree - same as @old unless we raced with + * other updates + * @new: extent with new pointers that we'll be adding to @insert + * + * Fist, drop rewrite_ptrs from @new: + */ + i = 0; + bkey_for_each_ptr_decode(old.k, bch2_bkey_ptrs_c(old), p, entry) { + if (((1U << i) & m->data_opts.rewrite_ptrs) && + bch2_extent_has_ptr(old, p, bkey_i_to_s_c(insert))) { + /* + * If we're going to be adding a pointer to the + * same device, we have to drop the old one - + * otherwise, we can just mark it cached: + */ + if (bch2_bkey_has_device(bkey_i_to_s_c(&new->k_i), p.ptr.dev)) + bch2_bkey_drop_device(bkey_i_to_s(insert), p.ptr.dev); + else + bch2_bkey_mark_dev_cached(bkey_i_to_s(insert), p.ptr.dev); + } + i++; } + /* + * hack: we may have dropped all the existing pointers but we're + * about to add our new pointers: + */ + insert->k.type = k.k->type; + + /* Add new ptrs: */ extent_for_each_ptr_decode(extent_i_to_s(new), p, entry) { if (bch2_bkey_has_device(bkey_i_to_s_c(insert), p.ptr.dev)) { /* @@ -178,12 +208,8 @@ static int bch2_data_update_index_update(struct bch_write_op *op) if (!did_work) goto nomatch; - bch2_bkey_narrow_crcs(insert, - (struct bch_extent_crc_unpacked) { 0 }); + bch2_bkey_narrow_crcs(insert, (struct bch_extent_crc_unpacked) { 0 }); bch2_extent_normalize(c, bkey_i_to_s(insert)); - bch2_bkey_mark_replicas_cached(c, bkey_i_to_s(insert), - op->opts.background_target, - op->opts.data_replicas); ret = bch2_sum_sector_overwrites(&trans, &iter, insert, &should_check_enospc, @@ -250,134 +276,101 @@ out: return ret; } -void bch2_data_update_read_done(struct data_update *m, struct bch_read_bio *rbio) +void bch2_data_update_read_done(struct data_update *m, + struct bch_extent_crc_unpacked crc, + struct closure *cl) { /* write bio must own pages: */ BUG_ON(!m->op.wbio.bio.bi_vcnt); - m->ptr = rbio->pick.ptr; - m->offset = rbio->data_pos.offset - rbio->pick.crc.offset; - m->op.devs_have = rbio->devs_have; - m->op.pos = rbio->data_pos; - m->op.version = rbio->version; - m->op.crc = rbio->pick.crc; - m->op.wbio.bio.bi_iter.bi_size = m->op.crc.compressed_size << 9; + m->op.crc = crc; + m->op.wbio.bio.bi_iter.bi_size = crc.compressed_size << 9; + + closure_call(&m->op.cl, bch2_write, NULL, cl); +} + +void bch2_data_update_exit(struct data_update *update) +{ + struct bch_fs *c = update->op.c; - if (m->data_cmd == DATA_REWRITE) - bch2_dev_list_drop_dev(&m->op.devs_have, m->data_opts.rewrite_dev); + bch2_bkey_buf_exit(&update->k, c); + bch2_disk_reservation_put(c, &update->op.res); + bch2_bio_free_pages_pool(c, &update->op.wbio.bio); } int bch2_data_update_init(struct bch_fs *c, struct data_update *m, struct write_point_specifier wp, struct bch_io_opts io_opts, - enum data_cmd data_cmd, - struct data_opts data_opts, + struct data_update_opts data_opts, enum btree_id btree_id, struct bkey_s_c k) { struct bkey_ptrs_c ptrs = bch2_bkey_ptrs_c(k); const union bch_extent_entry *entry; - struct bch_extent_crc_unpacked crc; struct extent_ptr_decoded p; + unsigned i, reserve_sectors = k.k->size * data_opts.extra_replicas; int ret; + bch2_bkey_buf_init(&m->k); + bch2_bkey_buf_reassemble(&m->k, c, k); m->btree_id = btree_id; - m->data_cmd = data_cmd; m->data_opts = data_opts; - m->nr_ptrs_reserved = 0; bch2_write_op_init(&m->op, c, io_opts); - - if (!bch2_bkey_is_incompressible(k)) - m->op.compression_type = - bch2_compression_opt_to_type[io_opts.background_compression ?: - io_opts.compression]; - else - m->op.incompressible = true; - + m->op.pos = bkey_start_pos(k.k); + m->op.version = k.k->version; m->op.target = data_opts.target, m->op.write_point = wp; - - /* - * op->csum_type is normally initialized from the fs/file's current - * options - but if an extent is encrypted, we require that it stays - * encrypted: - */ - bkey_for_each_crc(k.k, ptrs, crc, entry) - if (bch2_csum_type_is_encryption(crc.csum_type)) { - m->op.nonce = crc.nonce + crc.offset; - m->op.csum_type = crc.csum_type; - break; - } - - if (m->data_opts.btree_insert_flags & BTREE_INSERT_USE_RESERVE) { - m->op.alloc_reserve = RESERVE_movinggc; - } else { - /* XXX: this should probably be passed in */ - m->op.flags |= BCH_WRITE_ONLY_SPECIFIED_DEVS; - } - - m->op.flags |= BCH_WRITE_PAGES_STABLE| + m->op.flags |= BCH_WRITE_PAGES_STABLE| BCH_WRITE_PAGES_OWNED| BCH_WRITE_DATA_ENCODED| - BCH_WRITE_FROM_INTERNAL; - - m->op.nr_replicas = data_opts.nr_replicas; - m->op.nr_replicas_required = data_opts.nr_replicas; + BCH_WRITE_FROM_INTERNAL| + m->data_opts.write_flags; + m->op.compression_type = + bch2_compression_opt_to_type[io_opts.background_compression ?: + io_opts.compression]; + if (m->data_opts.btree_insert_flags & BTREE_INSERT_USE_RESERVE) + m->op.alloc_reserve = RESERVE_movinggc; m->op.index_update_fn = bch2_data_update_index_update; - switch (data_cmd) { - case DATA_ADD_REPLICAS: { - /* - * DATA_ADD_REPLICAS is used for moving data to a different - * device in the background, and due to compression the new copy - * might take up more space than the old copy: - */ -#if 0 - int nr = (int) io_opts.data_replicas - - bch2_bkey_nr_ptrs_allocated(k); -#endif - int nr = (int) io_opts.data_replicas; + i = 0; + bkey_for_each_ptr_decode(k.k, ptrs, p, entry) { + if (p.ptr.cached) + m->data_opts.rewrite_ptrs &= ~(1U << i); - if (nr > 0) { - m->op.nr_replicas = m->nr_ptrs_reserved = nr; + if (!((1U << i) & m->data_opts.rewrite_ptrs)) + bch2_dev_list_add_dev(&m->op.devs_have, p.ptr.dev); - ret = bch2_disk_reservation_get(c, &m->op.res, - k.k->size, m->op.nr_replicas, 0); - if (ret) - return ret; - } - break; - } - case DATA_REWRITE: { - unsigned compressed_sectors = 0; + if (((1U << i) & m->data_opts.rewrite_ptrs) && + crc_is_compressed(p.crc)) + reserve_sectors += k.k->size; - bkey_for_each_ptr_decode(k.k, ptrs, p, entry) - if (p.ptr.dev == data_opts.rewrite_dev) { - if (p.ptr.cached) - m->op.flags |= BCH_WRITE_CACHED; + /* + * op->csum_type is normally initialized from the fs/file's + * current options - but if an extent is encrypted, we require + * that it stays encrypted: + */ + if (bch2_csum_type_is_encryption(p.crc.csum_type)) { + m->op.nonce = p.crc.nonce + p.crc.offset; + m->op.csum_type = p.crc.csum_type; + } - if (!p.ptr.cached && - crc_is_compressed(p.crc)) - compressed_sectors += p.crc.compressed_size; - } + if (p.crc.compression_type == BCH_COMPRESSION_TYPE_incompressible) + m->op.incompressible = true; - if (compressed_sectors) { - ret = bch2_disk_reservation_add(c, &m->op.res, - k.k->size * m->op.nr_replicas, - BCH_DISK_RESERVATION_NOFAIL); - if (ret) - return ret; - } - break; + i++; } - case DATA_PROMOTE: - m->op.flags |= BCH_WRITE_ALLOC_NOWAIT; - m->op.flags |= BCH_WRITE_CACHED; - break; - default: - BUG(); + + if (reserve_sectors) { + ret = bch2_disk_reservation_add(c, &m->op.res, reserve_sectors, + m->data_opts.extra_replicas + ? 0 + : BCH_DISK_RESERVATION_NOFAIL); + if (ret) + return ret; } + m->op.nr_replicas = m->op.nr_replicas_required = + hweight32(m->data_opts.rewrite_ptrs) + m->data_opts.extra_replicas; return 0; } diff --git a/fs/bcachefs/data_update.h b/fs/bcachefs/data_update.h index 81388a442a95..e64505453a55 100644 --- a/fs/bcachefs/data_update.h +++ b/fs/bcachefs/data_update.h @@ -3,44 +3,36 @@ #ifndef _BCACHEFS_DATA_UPDATE_H #define _BCACHEFS_DATA_UPDATE_H +#include "bkey_buf.h" #include "io_types.h" -enum data_cmd { - DATA_SKIP, - DATA_SCRUB, - DATA_ADD_REPLICAS, - DATA_REWRITE, - DATA_PROMOTE, -}; +struct moving_context; -struct data_opts { +struct data_update_opts { + unsigned rewrite_ptrs; u16 target; - u8 rewrite_dev; - u8 nr_replicas; - int btree_insert_flags; + u8 extra_replicas; + unsigned btree_insert_flags; + unsigned write_flags; }; struct data_update { + /* extent being updated: */ enum btree_id btree_id; - enum data_cmd data_cmd; - struct data_opts data_opts; - - unsigned nr_ptrs_reserved; - + struct bkey_buf k; + struct data_update_opts data_opts; struct moving_context *ctxt; - - /* what we read: */ - struct bch_extent_ptr ptr; - u64 offset; - struct bch_write_op op; }; -void bch2_data_update_read_done(struct data_update *, struct bch_read_bio *); +void bch2_data_update_read_done(struct data_update *, + struct bch_extent_crc_unpacked, + struct closure *); + +void bch2_data_update_exit(struct data_update *); int bch2_data_update_init(struct bch_fs *, struct data_update *, struct write_point_specifier, - struct bch_io_opts, - enum data_cmd, struct data_opts, + struct bch_io_opts, struct data_update_opts, enum btree_id, struct bkey_s_c); #endif /* _BCACHEFS_DATA_UPDATE_H */ diff --git a/fs/bcachefs/extents.c b/fs/bcachefs/extents.c index 73d756a63572..a368ede65091 100644 --- a/fs/bcachefs/extents.c +++ b/fs/bcachefs/extents.c @@ -688,37 +688,6 @@ unsigned bch2_bkey_durability(struct bch_fs *c, struct bkey_s_c k) return durability; } -void bch2_bkey_mark_replicas_cached(struct bch_fs *c, struct bkey_s k, - unsigned target, - unsigned nr_desired_replicas) -{ - struct bkey_ptrs ptrs = bch2_bkey_ptrs(k); - union bch_extent_entry *entry; - struct extent_ptr_decoded p; - int extra = bch2_bkey_durability(c, k.s_c) - nr_desired_replicas; - - if (target && extra > 0) - bkey_for_each_ptr_decode(k.k, ptrs, p, entry) { - int n = bch2_extent_ptr_durability(c, p); - - if (n && n <= extra && - !bch2_dev_in_target(c, p.ptr.dev, target)) { - entry->ptr.cached = true; - extra -= n; - } - } - - if (extra > 0) - bkey_for_each_ptr_decode(k.k, ptrs, p, entry) { - int n = bch2_extent_ptr_durability(c, p); - - if (n && n <= extra) { - entry->ptr.cached = true; - extra -= n; - } - } -} - void bch2_bkey_extent_entry_drop(struct bkey_i *k, union bch_extent_entry *entry) { union bch_extent_entry *end = bkey_val_end(bkey_i_to_s(k)); @@ -940,6 +909,44 @@ bool bch2_bkey_matches_ptr(struct bch_fs *c, struct bkey_s_c k, } /* + * Returns true if two extents refer to the same data: + */ +bool bch2_extents_match(struct bkey_s_c k1, struct bkey_s_c k2) +{ + struct bkey_ptrs_c ptrs1 = bch2_bkey_ptrs_c(k1); + struct bkey_ptrs_c ptrs2 = bch2_bkey_ptrs_c(k2); + const union bch_extent_entry *entry1, *entry2; + struct extent_ptr_decoded p1, p2; + + bkey_for_each_ptr_decode(k1.k, ptrs1, p1, entry1) + bkey_for_each_ptr_decode(k2.k, ptrs2, p2, entry2) + if (p1.ptr.dev == p2.ptr.dev && + p1.ptr.gen == p2.ptr.gen && + (s64) p1.ptr.offset + p1.crc.offset - bkey_start_offset(k1.k) == + (s64) p2.ptr.offset + p2.crc.offset - bkey_start_offset(k2.k)) + return true; + + return false; +} + +bool bch2_extent_has_ptr(struct bkey_s_c k1, struct extent_ptr_decoded p1, + struct bkey_s_c k2) +{ + struct bkey_ptrs_c ptrs2 = bch2_bkey_ptrs_c(k2); + const union bch_extent_entry *entry2; + struct extent_ptr_decoded p2; + + bkey_for_each_ptr_decode(k2.k, ptrs2, p2, entry2) + if (p1.ptr.dev == p2.ptr.dev && + p1.ptr.gen == p2.ptr.gen && + (s64) p1.ptr.offset + p1.crc.offset - bkey_start_offset(k1.k) == + (s64) p2.ptr.offset + p2.crc.offset - bkey_start_offset(k2.k)) + return true; + + return false; +} + +/* * bch_extent_normalize - clean up an extent, dropping stale pointers etc. * * Returns true if @k should be dropped entirely diff --git a/fs/bcachefs/extents.h b/fs/bcachefs/extents.h index 4f41f0fd6cb1..27371dcfb81f 100644 --- a/fs/bcachefs/extents.h +++ b/fs/bcachefs/extents.h @@ -577,9 +577,6 @@ unsigned bch2_bkey_sectors_compressed(struct bkey_s_c); unsigned bch2_bkey_replicas(struct bch_fs *, struct bkey_s_c); unsigned bch2_bkey_durability(struct bch_fs *, struct bkey_s_c); -void bch2_bkey_mark_replicas_cached(struct bch_fs *, struct bkey_s, - unsigned, unsigned); - void bch2_bkey_extent_entry_drop(struct bkey_i *, union bch_extent_entry *); void bch2_bkey_append_ptr(struct bkey_i *, struct bch_extent_ptr); void bch2_extent_ptr_decoded_append(struct bkey_i *, @@ -612,6 +609,8 @@ bool bch2_bkey_has_target(struct bch_fs *, struct bkey_s_c, unsigned); bool bch2_bkey_matches_ptr(struct bch_fs *, struct bkey_s_c, struct bch_extent_ptr, u64); +bool bch2_extents_match(struct bkey_s_c, struct bkey_s_c); +bool bch2_extent_has_ptr(struct bkey_s_c, struct extent_ptr_decoded, struct bkey_s_c); bool bch2_extent_normalize(struct bch_fs *, struct bkey_s); void bch2_bkey_ptrs_to_text(struct printbuf *, struct bch_fs *, diff --git a/fs/bcachefs/io.c b/fs/bcachefs/io.c index 7756d4b36ccf..52c0f9300ae3 100644 --- a/fs/bcachefs/io.c +++ b/fs/bcachefs/io.c @@ -1381,13 +1381,12 @@ static void promote_done(struct closure *cl) bch2_time_stats_update(&c->times[BCH_TIME_data_promote], op->start_time); - bch2_bio_free_pages_pool(c, &op->write.op.wbio.bio); + bch2_data_update_exit(&op->write); promote_free(c, op); } static void promote_start(struct promote_op *op, struct bch_read_bio *rbio) { - struct bch_fs *c = rbio->c; struct closure *cl = &op->cl; struct bio *bio = &op->write.op.wbio.bio; @@ -1401,10 +1400,8 @@ static void promote_start(struct promote_op *op, struct bch_read_bio *rbio) sizeof(struct bio_vec) * rbio->bio.bi_vcnt); swap(bio->bi_vcnt, rbio->bio.bi_vcnt); - bch2_data_update_read_done(&op->write, rbio); - closure_init(cl, NULL); - closure_call(&op->write.op.cl, bch2_write, c->btree_update_wq, cl); + bch2_data_update_read_done(&op->write, rbio->pick.crc, cl); closure_return_with_destructor(cl, promote_done); } @@ -1463,10 +1460,10 @@ static struct promote_op *__promote_alloc(struct bch_fs *c, ret = bch2_data_update_init(c, &op->write, writepoint_hashed((unsigned long) current), opts, - DATA_PROMOTE, - (struct data_opts) { + (struct data_update_opts) { .target = opts.promote_target, - .nr_replicas = 1, + .extra_replicas = 1, + .write_flags = BCH_WRITE_ALLOC_NOWAIT|BCH_WRITE_CACHED, }, btree_id, k); BUG_ON(ret); diff --git a/fs/bcachefs/move.c b/fs/bcachefs/move.c index bc507c59df33..6f9a21be7906 100644 --- a/fs/bcachefs/move.c +++ b/fs/bcachefs/move.c @@ -44,17 +44,9 @@ static void move_free(struct closure *cl) { struct moving_io *io = container_of(cl, struct moving_io, cl); struct moving_context *ctxt = io->write.ctxt; - struct bvec_iter_all iter; - struct bio_vec *bv; - - bch2_disk_reservation_put(io->write.op.c, &io->write.op.res); - - bio_for_each_segment_all(bv, &io->write.op.wbio.bio, iter) - if (bv->bv_page) - __free_page(bv->bv_page); + bch2_data_update_exit(&io->write); wake_up(&ctxt->wait); - kfree(io); } @@ -75,10 +67,9 @@ static void move_write(struct closure *cl) return; } - bch2_data_update_read_done(&io->write, &io->rbio); - atomic_add(io->write_sectors, &io->write.ctxt->write_sectors); - closure_call(&io->write.op.cl, bch2_write, NULL, cl); + + bch2_data_update_read_done(&io->write, io->rbio.pick.crc, cl); continue_at(cl, move_write_done, NULL); } @@ -141,8 +132,7 @@ static int bch2_move_extent(struct btree_trans *trans, struct bch_io_opts io_opts, enum btree_id btree_id, struct bkey_s_c k, - enum data_cmd data_cmd, - struct data_opts data_opts) + struct data_update_opts data_opts) { struct bch_fs *c = trans->c; struct bkey_ptrs_c ptrs = bch2_bkey_ptrs_c(k); @@ -186,10 +176,12 @@ static int bch2_move_extent(struct btree_trans *trans, io->rbio.bio.bi_end_io = move_read_endio; ret = bch2_data_update_init(c, &io->write, wp, io_opts, - data_cmd, data_opts, btree_id, k); + data_opts, btree_id, k); if (ret) goto err_free_pages; + io->write.ctxt = ctxt; + atomic64_inc(&ctxt->stats->keys_moved); atomic64_add(k.k->size, &ctxt->stats->sectors_moved); this_cpu_add(c->counters[BCH_COUNTER_io_move], k.k->size); @@ -329,8 +321,7 @@ static int __bch2_move_data(struct bch_fs *c, struct btree_trans trans; struct btree_iter iter; struct bkey_s_c k; - struct data_opts data_opts; - enum data_cmd data_cmd; + struct data_update_opts data_opts; u64 cur_inum = U64_MAX; int ret = 0, ret2; @@ -373,18 +364,9 @@ static int __bch2_move_data(struct bch_fs *c, if (ret) continue; - switch ((data_cmd = pred(c, arg, k, &io_opts, &data_opts))) { - case DATA_SKIP: + memset(&data_opts, 0, sizeof(data_opts)); + if (!pred(c, arg, k, &io_opts, &data_opts)) goto next; - case DATA_SCRUB: - BUG(); - case DATA_ADD_REPLICAS: - case DATA_REWRITE: - case DATA_PROMOTE: - break; - default: - BUG(); - } /* * The iterator gets unlocked by __bch2_read_extent - need to @@ -393,8 +375,8 @@ static int __bch2_move_data(struct bch_fs *c, bch2_bkey_buf_reassemble(&sk, c, k); k = bkey_i_to_s_c(sk.k); - ret2 = bch2_move_extent(&trans, ctxt, wp, io_opts, btree_id, k, - data_cmd, data_opts); + ret2 = bch2_move_extent(&trans, ctxt, wp, io_opts, + btree_id, k, data_opts); if (ret2) { if (ret2 == -EINTR) continue; @@ -532,8 +514,7 @@ int bch2_evacuate_bucket(struct bch_fs *c, struct bpos bucket, int gen, struct bch_ratelimit *rate, struct write_point_specifier wp, - enum data_cmd data_cmd, - struct data_opts *data_opts, + struct data_update_opts *data_opts, struct bch_move_stats *stats) { struct bch_io_opts io_opts = bch2_opts_to_inode_opts(c->opts); @@ -567,7 +548,9 @@ int bch2_evacuate_bucket(struct bch_fs *c, break; if (!bp.level) { + const struct bch_extent_ptr *ptr; struct bkey_s_c k; + unsigned i = 0; k = bch2_backpointer_get_key(&trans, &iter, bucket, bp_offset, bp); @@ -588,10 +571,16 @@ int bch2_evacuate_bucket(struct bch_fs *c, continue; data_opts->target = io_opts.background_target; - data_opts->rewrite_dev = bucket.inode; + data_opts->rewrite_ptrs = 0; - ret = bch2_move_extent(&trans, &ctxt, wp, io_opts, bp.btree_id, k, - data_cmd, *data_opts); + bkey_for_each_ptr(bch2_bkey_ptrs_c(k), ptr) { + if (ptr->dev == bucket.inode) + data_opts->rewrite_ptrs |= 1U << i; + i++; + } + + ret = bch2_move_extent(&trans, &ctxt, wp, io_opts, + bp.btree_id, k, *data_opts); if (ret == -EINTR) continue; if (ret == -ENOMEM) { @@ -658,9 +647,9 @@ err: return ret; } -typedef enum data_cmd (*move_btree_pred)(struct bch_fs *, void *, - struct btree *, struct bch_io_opts *, - struct data_opts *); +typedef bool (*move_btree_pred)(struct bch_fs *, void *, + struct btree *, struct bch_io_opts *, + struct data_update_opts *); static int bch2_move_btree(struct bch_fs *c, enum btree_id start_btree_id, struct bpos start_pos, @@ -674,8 +663,7 @@ static int bch2_move_btree(struct bch_fs *c, struct btree_iter iter; struct btree *b; enum btree_id id; - struct data_opts data_opts; - enum data_cmd cmd; + struct data_update_opts data_opts; int ret = 0; bch2_trans_init(&trans, c, 0, 0); @@ -704,17 +692,8 @@ retry: stats->pos = iter.pos; - switch ((cmd = pred(c, arg, b, &io_opts, &data_opts))) { - case DATA_SKIP: + if (!pred(c, arg, b, &io_opts, &data_opts)) goto next; - case DATA_SCRUB: - BUG(); - case DATA_ADD_REPLICAS: - case DATA_REWRITE: - break; - default: - BUG(); - } ret = bch2_btree_node_rewrite(&trans, &iter, b, 0) ?: ret; if (ret == -EINTR) @@ -744,20 +723,10 @@ next: return ret; } -#if 0 -static enum data_cmd scrub_pred(struct bch_fs *c, void *arg, - struct bkey_s_c k, - struct bch_io_opts *io_opts, - struct data_opts *data_opts) -{ - return DATA_SCRUB; -} -#endif - -static enum data_cmd rereplicate_pred(struct bch_fs *c, void *arg, - struct bkey_s_c k, - struct bch_io_opts *io_opts, - struct data_opts *data_opts) +static bool rereplicate_pred(struct bch_fs *c, void *arg, + struct bkey_s_c k, + struct bch_io_opts *io_opts, + struct data_update_opts *data_opts) { unsigned nr_good = bch2_bkey_durability(c, k); unsigned replicas = bkey_is_btree_ptr(k.k) @@ -765,43 +734,50 @@ static enum data_cmd rereplicate_pred(struct bch_fs *c, void *arg, : io_opts->data_replicas; if (!nr_good || nr_good >= replicas) - return DATA_SKIP; + return false; data_opts->target = 0; - data_opts->nr_replicas = 1; + data_opts->extra_replicas = replicas - nr_good; data_opts->btree_insert_flags = 0; - return DATA_ADD_REPLICAS; + return true; } -static enum data_cmd migrate_pred(struct bch_fs *c, void *arg, - struct bkey_s_c k, - struct bch_io_opts *io_opts, - struct data_opts *data_opts) +static bool migrate_pred(struct bch_fs *c, void *arg, + struct bkey_s_c k, + struct bch_io_opts *io_opts, + struct data_update_opts *data_opts) { + struct bkey_ptrs_c ptrs = bch2_bkey_ptrs_c(k); + const struct bch_extent_ptr *ptr; struct bch_ioctl_data *op = arg; + unsigned i = 0; - if (!bch2_bkey_has_device(k, op->migrate.dev)) - return DATA_SKIP; - + data_opts->rewrite_ptrs = 0; data_opts->target = 0; - data_opts->nr_replicas = 1; + data_opts->extra_replicas = 0; data_opts->btree_insert_flags = 0; - data_opts->rewrite_dev = op->migrate.dev; - return DATA_REWRITE; + + bkey_for_each_ptr(ptrs, ptr) { + if (ptr->dev == op->migrate.dev) + data_opts->rewrite_ptrs |= 1U << i; + i++; + } + + return data_opts->rewrite_ptrs != 0;; } -static enum data_cmd rereplicate_btree_pred(struct bch_fs *c, void *arg, - struct btree *b, - struct bch_io_opts *io_opts, - struct data_opts *data_opts) +static bool rereplicate_btree_pred(struct bch_fs *c, void *arg, + struct btree *b, + struct bch_io_opts *io_opts, + struct data_update_opts *data_opts) { return rereplicate_pred(c, arg, bkey_i_to_s_c(&b->key), io_opts, data_opts); } -static enum data_cmd migrate_btree_pred(struct bch_fs *c, void *arg, - struct btree *b, - struct bch_io_opts *io_opts, - struct data_opts *data_opts) +static bool migrate_btree_pred(struct bch_fs *c, void *arg, + struct btree *b, + struct bch_io_opts *io_opts, + struct data_update_opts *data_opts) { return migrate_pred(c, arg, bkey_i_to_s_c(&b->key), io_opts, data_opts); } @@ -830,21 +806,21 @@ static bool bformat_needs_redo(struct bkey_format *f) return false; } -static enum data_cmd rewrite_old_nodes_pred(struct bch_fs *c, void *arg, - struct btree *b, - struct bch_io_opts *io_opts, - struct data_opts *data_opts) +static bool rewrite_old_nodes_pred(struct bch_fs *c, void *arg, + struct btree *b, + struct bch_io_opts *io_opts, + struct data_update_opts *data_opts) { if (b->version_ondisk != c->sb.version || btree_node_need_rewrite(b) || bformat_needs_redo(&b->format)) { data_opts->target = 0; - data_opts->nr_replicas = 1; + data_opts->extra_replicas = 0; data_opts->btree_insert_flags = 0; - return DATA_REWRITE; + return true; } - return DATA_SKIP; + return false; } int bch2_scan_old_btree_nodes(struct bch_fs *c, struct bch_move_stats *stats) diff --git a/fs/bcachefs/move.h b/fs/bcachefs/move.h index 8a0500450d52..27fee75c3626 100644 --- a/fs/bcachefs/move.h +++ b/fs/bcachefs/move.h @@ -24,9 +24,8 @@ struct moving_context { wait_queue_head_t wait; }; -typedef enum data_cmd (*move_pred_fn)(struct bch_fs *, void *, - struct bkey_s_c, - struct bch_io_opts *, struct data_opts *); +typedef bool (*move_pred_fn)(struct bch_fs *, void *, struct bkey_s_c, + struct bch_io_opts *, struct data_update_opts *); int bch2_scan_old_btree_nodes(struct bch_fs *, struct bch_move_stats *); @@ -41,8 +40,7 @@ int bch2_move_data(struct bch_fs *, int bch2_evacuate_bucket(struct bch_fs *, struct bpos, int, struct bch_ratelimit *, struct write_point_specifier, - enum data_cmd, - struct data_opts *, + struct data_update_opts *, struct bch_move_stats *); int bch2_data_job(struct bch_fs *, struct bch_move_stats *, diff --git a/fs/bcachefs/movinggc.c b/fs/bcachefs/movinggc.c index fb629129d794..47e3f6333757 100644 --- a/fs/bcachefs/movinggc.c +++ b/fs/bcachefs/movinggc.c @@ -95,9 +95,8 @@ static int bch2_copygc(struct bch_fs *c) struct bch_dev *ca; unsigned dev_idx; size_t heap_size = 0; - struct data_opts data_opts = { - .nr_replicas = 1, - .btree_insert_flags = BTREE_INSERT_USE_RESERVE|JOURNAL_WATERMARK_copygc, + struct data_update_opts data_opts = { + .btree_insert_flags = BTREE_INSERT_USE_RESERVE|JOURNAL_WATERMARK_copygc, }; int ret; @@ -154,8 +153,7 @@ static int bch2_copygc(struct bch_fs *c) ret = bch2_evacuate_bucket(c, POS(e.dev, e.bucket), e.gen, NULL, writepoint_ptr(&c->copygc_write_point), - DATA_REWRITE, &data_opts, - &move_stats); + &data_opts, &move_stats); if (ret < 0) bch_err(c, "error %i from bch2_move_data() in copygc", ret); if (ret) diff --git a/fs/bcachefs/rebalance.c b/fs/bcachefs/rebalance.c index 23cc46e95021..a0e778349188 100644 --- a/fs/bcachefs/rebalance.c +++ b/fs/bcachefs/rebalance.c @@ -22,62 +22,70 @@ * returns -1 if it should not be moved, or * device of pointer that should be moved, if known, or INT_MAX if unknown */ -static int __bch2_rebalance_pred(struct bch_fs *c, - struct bkey_s_c k, - struct bch_io_opts *io_opts) +static bool rebalance_pred(struct bch_fs *c, void *arg, + struct bkey_s_c k, + struct bch_io_opts *io_opts, + struct data_update_opts *data_opts) { struct bkey_ptrs_c ptrs = bch2_bkey_ptrs_c(k); - const union bch_extent_entry *entry; - struct extent_ptr_decoded p; + unsigned i; + + data_opts->rewrite_ptrs = 0; + data_opts->target = io_opts->background_target; + data_opts->extra_replicas = 0; + data_opts->btree_insert_flags = 0; if (io_opts->background_compression && - !bch2_bkey_is_incompressible(k)) - bkey_for_each_ptr_decode(k.k, ptrs, p, entry) + !bch2_bkey_is_incompressible(k)) { + const union bch_extent_entry *entry; + struct extent_ptr_decoded p; + + i = 0; + bkey_for_each_ptr_decode(k.k, ptrs, p, entry) { if (!p.ptr.cached && p.crc.compression_type != bch2_compression_opt_to_type[io_opts->background_compression]) - return p.ptr.dev; + data_opts->rewrite_ptrs |= 1U << i; + i++; + } + } - if (io_opts->background_target) - bkey_for_each_ptr_decode(k.k, ptrs, p, entry) - if (!p.ptr.cached && - !bch2_dev_in_target(c, p.ptr.dev, io_opts->background_target)) - return p.ptr.dev; + if (io_opts->background_target) { + const struct bch_extent_ptr *ptr; - return -1; + i = 0; + bkey_for_each_ptr(ptrs, ptr) { + if (!ptr->cached && + !bch2_dev_in_target(c, ptr->dev, io_opts->background_target)) + data_opts->rewrite_ptrs |= 1U << i; + i++; + } + } + + return data_opts->rewrite_ptrs != 0; } void bch2_rebalance_add_key(struct bch_fs *c, struct bkey_s_c k, struct bch_io_opts *io_opts) { - atomic64_t *counter; - int dev; + struct data_update_opts update_opts = { 0 }; + struct bkey_ptrs_c ptrs; + const struct bch_extent_ptr *ptr; + unsigned i; - dev = __bch2_rebalance_pred(c, k, io_opts); - if (dev < 0) + if (!rebalance_pred(c, NULL, k, io_opts, &update_opts)) return; - counter = dev < INT_MAX - ? &bch_dev_bkey_exists(c, dev)->rebalance_work - : &c->rebalance.work_unknown_dev; - - if (atomic64_add_return(k.k->size, counter) == k.k->size) - rebalance_wakeup(c); -} - -static enum data_cmd rebalance_pred(struct bch_fs *c, void *arg, - struct bkey_s_c k, - struct bch_io_opts *io_opts, - struct data_opts *data_opts) -{ - if (__bch2_rebalance_pred(c, k, io_opts) >= 0) { - data_opts->target = io_opts->background_target; - data_opts->nr_replicas = 1; - data_opts->btree_insert_flags = 0; - return DATA_ADD_REPLICAS; - } else { - return DATA_SKIP; + i = 0; + ptrs = bch2_bkey_ptrs_c(k); + bkey_for_each_ptr(ptrs, ptr) { + if ((1U << i) && update_opts.rewrite_ptrs) + if (atomic64_add_return(k.k->size, + &bch_dev_bkey_exists(c, ptr->dev)->rebalance_work) == + k.k->size) + rebalance_wakeup(c); + i++; } } |