diff options
author | Kent Overstreet <kent.overstreet@gmail.com> | 2021-03-14 19:01:14 -0400 |
---|---|---|
committer | Kent Overstreet <kent.overstreet@gmail.com> | 2022-04-17 15:42:47 -0400 |
commit | 691486a9e112fe3d0c8d7d7be5ee453f944f70d8 (patch) | |
tree | ed34a634616d50b6ddbe14322b438ef10759de12 | |
parent | 3ed9997296283dc343a3d45fd8d8002f040ee443 (diff) |
bcachefs: Add code to scan for/rewite old btree nodes
This adds a new data job type to scan for btree nodes in the old extent
format, and rewrite them.
Signed-off-by: Kent Overstreet <kent.overstreet@gmail.com>
-rw-r--r-- | fs/bcachefs/bcachefs_ioctl.h | 17 | ||||
-rw-r--r-- | fs/bcachefs/btree_io.c | 5 | ||||
-rw-r--r-- | fs/bcachefs/btree_types.h | 1 | ||||
-rw-r--r-- | fs/bcachefs/btree_update_interior.c | 1 | ||||
-rw-r--r-- | fs/bcachefs/move.c | 131 | ||||
-rw-r--r-- | fs/bcachefs/move.h | 6 | ||||
-rw-r--r-- | fs/bcachefs/movinggc.c | 6 | ||||
-rw-r--r-- | fs/bcachefs/rebalance.c | 3 |
8 files changed, 132 insertions, 38 deletions
diff --git a/fs/bcachefs/bcachefs_ioctl.h b/fs/bcachefs/bcachefs_ioctl.h index f1cb5d405129..f679fc2151bc 100644 --- a/fs/bcachefs/bcachefs_ioctl.h +++ b/fs/bcachefs/bcachefs_ioctl.h @@ -171,10 +171,11 @@ struct bch_ioctl_disk_set_state { }; enum bch_data_ops { - BCH_DATA_OP_SCRUB = 0, - BCH_DATA_OP_REREPLICATE = 1, - BCH_DATA_OP_MIGRATE = 2, - BCH_DATA_OP_NR = 3, + BCH_DATA_OP_SCRUB = 0, + BCH_DATA_OP_REREPLICATE = 1, + BCH_DATA_OP_MIGRATE = 2, + BCH_DATA_OP_REWRITE_OLD_NODES = 3, + BCH_DATA_OP_NR = 4, }; /* @@ -187,11 +188,13 @@ enum bch_data_ops { * job. The file descriptor is O_CLOEXEC. */ struct bch_ioctl_data { - __u32 op; + __u16 op; + __u8 start_btree; + __u8 end_btree; __u32 flags; - struct bpos start; - struct bpos end; + struct bpos start_pos; + struct bpos end_pos; union { struct { diff --git a/fs/bcachefs/btree_io.c b/fs/bcachefs/btree_io.c index 8a4fbdf47d23..a84a473101dc 100644 --- a/fs/bcachefs/btree_io.c +++ b/fs/bcachefs/btree_io.c @@ -920,6 +920,8 @@ int bch2_btree_node_read_done(struct bch_fs *c, struct bch_dev *ca, unsigned u64s; int ret, retry_read = 0, write = READ; + b->version_ondisk = U16_MAX; + iter = mempool_alloc(&c->fill_iter, GFP_NOIO); sort_iter_init(iter, b); iter->size = (btree_blocks(c) + 1) * 2; @@ -1000,6 +1002,9 @@ int bch2_btree_node_read_done(struct bch_fs *c, struct bch_dev *ca, sectors = vstruct_sectors(bne, c->block_bits); } + b->version_ondisk = min(b->version_ondisk, + le16_to_cpu(i->version)); + ret = validate_bset(c, ca, b, i, sectors, READ, have_retry); if (ret) diff --git a/fs/bcachefs/btree_types.h b/fs/bcachefs/btree_types.h index e57ed9d86ee3..03894e923037 100644 --- a/fs/bcachefs/btree_types.h +++ b/fs/bcachefs/btree_types.h @@ -76,6 +76,7 @@ struct btree { u16 written; u8 nsets; u8 nr_key_bits; + u16 version_ondisk; struct bkey_format format; diff --git a/fs/bcachefs/btree_update_interior.c b/fs/bcachefs/btree_update_interior.c index dd1b8f6ef9b0..275dcabbbdd6 100644 --- a/fs/bcachefs/btree_update_interior.c +++ b/fs/bcachefs/btree_update_interior.c @@ -286,6 +286,7 @@ static struct btree *bch2_btree_node_alloc(struct btree_update *as, unsigned lev bch2_bset_init_first(b, &b->data->keys); b->c.level = level; b->c.btree_id = as->btree_id; + b->version_ondisk = c->sb.version; memset(&b->nr, 0, sizeof(b->nr)); b->data->magic = cpu_to_le64(bset_magic(c)); diff --git a/fs/bcachefs/move.c b/fs/bcachefs/move.c index b4c315cf68df..602072b3d4d2 100644 --- a/fs/bcachefs/move.c +++ b/fs/bcachefs/move.c @@ -532,7 +532,7 @@ static int __bch2_move_data(struct bch_fs *c, stats->data_type = BCH_DATA_user; stats->btree_id = btree_id; - stats->pos = POS_MIN; + stats->pos = start; iter = bch2_trans_get_iter(&trans, btree_id, start, BTREE_ITER_PREFETCH); @@ -647,14 +647,15 @@ out: } int bch2_move_data(struct bch_fs *c, + enum btree_id start_btree_id, struct bpos start_pos, + enum btree_id end_btree_id, struct bpos end_pos, struct bch_ratelimit *rate, struct write_point_specifier wp, - struct bpos start, - struct bpos end, move_pred_fn pred, void *arg, struct bch_move_stats *stats) { struct moving_context ctxt = { .stats = stats }; + enum btree_id id; int ret; closure_init_stack(&ctxt.cl); @@ -663,10 +664,23 @@ int bch2_move_data(struct bch_fs *c, stats->data_type = BCH_DATA_user; - ret = __bch2_move_data(c, &ctxt, rate, wp, start, end, - pred, arg, stats, BTREE_ID_EXTENTS) ?: - __bch2_move_data(c, &ctxt, rate, wp, start, end, - pred, arg, stats, BTREE_ID_REFLINK); + for (id = start_btree_id; + id <= min_t(unsigned, end_btree_id, BTREE_ID_NR - 1); + id++) { + stats->btree_id = id; + + if (id != BTREE_ID_EXTENTS && + id != BTREE_ID_REFLINK) + continue; + + ret = __bch2_move_data(c, &ctxt, rate, wp, + id == start_btree_id ? start_pos : POS_MIN, + id == end_btree_id ? end_pos : POS_MAX, + pred, arg, stats, id); + if (ret) + break; + } + move_ctxt_wait_event(&ctxt, list_empty(&ctxt.reads)); closure_sync(&ctxt.cl); @@ -680,16 +694,22 @@ int bch2_move_data(struct bch_fs *c, return ret; } +typedef enum data_cmd (*move_btree_pred)(struct bch_fs *, void *, + struct btree *, struct bch_io_opts *, + struct data_opts *); + static int bch2_move_btree(struct bch_fs *c, - move_pred_fn pred, - void *arg, + enum btree_id start_btree_id, struct bpos start_pos, + enum btree_id end_btree_id, struct bpos end_pos, + move_btree_pred pred, void *arg, struct bch_move_stats *stats) { + bool kthread = (current->flags & PF_KTHREAD) != 0; struct bch_io_opts io_opts = bch2_opts_to_inode_opts(c->opts); struct btree_trans trans; struct btree_iter *iter; struct btree *b; - unsigned id; + enum btree_id id; struct data_opts data_opts; enum data_cmd cmd; int ret = 0; @@ -698,16 +718,24 @@ static int bch2_move_btree(struct bch_fs *c, stats->data_type = BCH_DATA_btree; - for (id = 0; id < BTREE_ID_NR; id++) { + for (id = start_btree_id; + id <= min_t(unsigned, end_btree_id, BTREE_ID_NR - 1); + id++) { stats->btree_id = id; - for_each_btree_node(&trans, iter, id, POS_MIN, + for_each_btree_node(&trans, iter, id, + id == start_btree_id ? start_pos : POS_MIN, BTREE_ITER_PREFETCH, b) { + if (kthread && kthread_should_stop()) + goto out; + + if ((cmp_int(id, end_btree_id) ?: + bkey_cmp(b->key.k.p, end_pos)) > 0) + break; + stats->pos = iter->pos; - switch ((cmd = pred(c, arg, - bkey_i_to_s_c(&b->key), - &io_opts, &data_opts))) { + switch ((cmd = pred(c, arg, b, &io_opts, &data_opts))) { case DATA_SKIP: goto next; case DATA_SCRUB: @@ -727,7 +755,7 @@ next: ret = bch2_trans_iter_free(&trans, iter) ?: ret; } - +out: bch2_trans_exit(&trans); return ret; @@ -786,6 +814,38 @@ static enum data_cmd migrate_pred(struct bch_fs *c, void *arg, return DATA_REWRITE; } +static enum data_cmd rereplicate_btree_pred(struct bch_fs *c, void *arg, + struct btree *b, + struct bch_io_opts *io_opts, + struct data_opts *data_opts) +{ + return rereplicate_pred(c, arg, bkey_i_to_s_c(&b->key), io_opts, data_opts); +} + +static enum data_cmd migrate_btree_pred(struct bch_fs *c, void *arg, + struct btree *b, + struct bch_io_opts *io_opts, + struct data_opts *data_opts) +{ + return migrate_pred(c, arg, bkey_i_to_s_c(&b->key), io_opts, data_opts); +} + +static enum data_cmd rewrite_old_nodes_pred(struct bch_fs *c, void *arg, + struct btree *b, + struct bch_io_opts *io_opts, + struct data_opts *data_opts) +{ + if (b->version_ondisk != c->sb.version || + btree_node_need_rewrite(b)) { + data_opts->target = 0; + data_opts->nr_replicas = 1; + data_opts->btree_insert_flags = 0; + return DATA_REWRITE; + } + + return DATA_SKIP; +} + int bch2_data_job(struct bch_fs *c, struct bch_move_stats *stats, struct bch_ioctl_data op) @@ -797,17 +857,20 @@ int bch2_data_job(struct bch_fs *c, stats->data_type = BCH_DATA_journal; ret = bch2_journal_flush_device_pins(&c->journal, -1); - ret = bch2_move_btree(c, rereplicate_pred, c, stats) ?: ret; + ret = bch2_move_btree(c, + op.start_btree, op.start_pos, + op.end_btree, op.end_pos, + rereplicate_btree_pred, c, stats) ?: ret; closure_wait_event(&c->btree_interior_update_wait, !bch2_btree_interior_updates_nr_pending(c)); ret = bch2_replicas_gc2(c) ?: ret; - ret = bch2_move_data(c, NULL, - writepoint_hashed((unsigned long) current), - op.start, - op.end, + ret = bch2_move_data(c, + op.start_btree, op.start_pos, + op.end_btree, op.end_pos, + NULL, writepoint_hashed((unsigned long) current), rereplicate_pred, c, stats) ?: ret; ret = bch2_replicas_gc2(c) ?: ret; break; @@ -818,16 +881,32 @@ int bch2_data_job(struct bch_fs *c, stats->data_type = BCH_DATA_journal; ret = bch2_journal_flush_device_pins(&c->journal, op.migrate.dev); - ret = bch2_move_btree(c, migrate_pred, &op, stats) ?: ret; + ret = bch2_move_btree(c, + op.start_btree, op.start_pos, + op.end_btree, op.end_pos, + migrate_btree_pred, &op, stats) ?: ret; ret = bch2_replicas_gc2(c) ?: ret; - ret = bch2_move_data(c, NULL, - writepoint_hashed((unsigned long) current), - op.start, - op.end, + ret = bch2_move_data(c, + op.start_btree, op.start_pos, + op.end_btree, op.end_pos, + NULL, writepoint_hashed((unsigned long) current), migrate_pred, &op, stats) ?: ret; ret = bch2_replicas_gc2(c) ?: ret; break; + case BCH_DATA_OP_REWRITE_OLD_NODES: + ret = bch2_move_btree(c, + op.start_btree, op.start_pos, + op.end_btree, op.end_pos, + rewrite_old_nodes_pred, &op, stats) ?: ret; + + if (!ret) { + mutex_lock(&c->sb_lock); + c->disk_sb.sb->version_min = c->disk_sb.sb->version; + bch2_write_super(c); + mutex_unlock(&c->sb_lock); + } + break; default: ret = -EINVAL; } diff --git a/fs/bcachefs/move.h b/fs/bcachefs/move.h index b04bc669226d..403ca695c875 100644 --- a/fs/bcachefs/move.h +++ b/fs/bcachefs/move.h @@ -52,9 +52,11 @@ typedef enum data_cmd (*move_pred_fn)(struct bch_fs *, void *, struct bkey_s_c, struct bch_io_opts *, struct data_opts *); -int bch2_move_data(struct bch_fs *, struct bch_ratelimit *, +int bch2_move_data(struct bch_fs *, + enum btree_id, struct bpos, + enum btree_id, struct bpos, + struct bch_ratelimit *, struct write_point_specifier, - struct bpos, struct bpos, move_pred_fn, void *, struct bch_move_stats *); diff --git a/fs/bcachefs/movinggc.c b/fs/bcachefs/movinggc.c index 0b1faee5094c..03668e481f7a 100644 --- a/fs/bcachefs/movinggc.c +++ b/fs/bcachefs/movinggc.c @@ -219,9 +219,11 @@ static int bch2_copygc(struct bch_fs *c) sizeof(h->data[0]), bucket_offset_cmp, NULL); - ret = bch2_move_data(c, &c->copygc_pd.rate, + ret = bch2_move_data(c, + 0, POS_MIN, + BTREE_ID_NR, POS_MAX, + &c->copygc_pd.rate, writepoint_ptr(&c->copygc_write_point), - POS_MIN, POS_MAX, copygc_pred, NULL, &move_stats); diff --git a/fs/bcachefs/rebalance.c b/fs/bcachefs/rebalance.c index e101c7f23910..9b5078ba6028 100644 --- a/fs/bcachefs/rebalance.c +++ b/fs/bcachefs/rebalance.c @@ -239,10 +239,11 @@ static int bch2_rebalance_thread(void *arg) rebalance_work_reset(c); bch2_move_data(c, + 0, POS_MIN, + BTREE_ID_NR, POS_MAX, /* ratelimiting disabled for now */ NULL, /* &r->pd.rate, */ writepoint_ptr(&c->rebalance_write_point), - POS_MIN, POS_MAX, rebalance_pred, NULL, &r->move_stats); } |