diff options
author | Kent Overstreet <kent.overstreet@linux.dev> | 2024-12-28 19:59:55 -0500 |
---|---|---|
committer | Kent Overstreet <kent.overstreet@linux.dev> | 2025-01-19 21:43:14 -0500 |
commit | cb4af57eb5a0046c43df9141f8f0cc7aed9f49c0 (patch) | |
tree | 3d574f0c67cc569651ebdb10a406295284dc1681 | |
parent | 6ef9eab6f449eafe3b6c6a467500bae280a7c8be (diff) |
bcachefs: Scrub
Add a new data op to walk all data and metadata in a filesystem,
checking if it can be read successfully, and on error repairing from
another copy if possible.
- New helper: bch2_dev_idx_is_online(), so that we can bail out and
report to userspace when we're unable to scrub because the device is
offline
- data_update_opts, which controls the data move path, now understands
scrub: data is only read, not written. The read path is responsible
for rewriting on read error, as with other reads.
- scrub_pred skips data extents that don't have checksums
- bch_ioctl_data has a new scrub member, which has a data_types field
for data types to check - i.e. all data types, or only metadata.
- Add new entries to bch_move_stats so that we can report numbers for
corrected and uncorrected errors
- Add a new enum to bch_ioctl_data_event for explicitly reporting
completion and return code (i.e. device offline)
Signed-off-by: Kent Overstreet <kent.overstreet@linux.dev>
-rw-r--r-- | fs/bcachefs/bcachefs_ioctl.h | 14 | ||||
-rw-r--r-- | fs/bcachefs/chardev.c | 33 | ||||
-rw-r--r-- | fs/bcachefs/data_update.h | 3 | ||||
-rw-r--r-- | fs/bcachefs/io_read.c | 1 | ||||
-rw-r--r-- | fs/bcachefs/io_read.h | 1 | ||||
-rw-r--r-- | fs/bcachefs/move.c | 131 | ||||
-rw-r--r-- | fs/bcachefs/move_types.h | 5 | ||||
-rw-r--r-- | fs/bcachefs/sb-members.h | 12 |
8 files changed, 171 insertions, 29 deletions
diff --git a/fs/bcachefs/bcachefs_ioctl.h b/fs/bcachefs/bcachefs_ioctl.h index 3c23bdf788ce..f176f1928725 100644 --- a/fs/bcachefs/bcachefs_ioctl.h +++ b/fs/bcachefs/bcachefs_ioctl.h @@ -215,6 +215,10 @@ struct bch_ioctl_data { union { struct { __u32 dev; + __u32 data_types; + } scrub; + struct { + __u32 dev; __u32 pad; } migrate; struct { @@ -237,11 +241,19 @@ struct bch_ioctl_data_progress { __u64 sectors_done; __u64 sectors_total; + __u64 sectors_error_corrected; + __u64 sectors_error_uncorrected; } __packed __aligned(8); +enum bch_ioctl_data_event_ret { + BCH_IOCTL_DATA_EVENT_RET_done = 1, + BCH_IOCTL_DATA_EVENT_RET_device_offline = 2, +}; + struct bch_ioctl_data_event { __u8 type; - __u8 pad[7]; + __u8 ret; + __u8 pad[6]; union { struct bch_ioctl_data_progress p; __u64 pad2[15]; diff --git a/fs/bcachefs/chardev.c b/fs/bcachefs/chardev.c index f374a3988622..0eb320747a9e 100644 --- a/fs/bcachefs/chardev.c +++ b/fs/bcachefs/chardev.c @@ -312,7 +312,10 @@ static int bch2_data_thread(void *arg) struct bch_data_ctx *ctx = container_of(arg, struct bch_data_ctx, thr); ctx->thr.ret = bch2_data_job(ctx->c, &ctx->stats, ctx->arg); - ctx->stats.done = true; + if (ctx->thr.ret == -BCH_ERR_device_offline) + ctx->stats.ret = BCH_IOCTL_DATA_EVENT_RET_device_offline; + else + ctx->stats.ret = BCH_IOCTL_DATA_EVENT_RET_done; return 0; } @@ -331,14 +334,30 @@ static ssize_t bch2_data_job_read(struct file *file, char __user *buf, struct bch_data_ctx *ctx = container_of(file->private_data, struct bch_data_ctx, thr); struct bch_fs *c = ctx->c; struct bch_ioctl_data_event e = { - .type = BCH_DATA_EVENT_PROGRESS, - .p.data_type = ctx->stats.done ? U8_MAX : ctx->stats.data_type, - .p.btree_id = ctx->stats.pos.btree, - .p.pos = ctx->stats.pos.pos, - .p.sectors_done = atomic64_read(&ctx->stats.sectors_seen), - .p.sectors_total = bch2_fs_usage_read_short(c).used, + .type = BCH_DATA_EVENT_PROGRESS, + .ret = ctx->stats.ret, + .p.data_type = ctx->stats.data_type, + .p.btree_id = ctx->stats.pos.btree, + .p.pos = ctx->stats.pos.pos, + .p.sectors_done = atomic64_read(&ctx->stats.sectors_seen), + .p.sectors_error_corrected = atomic64_read(&ctx->stats.sectors_error_corrected), + .p.sectors_error_uncorrected = atomic64_read(&ctx->stats.sectors_error_uncorrected), }; + if (ctx->arg.op == BCH_DATA_OP_scrub) { + struct bch_dev *ca = bch2_dev_tryget(c, ctx->arg.scrub.dev); + if (ca) { + struct bch_dev_usage u; + bch2_dev_usage_read_fast(ca, &u); + for (unsigned i = BCH_DATA_btree; i < ARRAY_SIZE(u.d); i++) + if (ctx->arg.scrub.data_types & BIT(i)) + e.p.sectors_total += u.d[i].sectors; + bch2_dev_put(ca); + } + } else { + e.p.sectors_total = bch2_fs_usage_read_short(c).used; + } + if (len < sizeof(e)) return -EINVAL; diff --git a/fs/bcachefs/data_update.h b/fs/bcachefs/data_update.h index b604eba77d9d..64e03e59485c 100644 --- a/fs/bcachefs/data_update.h +++ b/fs/bcachefs/data_update.h @@ -16,6 +16,9 @@ struct data_update_opts { u8 extra_replicas; unsigned btree_insert_flags; unsigned write_flags; + + int read_dev; + bool scrub; }; void bch2_data_update_opts_to_text(struct printbuf *, struct bch_fs *, diff --git a/fs/bcachefs/io_read.c b/fs/bcachefs/io_read.c index e57b8583b271..f596de77c0eb 100644 --- a/fs/bcachefs/io_read.c +++ b/fs/bcachefs/io_read.c @@ -488,6 +488,7 @@ static void bch2_rbio_error(struct bch_read_bio *rbio, int retry, blk_status_t error) { rbio->retry = retry; + rbio->saw_error = true; if (rbio->flags & BCH_READ_in_retry) return; diff --git a/fs/bcachefs/io_read.h b/fs/bcachefs/io_read.h index 5142f2818b33..73275da5d2c4 100644 --- a/fs/bcachefs/io_read.h +++ b/fs/bcachefs/io_read.h @@ -41,6 +41,7 @@ struct bch_read_bio { have_ioref:1, narrow_crcs:1, hole:1, + saw_error:1, retry:2, context:2; }; diff --git a/fs/bcachefs/move.c b/fs/bcachefs/move.c index dd7b70c7ee7d..bd9d7c1515fe 100644 --- a/fs/bcachefs/move.c +++ b/fs/bcachefs/move.c @@ -109,7 +109,20 @@ static void move_write_done(struct bch_write_op *op) static void move_write(struct moving_io *io) { - if (unlikely(io->write.rbio.bio.bi_status || io->write.rbio.hole)) { + struct moving_context *ctxt = io->write.ctxt; + + if (ctxt->stats) { + if (io->write.rbio.bio.bi_status) + atomic64_add(io->write.rbio.bvec_iter.bi_size >> 9, + &ctxt->stats->sectors_error_uncorrected); + else if (io->write.rbio.saw_error) + atomic64_add(io->write.rbio.bvec_iter.bi_size >> 9, + &ctxt->stats->sectors_error_corrected); + } + + if (unlikely(io->write.rbio.bio.bi_status || + io->write.rbio.hole || + io->write.data_opts.scrub)) { move_free(io); return; } @@ -263,7 +276,8 @@ int bch2_move_extent(struct moving_context *ctxt, bch2_data_update_opts_normalize(k, &data_opts); if (!data_opts.rewrite_ptrs && - !data_opts.extra_replicas) { + !data_opts.extra_replicas && + !data_opts.scrub) { if (data_opts.kill_ptrs) return bch2_extent_drop_ptrs(trans, iter, k, &io_opts, &data_opts); return 0; @@ -284,15 +298,21 @@ int bch2_move_extent(struct moving_context *ctxt, io->read_sectors = k.k->size; io->write_sectors = k.k->size; - ret = bch2_data_update_init(trans, iter, ctxt, &io->write, ctxt->wp, - io_opts, data_opts, iter->btree_id, k); - if (ret) - goto err_free; + if (!data_opts.scrub) { + ret = bch2_data_update_init(trans, iter, ctxt, &io->write, ctxt->wp, + io_opts, data_opts, iter->btree_id, k); + if (ret) + goto err_free; - io->write.rbio.bio.bi_end_io = move_read_endio; - bio_set_prio(&io->write.rbio.bio, IOPRIO_PRIO_VALUE(IOPRIO_CLASS_IDLE, 0)); + io->write.rbio.bio.bi_end_io = move_read_endio; + bio_set_prio(&io->write.rbio.bio, IOPRIO_PRIO_VALUE(IOPRIO_CLASS_IDLE, 0)); - io->write.op.end_io = move_write_done; + io->write.op.end_io = move_write_done; + } else { + bch2_bkey_buf_init(&io->write.k); + io->write.op.c = c; + io->write.data_opts = data_opts; + } if (ctxt->rate) bch2_ratelimit_increment(ctxt->rate, k.k->size); @@ -324,11 +344,14 @@ int bch2_move_extent(struct moving_context *ctxt, * ctxt when doing wakeup */ closure_get(&ctxt->cl); - bch2_read_extent(trans, &io->write.rbio, - bkey_start_pos(k.k), - iter->btree_id, k, 0, - BCH_READ_data_update| - BCH_READ_last_fragment); + __bch2_read_extent(trans, &io->write.rbio, + io->write.rbio.bio.bi_iter, + bkey_start_pos(k.k), + iter->btree_id, k, 0, + NULL, + BCH_READ_data_update| + BCH_READ_last_fragment, + data_opts.scrub ? data_opts.read_dev : -1); return 0; err_free: kfree(io); @@ -669,6 +692,7 @@ static int __bch2_move_data_phys(struct moving_context *ctxt, unsigned dev, u64 bucket_start, u64 bucket_end, + unsigned data_types, move_pred_fn pred, void *arg) { struct btree_trans *trans = ctxt->trans; @@ -739,6 +763,9 @@ static int __bch2_move_data_phys(struct moving_context *ctxt, if (ctxt->stats) ctxt->stats->offset = bp.k->p.offset >> MAX_EXTENT_COMPRESS_RATIO_SHIFT; + if (!(data_types & BIT(bp.v->data_type))) + goto next; + k = bch2_backpointer_get_key(trans, bp, &iter, 0, &last_flushed); ret = bkey_err(k); if (bch2_err_matches(ret, BCH_ERR_transaction_restart)) @@ -762,17 +789,25 @@ static int __bch2_move_data_phys(struct moving_context *ctxt, goto next; } + if (data_opts.scrub && + !bch2_dev_idx_is_online(c, data_opts.read_dev)) { + bch2_trans_iter_exit(trans, &iter); + ret = -BCH_ERR_device_offline; + break; + } + bch2_bkey_buf_reassemble(&sk, c, k); k = bkey_i_to_s_c(sk.k); /* move_extent will drop locks */ - unsigned sectors = !bp.v->level - ? bp.v->bucket_len - : btree_ptr_sectors_written(k); + unsigned sectors = bp.v->bucket_len; - ret = !bp.v->level - ? bch2_move_extent(ctxt, bucket_in_flight, &iter, k, io_opts, data_opts) - : bch2_btree_node_rewrite_key(trans, bp.v->btree_id, bp.v->level, k.k->p, 0); + if (!bp.v->level) + ret = bch2_move_extent(ctxt, bucket_in_flight, &iter, k, io_opts, data_opts); + else if (!data_opts.scrub) + ret = bch2_btree_node_rewrite_key(trans, bp.v->btree_id, bp.v->level, k.k->p, 0); + else + ret = bch2_btree_node_scrub(trans, bp.v->btree_id, bp.v->level, k, data_opts.read_dev); bch2_trans_iter_exit(trans, &iter); @@ -801,6 +836,30 @@ err: return ret; } +static int bch2_move_data_phys(struct bch_fs *c, + unsigned dev, + u64 start, + u64 end, + unsigned data_types, + struct bch_ratelimit *rate, + struct bch_move_stats *stats, + struct write_point_specifier wp, + bool wait_on_copygc, + move_pred_fn pred, void *arg) +{ + struct moving_context ctxt; + + bch2_trans_run(c, bch2_btree_write_buffer_flush_sync(trans)); + + bch2_moving_ctxt_init(&ctxt, c, rate, stats, wp, wait_on_copygc); + ctxt.stats->phys = true; + + int ret = __bch2_move_data_phys(&ctxt, NULL, dev, start, end, data_types, pred, arg); + bch2_moving_ctxt_exit(&ctxt); + + return ret; +} + struct evacuate_bucket_arg { struct bpos bucket; int gen; @@ -836,6 +895,7 @@ int bch2_evacuate_bucket(struct moving_context *ctxt, bucket.inode, bucket.offset, bucket.offset + 1, + ~0, evacuate_bucket_pred, &arg); } @@ -1077,6 +1137,30 @@ static bool drop_extra_replicas_btree_pred(struct bch_fs *c, void *arg, return drop_extra_replicas_pred(c, arg, bkey_i_to_s_c(&b->key), io_opts, data_opts); } +static bool scrub_pred(struct bch_fs *c, void *_arg, + struct bkey_s_c k, + struct bch_io_opts *io_opts, + struct data_update_opts *data_opts) +{ + struct bch_ioctl_data *arg = _arg; + + if (k.k->type != KEY_TYPE_btree_ptr_v2) { + struct bkey_ptrs_c ptrs = bch2_bkey_ptrs_c(k); + const union bch_extent_entry *entry; + struct extent_ptr_decoded p; + bkey_for_each_ptr_decode(k.k, ptrs, p, entry) + if (p.ptr.dev == arg->migrate.dev) { + if (!p.crc.csum_type) + return false; + break; + } + } + + data_opts->scrub = true; + data_opts->read_dev = arg->migrate.dev; + return true; +} + int bch2_data_job(struct bch_fs *c, struct bch_move_stats *stats, struct bch_ioctl_data op) @@ -1092,6 +1176,13 @@ int bch2_data_job(struct bch_fs *c, switch (op.op) { case BCH_DATA_OP_scrub: + ret = bch2_move_data_phys(c, op.scrub.dev, 0, U64_MAX, + op.scrub.data_types, + NULL, + stats, + writepoint_hashed((unsigned long) current), + false, + scrub_pred, &op) ?: ret; break; case BCH_DATA_OP_rereplicate: diff --git a/fs/bcachefs/move_types.h b/fs/bcachefs/move_types.h index 15d1f7f3d1dc..82e473ed48d2 100644 --- a/fs/bcachefs/move_types.h +++ b/fs/bcachefs/move_types.h @@ -3,11 +3,12 @@ #define _BCACHEFS_MOVE_TYPES_H #include "bbpos_types.h" +#include "bcachefs_ioctl.h" struct bch_move_stats { char name[32]; bool phys; - bool done; + enum bch_ioctl_data_event_ret ret; union { struct { @@ -25,6 +26,8 @@ struct bch_move_stats { atomic64_t sectors_seen; atomic64_t sectors_moved; atomic64_t sectors_raced; + atomic64_t sectors_error_corrected; + atomic64_t sectors_error_uncorrected; }; struct move_bucket_key { diff --git a/fs/bcachefs/sb-members.h b/fs/bcachefs/sb-members.h index 762083b564ee..b29b6c6c21dd 100644 --- a/fs/bcachefs/sb-members.h +++ b/fs/bcachefs/sb-members.h @@ -23,6 +23,18 @@ static inline bool bch2_dev_is_online(struct bch_dev *ca) return !percpu_ref_is_zero(&ca->io_ref); } +static inline struct bch_dev *bch2_dev_rcu(struct bch_fs *, unsigned); + +static inline bool bch2_dev_idx_is_online(struct bch_fs *c, unsigned dev) +{ + rcu_read_lock(); + struct bch_dev *ca = bch2_dev_rcu(c, dev); + bool ret = ca && bch2_dev_is_online(ca); + rcu_read_unlock(); + + return ret; +} + static inline bool bch2_dev_is_readable(struct bch_dev *ca) { return bch2_dev_is_online(ca) && |