summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorKent Overstreet <kent.overstreet@linux.dev>2024-12-28 19:59:55 -0500
committerKent Overstreet <kent.overstreet@linux.dev>2025-01-19 21:43:14 -0500
commitcb4af57eb5a0046c43df9141f8f0cc7aed9f49c0 (patch)
tree3d574f0c67cc569651ebdb10a406295284dc1681
parent6ef9eab6f449eafe3b6c6a467500bae280a7c8be (diff)
bcachefs: Scrub
Add a new data op to walk all data and metadata in a filesystem, checking if it can be read successfully, and on error repairing from another copy if possible. - New helper: bch2_dev_idx_is_online(), so that we can bail out and report to userspace when we're unable to scrub because the device is offline - data_update_opts, which controls the data move path, now understands scrub: data is only read, not written. The read path is responsible for rewriting on read error, as with other reads. - scrub_pred skips data extents that don't have checksums - bch_ioctl_data has a new scrub member, which has a data_types field for data types to check - i.e. all data types, or only metadata. - Add new entries to bch_move_stats so that we can report numbers for corrected and uncorrected errors - Add a new enum to bch_ioctl_data_event for explicitly reporting completion and return code (i.e. device offline) Signed-off-by: Kent Overstreet <kent.overstreet@linux.dev>
-rw-r--r--fs/bcachefs/bcachefs_ioctl.h14
-rw-r--r--fs/bcachefs/chardev.c33
-rw-r--r--fs/bcachefs/data_update.h3
-rw-r--r--fs/bcachefs/io_read.c1
-rw-r--r--fs/bcachefs/io_read.h1
-rw-r--r--fs/bcachefs/move.c131
-rw-r--r--fs/bcachefs/move_types.h5
-rw-r--r--fs/bcachefs/sb-members.h12
8 files changed, 171 insertions, 29 deletions
diff --git a/fs/bcachefs/bcachefs_ioctl.h b/fs/bcachefs/bcachefs_ioctl.h
index 3c23bdf788ce..f176f1928725 100644
--- a/fs/bcachefs/bcachefs_ioctl.h
+++ b/fs/bcachefs/bcachefs_ioctl.h
@@ -215,6 +215,10 @@ struct bch_ioctl_data {
union {
struct {
__u32 dev;
+ __u32 data_types;
+ } scrub;
+ struct {
+ __u32 dev;
__u32 pad;
} migrate;
struct {
@@ -237,11 +241,19 @@ struct bch_ioctl_data_progress {
__u64 sectors_done;
__u64 sectors_total;
+ __u64 sectors_error_corrected;
+ __u64 sectors_error_uncorrected;
} __packed __aligned(8);
+enum bch_ioctl_data_event_ret {
+ BCH_IOCTL_DATA_EVENT_RET_done = 1,
+ BCH_IOCTL_DATA_EVENT_RET_device_offline = 2,
+};
+
struct bch_ioctl_data_event {
__u8 type;
- __u8 pad[7];
+ __u8 ret;
+ __u8 pad[6];
union {
struct bch_ioctl_data_progress p;
__u64 pad2[15];
diff --git a/fs/bcachefs/chardev.c b/fs/bcachefs/chardev.c
index f374a3988622..0eb320747a9e 100644
--- a/fs/bcachefs/chardev.c
+++ b/fs/bcachefs/chardev.c
@@ -312,7 +312,10 @@ static int bch2_data_thread(void *arg)
struct bch_data_ctx *ctx = container_of(arg, struct bch_data_ctx, thr);
ctx->thr.ret = bch2_data_job(ctx->c, &ctx->stats, ctx->arg);
- ctx->stats.done = true;
+ if (ctx->thr.ret == -BCH_ERR_device_offline)
+ ctx->stats.ret = BCH_IOCTL_DATA_EVENT_RET_device_offline;
+ else
+ ctx->stats.ret = BCH_IOCTL_DATA_EVENT_RET_done;
return 0;
}
@@ -331,14 +334,30 @@ static ssize_t bch2_data_job_read(struct file *file, char __user *buf,
struct bch_data_ctx *ctx = container_of(file->private_data, struct bch_data_ctx, thr);
struct bch_fs *c = ctx->c;
struct bch_ioctl_data_event e = {
- .type = BCH_DATA_EVENT_PROGRESS,
- .p.data_type = ctx->stats.done ? U8_MAX : ctx->stats.data_type,
- .p.btree_id = ctx->stats.pos.btree,
- .p.pos = ctx->stats.pos.pos,
- .p.sectors_done = atomic64_read(&ctx->stats.sectors_seen),
- .p.sectors_total = bch2_fs_usage_read_short(c).used,
+ .type = BCH_DATA_EVENT_PROGRESS,
+ .ret = ctx->stats.ret,
+ .p.data_type = ctx->stats.data_type,
+ .p.btree_id = ctx->stats.pos.btree,
+ .p.pos = ctx->stats.pos.pos,
+ .p.sectors_done = atomic64_read(&ctx->stats.sectors_seen),
+ .p.sectors_error_corrected = atomic64_read(&ctx->stats.sectors_error_corrected),
+ .p.sectors_error_uncorrected = atomic64_read(&ctx->stats.sectors_error_uncorrected),
};
+ if (ctx->arg.op == BCH_DATA_OP_scrub) {
+ struct bch_dev *ca = bch2_dev_tryget(c, ctx->arg.scrub.dev);
+ if (ca) {
+ struct bch_dev_usage u;
+ bch2_dev_usage_read_fast(ca, &u);
+ for (unsigned i = BCH_DATA_btree; i < ARRAY_SIZE(u.d); i++)
+ if (ctx->arg.scrub.data_types & BIT(i))
+ e.p.sectors_total += u.d[i].sectors;
+ bch2_dev_put(ca);
+ }
+ } else {
+ e.p.sectors_total = bch2_fs_usage_read_short(c).used;
+ }
+
if (len < sizeof(e))
return -EINVAL;
diff --git a/fs/bcachefs/data_update.h b/fs/bcachefs/data_update.h
index b604eba77d9d..64e03e59485c 100644
--- a/fs/bcachefs/data_update.h
+++ b/fs/bcachefs/data_update.h
@@ -16,6 +16,9 @@ struct data_update_opts {
u8 extra_replicas;
unsigned btree_insert_flags;
unsigned write_flags;
+
+ int read_dev;
+ bool scrub;
};
void bch2_data_update_opts_to_text(struct printbuf *, struct bch_fs *,
diff --git a/fs/bcachefs/io_read.c b/fs/bcachefs/io_read.c
index e57b8583b271..f596de77c0eb 100644
--- a/fs/bcachefs/io_read.c
+++ b/fs/bcachefs/io_read.c
@@ -488,6 +488,7 @@ static void bch2_rbio_error(struct bch_read_bio *rbio, int retry,
blk_status_t error)
{
rbio->retry = retry;
+ rbio->saw_error = true;
if (rbio->flags & BCH_READ_in_retry)
return;
diff --git a/fs/bcachefs/io_read.h b/fs/bcachefs/io_read.h
index 5142f2818b33..73275da5d2c4 100644
--- a/fs/bcachefs/io_read.h
+++ b/fs/bcachefs/io_read.h
@@ -41,6 +41,7 @@ struct bch_read_bio {
have_ioref:1,
narrow_crcs:1,
hole:1,
+ saw_error:1,
retry:2,
context:2;
};
diff --git a/fs/bcachefs/move.c b/fs/bcachefs/move.c
index dd7b70c7ee7d..bd9d7c1515fe 100644
--- a/fs/bcachefs/move.c
+++ b/fs/bcachefs/move.c
@@ -109,7 +109,20 @@ static void move_write_done(struct bch_write_op *op)
static void move_write(struct moving_io *io)
{
- if (unlikely(io->write.rbio.bio.bi_status || io->write.rbio.hole)) {
+ struct moving_context *ctxt = io->write.ctxt;
+
+ if (ctxt->stats) {
+ if (io->write.rbio.bio.bi_status)
+ atomic64_add(io->write.rbio.bvec_iter.bi_size >> 9,
+ &ctxt->stats->sectors_error_uncorrected);
+ else if (io->write.rbio.saw_error)
+ atomic64_add(io->write.rbio.bvec_iter.bi_size >> 9,
+ &ctxt->stats->sectors_error_corrected);
+ }
+
+ if (unlikely(io->write.rbio.bio.bi_status ||
+ io->write.rbio.hole ||
+ io->write.data_opts.scrub)) {
move_free(io);
return;
}
@@ -263,7 +276,8 @@ int bch2_move_extent(struct moving_context *ctxt,
bch2_data_update_opts_normalize(k, &data_opts);
if (!data_opts.rewrite_ptrs &&
- !data_opts.extra_replicas) {
+ !data_opts.extra_replicas &&
+ !data_opts.scrub) {
if (data_opts.kill_ptrs)
return bch2_extent_drop_ptrs(trans, iter, k, &io_opts, &data_opts);
return 0;
@@ -284,15 +298,21 @@ int bch2_move_extent(struct moving_context *ctxt,
io->read_sectors = k.k->size;
io->write_sectors = k.k->size;
- ret = bch2_data_update_init(trans, iter, ctxt, &io->write, ctxt->wp,
- io_opts, data_opts, iter->btree_id, k);
- if (ret)
- goto err_free;
+ if (!data_opts.scrub) {
+ ret = bch2_data_update_init(trans, iter, ctxt, &io->write, ctxt->wp,
+ io_opts, data_opts, iter->btree_id, k);
+ if (ret)
+ goto err_free;
- io->write.rbio.bio.bi_end_io = move_read_endio;
- bio_set_prio(&io->write.rbio.bio, IOPRIO_PRIO_VALUE(IOPRIO_CLASS_IDLE, 0));
+ io->write.rbio.bio.bi_end_io = move_read_endio;
+ bio_set_prio(&io->write.rbio.bio, IOPRIO_PRIO_VALUE(IOPRIO_CLASS_IDLE, 0));
- io->write.op.end_io = move_write_done;
+ io->write.op.end_io = move_write_done;
+ } else {
+ bch2_bkey_buf_init(&io->write.k);
+ io->write.op.c = c;
+ io->write.data_opts = data_opts;
+ }
if (ctxt->rate)
bch2_ratelimit_increment(ctxt->rate, k.k->size);
@@ -324,11 +344,14 @@ int bch2_move_extent(struct moving_context *ctxt,
* ctxt when doing wakeup
*/
closure_get(&ctxt->cl);
- bch2_read_extent(trans, &io->write.rbio,
- bkey_start_pos(k.k),
- iter->btree_id, k, 0,
- BCH_READ_data_update|
- BCH_READ_last_fragment);
+ __bch2_read_extent(trans, &io->write.rbio,
+ io->write.rbio.bio.bi_iter,
+ bkey_start_pos(k.k),
+ iter->btree_id, k, 0,
+ NULL,
+ BCH_READ_data_update|
+ BCH_READ_last_fragment,
+ data_opts.scrub ? data_opts.read_dev : -1);
return 0;
err_free:
kfree(io);
@@ -669,6 +692,7 @@ static int __bch2_move_data_phys(struct moving_context *ctxt,
unsigned dev,
u64 bucket_start,
u64 bucket_end,
+ unsigned data_types,
move_pred_fn pred, void *arg)
{
struct btree_trans *trans = ctxt->trans;
@@ -739,6 +763,9 @@ static int __bch2_move_data_phys(struct moving_context *ctxt,
if (ctxt->stats)
ctxt->stats->offset = bp.k->p.offset >> MAX_EXTENT_COMPRESS_RATIO_SHIFT;
+ if (!(data_types & BIT(bp.v->data_type)))
+ goto next;
+
k = bch2_backpointer_get_key(trans, bp, &iter, 0, &last_flushed);
ret = bkey_err(k);
if (bch2_err_matches(ret, BCH_ERR_transaction_restart))
@@ -762,17 +789,25 @@ static int __bch2_move_data_phys(struct moving_context *ctxt,
goto next;
}
+ if (data_opts.scrub &&
+ !bch2_dev_idx_is_online(c, data_opts.read_dev)) {
+ bch2_trans_iter_exit(trans, &iter);
+ ret = -BCH_ERR_device_offline;
+ break;
+ }
+
bch2_bkey_buf_reassemble(&sk, c, k);
k = bkey_i_to_s_c(sk.k);
/* move_extent will drop locks */
- unsigned sectors = !bp.v->level
- ? bp.v->bucket_len
- : btree_ptr_sectors_written(k);
+ unsigned sectors = bp.v->bucket_len;
- ret = !bp.v->level
- ? bch2_move_extent(ctxt, bucket_in_flight, &iter, k, io_opts, data_opts)
- : bch2_btree_node_rewrite_key(trans, bp.v->btree_id, bp.v->level, k.k->p, 0);
+ if (!bp.v->level)
+ ret = bch2_move_extent(ctxt, bucket_in_flight, &iter, k, io_opts, data_opts);
+ else if (!data_opts.scrub)
+ ret = bch2_btree_node_rewrite_key(trans, bp.v->btree_id, bp.v->level, k.k->p, 0);
+ else
+ ret = bch2_btree_node_scrub(trans, bp.v->btree_id, bp.v->level, k, data_opts.read_dev);
bch2_trans_iter_exit(trans, &iter);
@@ -801,6 +836,30 @@ err:
return ret;
}
+static int bch2_move_data_phys(struct bch_fs *c,
+ unsigned dev,
+ u64 start,
+ u64 end,
+ unsigned data_types,
+ struct bch_ratelimit *rate,
+ struct bch_move_stats *stats,
+ struct write_point_specifier wp,
+ bool wait_on_copygc,
+ move_pred_fn pred, void *arg)
+{
+ struct moving_context ctxt;
+
+ bch2_trans_run(c, bch2_btree_write_buffer_flush_sync(trans));
+
+ bch2_moving_ctxt_init(&ctxt, c, rate, stats, wp, wait_on_copygc);
+ ctxt.stats->phys = true;
+
+ int ret = __bch2_move_data_phys(&ctxt, NULL, dev, start, end, data_types, pred, arg);
+ bch2_moving_ctxt_exit(&ctxt);
+
+ return ret;
+}
+
struct evacuate_bucket_arg {
struct bpos bucket;
int gen;
@@ -836,6 +895,7 @@ int bch2_evacuate_bucket(struct moving_context *ctxt,
bucket.inode,
bucket.offset,
bucket.offset + 1,
+ ~0,
evacuate_bucket_pred, &arg);
}
@@ -1077,6 +1137,30 @@ static bool drop_extra_replicas_btree_pred(struct bch_fs *c, void *arg,
return drop_extra_replicas_pred(c, arg, bkey_i_to_s_c(&b->key), io_opts, data_opts);
}
+static bool scrub_pred(struct bch_fs *c, void *_arg,
+ struct bkey_s_c k,
+ struct bch_io_opts *io_opts,
+ struct data_update_opts *data_opts)
+{
+ struct bch_ioctl_data *arg = _arg;
+
+ if (k.k->type != KEY_TYPE_btree_ptr_v2) {
+ struct bkey_ptrs_c ptrs = bch2_bkey_ptrs_c(k);
+ const union bch_extent_entry *entry;
+ struct extent_ptr_decoded p;
+ bkey_for_each_ptr_decode(k.k, ptrs, p, entry)
+ if (p.ptr.dev == arg->migrate.dev) {
+ if (!p.crc.csum_type)
+ return false;
+ break;
+ }
+ }
+
+ data_opts->scrub = true;
+ data_opts->read_dev = arg->migrate.dev;
+ return true;
+}
+
int bch2_data_job(struct bch_fs *c,
struct bch_move_stats *stats,
struct bch_ioctl_data op)
@@ -1092,6 +1176,13 @@ int bch2_data_job(struct bch_fs *c,
switch (op.op) {
case BCH_DATA_OP_scrub:
+ ret = bch2_move_data_phys(c, op.scrub.dev, 0, U64_MAX,
+ op.scrub.data_types,
+ NULL,
+ stats,
+ writepoint_hashed((unsigned long) current),
+ false,
+ scrub_pred, &op) ?: ret;
break;
case BCH_DATA_OP_rereplicate:
diff --git a/fs/bcachefs/move_types.h b/fs/bcachefs/move_types.h
index 15d1f7f3d1dc..82e473ed48d2 100644
--- a/fs/bcachefs/move_types.h
+++ b/fs/bcachefs/move_types.h
@@ -3,11 +3,12 @@
#define _BCACHEFS_MOVE_TYPES_H
#include "bbpos_types.h"
+#include "bcachefs_ioctl.h"
struct bch_move_stats {
char name[32];
bool phys;
- bool done;
+ enum bch_ioctl_data_event_ret ret;
union {
struct {
@@ -25,6 +26,8 @@ struct bch_move_stats {
atomic64_t sectors_seen;
atomic64_t sectors_moved;
atomic64_t sectors_raced;
+ atomic64_t sectors_error_corrected;
+ atomic64_t sectors_error_uncorrected;
};
struct move_bucket_key {
diff --git a/fs/bcachefs/sb-members.h b/fs/bcachefs/sb-members.h
index 762083b564ee..b29b6c6c21dd 100644
--- a/fs/bcachefs/sb-members.h
+++ b/fs/bcachefs/sb-members.h
@@ -23,6 +23,18 @@ static inline bool bch2_dev_is_online(struct bch_dev *ca)
return !percpu_ref_is_zero(&ca->io_ref);
}
+static inline struct bch_dev *bch2_dev_rcu(struct bch_fs *, unsigned);
+
+static inline bool bch2_dev_idx_is_online(struct bch_fs *c, unsigned dev)
+{
+ rcu_read_lock();
+ struct bch_dev *ca = bch2_dev_rcu(c, dev);
+ bool ret = ca && bch2_dev_is_online(ca);
+ rcu_read_unlock();
+
+ return ret;
+}
+
static inline bool bch2_dev_is_readable(struct bch_dev *ca)
{
return bch2_dev_is_online(ca) &&