summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorKent Overstreet <kent.overstreet@linux.dev>2024-12-30 12:34:22 -0500
committerKent Overstreet <kent.overstreet@linux.dev>2024-12-30 17:04:00 -0500
commit6f1429b4eb5bcd1941c5cfb16d21ad3d938764cf (patch)
treea4f5894a95d6805f3cab8f05bf72a36ad11d1c5b
parent7210d5c9111a3d05b0aa4c298739362914c886a5 (diff)
Update bcachefs sources to f4b9a91fce73 bcachefs: Scrub
-rw-r--r--.bcachefs_revision2
-rw-r--r--bch_bindgen/build.rs1
-rw-r--r--libbcachefs/backpointers.c34
-rw-r--r--libbcachefs/bcachefs.h3
-rw-r--r--libbcachefs/bcachefs_ioctl.h14
-rw-r--r--libbcachefs/btree_cache.c16
-rw-r--r--libbcachefs/btree_cache.h2
-rw-r--r--libbcachefs/btree_io.c189
-rw-r--r--libbcachefs/btree_io.h3
-rw-r--r--libbcachefs/btree_iter.c4
-rw-r--r--libbcachefs/btree_update_interior.c20
-rw-r--r--libbcachefs/btree_update_interior.h4
-rw-r--r--libbcachefs/chardev.c33
-rw-r--r--libbcachefs/data_update.h3
-rw-r--r--libbcachefs/debug.c2
-rw-r--r--libbcachefs/errcode.h1
-rw-r--r--libbcachefs/extents.c9
-rw-r--r--libbcachefs/extents.h2
-rw-r--r--libbcachefs/io_read.c68
-rw-r--r--libbcachefs/io_read.h5
-rw-r--r--libbcachefs/move.c299
-rw-r--r--libbcachefs/move_types.h18
-rw-r--r--libbcachefs/printbuf.h15
-rw-r--r--libbcachefs/sb-members.h12
-rw-r--r--libbcachefs/trace.h52
25 files changed, 583 insertions, 228 deletions
diff --git a/.bcachefs_revision b/.bcachefs_revision
index 5d86ae9c..ae0826de 100644
--- a/.bcachefs_revision
+++ b/.bcachefs_revision
@@ -1 +1 @@
-864591728963d416c49e502bfee56a283eda31a5
+f4b9a91fce7373d163343e5ca3a9b483d113706d
diff --git a/bch_bindgen/build.rs b/bch_bindgen/build.rs
index 4a3755a1..e2aeebb3 100644
--- a/bch_bindgen/build.rs
+++ b/bch_bindgen/build.rs
@@ -63,7 +63,6 @@ fn main() {
.allowlist_function("printbuf.*")
.blocklist_type("rhash_lock_head")
.blocklist_type("srcu_struct")
- .blocklist_type("bch_ioctl_data.*")
.allowlist_var("BCH_.*")
.allowlist_var("KEY_SPEC_.*")
.allowlist_var("Fix753_.*")
diff --git a/libbcachefs/backpointers.c b/libbcachefs/backpointers.c
index ebeb6a5f..655be233 100644
--- a/libbcachefs/backpointers.c
+++ b/libbcachefs/backpointers.c
@@ -244,27 +244,31 @@ struct bkey_s_c bch2_backpointer_get_key(struct btree_trans *trans,
if (unlikely(bp.v->btree_id >= btree_id_nr_alive(c)))
return bkey_s_c_null;
- if (likely(!bp.v->level)) {
- bch2_trans_node_iter_init(trans, iter,
- bp.v->btree_id,
- bp.v->pos,
- 0, 0,
- iter_flags);
- struct bkey_s_c k = bch2_btree_iter_peek_slot(iter);
- if (bkey_err(k)) {
- bch2_trans_iter_exit(trans, iter);
- return k;
- }
+ bch2_trans_node_iter_init(trans, iter,
+ bp.v->btree_id,
+ bp.v->pos,
+ 0,
+ bp.v->level,
+ iter_flags);
+ struct bkey_s_c k = bch2_btree_iter_peek_slot(iter);
+ if (bkey_err(k)) {
+ bch2_trans_iter_exit(trans, iter);
+ return k;
+ }
- if (k.k &&
- extent_matches_bp(c, bp.v->btree_id, bp.v->level, k, bp))
- return k;
+ if (k.k &&
+ extent_matches_bp(c, bp.v->btree_id, bp.v->level, k, bp))
+ return k;
- bch2_trans_iter_exit(trans, iter);
+ bch2_trans_iter_exit(trans, iter);
+
+ if (!bp.v->level) {
int ret = backpointer_target_not_found(trans, bp, k, last_flushed);
return ret ? bkey_s_c_err(ret) : bkey_s_c_null;
} else {
struct btree *b = bch2_backpointer_get_node(trans, bp, iter, last_flushed);
+ if (b == ERR_PTR(-BCH_ERR_backpointer_to_overwritten_btree_node))
+ return bkey_s_c_null;
if (IS_ERR_OR_NULL(b))
return ((struct bkey_s_c) { .k = ERR_CAST(b) });
diff --git a/libbcachefs/bcachefs.h b/libbcachefs/bcachefs.h
index 161cf2f0..13acfbf3 100644
--- a/libbcachefs/bcachefs.h
+++ b/libbcachefs/bcachefs.h
@@ -687,7 +687,8 @@ struct btree_trans_buf {
x(gc_gens) \
x(snapshot_delete_pagecache) \
x(sysfs) \
- x(btree_write_buffer)
+ x(btree_write_buffer) \
+ x(btree_node_scrub)
enum bch_write_ref {
#define x(n) BCH_WRITE_REF_##n,
diff --git a/libbcachefs/bcachefs_ioctl.h b/libbcachefs/bcachefs_ioctl.h
index 3c23bdf7..f176f192 100644
--- a/libbcachefs/bcachefs_ioctl.h
+++ b/libbcachefs/bcachefs_ioctl.h
@@ -215,6 +215,10 @@ struct bch_ioctl_data {
union {
struct {
__u32 dev;
+ __u32 data_types;
+ } scrub;
+ struct {
+ __u32 dev;
__u32 pad;
} migrate;
struct {
@@ -237,11 +241,19 @@ struct bch_ioctl_data_progress {
__u64 sectors_done;
__u64 sectors_total;
+ __u64 sectors_error_corrected;
+ __u64 sectors_error_uncorrected;
} __packed __aligned(8);
+enum bch_ioctl_data_event_ret {
+ BCH_IOCTL_DATA_EVENT_RET_done = 1,
+ BCH_IOCTL_DATA_EVENT_RET_device_offline = 2,
+};
+
struct bch_ioctl_data_event {
__u8 type;
- __u8 pad[7];
+ __u8 ret;
+ __u8 pad[6];
union {
struct bch_ioctl_data_progress p;
__u64 pad2[15];
diff --git a/libbcachefs/btree_cache.c b/libbcachefs/btree_cache.c
index b00c6a20..672ca2c1 100644
--- a/libbcachefs/btree_cache.c
+++ b/libbcachefs/btree_cache.c
@@ -1402,18 +1402,24 @@ void bch2_btree_id_level_to_text(struct printbuf *out, enum btree_id btree, unsi
prt_printf(out, " level=%u", level);
}
-void bch2_btree_pos_to_text(struct printbuf *out, struct bch_fs *c, const struct btree *b)
+void __bch2_btree_pos_to_text(struct printbuf *out, struct bch_fs *c,
+ enum btree_id btree, unsigned level, struct bkey_s_c k)
{
- bch2_btree_id_to_text(out, b->c.btree_id);
- prt_printf(out, " level %u/", b->c.level);
- struct btree_root *r = bch2_btree_id_root(c, b->c.btree_id);
+ bch2_btree_id_to_text(out, btree);
+ prt_printf(out, " level %u/", level);
+ struct btree_root *r = bch2_btree_id_root(c, btree);
if (r)
prt_printf(out, "%u", r->level);
else
prt_printf(out, "(unknown)");
prt_printf(out, "\n ");
- bch2_bkey_val_to_text(out, c, bkey_i_to_s_c(&b->key));
+ bch2_bkey_val_to_text(out, c, k);
+}
+
+void bch2_btree_pos_to_text(struct printbuf *out, struct bch_fs *c, const struct btree *b)
+{
+ __bch2_btree_pos_to_text(out, c, b->c.btree_id, b->c.level, bkey_i_to_s_c(&b->key));
}
void bch2_btree_node_to_text(struct printbuf *out, struct bch_fs *c, const struct btree *b)
diff --git a/libbcachefs/btree_cache.h b/libbcachefs/btree_cache.h
index dcc34fe4..ca3c1b14 100644
--- a/libbcachefs/btree_cache.h
+++ b/libbcachefs/btree_cache.h
@@ -147,6 +147,8 @@ const char *bch2_btree_id_str(enum btree_id); /* avoid */
void bch2_btree_id_to_text(struct printbuf *, enum btree_id);
void bch2_btree_id_level_to_text(struct printbuf *, enum btree_id, unsigned);
+void __bch2_btree_pos_to_text(struct printbuf *, struct bch_fs *,
+ enum btree_id, unsigned, struct bkey_s_c);
void bch2_btree_pos_to_text(struct printbuf *, struct bch_fs *, const struct btree *);
void bch2_btree_node_to_text(struct printbuf *, struct bch_fs *, const struct btree *);
void bch2_btree_cache_to_text(struct printbuf *, const struct btree_cache *);
diff --git a/libbcachefs/btree_io.c b/libbcachefs/btree_io.c
index e371e60e..13ab827d 100644
--- a/libbcachefs/btree_io.c
+++ b/libbcachefs/btree_io.c
@@ -1,6 +1,7 @@
// SPDX-License-Identifier: GPL-2.0
#include "bcachefs.h"
+#include "bkey_buf.h"
#include "bkey_methods.h"
#include "bkey_sort.h"
#include "btree_cache.h"
@@ -1352,7 +1353,7 @@ start:
can_retry = bch2_bkey_pick_read_device(c,
bkey_i_to_s_c(&b->key),
- &failed, &rb->pick) > 0;
+ &failed, &rb->pick, -1) > 0;
if (!bio->bi_status &&
!bch2_btree_node_read_done(c, ca, b, can_retry, &saw_error)) {
@@ -1697,7 +1698,7 @@ void bch2_btree_node_read(struct btree_trans *trans, struct btree *b,
return;
ret = bch2_bkey_pick_read_device(c, bkey_i_to_s_c(&b->key),
- NULL, &pick);
+ NULL, &pick, -1);
if (ret <= 0) {
struct printbuf buf = PRINTBUF;
@@ -1811,6 +1812,190 @@ int bch2_btree_root_read(struct bch_fs *c, enum btree_id id,
return bch2_trans_run(c, __bch2_btree_root_read(trans, id, k, level));
}
+struct btree_node_scrub {
+ struct bch_fs *c;
+ struct bch_dev *ca;
+ void *buf;
+ bool used_mempool;
+ unsigned written;
+
+ enum btree_id btree;
+ unsigned level;
+ struct bkey_buf key;
+ __le64 seq;
+
+ struct work_struct work;
+ struct bio bio;
+};
+
+static bool btree_node_scrub_check(struct bch_fs *c, struct btree_node *data, unsigned ptr_written,
+ struct printbuf *err)
+{
+ unsigned written = 0;
+
+ if (le64_to_cpu(data->magic) != bset_magic(c)) {
+ prt_printf(err, "bad magic: want %llx, got %llx",
+ bset_magic(c), le64_to_cpu(data->magic));
+ return false;
+ }
+
+ while (written < (ptr_written ?: btree_sectors(c))) {
+ struct btree_node_entry *bne;
+ struct bset *i;
+ bool first = !written;
+
+ if (first) {
+ bne = NULL;
+ i = &data->keys;
+ } else {
+ bne = (void *) data + (written << 9);
+ i = &bne->keys;
+
+ if (!ptr_written && i->seq != data->keys.seq)
+ break;
+ }
+
+ struct nonce nonce = btree_nonce(i, written << 9);
+ bool good_csum_type = bch2_checksum_type_valid(c, BSET_CSUM_TYPE(i));
+
+ if (first) {
+ if (good_csum_type) {
+ struct bch_csum csum = csum_vstruct(c, BSET_CSUM_TYPE(i), nonce, data);
+ if (bch2_crc_cmp(data->csum, csum)) {
+ bch2_csum_err_msg(err, BSET_CSUM_TYPE(i), data->csum, csum);
+ return false;
+ }
+ }
+
+ written += vstruct_sectors(data, c->block_bits);
+ } else {
+ if (good_csum_type) {
+ struct bch_csum csum = csum_vstruct(c, BSET_CSUM_TYPE(i), nonce, bne);
+ if (bch2_crc_cmp(bne->csum, csum)) {
+ bch2_csum_err_msg(err, BSET_CSUM_TYPE(i), bne->csum, csum);
+ return false;
+ }
+ }
+
+ written += vstruct_sectors(bne, c->block_bits);
+ }
+ }
+
+ return true;
+}
+
+static void btree_node_scrub_work(struct work_struct *work)
+{
+ struct btree_node_scrub *scrub = container_of(work, struct btree_node_scrub, work);
+ struct bch_fs *c = scrub->c;
+ struct printbuf err = PRINTBUF;
+
+ __bch2_btree_pos_to_text(&err, c, scrub->btree, scrub->level,
+ bkey_i_to_s_c(scrub->key.k));
+ prt_newline(&err);
+
+ if (!btree_node_scrub_check(c, scrub->buf, scrub->written, &err)) {
+ struct btree_trans *trans = bch2_trans_get(c);
+
+ struct btree_iter iter;
+ bch2_trans_node_iter_init(trans, &iter, scrub->btree,
+ scrub->key.k->k.p, 0, scrub->level - 1, 0);
+
+ struct btree *b;
+ int ret = lockrestart_do(trans, PTR_ERR_OR_ZERO(b = bch2_btree_iter_peek_node(&iter)));
+ if (ret)
+ goto err;
+
+ if (bkey_i_to_btree_ptr_v2(&b->key)->v.seq == scrub->seq) {
+ bch_err(c, "error validating btree node during scrub on %s at btree %s",
+ scrub->ca->name, err.buf);
+
+ ret = bch2_btree_node_rewrite(trans, &iter, b, 0);
+ }
+err:
+ bch2_trans_iter_exit(trans, &iter);
+ bch2_trans_begin(trans);
+ bch2_trans_put(trans);
+ }
+
+ printbuf_exit(&err);
+ bch2_bkey_buf_exit(&scrub->key, c);;
+ btree_bounce_free(c, c->opts.btree_node_size, scrub->used_mempool, scrub->buf);
+ percpu_ref_put(&scrub->ca->io_ref);
+ kfree(scrub);
+ bch2_write_ref_put(c, BCH_WRITE_REF_btree_node_scrub);
+}
+
+static void btree_node_scrub_endio(struct bio *bio)
+{
+ struct btree_node_scrub *scrub = container_of(bio, struct btree_node_scrub, bio);
+
+ queue_work(scrub->c->btree_read_complete_wq, &scrub->work);
+}
+
+int bch2_btree_node_scrub(struct btree_trans *trans,
+ enum btree_id btree, unsigned level,
+ struct bkey_s_c k, unsigned dev)
+{
+ if (k.k->type != KEY_TYPE_btree_ptr_v2)
+ return 0;
+
+ struct bch_fs *c = trans->c;
+
+ if (!bch2_write_ref_tryget(c, BCH_WRITE_REF_btree_node_scrub))
+ return -BCH_ERR_erofs_no_writes;
+
+ struct extent_ptr_decoded pick;
+ int ret = bch2_bkey_pick_read_device(c, k, NULL, &pick, dev);
+ if (ret <= 0)
+ goto err;
+
+ struct bch_dev *ca = bch2_dev_get_ioref(c, pick.ptr.dev, READ);
+ if (!ca) {
+ ret = -BCH_ERR_device_offline;
+ goto err;
+ }
+
+ bool used_mempool = false;
+ void *buf = btree_bounce_alloc(c, c->opts.btree_node_size, &used_mempool);
+
+ unsigned vecs = buf_pages(buf, c->opts.btree_node_size);
+
+ struct btree_node_scrub *scrub =
+ kzalloc(sizeof(*scrub) + sizeof(struct bio_vec) * vecs, GFP_KERNEL);
+ if (!scrub) {
+ ret = -ENOMEM;
+ goto err_free;
+ }
+
+ scrub->c = c;
+ scrub->ca = ca;
+ scrub->buf = buf;
+ scrub->used_mempool = used_mempool;
+ scrub->written = btree_ptr_sectors_written(k);
+
+ scrub->btree = btree;
+ scrub->level = level;
+ bch2_bkey_buf_init(&scrub->key);
+ bch2_bkey_buf_reassemble(&scrub->key, c, k);
+ scrub->seq = bkey_s_c_to_btree_ptr_v2(k).v->seq;
+
+ INIT_WORK(&scrub->work, btree_node_scrub_work);
+
+ bio_init(&scrub->bio, ca->disk_sb.bdev, scrub->bio.bi_inline_vecs, vecs, REQ_OP_READ);
+ bch2_bio_map(&scrub->bio, scrub->buf, c->opts.btree_node_size);
+ scrub->bio.bi_iter.bi_sector = pick.ptr.offset;
+ scrub->bio.bi_end_io = btree_node_scrub_endio;
+ submit_bio(&scrub->bio);
+ return 0;
+err_free:
+ btree_bounce_free(c, c->opts.btree_node_size, used_mempool, buf);
+ percpu_ref_put(&ca->io_ref);
+err:
+ bch2_write_ref_put(c, BCH_WRITE_REF_btree_node_scrub);
+ return ret;
+}
+
static void bch2_btree_complete_write(struct bch_fs *c, struct btree *b,
struct btree_write *w)
{
diff --git a/libbcachefs/btree_io.h b/libbcachefs/btree_io.h
index 6f9e4a6d..75ead381 100644
--- a/libbcachefs/btree_io.h
+++ b/libbcachefs/btree_io.h
@@ -132,6 +132,9 @@ void bch2_btree_node_read(struct btree_trans *, struct btree *, bool);
int bch2_btree_root_read(struct bch_fs *, enum btree_id,
const struct bkey_i *, unsigned);
+int bch2_btree_node_scrub(struct btree_trans *, enum btree_id, unsigned,
+ struct bkey_s_c, unsigned);
+
bool bch2_btree_post_write_cleanup(struct bch_fs *, struct btree *);
enum btree_write_flags {
diff --git a/libbcachefs/btree_iter.c b/libbcachefs/btree_iter.c
index c291d495..367231ab 100644
--- a/libbcachefs/btree_iter.c
+++ b/libbcachefs/btree_iter.c
@@ -2770,6 +2770,10 @@ struct bkey_s_c bch2_btree_iter_peek_slot(struct btree_iter *iter)
goto out_no_locked;
}
+ struct btree_path *path = btree_iter_path(trans, iter);
+ if (unlikely(!btree_path_node(path, path->level)))
+ return bkey_s_c_null;
+
if ((iter->flags & BTREE_ITER_cached) ||
!(iter->flags & (BTREE_ITER_is_extents|BTREE_ITER_filter_snapshots))) {
k = bkey_s_c_null;
diff --git a/libbcachefs/btree_update_interior.c b/libbcachefs/btree_update_interior.c
index f4aeadbe..ab111fec 100644
--- a/libbcachefs/btree_update_interior.c
+++ b/libbcachefs/btree_update_interior.c
@@ -2189,6 +2189,26 @@ err:
goto out;
}
+int bch2_btree_node_rewrite_key(struct btree_trans *trans,
+ enum btree_id btree, unsigned level,
+ struct bpos pos, unsigned flags)
+{
+ BUG_ON(!level);
+
+ /* Traverse one depth lower to get a pointer to the node itself: */
+ struct btree_iter iter;
+ bch2_trans_node_iter_init(trans, &iter, btree, pos, 0, level - 1, 0);
+ struct btree *b = bch2_btree_iter_peek_node(&iter);
+ int ret = PTR_ERR_OR_ZERO(b);
+ if (ret)
+ goto err;
+
+ ret = bch2_btree_node_rewrite(trans, &iter, b, flags);
+err:
+ bch2_trans_iter_exit(trans, &iter);
+ return ret;
+}
+
struct async_btree_rewrite {
struct bch_fs *c;
struct work_struct work;
diff --git a/libbcachefs/btree_update_interior.h b/libbcachefs/btree_update_interior.h
index 7930ffea..fa5a88f9 100644
--- a/libbcachefs/btree_update_interior.h
+++ b/libbcachefs/btree_update_interior.h
@@ -169,7 +169,11 @@ static inline int bch2_foreground_maybe_merge(struct btree_trans *trans,
int bch2_btree_node_rewrite(struct btree_trans *, struct btree_iter *,
struct btree *, unsigned);
+int bch2_btree_node_rewrite_key(struct btree_trans *,
+ enum btree_id, unsigned,
+ struct bpos, unsigned);
void bch2_btree_node_rewrite_async(struct bch_fs *, struct btree *);
+
int bch2_btree_node_update_key(struct btree_trans *, struct btree_iter *,
struct btree *, struct bkey_i *,
unsigned, bool);
diff --git a/libbcachefs/chardev.c b/libbcachefs/chardev.c
index 46e9e321..0eb32074 100644
--- a/libbcachefs/chardev.c
+++ b/libbcachefs/chardev.c
@@ -312,7 +312,10 @@ static int bch2_data_thread(void *arg)
struct bch_data_ctx *ctx = container_of(arg, struct bch_data_ctx, thr);
ctx->thr.ret = bch2_data_job(ctx->c, &ctx->stats, ctx->arg);
- ctx->stats.data_type = U8_MAX;
+ if (ctx->thr.ret == -BCH_ERR_device_offline)
+ ctx->stats.ret = BCH_IOCTL_DATA_EVENT_RET_device_offline;
+ else
+ ctx->stats.ret = BCH_IOCTL_DATA_EVENT_RET_done;
return 0;
}
@@ -331,14 +334,30 @@ static ssize_t bch2_data_job_read(struct file *file, char __user *buf,
struct bch_data_ctx *ctx = container_of(file->private_data, struct bch_data_ctx, thr);
struct bch_fs *c = ctx->c;
struct bch_ioctl_data_event e = {
- .type = BCH_DATA_EVENT_PROGRESS,
- .p.data_type = ctx->stats.data_type,
- .p.btree_id = ctx->stats.pos.btree,
- .p.pos = ctx->stats.pos.pos,
- .p.sectors_done = atomic64_read(&ctx->stats.sectors_seen),
- .p.sectors_total = bch2_fs_usage_read_short(c).used,
+ .type = BCH_DATA_EVENT_PROGRESS,
+ .ret = ctx->stats.ret,
+ .p.data_type = ctx->stats.data_type,
+ .p.btree_id = ctx->stats.pos.btree,
+ .p.pos = ctx->stats.pos.pos,
+ .p.sectors_done = atomic64_read(&ctx->stats.sectors_seen),
+ .p.sectors_error_corrected = atomic64_read(&ctx->stats.sectors_error_corrected),
+ .p.sectors_error_uncorrected = atomic64_read(&ctx->stats.sectors_error_uncorrected),
};
+ if (ctx->arg.op == BCH_DATA_OP_scrub) {
+ struct bch_dev *ca = bch2_dev_tryget(c, ctx->arg.scrub.dev);
+ if (ca) {
+ struct bch_dev_usage u;
+ bch2_dev_usage_read_fast(ca, &u);
+ for (unsigned i = BCH_DATA_btree; i < ARRAY_SIZE(u.d); i++)
+ if (ctx->arg.scrub.data_types & BIT(i))
+ e.p.sectors_total += u.d[i].sectors;
+ bch2_dev_put(ca);
+ }
+ } else {
+ e.p.sectors_total = bch2_fs_usage_read_short(c).used;
+ }
+
if (len < sizeof(e))
return -EINVAL;
diff --git a/libbcachefs/data_update.h b/libbcachefs/data_update.h
index e4b50723..144b935c 100644
--- a/libbcachefs/data_update.h
+++ b/libbcachefs/data_update.h
@@ -15,6 +15,9 @@ struct data_update_opts {
u8 extra_replicas;
unsigned btree_insert_flags;
unsigned write_flags;
+
+ int read_dev;
+ bool scrub;
};
void bch2_data_update_opts_to_text(struct printbuf *, struct bch_fs *,
diff --git a/libbcachefs/debug.c b/libbcachefs/debug.c
index b5de52a5..3722c2ac 100644
--- a/libbcachefs/debug.c
+++ b/libbcachefs/debug.c
@@ -189,7 +189,7 @@ void bch2_btree_node_ondisk_to_text(struct printbuf *out, struct bch_fs *c,
unsigned offset = 0;
int ret;
- if (bch2_bkey_pick_read_device(c, bkey_i_to_s_c(&b->key), NULL, &pick) <= 0) {
+ if (bch2_bkey_pick_read_device(c, bkey_i_to_s_c(&b->key), NULL, &pick, -1) <= 0) {
prt_printf(out, "error getting device to read from: invalid device\n");
return;
}
diff --git a/libbcachefs/errcode.h b/libbcachefs/errcode.h
index 4590cd0c..cae4fc3e 100644
--- a/libbcachefs/errcode.h
+++ b/libbcachefs/errcode.h
@@ -269,6 +269,7 @@
x(EIO, invalidate_stripe_to_dev) \
x(EIO, no_encryption_key) \
x(EIO, insufficient_journal_devices) \
+ x(EIO, device_offline) \
x(BCH_ERR_btree_node_read_err, btree_node_read_err_fixable) \
x(BCH_ERR_btree_node_read_err, btree_node_read_err_want_retry) \
x(BCH_ERR_btree_node_read_err, btree_node_read_err_must_retry) \
diff --git a/libbcachefs/extents.c b/libbcachefs/extents.c
index 05d5f71a..78a51d96 100644
--- a/libbcachefs/extents.c
+++ b/libbcachefs/extents.c
@@ -114,8 +114,9 @@ static inline bool ptr_better(struct bch_fs *c,
* other devices, it will still pick a pointer from avoid.
*/
int bch2_bkey_pick_read_device(struct bch_fs *c, struct bkey_s_c k,
- struct bch_io_failures *failed,
- struct extent_ptr_decoded *pick)
+ struct bch_io_failures *failed,
+ struct extent_ptr_decoded *pick,
+ int dev)
{
struct bkey_ptrs_c ptrs = bch2_bkey_ptrs_c(k);
const union bch_extent_entry *entry;
@@ -137,6 +138,10 @@ int bch2_bkey_pick_read_device(struct bch_fs *c, struct bkey_s_c k,
break;
}
+ /* Are we being asked to read from a specific device? */
+ if (dev >= 0 && p.ptr.dev != dev)
+ continue;
+
/*
* If there are any dirty pointers it's an error if we can't
* read:
diff --git a/libbcachefs/extents.h b/libbcachefs/extents.h
index 620b284a..8fae6b23 100644
--- a/libbcachefs/extents.h
+++ b/libbcachefs/extents.h
@@ -404,7 +404,7 @@ void bch2_mark_io_failure(struct bch_io_failures *,
struct extent_ptr_decoded *);
int bch2_bkey_pick_read_device(struct bch_fs *, struct bkey_s_c,
struct bch_io_failures *,
- struct extent_ptr_decoded *);
+ struct extent_ptr_decoded *, int);
/* KEY_TYPE_btree_ptr: */
diff --git a/libbcachefs/io_read.c b/libbcachefs/io_read.c
index 34a3569d..87fe5997 100644
--- a/libbcachefs/io_read.c
+++ b/libbcachefs/io_read.c
@@ -444,7 +444,7 @@ retry:
ret = __bch2_read_extent(trans, rbio, bvec_iter,
rbio->read_pos,
rbio->data_btree,
- k, 0, failed, flags);
+ k, 0, failed, flags, -1);
if (ret == READ_RETRY)
goto retry;
if (ret)
@@ -499,6 +499,7 @@ static void bch2_rbio_error(struct bch_read_bio *rbio, int retry,
blk_status_t error)
{
rbio->retry = retry;
+ rbio->saw_error = true;
if (rbio->flags & BCH_READ_IN_RETRY)
return;
@@ -740,7 +741,7 @@ static void __bch2_read_endio(struct work_struct *work)
bio_copy_data_iter(dst, &dst_iter, src, &src_iter);
}
}
-
+nodecode:
if (rbio->promote) {
/*
* Re encrypt data we decrypted, so it's consistent with
@@ -753,7 +754,7 @@ static void __bch2_read_endio(struct work_struct *work)
promote_start(rbio->promote, rbio);
rbio->promote = NULL;
}
-nodecode:
+
if (likely(!(rbio->flags & BCH_READ_IN_RETRY))) {
rbio = bch2_rbio_free(rbio);
bch2_rbio_done(rbio);
@@ -878,7 +879,7 @@ int __bch2_read_extent(struct btree_trans *trans, struct bch_read_bio *orig,
struct bvec_iter iter, struct bpos read_pos,
enum btree_id data_btree, struct bkey_s_c k,
unsigned offset_into_extent,
- struct bch_io_failures *failed, unsigned flags)
+ struct bch_io_failures *failed, unsigned flags, int dev)
{
struct bch_fs *c = trans->c;
struct extent_ptr_decoded pick;
@@ -888,6 +889,8 @@ int __bch2_read_extent(struct btree_trans *trans, struct bch_read_bio *orig,
struct bpos data_pos = bkey_start_pos(k.k);
int pick_ret;
+ //BUG_ON(failed && failed->nr);
+
if (bkey_extent_is_inline_data(k.k)) {
unsigned bytes = min_t(unsigned, iter.bi_size,
bkey_inline_data_bytes(k.k));
@@ -900,7 +903,7 @@ int __bch2_read_extent(struct btree_trans *trans, struct bch_read_bio *orig,
goto out_read_done;
}
retry_pick:
- pick_ret = bch2_bkey_pick_read_device(c, k, failed, &pick);
+ pick_ret = bch2_bkey_pick_read_device(c, k, failed, &pick, dev);
/* hole or reservation - just zero fill: */
if (!pick_ret)
@@ -952,7 +955,30 @@ retry_pick:
*/
bch2_trans_unlock(trans);
- if (flags & BCH_READ_NODECODE) {
+ if (!(flags & BCH_READ_NODECODE)) {
+ if (!(flags & BCH_READ_LAST_FRAGMENT) ||
+ bio_flagged(&orig->bio, BIO_CHAIN))
+ flags |= BCH_READ_MUST_CLONE;
+
+ narrow_crcs = !(flags & BCH_READ_IN_RETRY) &&
+ bch2_can_narrow_extent_crcs(k, pick.crc);
+
+ if (narrow_crcs && (flags & BCH_READ_USER_MAPPED))
+ flags |= BCH_READ_MUST_BOUNCE;
+
+ EBUG_ON(offset_into_extent + bvec_iter_sectors(iter) > k.k->size);
+
+ if (crc_is_compressed(pick.crc) ||
+ (pick.crc.csum_type != BCH_CSUM_none &&
+ (bvec_iter_sectors(iter) != pick.crc.uncompressed_size ||
+ (bch2_csum_type_is_encryption(pick.crc.csum_type) &&
+ (flags & BCH_READ_USER_MAPPED)) ||
+ (flags & BCH_READ_MUST_BOUNCE)))) {
+ read_full = true;
+ bounce = true;
+ }
+ } else {
+ read_full = true;
/*
* can happen if we retry, and the extent we were going to read
* has been merged in the meantime:
@@ -964,32 +990,10 @@ retry_pick:
}
iter.bi_size = pick.crc.compressed_size << 9;
- goto get_bio;
}
- if (!(flags & BCH_READ_LAST_FRAGMENT) ||
- bio_flagged(&orig->bio, BIO_CHAIN))
- flags |= BCH_READ_MUST_CLONE;
-
- narrow_crcs = !(flags & BCH_READ_IN_RETRY) &&
- bch2_can_narrow_extent_crcs(k, pick.crc);
-
- if (narrow_crcs && (flags & BCH_READ_USER_MAPPED))
- flags |= BCH_READ_MUST_BOUNCE;
-
- EBUG_ON(offset_into_extent + bvec_iter_sectors(iter) > k.k->size);
-
- if (crc_is_compressed(pick.crc) ||
- (pick.crc.csum_type != BCH_CSUM_none &&
- (bvec_iter_sectors(iter) != pick.crc.uncompressed_size ||
- (bch2_csum_type_is_encryption(pick.crc.csum_type) &&
- (flags & BCH_READ_USER_MAPPED)) ||
- (flags & BCH_READ_MUST_BOUNCE)))) {
- read_full = true;
- bounce = true;
- }
-
- if (orig->opts.promote_target)// || failed)
+ if ((orig->opts.promote_target && !(flags & BCH_READ_NODECODE)) ||
+ (failed && failed->nr))
promote = promote_alloc(trans, iter, k, &pick, orig->opts, flags,
&rbio, &bounce, &read_full, failed);
@@ -1010,7 +1014,7 @@ retry_pick:
pick.crc.offset = 0;
pick.crc.live_size = bvec_iter_sectors(iter);
}
-get_bio:
+
if (rbio) {
/*
* promote already allocated bounce rbio:
@@ -1261,7 +1265,7 @@ void __bch2_read(struct bch_fs *c, struct bch_read_bio *rbio,
ret = __bch2_read_extent(trans, rbio, bvec_iter, iter.pos,
data_btree, k,
- offset_into_extent, failed, flags);
+ offset_into_extent, failed, flags, -1);
if (ret)
goto err;
diff --git a/libbcachefs/io_read.h b/libbcachefs/io_read.h
index a82e8a94..b62fcee7 100644
--- a/libbcachefs/io_read.h
+++ b/libbcachefs/io_read.h
@@ -41,6 +41,7 @@ struct bch_read_bio {
have_ioref:1,
narrow_crcs:1,
hole:1,
+ saw_error:1,
retry:2,
context:2;
};
@@ -124,7 +125,7 @@ enum bch_read_flags {
int __bch2_read_extent(struct btree_trans *, struct bch_read_bio *,
struct bvec_iter, struct bpos, enum btree_id,
struct bkey_s_c, unsigned,
- struct bch_io_failures *, unsigned);
+ struct bch_io_failures *, unsigned, int);
static inline void bch2_read_extent(struct btree_trans *trans,
struct bch_read_bio *rbio, struct bpos read_pos,
@@ -132,7 +133,7 @@ static inline void bch2_read_extent(struct btree_trans *trans,
unsigned offset_into_extent, unsigned flags)
{
__bch2_read_extent(trans, rbio, rbio->bio.bi_iter, read_pos,
- data_btree, k, offset_into_extent, NULL, flags);
+ data_btree, k, offset_into_extent, NULL, flags, -1);
}
void __bch2_read(struct bch_fs *, struct bch_read_bio *, struct bvec_iter,
diff --git a/libbcachefs/move.c b/libbcachefs/move.c
index c493ea62..cc852492 100644
--- a/libbcachefs/move.c
+++ b/libbcachefs/move.c
@@ -114,7 +114,20 @@ static void move_write_done(struct bch_write_op *op)
static void move_write(struct moving_io *io)
{
- if (unlikely(io->rbio.bio.bi_status || io->rbio.hole)) {
+ struct moving_context *ctxt = io->write.ctxt;
+
+ if (ctxt->stats) {
+ if (io->rbio.bio.bi_status)
+ atomic64_add(io->rbio.bvec_iter.bi_size >> 9,
+ &ctxt->stats->sectors_error_uncorrected);
+ else if (io->rbio.saw_error)
+ atomic64_add(io->rbio.bvec_iter.bi_size >> 9,
+ &ctxt->stats->sectors_error_corrected);
+ }
+
+ if (unlikely(io->rbio.bio.bi_status ||
+ io->rbio.hole ||
+ io->write.data_opts.scrub)) {
move_free(io);
return;
}
@@ -273,7 +286,8 @@ int bch2_move_extent(struct moving_context *ctxt,
bch2_data_update_opts_normalize(k, &data_opts);
if (!data_opts.rewrite_ptrs &&
- !data_opts.extra_replicas) {
+ !data_opts.extra_replicas &&
+ !data_opts.scrub) {
if (data_opts.kill_ptrs)
return bch2_extent_drop_ptrs(trans, iter, k, &io_opts, &data_opts);
return 0;
@@ -319,12 +333,18 @@ int bch2_move_extent(struct moving_context *ctxt,
io->rbio.bio.bi_iter.bi_sector = bkey_start_offset(k.k);
io->rbio.bio.bi_end_io = move_read_endio;
- ret = bch2_data_update_init(trans, iter, ctxt, &io->write, ctxt->wp,
- io_opts, data_opts, iter->btree_id, k);
- if (ret)
- goto err_free_pages;
+ if (!data_opts.scrub) {
+ ret = bch2_data_update_init(trans, iter, ctxt, &io->write, ctxt->wp,
+ io_opts, data_opts, iter->btree_id, k);
+ if (ret)
+ goto err_free_pages;
- io->write.op.end_io = move_write_done;
+ io->write.op.end_io = move_write_done;
+ } else {
+ bch2_bkey_buf_init(&io->write.k);
+ io->write.op.c = c;
+ io->write.data_opts = data_opts;
+ }
if (ctxt->rate)
bch2_ratelimit_increment(ctxt->rate, k.k->size);
@@ -356,11 +376,14 @@ int bch2_move_extent(struct moving_context *ctxt,
* ctxt when doing wakeup
*/
closure_get(&ctxt->cl);
- bch2_read_extent(trans, &io->rbio,
- bkey_start_pos(k.k),
- iter->btree_id, k, 0,
- BCH_READ_NODECODE|
- BCH_READ_LAST_FRAGMENT);
+ __bch2_read_extent(trans, &io->rbio,
+ io->rbio.bio.bi_iter,
+ bkey_start_pos(k.k),
+ iter->btree_id, k, 0,
+ NULL,
+ BCH_READ_NODECODE|
+ BCH_READ_LAST_FRAGMENT,
+ data_opts.scrub ? data_opts.read_dev : -1);
return 0;
err_free_pages:
bio_free_pages(&io->write.op.wbio.bio);
@@ -689,21 +712,22 @@ int bch2_move_data(struct bch_fs *c,
bool wait_on_copygc,
move_pred_fn pred, void *arg)
{
-
struct moving_context ctxt;
- int ret;
bch2_moving_ctxt_init(&ctxt, c, rate, stats, wp, wait_on_copygc);
- ret = __bch2_move_data(&ctxt, start, end, pred, arg);
+ int ret = __bch2_move_data(&ctxt, start, end, pred, arg);
bch2_moving_ctxt_exit(&ctxt);
return ret;
}
-int bch2_evacuate_bucket(struct moving_context *ctxt,
- struct move_bucket_in_flight *bucket_in_flight,
- struct bpos bucket, int gen,
- struct data_update_opts _data_opts)
+static int __bch2_move_data_phys(struct moving_context *ctxt,
+ struct move_bucket_in_flight *bucket_in_flight,
+ unsigned dev,
+ u64 bucket_start,
+ u64 bucket_end,
+ unsigned data_types,
+ move_pred_fn pred, void *arg)
{
struct btree_trans *trans = ctxt->trans;
struct bch_fs *c = trans->c;
@@ -712,16 +736,22 @@ int bch2_evacuate_bucket(struct moving_context *ctxt,
struct btree_iter iter = {}, bp_iter = {};
struct bkey_buf sk;
struct bkey_s_c k;
- struct data_update_opts data_opts;
unsigned sectors_moved = 0;
struct bkey_buf last_flushed;
int ret = 0;
- struct bch_dev *ca = bch2_dev_tryget(c, bucket.inode);
+ struct bch_dev *ca = bch2_dev_tryget(c, dev);
if (!ca)
return 0;
- trace_bucket_evacuate(c, &bucket);
+ bucket_end = min(bucket_end, ca->mi.nbuckets);
+
+ struct bpos bp_start = bucket_pos_to_bp_start(ca, POS(dev, bucket_start));
+ struct bpos bp_end = bucket_pos_to_bp_end(ca, POS(dev, bucket_end));
+ bch2_dev_put(ca);
+ ca = NULL;
+
+ //trace_bucket_evacuate(c, &bucket);
bch2_bkey_buf_init(&last_flushed);
bkey_init(&last_flushed.k->k);
@@ -732,8 +762,7 @@ int bch2_evacuate_bucket(struct moving_context *ctxt,
*/
bch2_trans_begin(trans);
- bch2_trans_iter_init(trans, &bp_iter, BTREE_ID_backpointers,
- bucket_pos_to_bp_start(ca, bucket), 0);
+ bch2_trans_iter_init(trans, &bp_iter, BTREE_ID_backpointers, bp_start, 0);
bch_err_msg(c, ret, "looking up alloc key");
if (ret)
@@ -757,7 +786,7 @@ int bch2_evacuate_bucket(struct moving_context *ctxt,
if (ret)
goto err;
- if (!k.k || bkey_gt(k.k->p, bucket_pos_to_bp_end(ca, bucket)))
+ if (!k.k || bkey_gt(k.k->p, bp_end))
break;
if (k.k->type != KEY_TYPE_backpointer)
@@ -765,107 +794,145 @@ int bch2_evacuate_bucket(struct moving_context *ctxt,
struct bkey_s_c_backpointer bp = bkey_s_c_to_backpointer(k);
- if (!bp.v->level) {
- k = bch2_backpointer_get_key(trans, bp, &iter, 0, &last_flushed);
- ret = bkey_err(k);
- if (bch2_err_matches(ret, BCH_ERR_transaction_restart))
- continue;
- if (ret)
- goto err;
- if (!k.k)
- goto next;
+ if (ctxt->stats)
+ ctxt->stats->offset = bp.k->p.offset >> MAX_EXTENT_COMPRESS_RATIO_SHIFT;
- bch2_bkey_buf_reassemble(&sk, c, k);
- k = bkey_i_to_s_c(sk.k);
+ if (!(data_types & BIT(bp.v->data_type)))
+ goto next;
+ k = bch2_backpointer_get_key(trans, bp, &iter, 0, &last_flushed);
+ ret = bkey_err(k);
+ if (bch2_err_matches(ret, BCH_ERR_transaction_restart))
+ continue;
+ if (ret)
+ goto err;
+ if (!k.k)
+ goto next;
+
+ if (!bp.v->level) {
ret = bch2_move_get_io_opts_one(trans, &io_opts, &iter, k);
if (ret) {
bch2_trans_iter_exit(trans, &iter);
continue;
}
+ }
- data_opts = _data_opts;
- data_opts.target = io_opts.background_target;
- data_opts.rewrite_ptrs = 0;
-
- unsigned sectors = bp.v->bucket_len; /* move_extent will drop locks */
- unsigned i = 0;
- const union bch_extent_entry *entry;
- struct extent_ptr_decoded p;
- bkey_for_each_ptr_decode(k.k, bch2_bkey_ptrs_c(k), p, entry) {
- if (p.ptr.dev == bucket.inode) {
- if (p.ptr.cached) {
- bch2_trans_iter_exit(trans, &iter);
- goto next;
- }
- data_opts.rewrite_ptrs |= 1U << i;
- break;
- }
- i++;
- }
-
- ret = bch2_move_extent(ctxt, bucket_in_flight,
- &iter, k, io_opts, data_opts);
+ struct data_update_opts data_opts = {};
+ if (!pred(c, arg, k, &io_opts, &data_opts)) {
bch2_trans_iter_exit(trans, &iter);
+ goto next;
+ }
- if (bch2_err_matches(ret, BCH_ERR_transaction_restart))
- continue;
- if (ret == -ENOMEM) {
- /* memory allocation failure, wait for some IO to finish */
- bch2_move_ctxt_wait_for_io(ctxt);
- continue;
- }
- if (ret)
- goto err;
-
- if (ctxt->stats)
- atomic64_add(sectors, &ctxt->stats->sectors_seen);
- sectors_moved += sectors;
- } else {
- struct btree *b;
+ if (data_opts.scrub &&
+ !bch2_dev_idx_is_online(c, data_opts.read_dev)) {
+ bch2_trans_iter_exit(trans, &iter);
+ ret = -BCH_ERR_device_offline;
+ break;
+ }
- b = bch2_backpointer_get_node(trans, bp, &iter, &last_flushed);
- ret = PTR_ERR_OR_ZERO(b);
- if (ret == -BCH_ERR_backpointer_to_overwritten_btree_node)
- goto next;
- if (bch2_err_matches(ret, BCH_ERR_transaction_restart))
- continue;
- if (ret)
- goto err;
- if (!b)
- goto next;
+ bch2_bkey_buf_reassemble(&sk, c, k);
+ k = bkey_i_to_s_c(sk.k);
- unsigned sectors = btree_ptr_sectors_written(bkey_i_to_s_c(&b->key));
+ /* move_extent will drop locks */
+ unsigned sectors = bp.v->bucket_len;
- ret = bch2_btree_node_rewrite(trans, &iter, b, 0);
- bch2_trans_iter_exit(trans, &iter);
+ if (!bp.v->level)
+ ret = bch2_move_extent(ctxt, bucket_in_flight, &iter, k, io_opts, data_opts);
+ else if (!data_opts.scrub)
+ ret = bch2_btree_node_rewrite_key(trans, bp.v->btree_id, bp.v->level, k.k->p, 0);
+ else
+ ret = bch2_btree_node_scrub(trans, bp.v->btree_id, bp.v->level, k, data_opts.read_dev);
- if (bch2_err_matches(ret, BCH_ERR_transaction_restart))
- continue;
- if (ret)
- goto err;
+ bch2_trans_iter_exit(trans, &iter);
- if (ctxt->rate)
- bch2_ratelimit_increment(ctxt->rate, sectors);
- if (ctxt->stats) {
- atomic64_add(sectors, &ctxt->stats->sectors_seen);
- atomic64_add(sectors, &ctxt->stats->sectors_moved);
- }
- sectors_moved += btree_sectors(c);
+ if (bch2_err_matches(ret, BCH_ERR_transaction_restart))
+ continue;
+ if (ret == -ENOMEM) {
+ /* memory allocation failure, wait for some IO to finish */
+ bch2_move_ctxt_wait_for_io(ctxt);
+ continue;
}
+ if (ret)
+ goto err;
+
+ if (ctxt->stats)
+ atomic64_add(sectors, &ctxt->stats->sectors_seen);
+ sectors_moved += sectors;
next:
bch2_btree_iter_advance(&bp_iter);
}
- trace_evacuate_bucket(c, &bucket, sectors_moved, ca->mi.bucket_size, ret);
+ //trace_evacuate_bucket(c, &bucket, sectors_moved, ca->mi.bucket_size, ret);
err:
bch2_trans_iter_exit(trans, &bp_iter);
- bch2_dev_put(ca);
bch2_bkey_buf_exit(&sk, c);
bch2_bkey_buf_exit(&last_flushed, c);
return ret;
}
+static int bch2_move_data_phys(struct bch_fs *c,
+ unsigned dev,
+ u64 start,
+ u64 end,
+ unsigned data_types,
+ struct bch_ratelimit *rate,
+ struct bch_move_stats *stats,
+ struct write_point_specifier wp,
+ bool wait_on_copygc,
+ move_pred_fn pred, void *arg)
+{
+ struct moving_context ctxt;
+
+ bch2_trans_run(c, bch2_btree_write_buffer_flush_sync(trans));
+
+ bch2_moving_ctxt_init(&ctxt, c, rate, stats, wp, wait_on_copygc);
+ ctxt.stats->phys = true;
+
+ int ret = __bch2_move_data_phys(&ctxt, NULL, dev, start, end, data_types, pred, arg);
+ bch2_moving_ctxt_exit(&ctxt);
+
+ return ret;
+}
+
+struct evacuate_bucket_arg {
+ struct bpos bucket;
+ int gen;
+ struct data_update_opts data_opts;
+};
+
+static bool evacuate_bucket_pred(struct bch_fs *c, void *_arg, struct bkey_s_c k,
+ struct bch_io_opts *io_opts,
+ struct data_update_opts *data_opts)
+{
+ struct evacuate_bucket_arg *arg = _arg;
+
+ *data_opts = arg->data_opts;
+
+ unsigned i = 0;
+ bkey_for_each_ptr(bch2_bkey_ptrs_c(k), ptr) {
+ if (ptr->dev == arg->bucket.inode && !ptr->cached)
+ data_opts->rewrite_ptrs |= BIT(i);
+ i++;
+ }
+
+ return data_opts->rewrite_ptrs != 0;
+}
+
+int bch2_evacuate_bucket(struct moving_context *ctxt,
+ struct move_bucket_in_flight *bucket_in_flight,
+ struct bpos bucket, int gen,
+ struct data_update_opts data_opts)
+{
+ struct evacuate_bucket_arg arg = { bucket, gen, data_opts, };
+
+ return __bch2_move_data_phys(ctxt, bucket_in_flight,
+ bucket.inode,
+ bucket.offset,
+ bucket.offset + 1,
+ ~0,
+ evacuate_bucket_pred, &arg);
+}
+
typedef bool (*move_btree_pred)(struct bch_fs *, void *,
struct btree *, struct bch_io_opts *,
struct data_update_opts *);
@@ -1104,6 +1171,30 @@ static bool drop_extra_replicas_btree_pred(struct bch_fs *c, void *arg,
return drop_extra_replicas_pred(c, arg, bkey_i_to_s_c(&b->key), io_opts, data_opts);
}
+static bool scrub_pred(struct bch_fs *c, void *_arg,
+ struct bkey_s_c k,
+ struct bch_io_opts *io_opts,
+ struct data_update_opts *data_opts)
+{
+ struct bch_ioctl_data *arg = _arg;
+
+ if (k.k->type != KEY_TYPE_btree_ptr_v2) {
+ struct bkey_ptrs_c ptrs = bch2_bkey_ptrs_c(k);
+ const union bch_extent_entry *entry;
+ struct extent_ptr_decoded p;
+ bkey_for_each_ptr_decode(k.k, ptrs, p, entry)
+ if (p.ptr.dev == arg->migrate.dev) {
+ if (!p.crc.csum_type)
+ return false;
+ break;
+ }
+ }
+
+ data_opts->scrub = true;
+ data_opts->read_dev = arg->migrate.dev;
+ return true;
+}
+
int bch2_data_job(struct bch_fs *c,
struct bch_move_stats *stats,
struct bch_ioctl_data op)
@@ -1118,6 +1209,16 @@ int bch2_data_job(struct bch_fs *c,
bch2_move_stats_init(stats, bch2_data_ops_strs[op.op]);
switch (op.op) {
+ case BCH_DATA_OP_scrub:
+ ret = bch2_move_data_phys(c, op.scrub.dev, 0, U64_MAX,
+ op.scrub.data_types,
+ NULL,
+ stats,
+ writepoint_hashed((unsigned long) current),
+ false,
+ scrub_pred, &op) ?: ret;
+ break;
+
case BCH_DATA_OP_rereplicate:
stats->data_type = BCH_DATA_journal;
ret = bch2_journal_flush_device_pins(&c->journal, -1);
diff --git a/libbcachefs/move_types.h b/libbcachefs/move_types.h
index e22841ef..82e473ed 100644
--- a/libbcachefs/move_types.h
+++ b/libbcachefs/move_types.h
@@ -3,17 +3,31 @@
#define _BCACHEFS_MOVE_TYPES_H
#include "bbpos_types.h"
+#include "bcachefs_ioctl.h"
struct bch_move_stats {
- enum bch_data_type data_type;
- struct bbpos pos;
char name[32];
+ bool phys;
+ enum bch_ioctl_data_event_ret ret;
+
+ union {
+ struct {
+ enum bch_data_type data_type;
+ struct bbpos pos;
+ };
+ struct {
+ unsigned dev;
+ u64 offset;
+ };
+ };
atomic64_t keys_moved;
atomic64_t keys_raced;
atomic64_t sectors_seen;
atomic64_t sectors_moved;
atomic64_t sectors_raced;
+ atomic64_t sectors_error_corrected;
+ atomic64_t sectors_error_uncorrected;
};
struct move_bucket_key {
diff --git a/libbcachefs/printbuf.h b/libbcachefs/printbuf.h
index 1d570387..d0dd398b 100644
--- a/libbcachefs/printbuf.h
+++ b/libbcachefs/printbuf.h
@@ -251,16 +251,23 @@ static inline void prt_hex_byte_upper(struct printbuf *out, u8 byte)
printbuf_nul_terminate_reserved(out);
}
+static inline void printbuf_reset_keep_tabstops(struct printbuf *buf)
+{
+ buf->pos = 0;
+ buf->allocation_failure = 0;
+ buf->last_newline = 0;
+ buf->last_field = 0;
+ buf->indent = 0;
+ buf->cur_tabstop = 0;
+}
+
/**
* printbuf_reset - re-use a printbuf without freeing and re-initializing it:
*/
static inline void printbuf_reset(struct printbuf *buf)
{
- buf->pos = 0;
- buf->allocation_failure = 0;
- buf->indent = 0;
+ printbuf_reset_keep_tabstops(buf);
buf->nr_tabstops = 0;
- buf->cur_tabstop = 0;
}
/**
diff --git a/libbcachefs/sb-members.h b/libbcachefs/sb-members.h
index 762083b5..b29b6c6c 100644
--- a/libbcachefs/sb-members.h
+++ b/libbcachefs/sb-members.h
@@ -23,6 +23,18 @@ static inline bool bch2_dev_is_online(struct bch_dev *ca)
return !percpu_ref_is_zero(&ca->io_ref);
}
+static inline struct bch_dev *bch2_dev_rcu(struct bch_fs *, unsigned);
+
+static inline bool bch2_dev_idx_is_online(struct bch_fs *c, unsigned dev)
+{
+ rcu_read_lock();
+ struct bch_dev *ca = bch2_dev_rcu(c, dev);
+ bool ret = ca && bch2_dev_is_online(ca);
+ rcu_read_unlock();
+
+ return ret;
+}
+
static inline bool bch2_dev_is_readable(struct bch_dev *ca)
{
return bch2_dev_is_online(ca) &&
diff --git a/libbcachefs/trace.h b/libbcachefs/trace.h
index 9d40b7d4..e4c7dc0e 100644
--- a/libbcachefs/trace.h
+++ b/libbcachefs/trace.h
@@ -785,27 +785,6 @@ TRACE_EVENT(bucket_invalidate,
/* Moving IO */
-TRACE_EVENT(bucket_evacuate,
- TP_PROTO(struct bch_fs *c, struct bpos *bucket),
- TP_ARGS(c, bucket),
-
- TP_STRUCT__entry(
- __field(dev_t, dev )
- __field(u32, dev_idx )
- __field(u64, bucket )
- ),
-
- TP_fast_assign(
- __entry->dev = c->dev;
- __entry->dev_idx = bucket->inode;
- __entry->bucket = bucket->offset;
- ),
-
- TP_printk("%d:%d %u:%llu",
- MAJOR(__entry->dev), MINOR(__entry->dev),
- __entry->dev_idx, __entry->bucket)
-);
-
DEFINE_EVENT(fs_str, move_extent,
TP_PROTO(struct bch_fs *c, const char *str),
TP_ARGS(c, str)
@@ -869,37 +848,6 @@ TRACE_EVENT(move_data,
__entry->sectors_raced)
);
-TRACE_EVENT(evacuate_bucket,
- TP_PROTO(struct bch_fs *c, struct bpos *bucket,
- unsigned sectors, unsigned bucket_size,
- int ret),
- TP_ARGS(c, bucket, sectors, bucket_size, ret),
-
- TP_STRUCT__entry(
- __field(dev_t, dev )
- __field(u64, member )
- __field(u64, bucket )
- __field(u32, sectors )
- __field(u32, bucket_size )
- __field(int, ret )
- ),
-
- TP_fast_assign(
- __entry->dev = c->dev;
- __entry->member = bucket->inode;
- __entry->bucket = bucket->offset;
- __entry->sectors = sectors;
- __entry->bucket_size = bucket_size;
- __entry->ret = ret;
- ),
-
- TP_printk("%d,%d %llu:%llu sectors %u/%u ret %i",
- MAJOR(__entry->dev), MINOR(__entry->dev),
- __entry->member, __entry->bucket,
- __entry->sectors, __entry->bucket_size,
- __entry->ret)
-);
-
TRACE_EVENT(copygc,
TP_PROTO(struct bch_fs *c,
u64 sectors_moved, u64 sectors_not_moved,