summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--fs/bcachefs/data_update.c217
-rw-r--r--fs/bcachefs/data_update.h40
-rw-r--r--fs/bcachefs/extents.c69
-rw-r--r--fs/bcachefs/extents.h5
-rw-r--r--fs/bcachefs/io.c13
-rw-r--r--fs/bcachefs/move.c160
-rw-r--r--fs/bcachefs/move.h8
-rw-r--r--fs/bcachefs/movinggc.c8
-rw-r--r--fs/bcachefs/rebalance.c84
9 files changed, 286 insertions, 318 deletions
diff --git a/fs/bcachefs/data_update.c b/fs/bcachefs/data_update.c
index 4b0a866ab1fc..2d7c650ced55 100644
--- a/fs/bcachefs/data_update.c
+++ b/fs/bcachefs/data_update.c
@@ -90,6 +90,16 @@ next:
return ret;
}
+static void bch2_bkey_mark_dev_cached(struct bkey_s k, unsigned dev)
+{
+ struct bkey_ptrs ptrs = bch2_bkey_ptrs(k);
+ struct bch_extent_ptr *ptr;
+
+ bkey_for_each_ptr(ptrs, ptr)
+ if (ptr->dev == dev)
+ ptr->cached = true;
+}
+
static int bch2_data_update_index_update(struct bch_write_op *op)
{
struct bch_fs *c = op->c;
@@ -114,6 +124,7 @@ static int bch2_data_update_index_update(struct bch_write_op *op)
while (1) {
struct bkey_s_c k;
+ struct bkey_s_c old = bkey_i_to_s_c(m->k.k);
struct bkey_i *insert;
struct bkey_i_extent *new;
const union bch_extent_entry *entry;
@@ -122,6 +133,7 @@ static int bch2_data_update_index_update(struct bch_write_op *op)
bool did_work = false;
bool should_check_enospc;
s64 i_sectors_delta = 0, disk_sectors_delta = 0;
+ unsigned i;
bch2_trans_begin(&trans);
@@ -132,8 +144,7 @@ static int bch2_data_update_index_update(struct bch_write_op *op)
new = bkey_i_to_extent(bch2_keylist_front(keys));
- if (bversion_cmp(k.k->version, new->k.version) ||
- !bch2_bkey_matches_ptr(c, k, m->ptr, m->offset))
+ if (!bch2_extents_match(k, old))
goto nomatch;
bkey_reassemble(_insert.k, k);
@@ -147,20 +158,39 @@ static int bch2_data_update_index_update(struct bch_write_op *op)
bch2_cut_back(new->k.p, insert);
bch2_cut_back(insert->k.p, &new->k_i);
- if (m->data_cmd == DATA_REWRITE) {
- struct bch_extent_ptr *new_ptr, *old_ptr = (void *)
- bch2_bkey_has_device(bkey_i_to_s_c(insert),
- m->data_opts.rewrite_dev);
- if (!old_ptr)
- goto nomatch;
-
- if (old_ptr->cached)
- extent_for_each_ptr(extent_i_to_s(new), new_ptr)
- new_ptr->cached = true;
-
- __bch2_bkey_drop_ptr(bkey_i_to_s(insert), old_ptr);
+ /*
+ * @old: extent that we read from
+ * @insert: key that we're going to update, initialized from
+ * extent currently in btree - same as @old unless we raced with
+ * other updates
+ * @new: extent with new pointers that we'll be adding to @insert
+ *
+ * Fist, drop rewrite_ptrs from @new:
+ */
+ i = 0;
+ bkey_for_each_ptr_decode(old.k, bch2_bkey_ptrs_c(old), p, entry) {
+ if (((1U << i) & m->data_opts.rewrite_ptrs) &&
+ bch2_extent_has_ptr(old, p, bkey_i_to_s_c(insert))) {
+ /*
+ * If we're going to be adding a pointer to the
+ * same device, we have to drop the old one -
+ * otherwise, we can just mark it cached:
+ */
+ if (bch2_bkey_has_device(bkey_i_to_s_c(&new->k_i), p.ptr.dev))
+ bch2_bkey_drop_device(bkey_i_to_s(insert), p.ptr.dev);
+ else
+ bch2_bkey_mark_dev_cached(bkey_i_to_s(insert), p.ptr.dev);
+ }
+ i++;
}
+ /*
+ * hack: we may have dropped all the existing pointers but we're
+ * about to add our new pointers:
+ */
+ insert->k.type = k.k->type;
+
+ /* Add new ptrs: */
extent_for_each_ptr_decode(extent_i_to_s(new), p, entry) {
if (bch2_bkey_has_device(bkey_i_to_s_c(insert), p.ptr.dev)) {
/*
@@ -178,12 +208,8 @@ static int bch2_data_update_index_update(struct bch_write_op *op)
if (!did_work)
goto nomatch;
- bch2_bkey_narrow_crcs(insert,
- (struct bch_extent_crc_unpacked) { 0 });
+ bch2_bkey_narrow_crcs(insert, (struct bch_extent_crc_unpacked) { 0 });
bch2_extent_normalize(c, bkey_i_to_s(insert));
- bch2_bkey_mark_replicas_cached(c, bkey_i_to_s(insert),
- op->opts.background_target,
- op->opts.data_replicas);
ret = bch2_sum_sector_overwrites(&trans, &iter, insert,
&should_check_enospc,
@@ -250,134 +276,101 @@ out:
return ret;
}
-void bch2_data_update_read_done(struct data_update *m, struct bch_read_bio *rbio)
+void bch2_data_update_read_done(struct data_update *m,
+ struct bch_extent_crc_unpacked crc,
+ struct closure *cl)
{
/* write bio must own pages: */
BUG_ON(!m->op.wbio.bio.bi_vcnt);
- m->ptr = rbio->pick.ptr;
- m->offset = rbio->data_pos.offset - rbio->pick.crc.offset;
- m->op.devs_have = rbio->devs_have;
- m->op.pos = rbio->data_pos;
- m->op.version = rbio->version;
- m->op.crc = rbio->pick.crc;
- m->op.wbio.bio.bi_iter.bi_size = m->op.crc.compressed_size << 9;
+ m->op.crc = crc;
+ m->op.wbio.bio.bi_iter.bi_size = crc.compressed_size << 9;
+
+ closure_call(&m->op.cl, bch2_write, NULL, cl);
+}
+
+void bch2_data_update_exit(struct data_update *update)
+{
+ struct bch_fs *c = update->op.c;
- if (m->data_cmd == DATA_REWRITE)
- bch2_dev_list_drop_dev(&m->op.devs_have, m->data_opts.rewrite_dev);
+ bch2_bkey_buf_exit(&update->k, c);
+ bch2_disk_reservation_put(c, &update->op.res);
+ bch2_bio_free_pages_pool(c, &update->op.wbio.bio);
}
int bch2_data_update_init(struct bch_fs *c, struct data_update *m,
struct write_point_specifier wp,
struct bch_io_opts io_opts,
- enum data_cmd data_cmd,
- struct data_opts data_opts,
+ struct data_update_opts data_opts,
enum btree_id btree_id,
struct bkey_s_c k)
{
struct bkey_ptrs_c ptrs = bch2_bkey_ptrs_c(k);
const union bch_extent_entry *entry;
- struct bch_extent_crc_unpacked crc;
struct extent_ptr_decoded p;
+ unsigned i, reserve_sectors = k.k->size * data_opts.extra_replicas;
int ret;
+ bch2_bkey_buf_init(&m->k);
+ bch2_bkey_buf_reassemble(&m->k, c, k);
m->btree_id = btree_id;
- m->data_cmd = data_cmd;
m->data_opts = data_opts;
- m->nr_ptrs_reserved = 0;
bch2_write_op_init(&m->op, c, io_opts);
-
- if (!bch2_bkey_is_incompressible(k))
- m->op.compression_type =
- bch2_compression_opt_to_type[io_opts.background_compression ?:
- io_opts.compression];
- else
- m->op.incompressible = true;
-
+ m->op.pos = bkey_start_pos(k.k);
+ m->op.version = k.k->version;
m->op.target = data_opts.target,
m->op.write_point = wp;
-
- /*
- * op->csum_type is normally initialized from the fs/file's current
- * options - but if an extent is encrypted, we require that it stays
- * encrypted:
- */
- bkey_for_each_crc(k.k, ptrs, crc, entry)
- if (bch2_csum_type_is_encryption(crc.csum_type)) {
- m->op.nonce = crc.nonce + crc.offset;
- m->op.csum_type = crc.csum_type;
- break;
- }
-
- if (m->data_opts.btree_insert_flags & BTREE_INSERT_USE_RESERVE) {
- m->op.alloc_reserve = RESERVE_movinggc;
- } else {
- /* XXX: this should probably be passed in */
- m->op.flags |= BCH_WRITE_ONLY_SPECIFIED_DEVS;
- }
-
- m->op.flags |= BCH_WRITE_PAGES_STABLE|
+ m->op.flags |= BCH_WRITE_PAGES_STABLE|
BCH_WRITE_PAGES_OWNED|
BCH_WRITE_DATA_ENCODED|
- BCH_WRITE_FROM_INTERNAL;
-
- m->op.nr_replicas = data_opts.nr_replicas;
- m->op.nr_replicas_required = data_opts.nr_replicas;
+ BCH_WRITE_FROM_INTERNAL|
+ m->data_opts.write_flags;
+ m->op.compression_type =
+ bch2_compression_opt_to_type[io_opts.background_compression ?:
+ io_opts.compression];
+ if (m->data_opts.btree_insert_flags & BTREE_INSERT_USE_RESERVE)
+ m->op.alloc_reserve = RESERVE_movinggc;
m->op.index_update_fn = bch2_data_update_index_update;
- switch (data_cmd) {
- case DATA_ADD_REPLICAS: {
- /*
- * DATA_ADD_REPLICAS is used for moving data to a different
- * device in the background, and due to compression the new copy
- * might take up more space than the old copy:
- */
-#if 0
- int nr = (int) io_opts.data_replicas -
- bch2_bkey_nr_ptrs_allocated(k);
-#endif
- int nr = (int) io_opts.data_replicas;
+ i = 0;
+ bkey_for_each_ptr_decode(k.k, ptrs, p, entry) {
+ if (p.ptr.cached)
+ m->data_opts.rewrite_ptrs &= ~(1U << i);
- if (nr > 0) {
- m->op.nr_replicas = m->nr_ptrs_reserved = nr;
+ if (!((1U << i) & m->data_opts.rewrite_ptrs))
+ bch2_dev_list_add_dev(&m->op.devs_have, p.ptr.dev);
- ret = bch2_disk_reservation_get(c, &m->op.res,
- k.k->size, m->op.nr_replicas, 0);
- if (ret)
- return ret;
- }
- break;
- }
- case DATA_REWRITE: {
- unsigned compressed_sectors = 0;
+ if (((1U << i) & m->data_opts.rewrite_ptrs) &&
+ crc_is_compressed(p.crc))
+ reserve_sectors += k.k->size;
- bkey_for_each_ptr_decode(k.k, ptrs, p, entry)
- if (p.ptr.dev == data_opts.rewrite_dev) {
- if (p.ptr.cached)
- m->op.flags |= BCH_WRITE_CACHED;
+ /*
+ * op->csum_type is normally initialized from the fs/file's
+ * current options - but if an extent is encrypted, we require
+ * that it stays encrypted:
+ */
+ if (bch2_csum_type_is_encryption(p.crc.csum_type)) {
+ m->op.nonce = p.crc.nonce + p.crc.offset;
+ m->op.csum_type = p.crc.csum_type;
+ }
- if (!p.ptr.cached &&
- crc_is_compressed(p.crc))
- compressed_sectors += p.crc.compressed_size;
- }
+ if (p.crc.compression_type == BCH_COMPRESSION_TYPE_incompressible)
+ m->op.incompressible = true;
- if (compressed_sectors) {
- ret = bch2_disk_reservation_add(c, &m->op.res,
- k.k->size * m->op.nr_replicas,
- BCH_DISK_RESERVATION_NOFAIL);
- if (ret)
- return ret;
- }
- break;
+ i++;
}
- case DATA_PROMOTE:
- m->op.flags |= BCH_WRITE_ALLOC_NOWAIT;
- m->op.flags |= BCH_WRITE_CACHED;
- break;
- default:
- BUG();
+
+ if (reserve_sectors) {
+ ret = bch2_disk_reservation_add(c, &m->op.res, reserve_sectors,
+ m->data_opts.extra_replicas
+ ? 0
+ : BCH_DISK_RESERVATION_NOFAIL);
+ if (ret)
+ return ret;
}
+ m->op.nr_replicas = m->op.nr_replicas_required =
+ hweight32(m->data_opts.rewrite_ptrs) + m->data_opts.extra_replicas;
return 0;
}
diff --git a/fs/bcachefs/data_update.h b/fs/bcachefs/data_update.h
index 81388a442a95..e64505453a55 100644
--- a/fs/bcachefs/data_update.h
+++ b/fs/bcachefs/data_update.h
@@ -3,44 +3,36 @@
#ifndef _BCACHEFS_DATA_UPDATE_H
#define _BCACHEFS_DATA_UPDATE_H
+#include "bkey_buf.h"
#include "io_types.h"
-enum data_cmd {
- DATA_SKIP,
- DATA_SCRUB,
- DATA_ADD_REPLICAS,
- DATA_REWRITE,
- DATA_PROMOTE,
-};
+struct moving_context;
-struct data_opts {
+struct data_update_opts {
+ unsigned rewrite_ptrs;
u16 target;
- u8 rewrite_dev;
- u8 nr_replicas;
- int btree_insert_flags;
+ u8 extra_replicas;
+ unsigned btree_insert_flags;
+ unsigned write_flags;
};
struct data_update {
+ /* extent being updated: */
enum btree_id btree_id;
- enum data_cmd data_cmd;
- struct data_opts data_opts;
-
- unsigned nr_ptrs_reserved;
-
+ struct bkey_buf k;
+ struct data_update_opts data_opts;
struct moving_context *ctxt;
-
- /* what we read: */
- struct bch_extent_ptr ptr;
- u64 offset;
-
struct bch_write_op op;
};
-void bch2_data_update_read_done(struct data_update *, struct bch_read_bio *);
+void bch2_data_update_read_done(struct data_update *,
+ struct bch_extent_crc_unpacked,
+ struct closure *);
+
+void bch2_data_update_exit(struct data_update *);
int bch2_data_update_init(struct bch_fs *, struct data_update *,
struct write_point_specifier,
- struct bch_io_opts,
- enum data_cmd, struct data_opts,
+ struct bch_io_opts, struct data_update_opts,
enum btree_id, struct bkey_s_c);
#endif /* _BCACHEFS_DATA_UPDATE_H */
diff --git a/fs/bcachefs/extents.c b/fs/bcachefs/extents.c
index 73d756a63572..a368ede65091 100644
--- a/fs/bcachefs/extents.c
+++ b/fs/bcachefs/extents.c
@@ -688,37 +688,6 @@ unsigned bch2_bkey_durability(struct bch_fs *c, struct bkey_s_c k)
return durability;
}
-void bch2_bkey_mark_replicas_cached(struct bch_fs *c, struct bkey_s k,
- unsigned target,
- unsigned nr_desired_replicas)
-{
- struct bkey_ptrs ptrs = bch2_bkey_ptrs(k);
- union bch_extent_entry *entry;
- struct extent_ptr_decoded p;
- int extra = bch2_bkey_durability(c, k.s_c) - nr_desired_replicas;
-
- if (target && extra > 0)
- bkey_for_each_ptr_decode(k.k, ptrs, p, entry) {
- int n = bch2_extent_ptr_durability(c, p);
-
- if (n && n <= extra &&
- !bch2_dev_in_target(c, p.ptr.dev, target)) {
- entry->ptr.cached = true;
- extra -= n;
- }
- }
-
- if (extra > 0)
- bkey_for_each_ptr_decode(k.k, ptrs, p, entry) {
- int n = bch2_extent_ptr_durability(c, p);
-
- if (n && n <= extra) {
- entry->ptr.cached = true;
- extra -= n;
- }
- }
-}
-
void bch2_bkey_extent_entry_drop(struct bkey_i *k, union bch_extent_entry *entry)
{
union bch_extent_entry *end = bkey_val_end(bkey_i_to_s(k));
@@ -940,6 +909,44 @@ bool bch2_bkey_matches_ptr(struct bch_fs *c, struct bkey_s_c k,
}
/*
+ * Returns true if two extents refer to the same data:
+ */
+bool bch2_extents_match(struct bkey_s_c k1, struct bkey_s_c k2)
+{
+ struct bkey_ptrs_c ptrs1 = bch2_bkey_ptrs_c(k1);
+ struct bkey_ptrs_c ptrs2 = bch2_bkey_ptrs_c(k2);
+ const union bch_extent_entry *entry1, *entry2;
+ struct extent_ptr_decoded p1, p2;
+
+ bkey_for_each_ptr_decode(k1.k, ptrs1, p1, entry1)
+ bkey_for_each_ptr_decode(k2.k, ptrs2, p2, entry2)
+ if (p1.ptr.dev == p2.ptr.dev &&
+ p1.ptr.gen == p2.ptr.gen &&
+ (s64) p1.ptr.offset + p1.crc.offset - bkey_start_offset(k1.k) ==
+ (s64) p2.ptr.offset + p2.crc.offset - bkey_start_offset(k2.k))
+ return true;
+
+ return false;
+}
+
+bool bch2_extent_has_ptr(struct bkey_s_c k1, struct extent_ptr_decoded p1,
+ struct bkey_s_c k2)
+{
+ struct bkey_ptrs_c ptrs2 = bch2_bkey_ptrs_c(k2);
+ const union bch_extent_entry *entry2;
+ struct extent_ptr_decoded p2;
+
+ bkey_for_each_ptr_decode(k2.k, ptrs2, p2, entry2)
+ if (p1.ptr.dev == p2.ptr.dev &&
+ p1.ptr.gen == p2.ptr.gen &&
+ (s64) p1.ptr.offset + p1.crc.offset - bkey_start_offset(k1.k) ==
+ (s64) p2.ptr.offset + p2.crc.offset - bkey_start_offset(k2.k))
+ return true;
+
+ return false;
+}
+
+/*
* bch_extent_normalize - clean up an extent, dropping stale pointers etc.
*
* Returns true if @k should be dropped entirely
diff --git a/fs/bcachefs/extents.h b/fs/bcachefs/extents.h
index 4f41f0fd6cb1..27371dcfb81f 100644
--- a/fs/bcachefs/extents.h
+++ b/fs/bcachefs/extents.h
@@ -577,9 +577,6 @@ unsigned bch2_bkey_sectors_compressed(struct bkey_s_c);
unsigned bch2_bkey_replicas(struct bch_fs *, struct bkey_s_c);
unsigned bch2_bkey_durability(struct bch_fs *, struct bkey_s_c);
-void bch2_bkey_mark_replicas_cached(struct bch_fs *, struct bkey_s,
- unsigned, unsigned);
-
void bch2_bkey_extent_entry_drop(struct bkey_i *, union bch_extent_entry *);
void bch2_bkey_append_ptr(struct bkey_i *, struct bch_extent_ptr);
void bch2_extent_ptr_decoded_append(struct bkey_i *,
@@ -612,6 +609,8 @@ bool bch2_bkey_has_target(struct bch_fs *, struct bkey_s_c, unsigned);
bool bch2_bkey_matches_ptr(struct bch_fs *, struct bkey_s_c,
struct bch_extent_ptr, u64);
+bool bch2_extents_match(struct bkey_s_c, struct bkey_s_c);
+bool bch2_extent_has_ptr(struct bkey_s_c, struct extent_ptr_decoded, struct bkey_s_c);
bool bch2_extent_normalize(struct bch_fs *, struct bkey_s);
void bch2_bkey_ptrs_to_text(struct printbuf *, struct bch_fs *,
diff --git a/fs/bcachefs/io.c b/fs/bcachefs/io.c
index 7756d4b36ccf..52c0f9300ae3 100644
--- a/fs/bcachefs/io.c
+++ b/fs/bcachefs/io.c
@@ -1381,13 +1381,12 @@ static void promote_done(struct closure *cl)
bch2_time_stats_update(&c->times[BCH_TIME_data_promote],
op->start_time);
- bch2_bio_free_pages_pool(c, &op->write.op.wbio.bio);
+ bch2_data_update_exit(&op->write);
promote_free(c, op);
}
static void promote_start(struct promote_op *op, struct bch_read_bio *rbio)
{
- struct bch_fs *c = rbio->c;
struct closure *cl = &op->cl;
struct bio *bio = &op->write.op.wbio.bio;
@@ -1401,10 +1400,8 @@ static void promote_start(struct promote_op *op, struct bch_read_bio *rbio)
sizeof(struct bio_vec) * rbio->bio.bi_vcnt);
swap(bio->bi_vcnt, rbio->bio.bi_vcnt);
- bch2_data_update_read_done(&op->write, rbio);
-
closure_init(cl, NULL);
- closure_call(&op->write.op.cl, bch2_write, c->btree_update_wq, cl);
+ bch2_data_update_read_done(&op->write, rbio->pick.crc, cl);
closure_return_with_destructor(cl, promote_done);
}
@@ -1463,10 +1460,10 @@ static struct promote_op *__promote_alloc(struct bch_fs *c,
ret = bch2_data_update_init(c, &op->write,
writepoint_hashed((unsigned long) current),
opts,
- DATA_PROMOTE,
- (struct data_opts) {
+ (struct data_update_opts) {
.target = opts.promote_target,
- .nr_replicas = 1,
+ .extra_replicas = 1,
+ .write_flags = BCH_WRITE_ALLOC_NOWAIT|BCH_WRITE_CACHED,
},
btree_id, k);
BUG_ON(ret);
diff --git a/fs/bcachefs/move.c b/fs/bcachefs/move.c
index bc507c59df33..6f9a21be7906 100644
--- a/fs/bcachefs/move.c
+++ b/fs/bcachefs/move.c
@@ -44,17 +44,9 @@ static void move_free(struct closure *cl)
{
struct moving_io *io = container_of(cl, struct moving_io, cl);
struct moving_context *ctxt = io->write.ctxt;
- struct bvec_iter_all iter;
- struct bio_vec *bv;
-
- bch2_disk_reservation_put(io->write.op.c, &io->write.op.res);
-
- bio_for_each_segment_all(bv, &io->write.op.wbio.bio, iter)
- if (bv->bv_page)
- __free_page(bv->bv_page);
+ bch2_data_update_exit(&io->write);
wake_up(&ctxt->wait);
-
kfree(io);
}
@@ -75,10 +67,9 @@ static void move_write(struct closure *cl)
return;
}
- bch2_data_update_read_done(&io->write, &io->rbio);
-
atomic_add(io->write_sectors, &io->write.ctxt->write_sectors);
- closure_call(&io->write.op.cl, bch2_write, NULL, cl);
+
+ bch2_data_update_read_done(&io->write, io->rbio.pick.crc, cl);
continue_at(cl, move_write_done, NULL);
}
@@ -141,8 +132,7 @@ static int bch2_move_extent(struct btree_trans *trans,
struct bch_io_opts io_opts,
enum btree_id btree_id,
struct bkey_s_c k,
- enum data_cmd data_cmd,
- struct data_opts data_opts)
+ struct data_update_opts data_opts)
{
struct bch_fs *c = trans->c;
struct bkey_ptrs_c ptrs = bch2_bkey_ptrs_c(k);
@@ -186,10 +176,12 @@ static int bch2_move_extent(struct btree_trans *trans,
io->rbio.bio.bi_end_io = move_read_endio;
ret = bch2_data_update_init(c, &io->write, wp, io_opts,
- data_cmd, data_opts, btree_id, k);
+ data_opts, btree_id, k);
if (ret)
goto err_free_pages;
+ io->write.ctxt = ctxt;
+
atomic64_inc(&ctxt->stats->keys_moved);
atomic64_add(k.k->size, &ctxt->stats->sectors_moved);
this_cpu_add(c->counters[BCH_COUNTER_io_move], k.k->size);
@@ -329,8 +321,7 @@ static int __bch2_move_data(struct bch_fs *c,
struct btree_trans trans;
struct btree_iter iter;
struct bkey_s_c k;
- struct data_opts data_opts;
- enum data_cmd data_cmd;
+ struct data_update_opts data_opts;
u64 cur_inum = U64_MAX;
int ret = 0, ret2;
@@ -373,18 +364,9 @@ static int __bch2_move_data(struct bch_fs *c,
if (ret)
continue;
- switch ((data_cmd = pred(c, arg, k, &io_opts, &data_opts))) {
- case DATA_SKIP:
+ memset(&data_opts, 0, sizeof(data_opts));
+ if (!pred(c, arg, k, &io_opts, &data_opts))
goto next;
- case DATA_SCRUB:
- BUG();
- case DATA_ADD_REPLICAS:
- case DATA_REWRITE:
- case DATA_PROMOTE:
- break;
- default:
- BUG();
- }
/*
* The iterator gets unlocked by __bch2_read_extent - need to
@@ -393,8 +375,8 @@ static int __bch2_move_data(struct bch_fs *c,
bch2_bkey_buf_reassemble(&sk, c, k);
k = bkey_i_to_s_c(sk.k);
- ret2 = bch2_move_extent(&trans, ctxt, wp, io_opts, btree_id, k,
- data_cmd, data_opts);
+ ret2 = bch2_move_extent(&trans, ctxt, wp, io_opts,
+ btree_id, k, data_opts);
if (ret2) {
if (ret2 == -EINTR)
continue;
@@ -532,8 +514,7 @@ int bch2_evacuate_bucket(struct bch_fs *c,
struct bpos bucket, int gen,
struct bch_ratelimit *rate,
struct write_point_specifier wp,
- enum data_cmd data_cmd,
- struct data_opts *data_opts,
+ struct data_update_opts *data_opts,
struct bch_move_stats *stats)
{
struct bch_io_opts io_opts = bch2_opts_to_inode_opts(c->opts);
@@ -567,7 +548,9 @@ int bch2_evacuate_bucket(struct bch_fs *c,
break;
if (!bp.level) {
+ const struct bch_extent_ptr *ptr;
struct bkey_s_c k;
+ unsigned i = 0;
k = bch2_backpointer_get_key(&trans, &iter,
bucket, bp_offset, bp);
@@ -588,10 +571,16 @@ int bch2_evacuate_bucket(struct bch_fs *c,
continue;
data_opts->target = io_opts.background_target;
- data_opts->rewrite_dev = bucket.inode;
+ data_opts->rewrite_ptrs = 0;
- ret = bch2_move_extent(&trans, &ctxt, wp, io_opts, bp.btree_id, k,
- data_cmd, *data_opts);
+ bkey_for_each_ptr(bch2_bkey_ptrs_c(k), ptr) {
+ if (ptr->dev == bucket.inode)
+ data_opts->rewrite_ptrs |= 1U << i;
+ i++;
+ }
+
+ ret = bch2_move_extent(&trans, &ctxt, wp, io_opts,
+ bp.btree_id, k, *data_opts);
if (ret == -EINTR)
continue;
if (ret == -ENOMEM) {
@@ -658,9 +647,9 @@ err:
return ret;
}
-typedef enum data_cmd (*move_btree_pred)(struct bch_fs *, void *,
- struct btree *, struct bch_io_opts *,
- struct data_opts *);
+typedef bool (*move_btree_pred)(struct bch_fs *, void *,
+ struct btree *, struct bch_io_opts *,
+ struct data_update_opts *);
static int bch2_move_btree(struct bch_fs *c,
enum btree_id start_btree_id, struct bpos start_pos,
@@ -674,8 +663,7 @@ static int bch2_move_btree(struct bch_fs *c,
struct btree_iter iter;
struct btree *b;
enum btree_id id;
- struct data_opts data_opts;
- enum data_cmd cmd;
+ struct data_update_opts data_opts;
int ret = 0;
bch2_trans_init(&trans, c, 0, 0);
@@ -704,17 +692,8 @@ retry:
stats->pos = iter.pos;
- switch ((cmd = pred(c, arg, b, &io_opts, &data_opts))) {
- case DATA_SKIP:
+ if (!pred(c, arg, b, &io_opts, &data_opts))
goto next;
- case DATA_SCRUB:
- BUG();
- case DATA_ADD_REPLICAS:
- case DATA_REWRITE:
- break;
- default:
- BUG();
- }
ret = bch2_btree_node_rewrite(&trans, &iter, b, 0) ?: ret;
if (ret == -EINTR)
@@ -744,20 +723,10 @@ next:
return ret;
}
-#if 0
-static enum data_cmd scrub_pred(struct bch_fs *c, void *arg,
- struct bkey_s_c k,
- struct bch_io_opts *io_opts,
- struct data_opts *data_opts)
-{
- return DATA_SCRUB;
-}
-#endif
-
-static enum data_cmd rereplicate_pred(struct bch_fs *c, void *arg,
- struct bkey_s_c k,
- struct bch_io_opts *io_opts,
- struct data_opts *data_opts)
+static bool rereplicate_pred(struct bch_fs *c, void *arg,
+ struct bkey_s_c k,
+ struct bch_io_opts *io_opts,
+ struct data_update_opts *data_opts)
{
unsigned nr_good = bch2_bkey_durability(c, k);
unsigned replicas = bkey_is_btree_ptr(k.k)
@@ -765,43 +734,50 @@ static enum data_cmd rereplicate_pred(struct bch_fs *c, void *arg,
: io_opts->data_replicas;
if (!nr_good || nr_good >= replicas)
- return DATA_SKIP;
+ return false;
data_opts->target = 0;
- data_opts->nr_replicas = 1;
+ data_opts->extra_replicas = replicas - nr_good;
data_opts->btree_insert_flags = 0;
- return DATA_ADD_REPLICAS;
+ return true;
}
-static enum data_cmd migrate_pred(struct bch_fs *c, void *arg,
- struct bkey_s_c k,
- struct bch_io_opts *io_opts,
- struct data_opts *data_opts)
+static bool migrate_pred(struct bch_fs *c, void *arg,
+ struct bkey_s_c k,
+ struct bch_io_opts *io_opts,
+ struct data_update_opts *data_opts)
{
+ struct bkey_ptrs_c ptrs = bch2_bkey_ptrs_c(k);
+ const struct bch_extent_ptr *ptr;
struct bch_ioctl_data *op = arg;
+ unsigned i = 0;
- if (!bch2_bkey_has_device(k, op->migrate.dev))
- return DATA_SKIP;
-
+ data_opts->rewrite_ptrs = 0;
data_opts->target = 0;
- data_opts->nr_replicas = 1;
+ data_opts->extra_replicas = 0;
data_opts->btree_insert_flags = 0;
- data_opts->rewrite_dev = op->migrate.dev;
- return DATA_REWRITE;
+
+ bkey_for_each_ptr(ptrs, ptr) {
+ if (ptr->dev == op->migrate.dev)
+ data_opts->rewrite_ptrs |= 1U << i;
+ i++;
+ }
+
+ return data_opts->rewrite_ptrs != 0;;
}
-static enum data_cmd rereplicate_btree_pred(struct bch_fs *c, void *arg,
- struct btree *b,
- struct bch_io_opts *io_opts,
- struct data_opts *data_opts)
+static bool rereplicate_btree_pred(struct bch_fs *c, void *arg,
+ struct btree *b,
+ struct bch_io_opts *io_opts,
+ struct data_update_opts *data_opts)
{
return rereplicate_pred(c, arg, bkey_i_to_s_c(&b->key), io_opts, data_opts);
}
-static enum data_cmd migrate_btree_pred(struct bch_fs *c, void *arg,
- struct btree *b,
- struct bch_io_opts *io_opts,
- struct data_opts *data_opts)
+static bool migrate_btree_pred(struct bch_fs *c, void *arg,
+ struct btree *b,
+ struct bch_io_opts *io_opts,
+ struct data_update_opts *data_opts)
{
return migrate_pred(c, arg, bkey_i_to_s_c(&b->key), io_opts, data_opts);
}
@@ -830,21 +806,21 @@ static bool bformat_needs_redo(struct bkey_format *f)
return false;
}
-static enum data_cmd rewrite_old_nodes_pred(struct bch_fs *c, void *arg,
- struct btree *b,
- struct bch_io_opts *io_opts,
- struct data_opts *data_opts)
+static bool rewrite_old_nodes_pred(struct bch_fs *c, void *arg,
+ struct btree *b,
+ struct bch_io_opts *io_opts,
+ struct data_update_opts *data_opts)
{
if (b->version_ondisk != c->sb.version ||
btree_node_need_rewrite(b) ||
bformat_needs_redo(&b->format)) {
data_opts->target = 0;
- data_opts->nr_replicas = 1;
+ data_opts->extra_replicas = 0;
data_opts->btree_insert_flags = 0;
- return DATA_REWRITE;
+ return true;
}
- return DATA_SKIP;
+ return false;
}
int bch2_scan_old_btree_nodes(struct bch_fs *c, struct bch_move_stats *stats)
diff --git a/fs/bcachefs/move.h b/fs/bcachefs/move.h
index 8a0500450d52..27fee75c3626 100644
--- a/fs/bcachefs/move.h
+++ b/fs/bcachefs/move.h
@@ -24,9 +24,8 @@ struct moving_context {
wait_queue_head_t wait;
};
-typedef enum data_cmd (*move_pred_fn)(struct bch_fs *, void *,
- struct bkey_s_c,
- struct bch_io_opts *, struct data_opts *);
+typedef bool (*move_pred_fn)(struct bch_fs *, void *, struct bkey_s_c,
+ struct bch_io_opts *, struct data_update_opts *);
int bch2_scan_old_btree_nodes(struct bch_fs *, struct bch_move_stats *);
@@ -41,8 +40,7 @@ int bch2_move_data(struct bch_fs *,
int bch2_evacuate_bucket(struct bch_fs *, struct bpos, int,
struct bch_ratelimit *,
struct write_point_specifier,
- enum data_cmd,
- struct data_opts *,
+ struct data_update_opts *,
struct bch_move_stats *);
int bch2_data_job(struct bch_fs *,
struct bch_move_stats *,
diff --git a/fs/bcachefs/movinggc.c b/fs/bcachefs/movinggc.c
index fb629129d794..47e3f6333757 100644
--- a/fs/bcachefs/movinggc.c
+++ b/fs/bcachefs/movinggc.c
@@ -95,9 +95,8 @@ static int bch2_copygc(struct bch_fs *c)
struct bch_dev *ca;
unsigned dev_idx;
size_t heap_size = 0;
- struct data_opts data_opts = {
- .nr_replicas = 1,
- .btree_insert_flags = BTREE_INSERT_USE_RESERVE|JOURNAL_WATERMARK_copygc,
+ struct data_update_opts data_opts = {
+ .btree_insert_flags = BTREE_INSERT_USE_RESERVE|JOURNAL_WATERMARK_copygc,
};
int ret;
@@ -154,8 +153,7 @@ static int bch2_copygc(struct bch_fs *c)
ret = bch2_evacuate_bucket(c, POS(e.dev, e.bucket), e.gen, NULL,
writepoint_ptr(&c->copygc_write_point),
- DATA_REWRITE, &data_opts,
- &move_stats);
+ &data_opts, &move_stats);
if (ret < 0)
bch_err(c, "error %i from bch2_move_data() in copygc", ret);
if (ret)
diff --git a/fs/bcachefs/rebalance.c b/fs/bcachefs/rebalance.c
index 23cc46e95021..a0e778349188 100644
--- a/fs/bcachefs/rebalance.c
+++ b/fs/bcachefs/rebalance.c
@@ -22,62 +22,70 @@
* returns -1 if it should not be moved, or
* device of pointer that should be moved, if known, or INT_MAX if unknown
*/
-static int __bch2_rebalance_pred(struct bch_fs *c,
- struct bkey_s_c k,
- struct bch_io_opts *io_opts)
+static bool rebalance_pred(struct bch_fs *c, void *arg,
+ struct bkey_s_c k,
+ struct bch_io_opts *io_opts,
+ struct data_update_opts *data_opts)
{
struct bkey_ptrs_c ptrs = bch2_bkey_ptrs_c(k);
- const union bch_extent_entry *entry;
- struct extent_ptr_decoded p;
+ unsigned i;
+
+ data_opts->rewrite_ptrs = 0;
+ data_opts->target = io_opts->background_target;
+ data_opts->extra_replicas = 0;
+ data_opts->btree_insert_flags = 0;
if (io_opts->background_compression &&
- !bch2_bkey_is_incompressible(k))
- bkey_for_each_ptr_decode(k.k, ptrs, p, entry)
+ !bch2_bkey_is_incompressible(k)) {
+ const union bch_extent_entry *entry;
+ struct extent_ptr_decoded p;
+
+ i = 0;
+ bkey_for_each_ptr_decode(k.k, ptrs, p, entry) {
if (!p.ptr.cached &&
p.crc.compression_type !=
bch2_compression_opt_to_type[io_opts->background_compression])
- return p.ptr.dev;
+ data_opts->rewrite_ptrs |= 1U << i;
+ i++;
+ }
+ }
- if (io_opts->background_target)
- bkey_for_each_ptr_decode(k.k, ptrs, p, entry)
- if (!p.ptr.cached &&
- !bch2_dev_in_target(c, p.ptr.dev, io_opts->background_target))
- return p.ptr.dev;
+ if (io_opts->background_target) {
+ const struct bch_extent_ptr *ptr;
- return -1;
+ i = 0;
+ bkey_for_each_ptr(ptrs, ptr) {
+ if (!ptr->cached &&
+ !bch2_dev_in_target(c, ptr->dev, io_opts->background_target))
+ data_opts->rewrite_ptrs |= 1U << i;
+ i++;
+ }
+ }
+
+ return data_opts->rewrite_ptrs != 0;
}
void bch2_rebalance_add_key(struct bch_fs *c,
struct bkey_s_c k,
struct bch_io_opts *io_opts)
{
- atomic64_t *counter;
- int dev;
+ struct data_update_opts update_opts = { 0 };
+ struct bkey_ptrs_c ptrs;
+ const struct bch_extent_ptr *ptr;
+ unsigned i;
- dev = __bch2_rebalance_pred(c, k, io_opts);
- if (dev < 0)
+ if (!rebalance_pred(c, NULL, k, io_opts, &update_opts))
return;
- counter = dev < INT_MAX
- ? &bch_dev_bkey_exists(c, dev)->rebalance_work
- : &c->rebalance.work_unknown_dev;
-
- if (atomic64_add_return(k.k->size, counter) == k.k->size)
- rebalance_wakeup(c);
-}
-
-static enum data_cmd rebalance_pred(struct bch_fs *c, void *arg,
- struct bkey_s_c k,
- struct bch_io_opts *io_opts,
- struct data_opts *data_opts)
-{
- if (__bch2_rebalance_pred(c, k, io_opts) >= 0) {
- data_opts->target = io_opts->background_target;
- data_opts->nr_replicas = 1;
- data_opts->btree_insert_flags = 0;
- return DATA_ADD_REPLICAS;
- } else {
- return DATA_SKIP;
+ i = 0;
+ ptrs = bch2_bkey_ptrs_c(k);
+ bkey_for_each_ptr(ptrs, ptr) {
+ if ((1U << i) && update_opts.rewrite_ptrs)
+ if (atomic64_add_return(k.k->size,
+ &bch_dev_bkey_exists(c, ptr->dev)->rebalance_work) ==
+ k.k->size)
+ rebalance_wakeup(c);
+ i++;
}
}