summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--fs/bcachefs/move.c118
-rw-r--r--fs/bcachefs/move.h4
-rw-r--r--fs/bcachefs/movinggc.c1
-rw-r--r--fs/bcachefs/tier.c1
4 files changed, 71 insertions, 53 deletions
diff --git a/fs/bcachefs/move.c b/fs/bcachefs/move.c
index 99a6635ecf4c..cee63dbf3ca5 100644
--- a/fs/bcachefs/move.c
+++ b/fs/bcachefs/move.c
@@ -14,11 +14,16 @@
#include <trace/events/bcachefs.h>
+#define SECTORS_IN_FLIGHT_PER_DEVICE 2048
+
struct moving_io {
struct list_head list;
struct closure cl;
bool read_completed;
- unsigned sectors;
+
+ unsigned read_dev;
+ unsigned read_sectors;
+ unsigned write_sectors;
struct bch_read_bio rbio;
@@ -34,7 +39,11 @@ struct moving_context {
struct bch_move_stats *stats;
struct list_head reads;
- atomic_t sectors_in_flight;
+
+ /* in flight sectors: */
+ atomic_t read_sectors[BCH_SB_MEMBERS_MAX];
+ atomic_t write_sectors;
+
wait_queue_head_t wait;
};
@@ -280,19 +289,29 @@ static void move_free(struct closure *cl)
if (bv->bv_page)
__free_page(bv->bv_page);
- atomic_sub(io->sectors, &ctxt->sectors_in_flight);
wake_up(&ctxt->wait);
kfree(io);
}
+static void move_write_done(struct closure *cl)
+{
+ struct moving_io *io = container_of(cl, struct moving_io, cl);
+
+ atomic_sub(io->write_sectors, &io->write.ctxt->write_sectors);
+ closure_return_with_destructor(cl, move_free);
+}
+
static void move_write(struct closure *cl)
{
struct moving_io *io = container_of(cl, struct moving_io, cl);
if (likely(!io->rbio.bio.bi_status)) {
bch2_migrate_read_done(&io->write, &io->rbio);
+
+ atomic_add(io->write_sectors, &io->write.ctxt->write_sectors);
closure_call(&io->write.op.cl, bch2_write, NULL, cl);
+ continue_at(cl, move_write_done, NULL);
}
closure_return_with_destructor(cl, move_free);
@@ -311,13 +330,44 @@ static void move_read_endio(struct bio *bio)
struct moving_io *io = container_of(bio, struct moving_io, rbio.bio);
struct moving_context *ctxt = io->write.ctxt;
+ atomic_sub(io->read_sectors, &ctxt->read_sectors[io->read_dev]);
io->read_completed = true;
+
if (next_pending_write(ctxt))
wake_up(&ctxt->wait);
closure_put(&ctxt->cl);
}
+static void do_pending_writes(struct moving_context *ctxt)
+{
+ struct moving_io *io;
+
+ while ((io = next_pending_write(ctxt))) {
+ list_del(&io->list);
+ closure_call(&io->cl, move_write, NULL, &ctxt->cl);
+ }
+}
+
+#define move_ctxt_wait_event(_ctxt, _cond) \
+do { \
+ do_pending_writes(_ctxt); \
+ \
+ if (_cond) \
+ break; \
+ __wait_event((_ctxt)->wait, \
+ next_pending_write(_ctxt) || (_cond)); \
+} while (1)
+
+static void bch2_move_ctxt_wait_for_io(struct moving_context *ctxt)
+{
+ unsigned sectors_pending = atomic_read(&ctxt->write_sectors);
+
+ move_ctxt_wait_event(ctxt,
+ !atomic_read(&ctxt->write_sectors) ||
+ atomic_read(&ctxt->write_sectors) != sectors_pending);
+}
+
static int bch2_move_extent(struct bch_fs *c,
struct moving_context *ctxt,
struct bch_devs_mask *devs,
@@ -334,10 +384,18 @@ static int bch2_move_extent(struct bch_fs *c,
unsigned sectors = e.k->size, pages;
int ret = -ENOMEM;
+ move_ctxt_wait_event(ctxt,
+ atomic_read(&ctxt->write_sectors) <
+ SECTORS_IN_FLIGHT_PER_DEVICE);
+
bch2_extent_pick_ptr(c, e.s_c, NULL, &pick);
if (IS_ERR_OR_NULL(pick.ca))
return pick.ca ? PTR_ERR(pick.ca) : 0;
+ move_ctxt_wait_event(ctxt,
+ atomic_read(&ctxt->read_sectors[pick.ca->dev_idx]) <
+ SECTORS_IN_FLIGHT_PER_DEVICE);
+
/* write path might have to decompress data: */
extent_for_each_ptr_crc(e, ptr, crc)
sectors = max_t(unsigned, sectors, crc.uncompressed_size);
@@ -348,8 +406,10 @@ static int bch2_move_extent(struct bch_fs *c,
if (!io)
goto err;
- io->write.ctxt = ctxt;
- io->sectors = e.k->size;
+ io->write.ctxt = ctxt;
+ io->read_dev = pick.ca->dev_idx;
+ io->read_sectors = pick.crc.uncompressed_size;
+ io->write_sectors = e.k->size;
bio_init(&io->write.op.wbio.bio, io->bi_inline_vecs, pages);
bio_set_prio(&io->write.op.wbio.bio,
@@ -379,7 +439,7 @@ static int bch2_move_extent(struct bch_fs *c,
trace_move_extent(e.k);
- atomic_add(io->sectors, &ctxt->sectors_in_flight);
+ atomic_add(io->read_sectors, &ctxt->read_sectors[io->read_dev]);
list_add_tail(&io->list, &ctxt->reads);
/*
@@ -399,38 +459,8 @@ err:
return ret;
}
-static void do_pending_writes(struct moving_context *ctxt)
-{
- struct moving_io *io;
-
- while ((io = next_pending_write(ctxt))) {
- list_del(&io->list);
- closure_call(&io->cl, move_write, NULL, &ctxt->cl);
- }
-}
-
-#define move_ctxt_wait_event(_ctxt, _cond) \
-do { \
- do_pending_writes(_ctxt); \
- \
- if (_cond) \
- break; \
- __wait_event((_ctxt)->wait, \
- next_pending_write(_ctxt) || (_cond)); \
-} while (1)
-
-static void bch2_move_ctxt_wait_for_io(struct moving_context *ctxt)
-{
- unsigned sectors_pending = atomic_read(&ctxt->sectors_in_flight);
-
- move_ctxt_wait_event(ctxt,
- !atomic_read(&ctxt->sectors_in_flight) ||
- atomic_read(&ctxt->sectors_in_flight) != sectors_pending);
-}
-
int bch2_move_data(struct bch_fs *c,
struct bch_ratelimit *rate,
- unsigned sectors_in_flight,
struct bch_devs_mask *devs,
struct write_point_specifier wp,
struct bpos start,
@@ -461,13 +491,6 @@ int bch2_move_data(struct bch_fs *c,
bch2_ratelimit_reset(rate);
while (!kthread || !(ret = kthread_should_stop())) {
- if (atomic_read(&ctxt.sectors_in_flight) >= sectors_in_flight) {
- bch2_btree_iter_unlock(&stats->iter);
- move_ctxt_wait_event(&ctxt,
- atomic_read(&ctxt.sectors_in_flight) <
- sectors_in_flight);
- }
-
if (rate &&
bch2_ratelimit_delay(rate) &&
(bch2_btree_iter_unlock(&stats->iter),
@@ -546,11 +569,10 @@ next_nondata:
bch2_btree_iter_unlock(&stats->iter);
- move_ctxt_wait_event(&ctxt, !atomic_read(&ctxt.sectors_in_flight));
+ move_ctxt_wait_event(&ctxt, list_empty(&ctxt.reads));
closure_sync(&ctxt.cl);
- EBUG_ON(!list_empty(&ctxt.reads));
- EBUG_ON(atomic_read(&ctxt.sectors_in_flight));
+ EBUG_ON(atomic_read(&ctxt.write_sectors));
trace_move_data(c,
atomic64_read(&stats->sectors_moved),
@@ -711,7 +733,7 @@ int bch2_data_job(struct bch_fs *c,
ret = bch2_move_btree(c, rereplicate_pred, c, stats) ?: ret;
ret = bch2_gc_btree_replicas(c) ?: ret;
- ret = bch2_move_data(c, NULL, SECTORS_IN_FLIGHT_PER_DEVICE,
+ ret = bch2_move_data(c, NULL,
NULL,
writepoint_hashed((unsigned long) current),
op.start,
@@ -729,7 +751,7 @@ int bch2_data_job(struct bch_fs *c,
ret = bch2_move_btree(c, migrate_pred, &op, stats) ?: ret;
ret = bch2_gc_btree_replicas(c) ?: ret;
- ret = bch2_move_data(c, NULL, SECTORS_IN_FLIGHT_PER_DEVICE,
+ ret = bch2_move_data(c, NULL,
NULL,
writepoint_hashed((unsigned long) current),
op.start,
diff --git a/fs/bcachefs/move.h b/fs/bcachefs/move.h
index 819e5d9f0a24..cbcc44db9978 100644
--- a/fs/bcachefs/move.h
+++ b/fs/bcachefs/move.h
@@ -44,8 +44,6 @@ int bch2_migrate_write_init(struct bch_fs *, struct migrate_write *,
enum data_cmd, struct data_opts,
struct bkey_s_c);
-#define SECTORS_IN_FLIGHT_PER_DEVICE 2048
-
typedef enum data_cmd (*move_pred_fn)(struct bch_fs *, void *,
enum bkey_type, struct bkey_s_c_extent,
struct bch_io_opts *, struct data_opts *);
@@ -61,7 +59,7 @@ struct bch_move_stats {
};
int bch2_move_data(struct bch_fs *, struct bch_ratelimit *,
- unsigned, struct bch_devs_mask *,
+ struct bch_devs_mask *,
struct write_point_specifier,
struct bpos, struct bpos,
move_pred_fn, void *,
diff --git a/fs/bcachefs/movinggc.c b/fs/bcachefs/movinggc.c
index c306a89f8401..f6810c3ed52e 100644
--- a/fs/bcachefs/movinggc.c
+++ b/fs/bcachefs/movinggc.c
@@ -178,7 +178,6 @@ static void bch2_copygc(struct bch_fs *c, struct bch_dev *ca)
bucket_offset_cmp, NULL);
ret = bch2_move_data(c, &ca->copygc_pd.rate,
- SECTORS_IN_FLIGHT_PER_DEVICE,
&ca->self,
writepoint_ptr(&ca->copygc_write_point),
POS_MIN, POS_MAX,
diff --git a/fs/bcachefs/tier.c b/fs/bcachefs/tier.c
index 775c2e2be686..e992ab44ed3a 100644
--- a/fs/bcachefs/tier.c
+++ b/fs/bcachefs/tier.c
@@ -101,7 +101,6 @@ static int bch2_tiering_thread(void *arg)
}
bch2_move_data(c, &tier->pd.rate,
- SECTORS_IN_FLIGHT_PER_DEVICE * nr_devices,
&tier->devs,
writepoint_ptr(&tier->wp),
POS_MIN, POS_MAX,