diff options
-rw-r--r-- | fs/bcachefs/move.c | 118 | ||||
-rw-r--r-- | fs/bcachefs/move.h | 4 | ||||
-rw-r--r-- | fs/bcachefs/movinggc.c | 1 | ||||
-rw-r--r-- | fs/bcachefs/tier.c | 1 |
4 files changed, 71 insertions, 53 deletions
diff --git a/fs/bcachefs/move.c b/fs/bcachefs/move.c index 99a6635ecf4c..cee63dbf3ca5 100644 --- a/fs/bcachefs/move.c +++ b/fs/bcachefs/move.c @@ -14,11 +14,16 @@ #include <trace/events/bcachefs.h> +#define SECTORS_IN_FLIGHT_PER_DEVICE 2048 + struct moving_io { struct list_head list; struct closure cl; bool read_completed; - unsigned sectors; + + unsigned read_dev; + unsigned read_sectors; + unsigned write_sectors; struct bch_read_bio rbio; @@ -34,7 +39,11 @@ struct moving_context { struct bch_move_stats *stats; struct list_head reads; - atomic_t sectors_in_flight; + + /* in flight sectors: */ + atomic_t read_sectors[BCH_SB_MEMBERS_MAX]; + atomic_t write_sectors; + wait_queue_head_t wait; }; @@ -280,19 +289,29 @@ static void move_free(struct closure *cl) if (bv->bv_page) __free_page(bv->bv_page); - atomic_sub(io->sectors, &ctxt->sectors_in_flight); wake_up(&ctxt->wait); kfree(io); } +static void move_write_done(struct closure *cl) +{ + struct moving_io *io = container_of(cl, struct moving_io, cl); + + atomic_sub(io->write_sectors, &io->write.ctxt->write_sectors); + closure_return_with_destructor(cl, move_free); +} + static void move_write(struct closure *cl) { struct moving_io *io = container_of(cl, struct moving_io, cl); if (likely(!io->rbio.bio.bi_status)) { bch2_migrate_read_done(&io->write, &io->rbio); + + atomic_add(io->write_sectors, &io->write.ctxt->write_sectors); closure_call(&io->write.op.cl, bch2_write, NULL, cl); + continue_at(cl, move_write_done, NULL); } closure_return_with_destructor(cl, move_free); @@ -311,13 +330,44 @@ static void move_read_endio(struct bio *bio) struct moving_io *io = container_of(bio, struct moving_io, rbio.bio); struct moving_context *ctxt = io->write.ctxt; + atomic_sub(io->read_sectors, &ctxt->read_sectors[io->read_dev]); io->read_completed = true; + if (next_pending_write(ctxt)) wake_up(&ctxt->wait); closure_put(&ctxt->cl); } +static void do_pending_writes(struct moving_context *ctxt) +{ + struct moving_io *io; + + while ((io = next_pending_write(ctxt))) { + list_del(&io->list); + closure_call(&io->cl, move_write, NULL, &ctxt->cl); + } +} + +#define move_ctxt_wait_event(_ctxt, _cond) \ +do { \ + do_pending_writes(_ctxt); \ + \ + if (_cond) \ + break; \ + __wait_event((_ctxt)->wait, \ + next_pending_write(_ctxt) || (_cond)); \ +} while (1) + +static void bch2_move_ctxt_wait_for_io(struct moving_context *ctxt) +{ + unsigned sectors_pending = atomic_read(&ctxt->write_sectors); + + move_ctxt_wait_event(ctxt, + !atomic_read(&ctxt->write_sectors) || + atomic_read(&ctxt->write_sectors) != sectors_pending); +} + static int bch2_move_extent(struct bch_fs *c, struct moving_context *ctxt, struct bch_devs_mask *devs, @@ -334,10 +384,18 @@ static int bch2_move_extent(struct bch_fs *c, unsigned sectors = e.k->size, pages; int ret = -ENOMEM; + move_ctxt_wait_event(ctxt, + atomic_read(&ctxt->write_sectors) < + SECTORS_IN_FLIGHT_PER_DEVICE); + bch2_extent_pick_ptr(c, e.s_c, NULL, &pick); if (IS_ERR_OR_NULL(pick.ca)) return pick.ca ? PTR_ERR(pick.ca) : 0; + move_ctxt_wait_event(ctxt, + atomic_read(&ctxt->read_sectors[pick.ca->dev_idx]) < + SECTORS_IN_FLIGHT_PER_DEVICE); + /* write path might have to decompress data: */ extent_for_each_ptr_crc(e, ptr, crc) sectors = max_t(unsigned, sectors, crc.uncompressed_size); @@ -348,8 +406,10 @@ static int bch2_move_extent(struct bch_fs *c, if (!io) goto err; - io->write.ctxt = ctxt; - io->sectors = e.k->size; + io->write.ctxt = ctxt; + io->read_dev = pick.ca->dev_idx; + io->read_sectors = pick.crc.uncompressed_size; + io->write_sectors = e.k->size; bio_init(&io->write.op.wbio.bio, io->bi_inline_vecs, pages); bio_set_prio(&io->write.op.wbio.bio, @@ -379,7 +439,7 @@ static int bch2_move_extent(struct bch_fs *c, trace_move_extent(e.k); - atomic_add(io->sectors, &ctxt->sectors_in_flight); + atomic_add(io->read_sectors, &ctxt->read_sectors[io->read_dev]); list_add_tail(&io->list, &ctxt->reads); /* @@ -399,38 +459,8 @@ err: return ret; } -static void do_pending_writes(struct moving_context *ctxt) -{ - struct moving_io *io; - - while ((io = next_pending_write(ctxt))) { - list_del(&io->list); - closure_call(&io->cl, move_write, NULL, &ctxt->cl); - } -} - -#define move_ctxt_wait_event(_ctxt, _cond) \ -do { \ - do_pending_writes(_ctxt); \ - \ - if (_cond) \ - break; \ - __wait_event((_ctxt)->wait, \ - next_pending_write(_ctxt) || (_cond)); \ -} while (1) - -static void bch2_move_ctxt_wait_for_io(struct moving_context *ctxt) -{ - unsigned sectors_pending = atomic_read(&ctxt->sectors_in_flight); - - move_ctxt_wait_event(ctxt, - !atomic_read(&ctxt->sectors_in_flight) || - atomic_read(&ctxt->sectors_in_flight) != sectors_pending); -} - int bch2_move_data(struct bch_fs *c, struct bch_ratelimit *rate, - unsigned sectors_in_flight, struct bch_devs_mask *devs, struct write_point_specifier wp, struct bpos start, @@ -461,13 +491,6 @@ int bch2_move_data(struct bch_fs *c, bch2_ratelimit_reset(rate); while (!kthread || !(ret = kthread_should_stop())) { - if (atomic_read(&ctxt.sectors_in_flight) >= sectors_in_flight) { - bch2_btree_iter_unlock(&stats->iter); - move_ctxt_wait_event(&ctxt, - atomic_read(&ctxt.sectors_in_flight) < - sectors_in_flight); - } - if (rate && bch2_ratelimit_delay(rate) && (bch2_btree_iter_unlock(&stats->iter), @@ -546,11 +569,10 @@ next_nondata: bch2_btree_iter_unlock(&stats->iter); - move_ctxt_wait_event(&ctxt, !atomic_read(&ctxt.sectors_in_flight)); + move_ctxt_wait_event(&ctxt, list_empty(&ctxt.reads)); closure_sync(&ctxt.cl); - EBUG_ON(!list_empty(&ctxt.reads)); - EBUG_ON(atomic_read(&ctxt.sectors_in_flight)); + EBUG_ON(atomic_read(&ctxt.write_sectors)); trace_move_data(c, atomic64_read(&stats->sectors_moved), @@ -711,7 +733,7 @@ int bch2_data_job(struct bch_fs *c, ret = bch2_move_btree(c, rereplicate_pred, c, stats) ?: ret; ret = bch2_gc_btree_replicas(c) ?: ret; - ret = bch2_move_data(c, NULL, SECTORS_IN_FLIGHT_PER_DEVICE, + ret = bch2_move_data(c, NULL, NULL, writepoint_hashed((unsigned long) current), op.start, @@ -729,7 +751,7 @@ int bch2_data_job(struct bch_fs *c, ret = bch2_move_btree(c, migrate_pred, &op, stats) ?: ret; ret = bch2_gc_btree_replicas(c) ?: ret; - ret = bch2_move_data(c, NULL, SECTORS_IN_FLIGHT_PER_DEVICE, + ret = bch2_move_data(c, NULL, NULL, writepoint_hashed((unsigned long) current), op.start, diff --git a/fs/bcachefs/move.h b/fs/bcachefs/move.h index 819e5d9f0a24..cbcc44db9978 100644 --- a/fs/bcachefs/move.h +++ b/fs/bcachefs/move.h @@ -44,8 +44,6 @@ int bch2_migrate_write_init(struct bch_fs *, struct migrate_write *, enum data_cmd, struct data_opts, struct bkey_s_c); -#define SECTORS_IN_FLIGHT_PER_DEVICE 2048 - typedef enum data_cmd (*move_pred_fn)(struct bch_fs *, void *, enum bkey_type, struct bkey_s_c_extent, struct bch_io_opts *, struct data_opts *); @@ -61,7 +59,7 @@ struct bch_move_stats { }; int bch2_move_data(struct bch_fs *, struct bch_ratelimit *, - unsigned, struct bch_devs_mask *, + struct bch_devs_mask *, struct write_point_specifier, struct bpos, struct bpos, move_pred_fn, void *, diff --git a/fs/bcachefs/movinggc.c b/fs/bcachefs/movinggc.c index c306a89f8401..f6810c3ed52e 100644 --- a/fs/bcachefs/movinggc.c +++ b/fs/bcachefs/movinggc.c @@ -178,7 +178,6 @@ static void bch2_copygc(struct bch_fs *c, struct bch_dev *ca) bucket_offset_cmp, NULL); ret = bch2_move_data(c, &ca->copygc_pd.rate, - SECTORS_IN_FLIGHT_PER_DEVICE, &ca->self, writepoint_ptr(&ca->copygc_write_point), POS_MIN, POS_MAX, diff --git a/fs/bcachefs/tier.c b/fs/bcachefs/tier.c index 775c2e2be686..e992ab44ed3a 100644 --- a/fs/bcachefs/tier.c +++ b/fs/bcachefs/tier.c @@ -101,7 +101,6 @@ static int bch2_tiering_thread(void *arg) } bch2_move_data(c, &tier->pd.rate, - SECTORS_IN_FLIGHT_PER_DEVICE * nr_devices, &tier->devs, writepoint_ptr(&tier->wp), POS_MIN, POS_MAX, |