diff options
author | Kent Overstreet <kent.overstreet@gmail.com> | 2018-05-01 11:32:47 -0400 |
---|---|---|
committer | Kent Overstreet <kent.overstreet@gmail.com> | 2018-05-22 00:44:18 -0400 |
commit | 58d7fbdf78338727a20d8af24fbc2e092f96af59 (patch) | |
tree | 635a412f9feb71dff37fd438f9274f9c6494b307 | |
parent | 88acdafd52ee2249d1d96cc6ac4a0ec84129d6b8 (diff) |
bcachefs: Promote whole extents, avoiding mempool
The mempools we allocate for bouncing reads are only sized for
checksummed/compressed extents - non checksummed/compressed extents can
be bigger, so if we bounce for a promote we can't use the mempool if
it's too big.
-rw-r--r-- | fs/bcachefs/bcachefs.h | 1 | ||||
-rw-r--r-- | fs/bcachefs/io.c | 211 | ||||
-rw-r--r-- | fs/bcachefs/io_types.h | 1 | ||||
-rw-r--r-- | fs/bcachefs/super.c | 7 | ||||
-rw-r--r-- | fs/bcachefs/sysfs.c | 7 |
5 files changed, 151 insertions, 76 deletions
diff --git a/fs/bcachefs/bcachefs.h b/fs/bcachefs/bcachefs.h index 61865240133f..1b71ae08f7f4 100644 --- a/fs/bcachefs/bcachefs.h +++ b/fs/bcachefs/bcachefs.h @@ -711,6 +711,7 @@ struct bch_fs { unsigned copy_gc_enabled:1; unsigned rebalance_enabled:1; unsigned rebalance_percent; + bool promote_whole_extents; #define BCH_DEBUG_PARAM(name, description) bool name; BCH_DEBUG_PARAMS_ALL() diff --git a/fs/bcachefs/io.c b/fs/bcachefs/io.c index caab7b460532..3afc4108d6a4 100644 --- a/fs/bcachefs/io.c +++ b/fs/bcachefs/io.c @@ -871,9 +871,41 @@ static const struct rhashtable_params bch_promote_params = { .key_len = sizeof(struct bpos), }; +static inline bool should_promote(struct bch_fs *c, struct bkey_s_c k, + struct bpos pos, + struct bch_io_opts opts, + unsigned flags) +{ + if (!opts.promote_target) + return false; + + if (!(flags & BCH_READ_MAY_PROMOTE)) + return false; + + if (percpu_ref_is_dying(&c->writes)) + return false; + + if (!bkey_extent_is_data(k.k)) + return false; + + if (bch2_extent_has_target(c, bkey_s_c_to_extent(k), opts.promote_target)) + return false; + + if (rhashtable_lookup_fast(&c->promote_table, &pos, + bch_promote_params)) + return false; + + return true; +} + static void promote_free(struct bch_fs *c, struct promote_op *op) { - rhashtable_remove_fast(&c->promote_table, &op->hash, bch_promote_params); + int ret; + + ret = rhashtable_remove_fast(&c->promote_table, &op->hash, + bch_promote_params); + BUG_ON(ret); + percpu_ref_put(&c->writes); kfree(op); } @@ -883,7 +915,6 @@ static void promote_done(struct closure *cl) container_of(cl, struct promote_op, cl); struct bch_fs *c = op->write.op.c; - percpu_ref_put(&c->writes); bch2_bio_free_pages_pool(c, &op->write.op.wbio.bio); promote_free(c, op); } @@ -894,17 +925,15 @@ static void promote_start(struct promote_op *op, struct bch_read_bio *rbio) struct closure *cl = &op->cl; struct bio *bio = &op->write.op.wbio.bio; - BUG_ON(!rbio->split || !rbio->bounce); - - if (!percpu_ref_tryget(&c->writes)) - return; - trace_promote(&rbio->bio); /* we now own pages: */ + BUG_ON(!rbio->bounce); BUG_ON(rbio->bio.bi_vcnt > bio->bi_max_vecs); + + memcpy(bio->bi_io_vec, rbio->bio.bi_io_vec, + sizeof(struct bio_vec) * rbio->bio.bi_vcnt); swap(bio->bi_vcnt, rbio->bio.bi_vcnt); - rbio->promote = NULL; bch2_migrate_read_done(&op->write, rbio); @@ -913,79 +942,115 @@ static void promote_start(struct promote_op *op, struct bch_read_bio *rbio) closure_return_with_destructor(cl, promote_done); } -/* - * XXX: multiple promotes can race with each other, wastefully. Keep a list of - * outstanding promotes? - */ -static struct promote_op *promote_alloc(struct bch_read_bio *rbio, - struct bkey_s_c k) +noinline +static struct promote_op *__promote_alloc(struct bch_fs *c, + struct bpos pos, + struct extent_pick_ptr *pick, + struct bch_io_opts opts, + unsigned rbio_sectors, + struct bch_read_bio **rbio) { - struct bch_fs *c = rbio->c; - struct promote_op *op; + struct promote_op *op = NULL; struct bio *bio; + unsigned rbio_pages = DIV_ROUND_UP(rbio_sectors, PAGE_SECTORS); /* data might have to be decompressed in the write path: */ - unsigned pages = DIV_ROUND_UP(rbio->pick.crc.uncompressed_size, - PAGE_SECTORS); + unsigned wbio_pages = DIV_ROUND_UP(pick->crc.uncompressed_size, + PAGE_SECTORS); int ret; - BUG_ON(!rbio->bounce); - BUG_ON(pages < rbio->bio.bi_vcnt); + if (!percpu_ref_tryget(&c->writes)) + return NULL; - op = kzalloc(sizeof(*op) + sizeof(struct bio_vec) * pages, + op = kzalloc(sizeof(*op) + sizeof(struct bio_vec) * wbio_pages, GFP_NOIO); if (!op) - return NULL; + goto err; - op->pos = k.k->p; + op->pos = pos; - if (rhashtable_lookup_insert_fast(&c->promote_table, &op->hash, - bch_promote_params)) { - kfree(op); - return NULL; + /* + * promotes require bouncing, but if the extent isn't + * checksummed/compressed it might be too big for the mempool: + */ + if (rbio_sectors > c->sb.encoded_extent_max) { + *rbio = kzalloc(sizeof(struct bch_read_bio) + + sizeof(struct bio_vec) * rbio_pages, + GFP_NOIO); + if (!*rbio) + goto err; + + rbio_init(&(*rbio)->bio, opts); + bio_init(&(*rbio)->bio, (*rbio)->bio.bi_inline_vecs, + rbio_pages); + + (*rbio)->bio.bi_iter.bi_size = rbio_sectors << 9; + bch2_bio_map(&(*rbio)->bio, NULL); + + if (bch2_bio_alloc_pages(&(*rbio)->bio, GFP_NOIO)) + goto err; + + (*rbio)->bounce = true; + (*rbio)->split = true; + (*rbio)->kmalloc = true; } - bio = &op->write.op.wbio.bio; - bio_init(bio, bio->bi_inline_vecs, pages); + if (rhashtable_lookup_insert_fast(&c->promote_table, &op->hash, + bch_promote_params)) + goto err; - memcpy(bio->bi_io_vec, rbio->bio.bi_io_vec, - sizeof(struct bio_vec) * rbio->bio.bi_vcnt); + bio = &op->write.op.wbio.bio; + bio_init(bio, bio->bi_inline_vecs, wbio_pages); ret = bch2_migrate_write_init(c, &op->write, writepoint_hashed((unsigned long) current), - rbio->opts, + opts, DATA_PROMOTE, (struct data_opts) { - .target = rbio->opts.promote_target + .target = opts.promote_target }, - k); + bkey_s_c_null); BUG_ON(ret); return op; +err: + if (*rbio) + bio_free_pages(&(*rbio)->bio); + kfree(*rbio); + *rbio = NULL; + kfree(op); + percpu_ref_put(&c->writes); + return NULL; } -static bool should_promote(struct bch_fs *c, struct bkey_s_c k, - unsigned flags, u16 target) +static inline struct promote_op *promote_alloc(struct bch_fs *c, + struct bvec_iter iter, + struct bkey_s_c k, + struct extent_pick_ptr *pick, + struct bch_io_opts opts, + unsigned flags, + struct bch_read_bio **rbio, + bool *bounce, + bool *read_full) { - if (!target) - return false; - - if (!(flags & BCH_READ_MAY_PROMOTE)) - return false; - - if (percpu_ref_is_dying(&c->writes)) - return false; - - if (!bkey_extent_is_data(k.k)) - return false; - - if (bch2_extent_has_target(c, bkey_s_c_to_extent(k), target)) - return false; + bool promote_full = *read_full || READ_ONCE(c->promote_whole_extents); + unsigned sectors = promote_full + ? pick->crc.compressed_size + : bvec_iter_sectors(iter); + struct bpos pos = promote_full + ? bkey_start_pos(k.k) + : POS(k.k->p.inode, iter.bi_sector); + struct promote_op *promote; + + if (!should_promote(c, k, pos, opts, flags)) + return NULL; - if (rhashtable_lookup_fast(&c->promote_table, &k.k->p, - bch_promote_params)) - return false; + promote = __promote_alloc(c, pos, pick, opts, sectors, rbio); + if (!promote) + return NULL; - return true; + *bounce = true; + *read_full = promote_full; + return promote; } /* Read */ @@ -1034,7 +1099,11 @@ static inline struct bch_read_bio *bch2_rbio_free(struct bch_read_bio *rbio) if (rbio->split) { struct bch_read_bio *parent = rbio->parent; - bio_put(&rbio->bio); + if (rbio->kmalloc) + kfree(rbio); + else + bio_put(&rbio->bio); + rbio = parent; } @@ -1334,6 +1403,7 @@ static void __bch2_read_endio(struct work_struct *work) */ bch2_encrypt_bio(c, crc.csum_type, nonce, src); promote_start(rbio->promote, rbio); + rbio->promote = NULL; } nodecode: if (likely(!(rbio->flags & BCH_READ_IN_RETRY))) { @@ -1417,10 +1487,10 @@ int __bch2_read_extent(struct bch_fs *c, struct bch_read_bio *orig, struct bch_devs_mask *avoid, unsigned flags) { struct extent_pick_ptr pick; - struct bch_read_bio *rbio; + struct bch_read_bio *rbio = NULL; struct bch_dev *ca; - bool split = false, bounce = false, read_full = false; - bool promote = false, narrow_crcs = false; + struct promote_op *promote = NULL; + bool bounce = false, read_full = false, narrow_crcs = false; struct bpos pos = bkey_start_pos(k.k); int pick_ret; @@ -1471,11 +1541,8 @@ int __bch2_read_extent(struct bch_fs *c, struct bch_read_bio *orig, bounce = true; } - promote = should_promote(c, k, flags, orig->opts.promote_target); - if (promote) { - read_full = true; - bounce = true; - } + promote = promote_alloc(c, iter, k, &pick, orig->opts, flags, + &rbio, &bounce, &read_full); if (!read_full) { EBUG_ON(pick.crc.compression_type); @@ -1494,7 +1561,9 @@ int __bch2_read_extent(struct bch_fs *c, struct bch_read_bio *orig, pos.offset = iter.bi_sector; } - if (bounce) { + if (rbio) { + /* promote already allocated bounce rbio */ + } else if (bounce) { unsigned sectors = pick.crc.compressed_size; rbio = rbio_init(bio_alloc_bioset(GFP_NOIO, @@ -1503,7 +1572,8 @@ int __bch2_read_extent(struct bch_fs *c, struct bch_read_bio *orig, orig->opts); bch2_bio_alloc_pages_pool(c, &rbio->bio, sectors << 9); - split = true; + rbio->bounce = true; + rbio->split = true; } else if (flags & BCH_READ_MUST_CLONE) { /* * Have to clone if there were any splits, due to error @@ -1517,12 +1587,11 @@ int __bch2_read_extent(struct bch_fs *c, struct bch_read_bio *orig, &c->bio_read_split), orig->opts); rbio->bio.bi_iter = iter; - split = true; + rbio->split = true; } else { noclone: rbio = orig; rbio->bio.bi_iter = iter; - split = false; BUG_ON(bio_flagged(&rbio->bio, BIO_CHAIN)); } @@ -1530,14 +1599,12 @@ noclone: rbio->c = c; rbio->submit_time = local_clock(); - if (split) + if (rbio->split) rbio->parent = orig; else rbio->end_io = orig->bio.bi_end_io; rbio->bvec_iter = iter; rbio->flags = flags; - rbio->bounce = bounce; - rbio->split = split; rbio->have_ioref = pick_ret > 0 && bch2_dev_get_ioref(ca, READ); rbio->narrow_crcs = narrow_crcs; rbio->hole = 0; @@ -1547,14 +1614,14 @@ noclone: rbio->pick = pick; rbio->pos = pos; rbio->version = k.k->version; - rbio->promote = promote ? promote_alloc(rbio, k) : NULL; + rbio->promote = promote; INIT_WORK(&rbio->work, NULL); rbio->bio.bi_opf = orig->bio.bi_opf; rbio->bio.bi_iter.bi_sector = pick.ptr.offset; rbio->bio.bi_end_io = bch2_read_endio; - if (bounce) + if (rbio->bounce) trace_read_bounce(&rbio->bio); bch2_increment_clock(c, bio_sectors(&rbio->bio), READ); diff --git a/fs/bcachefs/io_types.h b/fs/bcachefs/io_types.h index c3c33f6e0f73..28281ea6c43a 100644 --- a/fs/bcachefs/io_types.h +++ b/fs/bcachefs/io_types.h @@ -42,6 +42,7 @@ struct bch_read_bio { struct { u16 bounce:1, split:1, + kmalloc:1, have_ioref:1, narrow_crcs:1, hole:1, diff --git a/fs/bcachefs/super.c b/fs/bcachefs/super.c index 6683fe95b462..16b8cbfc973a 100644 --- a/fs/bcachefs/super.c +++ b/fs/bcachefs/super.c @@ -591,9 +591,10 @@ static struct bch_fs *bch2_fs_alloc(struct bch_sb *sb, struct bch_opts opts) seqcount_init(&c->gc_pos_lock); - c->copy_gc_enabled = 1; - c->rebalance_enabled = 1; - c->rebalance_percent = 10; + c->copy_gc_enabled = 1; + c->rebalance_enabled = 1; + c->rebalance_percent = 10; + c->promote_whole_extents = true; c->journal.write_time = &c->journal_write_time; c->journal.delay_time = &c->journal_delay_time; diff --git a/fs/bcachefs/sysfs.c b/fs/bcachefs/sysfs.c index d4e02f2aa63b..e4381bb5da19 100644 --- a/fs/bcachefs/sysfs.c +++ b/fs/bcachefs/sysfs.c @@ -184,6 +184,7 @@ sysfs_pd_controller_attribute(copy_gc); rw_attribute(rebalance_enabled); rw_attribute(rebalance_percent); sysfs_pd_controller_attribute(rebalance); +rw_attribute(promote_whole_extents); rw_attribute(pd_controllers_update_seconds); @@ -340,9 +341,10 @@ SHOW(bch2_fs) sysfs_printf(rebalance_enabled, "%i", c->rebalance_enabled); sysfs_print(rebalance_percent, c->rebalance_percent); - sysfs_pd_controller_show(rebalance, &c->rebalance_pd); /* XXX */ + sysfs_print(promote_whole_extents, c->promote_whole_extents); + sysfs_printf(meta_replicas_have, "%u", bch2_replicas_online(c, true)); sysfs_printf(data_replicas_have, "%u", bch2_replicas_online(c, false)); @@ -414,6 +416,8 @@ STORE(__bch2_fs) sysfs_strtoul(rebalance_percent, c->rebalance_percent); sysfs_pd_controller_store(rebalance, &c->rebalance_pd); + sysfs_strtoul(promote_whole_extents, c->promote_whole_extents); + /* Debugging: */ #define BCH_DEBUG_PARAM(name, description) sysfs_strtoul(name, c->name); @@ -470,6 +474,7 @@ struct attribute *bch2_fs_files[] = { &sysfs_journal_reclaim_delay_ms, &sysfs_rebalance_percent, + &sysfs_promote_whole_extents, &sysfs_compression_stats, NULL |