summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorKent Overstreet <kent.overstreet@gmail.com>2018-05-01 11:32:47 -0400
committerKent Overstreet <kent.overstreet@gmail.com>2018-05-22 00:44:18 -0400
commit58d7fbdf78338727a20d8af24fbc2e092f96af59 (patch)
tree635a412f9feb71dff37fd438f9274f9c6494b307
parent88acdafd52ee2249d1d96cc6ac4a0ec84129d6b8 (diff)
bcachefs: Promote whole extents, avoiding mempool
The mempools we allocate for bouncing reads are only sized for checksummed/compressed extents - non checksummed/compressed extents can be bigger, so if we bounce for a promote we can't use the mempool if it's too big.
-rw-r--r--fs/bcachefs/bcachefs.h1
-rw-r--r--fs/bcachefs/io.c211
-rw-r--r--fs/bcachefs/io_types.h1
-rw-r--r--fs/bcachefs/super.c7
-rw-r--r--fs/bcachefs/sysfs.c7
5 files changed, 151 insertions, 76 deletions
diff --git a/fs/bcachefs/bcachefs.h b/fs/bcachefs/bcachefs.h
index 61865240133f..1b71ae08f7f4 100644
--- a/fs/bcachefs/bcachefs.h
+++ b/fs/bcachefs/bcachefs.h
@@ -711,6 +711,7 @@ struct bch_fs {
unsigned copy_gc_enabled:1;
unsigned rebalance_enabled:1;
unsigned rebalance_percent;
+ bool promote_whole_extents;
#define BCH_DEBUG_PARAM(name, description) bool name;
BCH_DEBUG_PARAMS_ALL()
diff --git a/fs/bcachefs/io.c b/fs/bcachefs/io.c
index caab7b460532..3afc4108d6a4 100644
--- a/fs/bcachefs/io.c
+++ b/fs/bcachefs/io.c
@@ -871,9 +871,41 @@ static const struct rhashtable_params bch_promote_params = {
.key_len = sizeof(struct bpos),
};
+static inline bool should_promote(struct bch_fs *c, struct bkey_s_c k,
+ struct bpos pos,
+ struct bch_io_opts opts,
+ unsigned flags)
+{
+ if (!opts.promote_target)
+ return false;
+
+ if (!(flags & BCH_READ_MAY_PROMOTE))
+ return false;
+
+ if (percpu_ref_is_dying(&c->writes))
+ return false;
+
+ if (!bkey_extent_is_data(k.k))
+ return false;
+
+ if (bch2_extent_has_target(c, bkey_s_c_to_extent(k), opts.promote_target))
+ return false;
+
+ if (rhashtable_lookup_fast(&c->promote_table, &pos,
+ bch_promote_params))
+ return false;
+
+ return true;
+}
+
static void promote_free(struct bch_fs *c, struct promote_op *op)
{
- rhashtable_remove_fast(&c->promote_table, &op->hash, bch_promote_params);
+ int ret;
+
+ ret = rhashtable_remove_fast(&c->promote_table, &op->hash,
+ bch_promote_params);
+ BUG_ON(ret);
+ percpu_ref_put(&c->writes);
kfree(op);
}
@@ -883,7 +915,6 @@ static void promote_done(struct closure *cl)
container_of(cl, struct promote_op, cl);
struct bch_fs *c = op->write.op.c;
- percpu_ref_put(&c->writes);
bch2_bio_free_pages_pool(c, &op->write.op.wbio.bio);
promote_free(c, op);
}
@@ -894,17 +925,15 @@ static void promote_start(struct promote_op *op, struct bch_read_bio *rbio)
struct closure *cl = &op->cl;
struct bio *bio = &op->write.op.wbio.bio;
- BUG_ON(!rbio->split || !rbio->bounce);
-
- if (!percpu_ref_tryget(&c->writes))
- return;
-
trace_promote(&rbio->bio);
/* we now own pages: */
+ BUG_ON(!rbio->bounce);
BUG_ON(rbio->bio.bi_vcnt > bio->bi_max_vecs);
+
+ memcpy(bio->bi_io_vec, rbio->bio.bi_io_vec,
+ sizeof(struct bio_vec) * rbio->bio.bi_vcnt);
swap(bio->bi_vcnt, rbio->bio.bi_vcnt);
- rbio->promote = NULL;
bch2_migrate_read_done(&op->write, rbio);
@@ -913,79 +942,115 @@ static void promote_start(struct promote_op *op, struct bch_read_bio *rbio)
closure_return_with_destructor(cl, promote_done);
}
-/*
- * XXX: multiple promotes can race with each other, wastefully. Keep a list of
- * outstanding promotes?
- */
-static struct promote_op *promote_alloc(struct bch_read_bio *rbio,
- struct bkey_s_c k)
+noinline
+static struct promote_op *__promote_alloc(struct bch_fs *c,
+ struct bpos pos,
+ struct extent_pick_ptr *pick,
+ struct bch_io_opts opts,
+ unsigned rbio_sectors,
+ struct bch_read_bio **rbio)
{
- struct bch_fs *c = rbio->c;
- struct promote_op *op;
+ struct promote_op *op = NULL;
struct bio *bio;
+ unsigned rbio_pages = DIV_ROUND_UP(rbio_sectors, PAGE_SECTORS);
/* data might have to be decompressed in the write path: */
- unsigned pages = DIV_ROUND_UP(rbio->pick.crc.uncompressed_size,
- PAGE_SECTORS);
+ unsigned wbio_pages = DIV_ROUND_UP(pick->crc.uncompressed_size,
+ PAGE_SECTORS);
int ret;
- BUG_ON(!rbio->bounce);
- BUG_ON(pages < rbio->bio.bi_vcnt);
+ if (!percpu_ref_tryget(&c->writes))
+ return NULL;
- op = kzalloc(sizeof(*op) + sizeof(struct bio_vec) * pages,
+ op = kzalloc(sizeof(*op) + sizeof(struct bio_vec) * wbio_pages,
GFP_NOIO);
if (!op)
- return NULL;
+ goto err;
- op->pos = k.k->p;
+ op->pos = pos;
- if (rhashtable_lookup_insert_fast(&c->promote_table, &op->hash,
- bch_promote_params)) {
- kfree(op);
- return NULL;
+ /*
+ * promotes require bouncing, but if the extent isn't
+ * checksummed/compressed it might be too big for the mempool:
+ */
+ if (rbio_sectors > c->sb.encoded_extent_max) {
+ *rbio = kzalloc(sizeof(struct bch_read_bio) +
+ sizeof(struct bio_vec) * rbio_pages,
+ GFP_NOIO);
+ if (!*rbio)
+ goto err;
+
+ rbio_init(&(*rbio)->bio, opts);
+ bio_init(&(*rbio)->bio, (*rbio)->bio.bi_inline_vecs,
+ rbio_pages);
+
+ (*rbio)->bio.bi_iter.bi_size = rbio_sectors << 9;
+ bch2_bio_map(&(*rbio)->bio, NULL);
+
+ if (bch2_bio_alloc_pages(&(*rbio)->bio, GFP_NOIO))
+ goto err;
+
+ (*rbio)->bounce = true;
+ (*rbio)->split = true;
+ (*rbio)->kmalloc = true;
}
- bio = &op->write.op.wbio.bio;
- bio_init(bio, bio->bi_inline_vecs, pages);
+ if (rhashtable_lookup_insert_fast(&c->promote_table, &op->hash,
+ bch_promote_params))
+ goto err;
- memcpy(bio->bi_io_vec, rbio->bio.bi_io_vec,
- sizeof(struct bio_vec) * rbio->bio.bi_vcnt);
+ bio = &op->write.op.wbio.bio;
+ bio_init(bio, bio->bi_inline_vecs, wbio_pages);
ret = bch2_migrate_write_init(c, &op->write,
writepoint_hashed((unsigned long) current),
- rbio->opts,
+ opts,
DATA_PROMOTE,
(struct data_opts) {
- .target = rbio->opts.promote_target
+ .target = opts.promote_target
},
- k);
+ bkey_s_c_null);
BUG_ON(ret);
return op;
+err:
+ if (*rbio)
+ bio_free_pages(&(*rbio)->bio);
+ kfree(*rbio);
+ *rbio = NULL;
+ kfree(op);
+ percpu_ref_put(&c->writes);
+ return NULL;
}
-static bool should_promote(struct bch_fs *c, struct bkey_s_c k,
- unsigned flags, u16 target)
+static inline struct promote_op *promote_alloc(struct bch_fs *c,
+ struct bvec_iter iter,
+ struct bkey_s_c k,
+ struct extent_pick_ptr *pick,
+ struct bch_io_opts opts,
+ unsigned flags,
+ struct bch_read_bio **rbio,
+ bool *bounce,
+ bool *read_full)
{
- if (!target)
- return false;
-
- if (!(flags & BCH_READ_MAY_PROMOTE))
- return false;
-
- if (percpu_ref_is_dying(&c->writes))
- return false;
-
- if (!bkey_extent_is_data(k.k))
- return false;
-
- if (bch2_extent_has_target(c, bkey_s_c_to_extent(k), target))
- return false;
+ bool promote_full = *read_full || READ_ONCE(c->promote_whole_extents);
+ unsigned sectors = promote_full
+ ? pick->crc.compressed_size
+ : bvec_iter_sectors(iter);
+ struct bpos pos = promote_full
+ ? bkey_start_pos(k.k)
+ : POS(k.k->p.inode, iter.bi_sector);
+ struct promote_op *promote;
+
+ if (!should_promote(c, k, pos, opts, flags))
+ return NULL;
- if (rhashtable_lookup_fast(&c->promote_table, &k.k->p,
- bch_promote_params))
- return false;
+ promote = __promote_alloc(c, pos, pick, opts, sectors, rbio);
+ if (!promote)
+ return NULL;
- return true;
+ *bounce = true;
+ *read_full = promote_full;
+ return promote;
}
/* Read */
@@ -1034,7 +1099,11 @@ static inline struct bch_read_bio *bch2_rbio_free(struct bch_read_bio *rbio)
if (rbio->split) {
struct bch_read_bio *parent = rbio->parent;
- bio_put(&rbio->bio);
+ if (rbio->kmalloc)
+ kfree(rbio);
+ else
+ bio_put(&rbio->bio);
+
rbio = parent;
}
@@ -1334,6 +1403,7 @@ static void __bch2_read_endio(struct work_struct *work)
*/
bch2_encrypt_bio(c, crc.csum_type, nonce, src);
promote_start(rbio->promote, rbio);
+ rbio->promote = NULL;
}
nodecode:
if (likely(!(rbio->flags & BCH_READ_IN_RETRY))) {
@@ -1417,10 +1487,10 @@ int __bch2_read_extent(struct bch_fs *c, struct bch_read_bio *orig,
struct bch_devs_mask *avoid, unsigned flags)
{
struct extent_pick_ptr pick;
- struct bch_read_bio *rbio;
+ struct bch_read_bio *rbio = NULL;
struct bch_dev *ca;
- bool split = false, bounce = false, read_full = false;
- bool promote = false, narrow_crcs = false;
+ struct promote_op *promote = NULL;
+ bool bounce = false, read_full = false, narrow_crcs = false;
struct bpos pos = bkey_start_pos(k.k);
int pick_ret;
@@ -1471,11 +1541,8 @@ int __bch2_read_extent(struct bch_fs *c, struct bch_read_bio *orig,
bounce = true;
}
- promote = should_promote(c, k, flags, orig->opts.promote_target);
- if (promote) {
- read_full = true;
- bounce = true;
- }
+ promote = promote_alloc(c, iter, k, &pick, orig->opts, flags,
+ &rbio, &bounce, &read_full);
if (!read_full) {
EBUG_ON(pick.crc.compression_type);
@@ -1494,7 +1561,9 @@ int __bch2_read_extent(struct bch_fs *c, struct bch_read_bio *orig,
pos.offset = iter.bi_sector;
}
- if (bounce) {
+ if (rbio) {
+ /* promote already allocated bounce rbio */
+ } else if (bounce) {
unsigned sectors = pick.crc.compressed_size;
rbio = rbio_init(bio_alloc_bioset(GFP_NOIO,
@@ -1503,7 +1572,8 @@ int __bch2_read_extent(struct bch_fs *c, struct bch_read_bio *orig,
orig->opts);
bch2_bio_alloc_pages_pool(c, &rbio->bio, sectors << 9);
- split = true;
+ rbio->bounce = true;
+ rbio->split = true;
} else if (flags & BCH_READ_MUST_CLONE) {
/*
* Have to clone if there were any splits, due to error
@@ -1517,12 +1587,11 @@ int __bch2_read_extent(struct bch_fs *c, struct bch_read_bio *orig,
&c->bio_read_split),
orig->opts);
rbio->bio.bi_iter = iter;
- split = true;
+ rbio->split = true;
} else {
noclone:
rbio = orig;
rbio->bio.bi_iter = iter;
- split = false;
BUG_ON(bio_flagged(&rbio->bio, BIO_CHAIN));
}
@@ -1530,14 +1599,12 @@ noclone:
rbio->c = c;
rbio->submit_time = local_clock();
- if (split)
+ if (rbio->split)
rbio->parent = orig;
else
rbio->end_io = orig->bio.bi_end_io;
rbio->bvec_iter = iter;
rbio->flags = flags;
- rbio->bounce = bounce;
- rbio->split = split;
rbio->have_ioref = pick_ret > 0 && bch2_dev_get_ioref(ca, READ);
rbio->narrow_crcs = narrow_crcs;
rbio->hole = 0;
@@ -1547,14 +1614,14 @@ noclone:
rbio->pick = pick;
rbio->pos = pos;
rbio->version = k.k->version;
- rbio->promote = promote ? promote_alloc(rbio, k) : NULL;
+ rbio->promote = promote;
INIT_WORK(&rbio->work, NULL);
rbio->bio.bi_opf = orig->bio.bi_opf;
rbio->bio.bi_iter.bi_sector = pick.ptr.offset;
rbio->bio.bi_end_io = bch2_read_endio;
- if (bounce)
+ if (rbio->bounce)
trace_read_bounce(&rbio->bio);
bch2_increment_clock(c, bio_sectors(&rbio->bio), READ);
diff --git a/fs/bcachefs/io_types.h b/fs/bcachefs/io_types.h
index c3c33f6e0f73..28281ea6c43a 100644
--- a/fs/bcachefs/io_types.h
+++ b/fs/bcachefs/io_types.h
@@ -42,6 +42,7 @@ struct bch_read_bio {
struct {
u16 bounce:1,
split:1,
+ kmalloc:1,
have_ioref:1,
narrow_crcs:1,
hole:1,
diff --git a/fs/bcachefs/super.c b/fs/bcachefs/super.c
index 6683fe95b462..16b8cbfc973a 100644
--- a/fs/bcachefs/super.c
+++ b/fs/bcachefs/super.c
@@ -591,9 +591,10 @@ static struct bch_fs *bch2_fs_alloc(struct bch_sb *sb, struct bch_opts opts)
seqcount_init(&c->gc_pos_lock);
- c->copy_gc_enabled = 1;
- c->rebalance_enabled = 1;
- c->rebalance_percent = 10;
+ c->copy_gc_enabled = 1;
+ c->rebalance_enabled = 1;
+ c->rebalance_percent = 10;
+ c->promote_whole_extents = true;
c->journal.write_time = &c->journal_write_time;
c->journal.delay_time = &c->journal_delay_time;
diff --git a/fs/bcachefs/sysfs.c b/fs/bcachefs/sysfs.c
index d4e02f2aa63b..e4381bb5da19 100644
--- a/fs/bcachefs/sysfs.c
+++ b/fs/bcachefs/sysfs.c
@@ -184,6 +184,7 @@ sysfs_pd_controller_attribute(copy_gc);
rw_attribute(rebalance_enabled);
rw_attribute(rebalance_percent);
sysfs_pd_controller_attribute(rebalance);
+rw_attribute(promote_whole_extents);
rw_attribute(pd_controllers_update_seconds);
@@ -340,9 +341,10 @@ SHOW(bch2_fs)
sysfs_printf(rebalance_enabled, "%i", c->rebalance_enabled);
sysfs_print(rebalance_percent, c->rebalance_percent);
-
sysfs_pd_controller_show(rebalance, &c->rebalance_pd); /* XXX */
+ sysfs_print(promote_whole_extents, c->promote_whole_extents);
+
sysfs_printf(meta_replicas_have, "%u", bch2_replicas_online(c, true));
sysfs_printf(data_replicas_have, "%u", bch2_replicas_online(c, false));
@@ -414,6 +416,8 @@ STORE(__bch2_fs)
sysfs_strtoul(rebalance_percent, c->rebalance_percent);
sysfs_pd_controller_store(rebalance, &c->rebalance_pd);
+ sysfs_strtoul(promote_whole_extents, c->promote_whole_extents);
+
/* Debugging: */
#define BCH_DEBUG_PARAM(name, description) sysfs_strtoul(name, c->name);
@@ -470,6 +474,7 @@ struct attribute *bch2_fs_files[] = {
&sysfs_journal_reclaim_delay_ms,
&sysfs_rebalance_percent,
+ &sysfs_promote_whole_extents,
&sysfs_compression_stats,
NULL