summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorKent Overstreet <kent.overstreet@linux.dev>2022-10-29 15:54:17 -0400
committerKent Overstreet <kent.overstreet@linux.dev>2022-10-31 21:36:07 -0400
commit77c27f28aa58e9d9037eb68c87d3283f68c371f7 (patch)
tree692daf92aebe70364f6711a6f3fc7971651d7a57
parent1913e923fbf5fecb16dca4bd2ebe960b88f34a9c (diff)
bcachefs: BCH_WRITE_SYNC
This adds a new flag for the write path, BCH_WRITE_SYNC, and switches the O_DIRECT write path to use it when we're not running asynchronously. It runs the btree update after the write in the original thread's context instead of a kworker, cutting context switches in half. Signed-off-by: Kent Overstreet <kent.overstreet@linux.dev>
-rw-r--r--fs/bcachefs/fs-io.c2
-rw-r--r--fs/bcachefs/io.c134
-rw-r--r--fs/bcachefs/io.h4
3 files changed, 59 insertions, 81 deletions
diff --git a/fs/bcachefs/fs-io.c b/fs/bcachefs/fs-io.c
index bc91b584e891..0925f11752d9 100644
--- a/fs/bcachefs/fs-io.c
+++ b/fs/bcachefs/fs-io.c
@@ -2118,6 +2118,8 @@ static long bch2_dio_write_loop(struct dio_write *dio)
dio->op.subvol = inode->ei_subvol;
dio->op.pos = POS(inode->v.i_ino, (u64) req->ki_pos >> 9);
+ if (sync)
+ dio->op.flags |= BCH_WRITE_SYNC;
if ((req->ki_flags & IOCB_DSYNC) &&
!c->opts.journal_flush_disabled)
dio->op.flags |= BCH_WRITE_FLUSH;
diff --git a/fs/bcachefs/io.c b/fs/bcachefs/io.c
index f74f6e4de233..74bad67f6000 100644
--- a/fs/bcachefs/io.c
+++ b/fs/bcachefs/io.c
@@ -541,7 +541,7 @@ void bch2_submit_wbio_replicas(struct bch_write_bio *wbio, struct bch_fs *c,
static void __bch2_write(struct closure *);
-static void bch2_write_done(struct closure *cl)
+static void __bch2_write_done(struct closure *cl)
{
struct bch_write_op *op = container_of(cl, struct bch_write_op, cl);
struct bch_fs *c = op->c;
@@ -557,7 +557,23 @@ static void bch2_write_done(struct closure *cl)
EBUG_ON(cl->parent);
closure_debug_destroy(cl);
- op->end_io(op);
+ if (op->end_io)
+ op->end_io(op);
+}
+
+static __always_inline void bch2_write_done(struct bch_write_op *op)
+{
+ if (likely(!(op->flags & BCH_WRITE_FLUSH) || op->error)) {
+ __bch2_write_done(&op->cl);
+ } else if (!(op->flags & BCH_WRITE_SYNC)) {
+ bch2_journal_flush_seq_async(&op->c->journal,
+ op->journal_seq,
+ &op->cl);
+ continue_at(&op->cl, __bch2_write_done, index_update_wq(op));
+ } else {
+ bch2_journal_flush_seq(&op->c->journal, op->journal_seq);
+ __bch2_write_done(&op->cl);
+ }
}
static noinline int bch2_write_drop_io_error_ptrs(struct bch_write_op *op)
@@ -644,26 +660,20 @@ out:
err:
keys->top = keys->keys;
op->error = ret;
+ op->flags |= BCH_WRITE_DONE;
goto out;
}
static void bch2_write_index(struct closure *cl)
{
struct bch_write_op *op = container_of(cl, struct bch_write_op, cl);
- struct bch_fs *c = op->c;
__bch2_write_index(op);
- if (!(op->flags & BCH_WRITE_DONE)) {
+ if (!(op->flags & BCH_WRITE_DONE))
continue_at(cl, __bch2_write, index_update_wq(op));
- } else if (!op->error && (op->flags & BCH_WRITE_FLUSH)) {
- bch2_journal_flush_seq_async(&c->journal,
- op->journal_seq,
- cl);
- continue_at(cl, bch2_write_done, index_update_wq(op));
- } else {
- continue_at_nobarrier(cl, bch2_write_done, NULL);
- }
+ else
+ bch2_write_done(op);
}
static void bch2_write_endio(struct bio *bio)
@@ -695,12 +705,12 @@ static void bch2_write_endio(struct bio *bio)
if (wbio->put_bio)
bio_put(bio);
- if (parent)
+ if (parent) {
bio_endio(&parent->bio);
- else if (!(op->flags & BCH_WRITE_SKIP_CLOSURE_PUT))
- closure_put(cl);
- else
- continue_at_nobarrier(cl, bch2_write_index, index_update_wq(op));
+ return;
+ }
+
+ closure_put(cl);
}
static void init_append_extent(struct bch_write_op *op,
@@ -1113,7 +1123,6 @@ static void __bch2_write(struct closure *cl)
struct bch_fs *c = op->c;
struct write_point *wp;
struct bio *bio = NULL;
- bool skip_put = true;
unsigned nofs_flags;
int ret;
@@ -1129,13 +1138,13 @@ again:
/* +1 for possible cache device: */
if (op->open_buckets.nr + op->nr_replicas + 1 >
ARRAY_SIZE(op->open_buckets.v))
- goto flush_io;
+ break;
if (bch2_keylist_realloc(&op->insert_keys,
op->inline_keys,
ARRAY_SIZE(op->inline_keys),
BKEY_EXTENT_U64s_MAX))
- goto flush_io;
+ break;
/*
* The copygc thread is now global, which means it's no longer
@@ -1157,48 +1166,31 @@ again:
if (IS_ERR(wp)) {
if (unlikely(wp != ERR_PTR(-EAGAIN))) {
- ret = PTR_ERR(wp);
- goto err;
+ op->error = PTR_ERR(wp);
+ op->flags |= BCH_WRITE_DONE;
}
- goto flush_io;
+ break;
}
- /*
- * It's possible for the allocator to fail, put us on the
- * freelist waitlist, and then succeed in one of various retry
- * paths: if that happens, we need to disable the skip_put
- * optimization because otherwise there won't necessarily be a
- * barrier before we free the bch_write_op:
- */
- if (atomic_read(&cl->remaining) & CLOSURE_WAITING)
- skip_put = false;
-
bch2_open_bucket_get(c, wp, &op->open_buckets);
ret = bch2_write_extent(op, wp, &bio);
bch2_alloc_sectors_done(c, wp);
- if (ret < 0)
- goto err;
-
- if (ret) {
- skip_put = false;
- } else {
- /*
- * for the skip_put optimization this has to be set
- * before we submit the bio:
- */
+ if (ret < 0) {
+ op->error = ret;
op->flags |= BCH_WRITE_DONE;
+ break;
}
+ if (!ret)
+ op->flags |= BCH_WRITE_DONE;
+
bio->bi_end_io = bch2_write_endio;
bio->bi_private = &op->cl;
bio->bi_opf |= REQ_OP_WRITE;
- if (!skip_put)
- closure_get(bio->bi_private);
- else
- op->flags |= BCH_WRITE_SKIP_CLOSURE_PUT;
+ closure_get(bio->bi_private);
key_to_write = (void *) (op->insert_keys.keys_p +
key_to_write_offset);
@@ -1207,48 +1199,33 @@ again:
key_to_write);
} while (ret);
- if (!skip_put)
- continue_at(cl, bch2_write_index, index_update_wq(op));
-out:
- memalloc_nofs_restore(nofs_flags);
- return;
-err:
- op->error = ret;
- op->flags |= BCH_WRITE_DONE;
-
- continue_at(cl, bch2_write_index, index_update_wq(op));
- goto out;
-flush_io:
/*
- * If the write can't all be submitted at once, we generally want to
- * block synchronously as that signals backpressure to the caller.
+ * Sync or no?
+ *
+ * If we're running asynchronously, wne may still want to block
+ * synchronously here if we weren't able to submit all of the IO at
+ * once, as that signals backpressure to the caller.
*
* However, if we're running out of a workqueue, we can't block here
* because we'll be blocking other work items from completing:
*/
- if (current->flags & PF_WQ_WORKER) {
- continue_at(cl, bch2_write_index, index_update_wq(op));
- goto out;
- }
-
- closure_sync(cl);
-
- if (!bch2_keylist_empty(&op->insert_keys)) {
+ if ((op->flags & BCH_WRITE_SYNC) ||
+ (!(op->flags & BCH_WRITE_DONE) && !(current->flags & PF_WQ_WORKER))) {
+ closure_sync(cl);
__bch2_write_index(op);
- if (op->error) {
- op->flags |= BCH_WRITE_DONE;
- continue_at_nobarrier(cl, bch2_write_done, NULL);
- goto out;
- }
+ if (!(op->flags & BCH_WRITE_DONE))
+ goto again;
+ bch2_write_done(op);
+ } else {
+ continue_at(cl, bch2_write_index, index_update_wq(op));
}
- goto again;
+ memalloc_nofs_restore(nofs_flags);
}
static void bch2_write_data_inline(struct bch_write_op *op, unsigned data_len)
{
- struct closure *cl = &op->cl;
struct bio *bio = &op->wbio.bio;
struct bvec_iter iter;
struct bkey_i_inline_data *id;
@@ -1285,10 +1262,9 @@ static void bch2_write_data_inline(struct bch_write_op *op, unsigned data_len)
op->flags |= BCH_WRITE_WROTE_DATA_INLINE;
op->flags |= BCH_WRITE_DONE;
- continue_at_nobarrier(cl, bch2_write_index, NULL);
- return;
+ __bch2_write_index(op);
err:
- bch2_write_done(&op->cl);
+ bch2_write_done(op);
}
/**
diff --git a/fs/bcachefs/io.h b/fs/bcachefs/io.h
index 9879a76e0168..cf69a5066e3a 100644
--- a/fs/bcachefs/io.h
+++ b/fs/bcachefs/io.h
@@ -35,10 +35,10 @@ enum bch_write_flags {
BCH_WRITE_WROTE_DATA_INLINE = (1 << 7),
BCH_WRITE_FROM_INTERNAL = (1 << 8),
BCH_WRITE_CHECK_ENOSPC = (1 << 9),
- BCH_WRITE_MOVE = (1 << 10),
+ BCH_WRITE_SYNC = (1 << 10),
+ BCH_WRITE_MOVE = (1 << 11),
/* Internal: */
- BCH_WRITE_SKIP_CLOSURE_PUT = (1 << 11),
BCH_WRITE_DONE = (1 << 12),
BCH_WRITE_IO_ERROR = (1 << 13),
};