diff options
author | Kent Overstreet <kent.overstreet@linux.dev> | 2022-10-29 15:54:17 -0400 |
---|---|---|
committer | Kent Overstreet <kent.overstreet@linux.dev> | 2022-10-31 21:36:07 -0400 |
commit | 77c27f28aa58e9d9037eb68c87d3283f68c371f7 (patch) | |
tree | 692daf92aebe70364f6711a6f3fc7971651d7a57 | |
parent | 1913e923fbf5fecb16dca4bd2ebe960b88f34a9c (diff) |
bcachefs: BCH_WRITE_SYNC
This adds a new flag for the write path, BCH_WRITE_SYNC, and switches
the O_DIRECT write path to use it when we're not running asynchronously.
It runs the btree update after the write in the original thread's
context instead of a kworker, cutting context switches in half.
Signed-off-by: Kent Overstreet <kent.overstreet@linux.dev>
-rw-r--r-- | fs/bcachefs/fs-io.c | 2 | ||||
-rw-r--r-- | fs/bcachefs/io.c | 134 | ||||
-rw-r--r-- | fs/bcachefs/io.h | 4 |
3 files changed, 59 insertions, 81 deletions
diff --git a/fs/bcachefs/fs-io.c b/fs/bcachefs/fs-io.c index bc91b584e891..0925f11752d9 100644 --- a/fs/bcachefs/fs-io.c +++ b/fs/bcachefs/fs-io.c @@ -2118,6 +2118,8 @@ static long bch2_dio_write_loop(struct dio_write *dio) dio->op.subvol = inode->ei_subvol; dio->op.pos = POS(inode->v.i_ino, (u64) req->ki_pos >> 9); + if (sync) + dio->op.flags |= BCH_WRITE_SYNC; if ((req->ki_flags & IOCB_DSYNC) && !c->opts.journal_flush_disabled) dio->op.flags |= BCH_WRITE_FLUSH; diff --git a/fs/bcachefs/io.c b/fs/bcachefs/io.c index f74f6e4de233..74bad67f6000 100644 --- a/fs/bcachefs/io.c +++ b/fs/bcachefs/io.c @@ -541,7 +541,7 @@ void bch2_submit_wbio_replicas(struct bch_write_bio *wbio, struct bch_fs *c, static void __bch2_write(struct closure *); -static void bch2_write_done(struct closure *cl) +static void __bch2_write_done(struct closure *cl) { struct bch_write_op *op = container_of(cl, struct bch_write_op, cl); struct bch_fs *c = op->c; @@ -557,7 +557,23 @@ static void bch2_write_done(struct closure *cl) EBUG_ON(cl->parent); closure_debug_destroy(cl); - op->end_io(op); + if (op->end_io) + op->end_io(op); +} + +static __always_inline void bch2_write_done(struct bch_write_op *op) +{ + if (likely(!(op->flags & BCH_WRITE_FLUSH) || op->error)) { + __bch2_write_done(&op->cl); + } else if (!(op->flags & BCH_WRITE_SYNC)) { + bch2_journal_flush_seq_async(&op->c->journal, + op->journal_seq, + &op->cl); + continue_at(&op->cl, __bch2_write_done, index_update_wq(op)); + } else { + bch2_journal_flush_seq(&op->c->journal, op->journal_seq); + __bch2_write_done(&op->cl); + } } static noinline int bch2_write_drop_io_error_ptrs(struct bch_write_op *op) @@ -644,26 +660,20 @@ out: err: keys->top = keys->keys; op->error = ret; + op->flags |= BCH_WRITE_DONE; goto out; } static void bch2_write_index(struct closure *cl) { struct bch_write_op *op = container_of(cl, struct bch_write_op, cl); - struct bch_fs *c = op->c; __bch2_write_index(op); - if (!(op->flags & BCH_WRITE_DONE)) { + if (!(op->flags & BCH_WRITE_DONE)) continue_at(cl, __bch2_write, index_update_wq(op)); - } else if (!op->error && (op->flags & BCH_WRITE_FLUSH)) { - bch2_journal_flush_seq_async(&c->journal, - op->journal_seq, - cl); - continue_at(cl, bch2_write_done, index_update_wq(op)); - } else { - continue_at_nobarrier(cl, bch2_write_done, NULL); - } + else + bch2_write_done(op); } static void bch2_write_endio(struct bio *bio) @@ -695,12 +705,12 @@ static void bch2_write_endio(struct bio *bio) if (wbio->put_bio) bio_put(bio); - if (parent) + if (parent) { bio_endio(&parent->bio); - else if (!(op->flags & BCH_WRITE_SKIP_CLOSURE_PUT)) - closure_put(cl); - else - continue_at_nobarrier(cl, bch2_write_index, index_update_wq(op)); + return; + } + + closure_put(cl); } static void init_append_extent(struct bch_write_op *op, @@ -1113,7 +1123,6 @@ static void __bch2_write(struct closure *cl) struct bch_fs *c = op->c; struct write_point *wp; struct bio *bio = NULL; - bool skip_put = true; unsigned nofs_flags; int ret; @@ -1129,13 +1138,13 @@ again: /* +1 for possible cache device: */ if (op->open_buckets.nr + op->nr_replicas + 1 > ARRAY_SIZE(op->open_buckets.v)) - goto flush_io; + break; if (bch2_keylist_realloc(&op->insert_keys, op->inline_keys, ARRAY_SIZE(op->inline_keys), BKEY_EXTENT_U64s_MAX)) - goto flush_io; + break; /* * The copygc thread is now global, which means it's no longer @@ -1157,48 +1166,31 @@ again: if (IS_ERR(wp)) { if (unlikely(wp != ERR_PTR(-EAGAIN))) { - ret = PTR_ERR(wp); - goto err; + op->error = PTR_ERR(wp); + op->flags |= BCH_WRITE_DONE; } - goto flush_io; + break; } - /* - * It's possible for the allocator to fail, put us on the - * freelist waitlist, and then succeed in one of various retry - * paths: if that happens, we need to disable the skip_put - * optimization because otherwise there won't necessarily be a - * barrier before we free the bch_write_op: - */ - if (atomic_read(&cl->remaining) & CLOSURE_WAITING) - skip_put = false; - bch2_open_bucket_get(c, wp, &op->open_buckets); ret = bch2_write_extent(op, wp, &bio); bch2_alloc_sectors_done(c, wp); - if (ret < 0) - goto err; - - if (ret) { - skip_put = false; - } else { - /* - * for the skip_put optimization this has to be set - * before we submit the bio: - */ + if (ret < 0) { + op->error = ret; op->flags |= BCH_WRITE_DONE; + break; } + if (!ret) + op->flags |= BCH_WRITE_DONE; + bio->bi_end_io = bch2_write_endio; bio->bi_private = &op->cl; bio->bi_opf |= REQ_OP_WRITE; - if (!skip_put) - closure_get(bio->bi_private); - else - op->flags |= BCH_WRITE_SKIP_CLOSURE_PUT; + closure_get(bio->bi_private); key_to_write = (void *) (op->insert_keys.keys_p + key_to_write_offset); @@ -1207,48 +1199,33 @@ again: key_to_write); } while (ret); - if (!skip_put) - continue_at(cl, bch2_write_index, index_update_wq(op)); -out: - memalloc_nofs_restore(nofs_flags); - return; -err: - op->error = ret; - op->flags |= BCH_WRITE_DONE; - - continue_at(cl, bch2_write_index, index_update_wq(op)); - goto out; -flush_io: /* - * If the write can't all be submitted at once, we generally want to - * block synchronously as that signals backpressure to the caller. + * Sync or no? + * + * If we're running asynchronously, wne may still want to block + * synchronously here if we weren't able to submit all of the IO at + * once, as that signals backpressure to the caller. * * However, if we're running out of a workqueue, we can't block here * because we'll be blocking other work items from completing: */ - if (current->flags & PF_WQ_WORKER) { - continue_at(cl, bch2_write_index, index_update_wq(op)); - goto out; - } - - closure_sync(cl); - - if (!bch2_keylist_empty(&op->insert_keys)) { + if ((op->flags & BCH_WRITE_SYNC) || + (!(op->flags & BCH_WRITE_DONE) && !(current->flags & PF_WQ_WORKER))) { + closure_sync(cl); __bch2_write_index(op); - if (op->error) { - op->flags |= BCH_WRITE_DONE; - continue_at_nobarrier(cl, bch2_write_done, NULL); - goto out; - } + if (!(op->flags & BCH_WRITE_DONE)) + goto again; + bch2_write_done(op); + } else { + continue_at(cl, bch2_write_index, index_update_wq(op)); } - goto again; + memalloc_nofs_restore(nofs_flags); } static void bch2_write_data_inline(struct bch_write_op *op, unsigned data_len) { - struct closure *cl = &op->cl; struct bio *bio = &op->wbio.bio; struct bvec_iter iter; struct bkey_i_inline_data *id; @@ -1285,10 +1262,9 @@ static void bch2_write_data_inline(struct bch_write_op *op, unsigned data_len) op->flags |= BCH_WRITE_WROTE_DATA_INLINE; op->flags |= BCH_WRITE_DONE; - continue_at_nobarrier(cl, bch2_write_index, NULL); - return; + __bch2_write_index(op); err: - bch2_write_done(&op->cl); + bch2_write_done(op); } /** diff --git a/fs/bcachefs/io.h b/fs/bcachefs/io.h index 9879a76e0168..cf69a5066e3a 100644 --- a/fs/bcachefs/io.h +++ b/fs/bcachefs/io.h @@ -35,10 +35,10 @@ enum bch_write_flags { BCH_WRITE_WROTE_DATA_INLINE = (1 << 7), BCH_WRITE_FROM_INTERNAL = (1 << 8), BCH_WRITE_CHECK_ENOSPC = (1 << 9), - BCH_WRITE_MOVE = (1 << 10), + BCH_WRITE_SYNC = (1 << 10), + BCH_WRITE_MOVE = (1 << 11), /* Internal: */ - BCH_WRITE_SKIP_CLOSURE_PUT = (1 << 11), BCH_WRITE_DONE = (1 << 12), BCH_WRITE_IO_ERROR = (1 << 13), }; |