diff options
Diffstat (limited to 'libbcachefs/fs-io.c')
-rw-r--r-- | libbcachefs/fs-io.c | 284 |
1 files changed, 124 insertions, 160 deletions
diff --git a/libbcachefs/fs-io.c b/libbcachefs/fs-io.c index 2c34a85c..66374a9c 100644 --- a/libbcachefs/fs-io.c +++ b/libbcachefs/fs-io.c @@ -12,6 +12,7 @@ #include "journal.h" #include "io.h" #include "keylist.h" +#include "quota.h" #include <linux/aio.h> #include <linux/backing-dev.h> @@ -56,14 +57,13 @@ struct bch_writepage_io { struct dio_write { struct closure cl; struct kiocb *req; - struct bch_fs *c; - loff_t offset; + struct task_struct *task; + unsigned loop:1, + sync:1, + free_iov:1; - struct iovec *iovec; - struct iovec inline_vecs[UIO_FASTIOV]; struct iov_iter iter; - - struct task_struct *task; + struct iovec inline_vecs[2]; /* must be last: */ struct bchfs_write_op iop; @@ -130,6 +130,7 @@ static int __must_check bch2_write_inode_size(struct bch_fs *c, static void __i_sectors_acct(struct bch_fs *c, struct bch_inode_info *inode, int sectors) { inode->v.i_blocks += sectors; + bch2_quota_acct(c, inode->ei_qid, Q_SPC, sectors, BCH_QUOTA_WARN); } static void i_sectors_acct(struct bch_fs *c, struct bch_inode_info *inode, int sectors) @@ -1286,7 +1287,8 @@ static int bch2_read_single_page(struct page *page, int ret; DECLARE_COMPLETION_ONSTACK(done); - rbio = to_rbio(bio_alloc_bioset(GFP_NOFS, 1, &c->bio_read)); + rbio = rbio_init(bio_alloc_bioset(GFP_NOFS, 1, &c->bio_read), + io_opts(c, inode)); rbio->bio.bi_private = &done; rbio->bio.bi_end_io = bch2_read_single_page_end_io; @@ -1439,13 +1441,15 @@ static void bch2_direct_IO_read_split_endio(struct bio *bio) bio_check_pages_dirty(bio); /* transfers ownership */ } -static int bch2_direct_IO_read(struct bch_fs *c, struct kiocb *req, - struct file *file, struct bch_inode_info *inode, - struct iov_iter *iter, loff_t offset) +static int bch2_direct_IO_read(struct kiocb *req, struct iov_iter *iter) { + struct file *file = req->ki_filp; + struct bch_inode_info *inode = file_bch_inode(file); + struct bch_fs *c = inode->v.i_sb->s_fs_info; struct bch_io_opts opts = io_opts(c, inode); struct dio_read *dio; struct bio *bio; + loff_t offset = req->ki_pos; bool sync = is_sync_kiocb(req); ssize_t ret; @@ -1525,103 +1529,128 @@ start: } } -static long __bch2_dio_write_complete(struct dio_write *dio) +static void bch2_dio_write_loop_async(struct closure *); + +static long bch2_dio_write_loop(struct dio_write *dio) { - struct file *file = dio->req->ki_filp; + struct kiocb *req = dio->req; + struct file *file = req->ki_filp; struct address_space *mapping = file->f_mapping; struct bch_inode_info *inode = file_bch_inode(file); - long ret = dio->iop.op.error ?: ((long) dio->iop.op.written << 9); + struct bio *bio = &dio->iop.op.wbio.bio; + struct bio_vec *bv; + bool sync; + long ret; + int i; - bch2_disk_reservation_put(dio->c, &dio->iop.op.res); + if (dio->loop) + goto loop; - __pagecache_block_put(&mapping->add_lock); - inode_dio_end(&inode->v); + inode_dio_begin(&inode->v); + __pagecache_block_get(&mapping->add_lock); - if (dio->iovec && dio->iovec != dio->inline_vecs) - kfree(dio->iovec); + /* Write and invalidate pagecache range that we're writing to: */ + ret = write_invalidate_inode_pages_range(mapping, req->ki_pos, + req->ki_pos + iov_iter_count(&dio->iter) - 1); + if (unlikely(ret)) + goto err; - bio_put(&dio->iop.op.wbio.bio); - return ret; -} + while (1) { + BUG_ON(current->pagecache_lock); + current->pagecache_lock = &mapping->add_lock; + if (current != dio->task) + use_mm(dio->task->mm); -static void bch2_dio_write_complete(struct closure *cl) -{ - struct dio_write *dio = container_of(cl, struct dio_write, cl); - struct kiocb *req = dio->req; + ret = bio_iov_iter_get_pages(bio, &dio->iter); - req->ki_complete(req, __bch2_dio_write_complete(dio), 0); -} + if (current != dio->task) + unuse_mm(dio->task->mm); + current->pagecache_lock = NULL; -static void bch2_dio_write_done(struct dio_write *dio) -{ - struct bio_vec *bv; - int i; + if (unlikely(ret < 0)) + goto err; - bio_for_each_segment_all(bv, &dio->iop.op.wbio.bio, i) - put_page(bv->bv_page); + dio->iop.op.pos = POS(inode->v.i_ino, + (req->ki_pos >> 9) + dio->iop.op.written); - if (dio->iter.count) - bio_reset(&dio->iop.op.wbio.bio); -} + task_io_account_write(bio->bi_iter.bi_size); -static void bch2_do_direct_IO_write(struct dio_write *dio) -{ - struct file *file = dio->req->ki_filp; - struct bch_inode_info *inode = file_bch_inode(file); - struct bio *bio = &dio->iop.op.wbio.bio; - int ret; + closure_call(&dio->iop.op.cl, bch2_write, NULL, &dio->cl); - ret = bio_iov_iter_get_pages(bio, &dio->iter); - if (ret < 0) { - dio->iop.op.error = ret; - return; - } + if (!dio->sync && !dio->loop && dio->iter.count) { + struct iovec *iov = dio->inline_vecs; - dio->iop.op.pos = POS(inode->v.i_ino, (dio->offset >> 9) + dio->iop.op.written); + if (dio->iter.nr_segs > ARRAY_SIZE(dio->inline_vecs)) { + iov = kmalloc(dio->iter.nr_segs * sizeof(*iov), + GFP_KERNEL); + if (unlikely(!iov)) { + dio->iop.op.error = -ENOMEM; + goto err_wait_io; + } - task_io_account_write(bio->bi_iter.bi_size); + dio->free_iov = true; + } - closure_call(&dio->iop.op.cl, bch2_write, NULL, &dio->cl); -} + memcpy(iov, dio->iter.iov, dio->iter.nr_segs * sizeof(*iov)); + dio->iter.iov = iov; + } +err_wait_io: + dio->loop = true; -static void bch2_dio_write_loop_async(struct closure *cl) -{ - struct dio_write *dio = - container_of(cl, struct dio_write, cl); - struct address_space *mapping = dio->req->ki_filp->f_mapping; + if (!dio->sync) { + continue_at_noreturn(&dio->cl, + bch2_dio_write_loop_async, NULL); + return -EIOCBQUEUED; + } - bch2_dio_write_done(dio); + closure_sync(&dio->cl); +loop: + bio_for_each_segment_all(bv, bio, i) + put_page(bv->bv_page); + if (!dio->iter.count || dio->iop.op.error) + break; + bio_reset(bio); + } + + ret = dio->iop.op.error ?: ((long) dio->iop.op.written << 9); +err: + __pagecache_block_put(&mapping->add_lock); + inode_dio_end(&inode->v); + bch2_disk_reservation_put(dio->iop.op.c, &dio->iop.op.res); - if (dio->iter.count && !dio->iop.op.error) { - use_mm(dio->task->mm); - pagecache_block_get(&mapping->add_lock); + if (dio->free_iov) + kfree(dio->iter.iov); - bch2_do_direct_IO_write(dio); + closure_debug_destroy(&dio->cl); - pagecache_block_put(&mapping->add_lock); - unuse_mm(dio->task->mm); + sync = dio->sync; + bio_put(bio); - continue_at(&dio->cl, bch2_dio_write_loop_async, NULL); - } else { -#if 0 - closure_return_with_destructor(cl, bch2_dio_write_complete); -#else - closure_debug_destroy(cl); - bch2_dio_write_complete(cl); -#endif + if (!sync) { + req->ki_complete(req, ret, 0); + ret = -EIOCBQUEUED; } + return ret; +} + +static void bch2_dio_write_loop_async(struct closure *cl) +{ + struct dio_write *dio = container_of(cl, struct dio_write, cl); + + bch2_dio_write_loop(dio); } -static int bch2_direct_IO_write(struct bch_fs *c, - struct kiocb *req, struct file *file, - struct bch_inode_info *inode, - struct iov_iter *iter, loff_t offset) +static int bch2_direct_IO_write(struct kiocb *req, + struct iov_iter *iter, + bool swap) { - struct address_space *mapping = file->f_mapping; + struct file *file = req->ki_filp; + struct bch_inode_info *inode = file_bch_inode(file); + struct bch_fs *c = inode->v.i_sb->s_fs_info; struct dio_write *dio; struct bio *bio; + loff_t offset = req->ki_pos; ssize_t ret; - bool sync = is_sync_kiocb(req); lockdep_assert_held(&inode->v.i_rwsem); @@ -1637,95 +1666,49 @@ static int bch2_direct_IO_write(struct bch_fs *c, dio = container_of(bio, struct dio_write, iop.op.wbio.bio); closure_init(&dio->cl, NULL); dio->req = req; - dio->c = c; - dio->offset = offset; - dio->iovec = NULL; - dio->iter = *iter; dio->task = current; + dio->loop = false; + dio->sync = is_sync_kiocb(req) || + offset + iter->count > inode->v.i_size; + dio->free_iov = false; + dio->iter = *iter; bch2_fswrite_op_init(&dio->iop, c, inode, io_opts(c, inode), true); dio->iop.op.write_point = writepoint_hashed((unsigned long) dio->task); dio->iop.op.flags |= BCH_WRITE_NOPUT_RESERVATION; - if ((dio->req->ki_flags & IOCB_DSYNC) && + if ((req->ki_flags & IOCB_DSYNC) && !c->opts.journal_flush_disabled) dio->iop.op.flags |= BCH_WRITE_FLUSH; - if (offset + iter->count > inode->v.i_size) - sync = true; - - /* - * XXX: we shouldn't return -ENOSPC if we're overwriting existing data - - * if getting a reservation fails we should check if we are doing an - * overwrite. - * - * Have to then guard against racing with truncate (deleting data that - * we would have been overwriting) - */ ret = bch2_disk_reservation_get(c, &dio->iop.op.res, iter->count >> 9, 0); if (unlikely(ret)) { if (bch2_check_range_allocated(c, POS(inode->v.i_ino, offset >> 9), - iter->count >> 9)) { - closure_debug_destroy(&dio->cl); - bio_put(bio); - return ret; - } + iter->count >> 9)) + goto err; dio->iop.unalloc = true; } dio->iop.op.nr_replicas = dio->iop.op.res.nr_replicas; - inode_dio_begin(&inode->v); - __pagecache_block_get(&mapping->add_lock); - - if (sync) { - do { - bch2_do_direct_IO_write(dio); - - closure_sync(&dio->cl); - bch2_dio_write_done(dio); - } while (dio->iter.count && !dio->iop.op.error); - - closure_debug_destroy(&dio->cl); - return __bch2_dio_write_complete(dio); - } else { - bch2_do_direct_IO_write(dio); - - if (dio->iter.count && !dio->iop.op.error) { - if (dio->iter.nr_segs > ARRAY_SIZE(dio->inline_vecs)) { - dio->iovec = kmalloc(dio->iter.nr_segs * - sizeof(struct iovec), - GFP_KERNEL); - if (!dio->iovec) - dio->iop.op.error = -ENOMEM; - } else { - dio->iovec = dio->inline_vecs; - } - - memcpy(dio->iovec, - dio->iter.iov, - dio->iter.nr_segs * sizeof(struct iovec)); - dio->iter.iov = dio->iovec; - } - - continue_at_noreturn(&dio->cl, bch2_dio_write_loop_async, NULL); - return -EIOCBQUEUED; - } + return bch2_dio_write_loop(dio); +err: + bch2_disk_reservation_put(c, &dio->iop.op.res); + closure_debug_destroy(&dio->cl); + bio_put(bio); + return ret; } ssize_t bch2_direct_IO(struct kiocb *req, struct iov_iter *iter) { - struct file *file = req->ki_filp; - struct bch_inode_info *inode = file_bch_inode(file); - struct bch_fs *c = inode->v.i_sb->s_fs_info; struct blk_plug plug; ssize_t ret; blk_start_plug(&plug); - ret = ((iov_iter_rw(iter) == WRITE) - ? bch2_direct_IO_write - : bch2_direct_IO_read)(c, req, file, inode, iter, req->ki_pos); + ret = iov_iter_rw(iter) == WRITE + ? bch2_direct_IO_write(req, iter, false) + : bch2_direct_IO_read(req, iter); blk_finish_plug(&plug); return ret; @@ -1734,26 +1717,7 @@ ssize_t bch2_direct_IO(struct kiocb *req, struct iov_iter *iter) static ssize_t bch2_direct_write(struct kiocb *iocb, struct iov_iter *iter) { - struct file *file = iocb->ki_filp; - struct bch_inode_info *inode = file_bch_inode(file); - struct bch_fs *c = inode->v.i_sb->s_fs_info; - struct address_space *mapping = file->f_mapping; - loff_t pos = iocb->ki_pos; - ssize_t ret; - - pagecache_block_get(&mapping->add_lock); - - /* Write and invalidate pagecache range that we're writing to: */ - ret = write_invalidate_inode_pages_range(file->f_mapping, pos, - pos + iov_iter_count(iter) - 1); - if (unlikely(ret)) - goto err; - - ret = bch2_direct_IO_write(c, iocb, file, inode, iter, pos); -err: - pagecache_block_put(&mapping->add_lock); - - return ret; + return bch2_direct_IO_write(iocb, iter, true); } static ssize_t __bch2_write_iter(struct kiocb *iocb, struct iov_iter *from) |