summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorKent Overstreet <kent.overstreet@gmail.com>2016-02-04 18:36:48 -0900
committerKent Overstreet <kent.overstreet@gmail.com>2017-01-18 21:37:37 -0900
commit3ae36b2b8294a034d1b0656ea2835ccdd4e6a797 (patch)
treecbd6b585606a220c384900eafc84a57931e9c89a
parente3f87bfdb2b1dffc4c38f77d8490e764c2d65a06 (diff)
bcachefs: split out fs-io.c
-rw-r--r--drivers/md/bcache/Makefile7
-rw-r--r--drivers/md/bcache/fs-io.c1834
-rw-r--r--drivers/md/bcache/fs-io.h77
-rw-r--r--drivers/md/bcache/fs.c1899
-rw-r--r--drivers/md/bcache/fs.h12
5 files changed, 1941 insertions, 1888 deletions
diff --git a/drivers/md/bcache/Makefile b/drivers/md/bcache/Makefile
index aa67393cd0a8..70119335e649 100644
--- a/drivers/md/bcache/Makefile
+++ b/drivers/md/bcache/Makefile
@@ -4,6 +4,7 @@ obj-$(CONFIG_BCACHE) += bcache.o
bcache-y := acl.o alloc.o bkey.o bkey_methods.o blockdev.o\
bset.o btree_cache.o btree_gc.o btree_io.o btree_iter.o btree_update.o\
buckets.o chardev.o clock.o closure.o debug.o dirent.o error.o\
- extents.o fs.o fs-gc.o inode.o io.o journal.o keybuf.o keylist.o\
- migrate.o move.o movinggc.o notify.o opts.o request.o siphash.o six.o\
- stats.o super.o sysfs.o tier.o trace.o util.o writeback.o xattr.o
+ extents.o fs.o fs-gc.o fs-io.o inode.o io.o journal.o keybuf.o\
+ keylist.o migrate.o move.o movinggc.o notify.o opts.o request.o\
+ siphash.o six.o stats.o super.o sysfs.o tier.o trace.o util.o\
+ writeback.o xattr.o
diff --git a/drivers/md/bcache/fs-io.c b/drivers/md/bcache/fs-io.c
new file mode 100644
index 000000000000..36837376524a
--- /dev/null
+++ b/drivers/md/bcache/fs-io.c
@@ -0,0 +1,1834 @@
+
+#include "bcache.h"
+#include "btree_update.h"
+#include "buckets.h"
+#include "clock.h"
+#include "error.h"
+#include "fs.h"
+#include "fs-io.h"
+#include "inode.h"
+#include "journal.h"
+#include "io.h"
+#include "keylist.h"
+
+#include <linux/aio.h>
+#include <linux/backing-dev.h>
+#include <linux/falloc.h>
+#include <linux/migrate.h>
+#include <linux/mmu_context.h>
+#include <linux/task_io_accounting_ops.h>
+#include <linux/uio.h>
+#include <linux/writeback.h>
+
+struct bio_set *bch_writepage_bioset;
+struct bio_set *bch_dio_read_bioset;
+struct bio_set *bch_dio_write_bioset;
+
+/* i_size updates: */
+
+/*
+ * In memory i_size should never be < on disk i_size:
+ */
+static void bch_i_size_write(struct inode *inode, loff_t new_i_size)
+{
+ struct bch_inode_info *ei = to_bch_ei(inode);
+
+ EBUG_ON(new_i_size < ei->i_size);
+ i_size_write(inode, new_i_size);
+}
+
+static int inode_set_size(struct bch_inode_info *ei, struct bch_inode *bi,
+ void *p)
+{
+ loff_t *new_i_size = p;
+ unsigned i_flags = le32_to_cpu(bi->i_flags);
+
+ lockdep_assert_held(&ei->update_lock);
+
+ bi->i_size = cpu_to_le64(*new_i_size);
+
+ if (atomic_long_read(&ei->i_size_dirty_count))
+ i_flags |= BCH_INODE_I_SIZE_DIRTY;
+ else
+ i_flags &= ~BCH_INODE_I_SIZE_DIRTY;
+
+ bi->i_flags = cpu_to_le32(i_flags);;
+
+ return 0;
+}
+
+static int __must_check bch_write_inode_size(struct cache_set *c,
+ struct bch_inode_info *ei,
+ loff_t new_size)
+{
+ return __bch_write_inode(c, ei, inode_set_size, &new_size);
+}
+
+static int inode_set_dirty(struct bch_inode_info *ei,
+ struct bch_inode *bi, void *p)
+{
+ bi->i_flags = cpu_to_le32(le32_to_cpu(bi->i_flags)|
+ BCH_INODE_I_SIZE_DIRTY);
+ return 0;
+}
+
+static int check_make_i_size_dirty(struct bch_inode_info *ei, loff_t offset)
+{
+ bool need_set_dirty;
+ unsigned seq;
+ int ret = 0;
+
+ do {
+ seq = read_seqcount_begin(&ei->shadow_i_size_lock);
+ need_set_dirty = offset > ei->i_size &&
+ !(ei->i_flags & BCH_INODE_I_SIZE_DIRTY);
+ } while (read_seqcount_retry(&ei->shadow_i_size_lock, seq));
+
+ if (!need_set_dirty)
+ return 0;
+
+ mutex_lock(&ei->update_lock);
+
+ /* recheck under lock.. */
+
+ if (offset > ei->i_size &&
+ !(ei->i_flags & BCH_INODE_I_SIZE_DIRTY)) {
+ struct cache_set *c = ei->vfs_inode.i_sb->s_fs_info;
+
+ ret = __bch_write_inode(c, ei, inode_set_dirty, NULL);
+ }
+
+ mutex_unlock(&ei->update_lock);
+
+ return ret;
+}
+
+static inline void i_size_dirty_put(struct bch_inode_info *ei)
+{
+ atomic_long_dec_bug(&ei->i_size_dirty_count);
+}
+
+static inline void i_size_dirty_get(struct bch_inode_info *ei)
+{
+ lockdep_assert_held(&ei->vfs_inode.i_rwsem);
+
+ atomic_long_inc(&ei->i_size_dirty_count);
+}
+
+static void i_size_update_put(struct cache_set *c, struct bch_inode_info *ei,
+ unsigned idx, unsigned long count)
+{
+ struct i_size_update *u = &ei->i_size_updates.data[idx];
+ loff_t new_i_size = -1;
+ long r;
+
+ if (!count)
+ return;
+
+ r = atomic_long_sub_return(count, &u->count);
+ BUG_ON(r < 0);
+
+ if (r)
+ return;
+
+ /*
+ * Flush i_size_updates entries in order - from the end of the fifo -
+ * if the entry at the end is finished (refcount has gone to 0):
+ */
+
+ mutex_lock(&ei->update_lock);
+
+ while (!fifo_empty(&ei->i_size_updates) &&
+ !atomic_long_read(&(u = &fifo_front(&ei->i_size_updates))->count)) {
+ struct i_size_update t;
+
+ i_size_dirty_put(ei);
+
+ if (u->new_i_size != -1) {
+ BUG_ON(u->new_i_size < ei->i_size);
+ new_i_size = u->new_i_size;
+ }
+
+ fifo_pop(&ei->i_size_updates, t);
+ }
+
+ if (new_i_size != -1) {
+ int ret = bch_write_inode_size(c, ei, new_i_size);
+
+ ret = ret;
+ /*
+ * XXX: need to pin the inode in memory if the inode update
+ * fails
+ */
+ }
+
+ mutex_unlock(&ei->update_lock);
+}
+
+static struct i_size_update *i_size_update_new(struct bch_inode_info *ei,
+ loff_t new_size)
+{
+ struct i_size_update *u;
+
+ lockdep_assert_held(&ei->update_lock);
+
+ if (fifo_empty(&ei->i_size_updates) ||
+ (test_bit(BCH_INODE_WANT_NEW_APPEND, &ei->flags) &&
+ !fifo_full(&ei->i_size_updates))) {
+ clear_bit(BCH_INODE_WANT_NEW_APPEND, &ei->flags);
+ fifo_push(&ei->i_size_updates,
+ (struct i_size_update) { 0 });
+
+ u = &fifo_back(&ei->i_size_updates);
+ atomic_long_set(&u->count, 0);
+ i_size_dirty_get(ei);
+ }
+
+ u = &fifo_back(&ei->i_size_updates);
+ u->new_i_size = new_size;
+
+ return u;
+}
+
+/* page state: */
+
+/* stored in page->private: */
+struct bch_page_state {
+ u8 idx;
+};
+
+#define SECTORS_CACHE 1024
+
+static int reserve_sectors(struct cache_set *c, unsigned sectors)
+{
+ u64 sectors_to_get = SECTORS_CACHE + sectors;
+
+ if (likely(atomic64_sub_return(sectors,
+ &c->sectors_reserved_cache) >= 0))
+ return 0;
+
+ atomic64_add(sectors_to_get, &c->sectors_reserved);
+
+ if (likely(!cache_set_full(c))) {
+ atomic64_add(sectors_to_get, &c->sectors_reserved_cache);
+ return 0;
+ }
+
+ atomic64_sub_bug(sectors_to_get, &c->sectors_reserved);
+ atomic64_add(sectors, &c->sectors_reserved_cache);
+ return -ENOSPC;
+}
+
+/*
+ * our page flags:
+ *
+ * allocated - page has space on disk reserved for it (c->sectors_reserved) -
+ * -ENOSPC was checked then, shouldn't be checked later
+ *
+ * append - page is dirty from an append write, new i_size can't be written
+ * until after page is written; ref held on ei->i_size_dirty_count
+ */
+
+#define PF_ANY(page, enforce) page
+PAGEFLAG(Allocated, private, PF_ANY)
+TESTSCFLAG(Allocated, private, PF_ANY)
+
+PAGEFLAG(Append, private_2, PF_ANY)
+TESTSCFLAG(Append, private_2, PF_ANY)
+#undef PF_ANY
+
+static void bch_clear_page_bits(struct cache_set *c, struct bch_inode_info *ei,
+ struct page *page)
+{
+ EBUG_ON(!PageLocked(page));
+
+ if (PageAllocated(page)) {
+ atomic64_sub_bug(PAGE_SECTORS, &c->sectors_reserved);
+ ClearPageAllocated(page);
+ }
+
+ if (PageAppend(page)) {
+ struct bch_page_state *s = (void *) &page->private;
+
+ i_size_update_put(c, ei, s->idx, 1);
+ ClearPageAppend(page);
+ }
+}
+
+/* readpages/writepages: */
+
+static int bch_bio_add_page(struct bio *bio, struct page *page)
+{
+ sector_t offset = (sector_t) page->index << (PAGE_SHIFT - 9);
+
+ BUG_ON(!bio->bi_max_vecs);
+
+ if (!bio->bi_vcnt)
+ bio->bi_iter.bi_sector = offset;
+ else if (bio_end_sector(bio) != offset ||
+ bio->bi_vcnt == bio->bi_max_vecs)
+ return -1;
+
+ bio->bi_io_vec[bio->bi_vcnt++] = (struct bio_vec) {
+ .bv_page = page,
+ .bv_len = PAGE_SIZE,
+ .bv_offset = 0,
+ };
+
+ bio->bi_iter.bi_size += PAGE_SIZE;
+
+ return 0;
+}
+
+static void bch_readpages_end_io(struct bio *bio)
+{
+ struct bio_vec *bv;
+ int i;
+
+ bio_for_each_segment_all(bv, bio, i) {
+ struct page *page = bv->bv_page;
+
+ if (!bio->bi_error) {
+ SetPageUptodate(page);
+ } else {
+ ClearPageUptodate(page);
+ SetPageError(page);
+ }
+ unlock_page(page);
+ }
+
+ bio_put(bio);
+}
+
+static inline struct page *__readpage_next_page(struct address_space *mapping,
+ struct list_head *pages,
+ unsigned *nr_pages)
+{
+ struct page *page;
+ int ret;
+
+ while (*nr_pages) {
+ page = list_entry(pages->prev, struct page, lru);
+ prefetchw(&page->flags);
+ list_del(&page->lru);
+
+ ret = add_to_page_cache_lru(page, mapping, page->index, GFP_NOFS);
+
+ /* if add_to_page_cache_lru() succeeded, page is locked: */
+ put_page(page);
+
+ if (!ret)
+ return page;
+
+ (*nr_pages)--;
+ }
+
+ return NULL;
+}
+
+#define for_each_readpage_page(_mapping, _pages, _nr_pages, _page) \
+ for (; \
+ ((_page) = __readpage_next_page(_mapping, _pages, &(_nr_pages)));\
+ (_nr_pages)--)
+
+int bch_readpages(struct file *file, struct address_space *mapping,
+ struct list_head *pages, unsigned nr_pages)
+{
+ struct inode *inode = mapping->host;
+ struct cache_set *c = inode->i_sb->s_fs_info;
+ struct bio *bio = NULL;
+ struct page *page;
+
+ pr_debug("reading %u pages", nr_pages);
+
+ for_each_readpage_page(mapping, pages, nr_pages, page) {
+again:
+ if (!bio) {
+ bio = bio_alloc(GFP_NOFS,
+ min_t(unsigned, nr_pages,
+ BIO_MAX_PAGES));
+
+ bio->bi_end_io = bch_readpages_end_io;
+ }
+
+ if (bch_bio_add_page(bio, page)) {
+ bch_read(c, bio, inode->i_ino);
+ bio = NULL;
+ goto again;
+ }
+ }
+
+ if (bio)
+ bch_read(c, bio, inode->i_ino);
+
+ pr_debug("success");
+ return 0;
+}
+
+int bch_readpage(struct file *file, struct page *page)
+{
+ struct address_space *mapping = page->mapping;
+ struct inode *inode = mapping->host;
+ struct cache_set *c = inode->i_sb->s_fs_info;
+ struct bio *bio;
+
+ bio = bio_alloc(GFP_NOFS, 1);
+ bio_set_op_attrs(bio, REQ_OP_READ, REQ_SYNC);
+ bio->bi_end_io = bch_readpages_end_io;
+
+ bch_bio_add_page(bio, page);
+ bch_read(c, bio, inode->i_ino);
+
+ return 0;
+}
+
+struct bch_writepage {
+ struct cache_set *c;
+ u64 inum;
+ struct bch_writepage_io *io;
+};
+
+static void bch_writepage_io_free(struct closure *cl)
+{
+ struct bch_writepage_io *io = container_of(cl,
+ struct bch_writepage_io, cl);
+ struct bio *bio = &io->bio.bio.bio;
+
+ bio_put(bio);
+}
+
+static void bch_writepage_io_done(struct closure *cl)
+{
+ struct bch_writepage_io *io = container_of(cl,
+ struct bch_writepage_io, cl);
+ struct cache_set *c = io->op.c;
+ struct bio *bio = &io->bio.bio.bio;
+ struct bch_inode_info *ei = io->ei;
+ struct bio_vec *bvec;
+ unsigned i;
+
+ atomic64_sub_bug(io->sectors_reserved, &c->sectors_reserved);
+
+ for (i = 0; i < ARRAY_SIZE(io->i_size_update_count); i++)
+ i_size_update_put(c, ei, i, io->i_size_update_count[i]);
+
+ bio_for_each_segment_all(bvec, bio, i) {
+ struct page *page = bvec->bv_page;
+
+ BUG_ON(!PageWriteback(page));
+
+ if (io->bio.bio.bio.bi_error) {
+ SetPageError(page);
+ if (page->mapping)
+ set_bit(AS_EIO, &page->mapping->flags);
+ }
+
+ end_page_writeback(page);
+ }
+
+ closure_return_with_destructor(&io->cl, bch_writepage_io_free);
+}
+
+static void bch_writepage_do_io(struct bch_writepage_io *io)
+{
+ pr_debug("writing %u sectors to %llu:%llu",
+ bio_sectors(&io->bio.bio.bio),
+ io->op.insert_key.k.p.inode,
+ (u64) io->bio.bio.bio.bi_iter.bi_sector);
+
+ closure_call(&io->op.cl, bch_write, NULL, &io->cl);
+ continue_at(&io->cl, bch_writepage_io_done, io->op.c->wq);
+}
+
+/*
+ * Get a bch_writepage_io and add @page to it - appending to an existing one if
+ * possible, else allocating a new one:
+ */
+static void bch_writepage_io_alloc(struct bch_writepage *w,
+ struct bch_inode_info *ei,
+ struct page *page)
+{
+alloc_io:
+ if (!w->io) {
+ struct bio *bio = bio_alloc_bioset(GFP_NOFS, BIO_MAX_PAGES,
+ bch_writepage_bioset);
+ w->io = container_of(bio, struct bch_writepage_io, bio.bio.bio);
+
+ closure_init(&w->io->cl, NULL);
+ w->io->ei = ei;
+ memset(w->io->i_size_update_count, 0,
+ sizeof(w->io->i_size_update_count));
+ w->io->sectors_reserved = 0;
+
+ bch_write_op_init(&w->io->op, w->c, &w->io->bio, NULL,
+ bkey_to_s_c(&KEY(w->inum, 0, 0)),
+ NULL,
+ &ei->journal_seq, 0);
+ }
+
+ if (bch_bio_add_page(&w->io->bio.bio.bio, page)) {
+ bch_writepage_do_io(w->io);
+ w->io = NULL;
+ goto alloc_io;
+ }
+
+ /*
+ * We shouldn't ever be handed pages for multiple inodes in a single
+ * pass - right?
+ */
+ BUG_ON(ei != w->io->ei);
+}
+
+static int __bch_writepage(struct page *page, struct writeback_control *wbc,
+ void *data)
+{
+ struct inode *inode = page->mapping->host;
+ struct bch_inode_info *ei = to_bch_ei(inode);
+ struct bch_writepage *w = data;
+ unsigned offset;
+ loff_t i_size = i_size_read(inode);
+ pgoff_t end_index = i_size >> PAGE_SHIFT;
+
+ /* Is the page fully inside i_size? */
+ if (page->index < end_index)
+ goto do_io;
+
+ /* Is the page fully outside i_size? (truncate in progress) */
+ offset = i_size & (PAGE_SIZE - 1);
+ if (page->index > end_index || !offset) {
+ unlock_page(page);
+ return 0;
+ }
+
+ /*
+ * The page straddles i_size. It must be zeroed out on each and every
+ * writepage invocation because it may be mmapped. "A file is mapped
+ * in multiples of the page size. For a file that is not a multiple of
+ * the page size, the remaining memory is zeroed when mapped, and
+ * writes to that region are not written out to the file."
+ */
+ zero_user_segment(page, offset, PAGE_SIZE);
+do_io:
+ if (check_make_i_size_dirty(ei, page_offset(page) + PAGE_SIZE)) {
+ redirty_page_for_writepage(wbc, page);
+ unlock_page(page);
+ return 0;
+ }
+
+ bch_writepage_io_alloc(w, ei, page);
+
+ /*
+ * Before unlocking the page, transfer refcounts to w->io:
+ */
+ if (PageAppend(page)) {
+ struct bch_page_state *s = (void *) &page->private;
+
+ /*
+ * i_size won't get updated and this write's data made visible
+ * until the i_size_update this page points to completes - so
+ * tell the write path to start a new one:
+ */
+ if (&ei->i_size_updates.data[s->idx] ==
+ &fifo_back(&ei->i_size_updates))
+ set_bit(BCH_INODE_WANT_NEW_APPEND, &ei->flags);
+
+ w->io->i_size_update_count[s->idx]++;
+ ClearPageAppend(page);
+ }
+
+ if (PageAllocated(page)) {
+ w->io->sectors_reserved += PAGE_SECTORS;
+ ClearPageAllocated(page);
+ }
+
+ BUG_ON(PageWriteback(page));
+ set_page_writeback(page);
+ unlock_page(page);
+
+ return 0;
+}
+
+int bch_writepages(struct address_space *mapping, struct writeback_control *wbc)
+{
+ int ret;
+ struct bch_writepage w = {
+ .c = mapping->host->i_sb->s_fs_info,
+ .inum = mapping->host->i_ino,
+ .io = NULL,
+ };
+
+ ret = write_cache_pages(mapping, wbc, __bch_writepage, &w);
+
+ if (w.io)
+ bch_writepage_do_io(w.io);
+
+ return ret;
+}
+
+int bch_writepage(struct page *page, struct writeback_control *wbc)
+{
+ struct inode *inode = page->mapping->host;
+ int ret;
+ struct bch_writepage w = {
+ .c = inode->i_sb->s_fs_info,
+ .inum = inode->i_ino,
+ .io = NULL,
+ };
+
+ ret = __bch_writepage(page, NULL, &w);
+ if (ret)
+ return ret;
+
+ if (w.io)
+ bch_writepage_do_io(w.io);
+
+ return 0;
+}
+
+static void bch_read_single_page_end_io(struct bio *bio)
+{
+ complete(bio->bi_private);
+}
+
+static int bch_read_single_page(struct page *page,
+ struct address_space *mapping)
+{
+ struct inode *inode = mapping->host;
+ struct cache_set *c = inode->i_sb->s_fs_info;
+ struct bio *bio;
+ int ret = 0;
+ DECLARE_COMPLETION_ONSTACK(done);
+
+ bio = bio_alloc(GFP_NOFS, 1);
+ bio_set_op_attrs(bio, REQ_OP_READ, REQ_SYNC);
+ bio->bi_private = &done;
+ bio->bi_end_io = bch_read_single_page_end_io;
+ bch_bio_add_page(bio, page);
+
+ bch_read(c, bio, inode->i_ino);
+ wait_for_completion(&done);
+
+ if (!ret)
+ ret = bio->bi_error;
+ bio_put(bio);
+
+ if (ret < 0)
+ return ret;
+
+ SetPageUptodate(page);
+
+ return 0;
+}
+
+int bch_write_begin(struct file *file, struct address_space *mapping,
+ loff_t pos, unsigned len, unsigned flags,
+ struct page **pagep, void **fsdata)
+{
+ struct inode *inode = mapping->host;
+ struct cache_set *c = inode->i_sb->s_fs_info;
+ pgoff_t index = pos >> PAGE_SHIFT;
+ unsigned offset = pos & (PAGE_SIZE - 1);
+ struct page *page;
+ int ret = 0;
+
+ BUG_ON(inode_unhashed(mapping->host));
+
+ page = grab_cache_page_write_begin(mapping, index, flags);
+ if (!page)
+ return -ENOMEM;
+
+ if (!PageAllocated(page)) {
+ if (reserve_sectors(c, PAGE_SECTORS)) {
+ ret = -ENOSPC;
+ goto err;
+ }
+
+ SetPageAllocated(page);
+ }
+
+ if (PageUptodate(page))
+ goto out;
+
+ /* If we're writing entire page, don't need to read it in first: */
+ if (len == PAGE_SIZE)
+ goto out;
+
+ if (!offset && pos + len >= inode->i_size) {
+ zero_user_segment(page, len, PAGE_SIZE);
+ flush_dcache_page(page);
+ goto out;
+ }
+
+ if (index > inode->i_size >> PAGE_SHIFT) {
+ zero_user_segments(page, 0, offset, offset + len, PAGE_SIZE);
+ flush_dcache_page(page);
+ goto out;
+ }
+
+ ret = bch_read_single_page(page, mapping);
+ if (ret)
+ goto err;
+out:
+ *pagep = page;
+ return ret;
+err:
+ unlock_page(page);
+ put_page(page);
+ page = NULL;
+ goto out;
+}
+
+int bch_write_end(struct file *filp, struct address_space *mapping,
+ loff_t pos, unsigned len, unsigned copied,
+ struct page *page, void *fsdata)
+{
+ struct inode *inode = page->mapping->host;
+ struct bch_inode_info *ei = to_bch_ei(inode);
+ struct cache_set *c = inode->i_sb->s_fs_info;
+
+ lockdep_assert_held(&inode->i_rwsem);
+
+ if (unlikely(copied < len && !PageUptodate(page))) {
+ /*
+ * The page needs to be read in, but that would destroy
+ * our partial write - simplest thing is to just force
+ * userspace to redo the write:
+ *
+ * userspace doesn't _have_ to redo the write, so clear
+ * PageAllocated:
+ */
+ copied = 0;
+ zero_user(page, 0, PAGE_SIZE);
+ flush_dcache_page(page);
+ bch_clear_page_bits(c, ei, page);
+ goto out;
+ }
+
+ if (!PageUptodate(page))
+ SetPageUptodate(page);
+ if (!PageDirty(page))
+ set_page_dirty(page);
+
+ if (pos + copied > inode->i_size) {
+ struct i_size_update *u;
+
+ /*
+ * if page already has a ref on a i_size_update, even if it's an
+ * older one, leave it - they have to be flushed in order so
+ * that's just as good as taking a ref on a newer one, if we're
+ * adding a newer one now
+ *
+ * - if there's no current i_size_update, or if we want to
+ * create a new one and there's room for a new one, create it
+ *
+ * - set current i_size_update's i_size to new i_size
+ *
+ * - if !PageAppend, take a ref on the current i_size_update
+ */
+
+ /* XXX: locking */
+ mutex_lock(&ei->update_lock);
+ u = i_size_update_new(ei, pos + copied);
+
+ if (!PageAppend(page)) {
+ struct bch_page_state *s = (void *) &page->private;
+
+ s->idx = u - ei->i_size_updates.data;
+ atomic_long_inc(&u->count);
+
+ SetPageAppend(page);
+ }
+
+ bch_i_size_write(inode, pos + copied);
+ mutex_unlock(&ei->update_lock);
+ }
+out:
+ unlock_page(page);
+ put_page(page);
+
+ return copied;
+}
+
+/* O_DIRECT */
+
+static void bch_dio_read_complete(struct closure *cl)
+{
+ struct dio_read *dio = container_of(cl, struct dio_read, cl);
+
+ dio->req->ki_complete(dio->req, dio->ret, 0);
+ bio_put(&dio->bio);
+}
+
+static void bch_direct_IO_read_endio(struct bio *bio)
+{
+ struct dio_read *dio = bio->bi_private;
+
+ if (bio->bi_error)
+ dio->ret = bio->bi_error;
+
+ closure_put(&dio->cl);
+ bio_check_pages_dirty(bio); /* transfers ownership */
+}
+
+static int bch_direct_IO_read(struct cache_set *c, struct kiocb *req,
+ struct file *file, struct inode *inode,
+ struct iov_iter *iter, loff_t offset)
+{
+ struct dio_read *dio;
+ struct bio *bio;
+ unsigned long inum = inode->i_ino;
+ ssize_t ret = 0;
+ size_t pages = iov_iter_npages(iter, BIO_MAX_PAGES);
+ bool sync = is_sync_kiocb(req);
+ loff_t i_size;
+
+ bio = bio_alloc_bioset(GFP_KERNEL, pages, bch_dio_read_bioset);
+ bio_get(bio);
+
+ dio = container_of(bio, struct dio_read, bio);
+ closure_init(&dio->cl, NULL);
+
+ /*
+ * this is a _really_ horrible hack just to avoid an atomic sub at the
+ * end:
+ */
+ if (!sync) {
+ set_closure_fn(&dio->cl, bch_dio_read_complete, NULL);
+ atomic_set(&dio->cl.remaining,
+ CLOSURE_REMAINING_INITIALIZER -
+ CLOSURE_RUNNING +
+ CLOSURE_DESTRUCTOR);
+ } else {
+ atomic_set(&dio->cl.remaining,
+ CLOSURE_REMAINING_INITIALIZER + 1);
+ }
+
+ dio->req = req;
+ dio->ret = iter->count;
+
+ i_size = i_size_read(inode);
+ if (offset + dio->ret > i_size) {
+ dio->ret = max_t(loff_t, 0, i_size - offset);
+ iter->count = round_up(dio->ret, PAGE_SIZE);
+ }
+
+ if (!dio->ret) {
+ closure_put(&dio->cl);
+ goto out;
+ }
+
+ goto start;
+ while (iter->count) {
+ pages = iov_iter_npages(iter, BIO_MAX_PAGES);
+ bio = bio_alloc(GFP_KERNEL, pages);
+start:
+ bio->bi_iter.bi_sector = offset >> 9;
+ bio->bi_end_io = bch_direct_IO_read_endio;
+ bio->bi_private = dio;
+
+ ret = bio_get_user_pages(bio, iter, 1);
+ if (ret < 0) {
+ /* XXX: fault inject this path */
+ bio->bi_error = ret;
+ bio_endio(bio);
+ break;
+ }
+
+ offset += bio->bi_iter.bi_size;
+ bio_set_pages_dirty(bio);
+
+ if (iter->count)
+ closure_get(&dio->cl);
+
+ bch_read(c, bio, inum);
+ }
+out:
+ if (sync) {
+ closure_sync(&dio->cl);
+ closure_debug_destroy(&dio->cl);
+ ret = dio->ret;
+ bio_put(&dio->bio);
+ return ret;
+ } else {
+ return -EIOCBQUEUED;
+ }
+}
+
+static void __bch_dio_write_complete(struct dio_write *dio)
+{
+ inode_dio_end(dio->req->ki_filp->f_inode);
+
+ if (dio->iovec && dio->iovec != dio->inline_vecs)
+ kfree(dio->iovec);
+
+ bio_put(&dio->bio.bio.bio);
+}
+
+static void bch_dio_write_complete(struct closure *cl)
+{
+ struct dio_write *dio = container_of(cl, struct dio_write, cl);
+ struct kiocb *req = dio->req;
+ long ret = dio->written ?: dio->error;
+
+ __bch_dio_write_complete(dio);
+ req->ki_complete(req, ret, 0);
+}
+
+static void bch_dio_write_done(struct dio_write *dio)
+{
+ struct bio_vec *bv;
+ int i;
+
+ dio->written += dio->iop.written << 9;
+
+ if (dio->iop.error)
+ dio->error = dio->iop.error;
+
+ bio_for_each_segment_all(bv, &dio->bio.bio.bio, i)
+ put_page(bv->bv_page);
+
+ if (dio->iter.count)
+ bio_reset(&dio->bio.bio.bio);
+}
+
+static void bch_do_direct_IO_write(struct dio_write *dio, bool sync)
+{
+ struct file *file = dio->req->ki_filp;
+ struct inode *inode = file->f_inode;
+ struct bch_inode_info *ei = to_bch_ei(inode);
+ struct cache_set *c = inode->i_sb->s_fs_info;
+ struct bio *bio = &dio->bio.bio.bio;
+ unsigned flags = BCH_WRITE_CHECK_ENOSPC;
+ int ret;
+
+ if (file->f_flags & O_DSYNC || IS_SYNC(file->f_mapping->host))
+ flags |= BCH_WRITE_FLUSH;
+
+ while (dio->iter.count) {
+ bio->bi_iter.bi_sector = (dio->offset + dio->written) >> 9;
+
+ ret = bio_get_user_pages(bio, &dio->iter, 0);
+ if (ret < 0) {
+ dio->error = ret;
+ break;
+ }
+
+ bch_write_op_init(&dio->iop, c, &dio->bio, NULL,
+ bkey_to_s_c(&KEY(inode->i_ino,
+ bio_end_sector(bio),
+ bio_sectors(bio))),
+ NULL,
+ &ei->journal_seq, flags);
+
+ task_io_account_write(bio->bi_iter.bi_size);
+
+ closure_call(&dio->iop.cl, bch_write, NULL, &dio->cl);
+
+ if (!sync)
+ break;
+
+ closure_sync(&dio->cl);
+ bch_dio_write_done(dio);
+ }
+}
+
+static void bch_dio_write_loop_async(struct closure *cl)
+{
+ struct dio_write *dio =
+ container_of(cl, struct dio_write, cl);
+
+ bch_dio_write_done(dio);
+
+ if (dio->iter.count && !dio->error) {
+ use_mm(dio->mm);
+ bch_do_direct_IO_write(dio, false);
+ unuse_mm(dio->mm);
+
+ continue_at(&dio->cl,
+ bch_dio_write_loop_async,
+ dio->iter.count ? system_wq : NULL);
+ } else {
+#if 0
+ closure_return_with_destructor(cl, bch_dio_write_complete);
+#else
+ closure_debug_destroy(cl);
+ bch_dio_write_complete(cl);
+#endif
+ }
+}
+
+static int bch_direct_IO_write(struct cache_set *c, struct kiocb *req,
+ struct file *file, struct inode *inode,
+ struct iov_iter *iter, loff_t offset)
+{
+ struct bch_inode_info *ei = to_bch_ei(inode);
+ struct dio_write *dio;
+ struct bio *bio;
+ size_t pages = iov_iter_npages(iter, BIO_MAX_PAGES);
+ ssize_t ret;
+ bool sync;
+
+ lockdep_assert_held(&inode->i_rwsem);
+
+ bio = bio_alloc_bioset(GFP_KERNEL, pages, bch_dio_write_bioset);
+
+ dio = container_of(bio, struct dio_write, bio.bio.bio);
+ dio->req = req;
+ dio->written = 0;
+ dio->error = 0;
+ dio->offset = offset;
+ dio->append = false;
+ dio->iovec = NULL;
+ dio->iter = *iter;
+ dio->mm = current->mm;
+
+ if (offset + iter->count > inode->i_size) {
+ /*
+ * XXX: try and convert this to i_size_update_new(), and maybe
+ * make async O_DIRECT appends work
+ */
+
+ dio->append = true;
+ i_size_dirty_get(ei);
+ }
+
+ ret = check_make_i_size_dirty(ei, offset + iter->count);
+ if (ret) {
+ if (dio->append)
+ i_size_dirty_put(ei);
+ bio_put(bio);
+ return ret;
+ }
+
+ closure_init(&dio->cl, NULL);
+
+ inode_dio_begin(inode);
+
+ /*
+ * appends are sync in order to do the i_size update under
+ * i_rwsem, after we know the write has completed successfully
+ */
+ sync = is_sync_kiocb(req) || dio->append;
+
+ bch_do_direct_IO_write(dio, sync);
+
+ if (sync) {
+ closure_debug_destroy(&dio->cl);
+ ret = dio->written ?: dio->error;
+
+ if (dio->append) {
+ loff_t new_i_size = offset + dio->written;
+ int ret2 = 0;
+
+ if (dio->written &&
+ new_i_size > inode->i_size) {
+ struct i_size_update *u;
+ unsigned idx;
+
+ mutex_lock(&ei->update_lock);
+
+ bch_i_size_write(inode, new_i_size);
+
+ fifo_for_each_entry_ptr(u, &ei->i_size_updates, idx) {
+ if (u->new_i_size < new_i_size)
+ u->new_i_size = -1;
+ else
+ BUG();
+ }
+
+ i_size_dirty_put(ei);
+ ret2 = bch_write_inode_size(c, ei, new_i_size);
+
+ mutex_unlock(&ei->update_lock);
+ } else {
+ i_size_dirty_put(ei);
+ }
+ }
+
+ __bch_dio_write_complete(dio);
+ return ret;
+ } else {
+ if (dio->iter.count) {
+ if (dio->iter.nr_segs > ARRAY_SIZE(dio->inline_vecs)) {
+ dio->iovec = kmalloc(dio->iter.nr_segs *
+ sizeof(struct iovec),
+ GFP_KERNEL);
+ if (!dio->iovec)
+ dio->error = -ENOMEM;
+ } else {
+ dio->iovec = dio->inline_vecs;
+ }
+
+ memcpy(dio->iovec,
+ dio->iter.iov,
+ dio->iter.nr_segs * sizeof(struct iovec));
+ dio->iter.iov = dio->iovec;
+ }
+
+ continue_at_noreturn(&dio->cl,
+ bch_dio_write_loop_async,
+ dio->iter.count ? system_wq : NULL);
+ return -EIOCBQUEUED;
+ }
+}
+
+ssize_t bch_direct_IO(struct kiocb *req, struct iov_iter *iter)
+{
+ struct file *file = req->ki_filp;
+ struct inode *inode = file->f_inode;
+ struct cache_set *c = inode->i_sb->s_fs_info;
+
+ if ((req->ki_pos|iter->count) & (block_bytes(c) - 1))
+ return -EINVAL;
+
+ return ((iov_iter_rw(iter) == WRITE)
+ ? bch_direct_IO_write
+ : bch_direct_IO_read)(c, req, file, inode, iter, req->ki_pos);
+}
+
+static ssize_t
+bch_direct_write(struct kiocb *iocb, struct iov_iter *from)
+{
+ struct file *file = iocb->ki_filp;
+ struct address_space *mapping = file->f_mapping;
+ loff_t pos = iocb->ki_pos;
+ ssize_t written;
+ size_t write_len;
+ pgoff_t end;
+
+ write_len = iov_iter_count(from);
+ end = (pos + write_len - 1) >> PAGE_SHIFT;
+
+ written = filemap_write_and_wait_range(mapping, pos, pos + write_len - 1);
+ if (written)
+ goto out;
+
+ /*
+ * After a write we want buffered reads to be sure to go to disk to get
+ * the new data. We invalidate clean cached page from the region we're
+ * about to write. We do this *before* the write so that we can return
+ * without clobbering -EIOCBQUEUED from ->direct_IO().
+ */
+ if (mapping->nrpages) {
+ written = invalidate_inode_pages2_range(mapping,
+ pos >> PAGE_SHIFT, end);
+ /*
+ * If a page can not be invalidated, return 0 to fall back
+ * to buffered write.
+ */
+ if (written) {
+ if (written == -EBUSY)
+ return 0;
+ goto out;
+ }
+ }
+
+ written = mapping->a_ops->direct_IO(iocb, from);
+
+ /*
+ * Finally, try again to invalidate clean pages which might have been
+ * cached by non-direct readahead, or faulted in by get_user_pages()
+ * if the source of the write was an mmap'ed region of the file
+ * we're writing. Either one is a pretty crazy thing to do,
+ * so we don't support it 100%. If this invalidation
+ * fails, tough, the write still worked...
+ *
+ * Augh: this makes no sense for async writes - the second invalidate
+ * has to come after the new data is visible. But, we can't just move it
+ * to the end of the dio write path - for async writes we don't have
+ * i_mutex held anymore,
+ */
+ if (mapping->nrpages) {
+ invalidate_inode_pages2_range(mapping,
+ pos >> PAGE_SHIFT, end);
+ }
+out:
+ return written;
+}
+
+static ssize_t __bch_write_iter(struct kiocb *iocb, struct iov_iter *from)
+{
+ struct file *file = iocb->ki_filp;
+ struct address_space * mapping = file->f_mapping;
+ struct inode *inode = mapping->host;
+ ssize_t ret;
+
+ /* We can write back this queue in page reclaim */
+ current->backing_dev_info = inode_to_bdi(inode);
+ ret = file_remove_privs(file);
+ if (ret)
+ goto out;
+
+ ret = file_update_time(file);
+ if (ret)
+ goto out;
+
+ ret = iocb->ki_flags & IOCB_DIRECT
+ ? bch_direct_write(iocb, from)
+ : generic_perform_write(file, from, iocb->ki_pos);
+
+ if (likely(ret > 0))
+ iocb->ki_pos += ret;
+out:
+ current->backing_dev_info = NULL;
+ return ret;
+}
+
+ssize_t bch_write_iter(struct kiocb *iocb, struct iov_iter *from)
+{
+ struct file *file = iocb->ki_filp;
+ struct inode *inode = file->f_mapping->host;
+ ssize_t ret;
+
+ inode_lock(inode);
+ ret = generic_write_checks(iocb, from);
+ if (ret > 0)
+ ret = __bch_write_iter(iocb, from);
+ inode_unlock(inode);
+
+ if (ret > 0)
+ ret = generic_write_sync(iocb, ret);
+
+ return ret;
+}
+
+int bch_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf)
+{
+ struct page *page = vmf->page;
+ struct inode *inode = file_inode(vma->vm_file);
+ struct address_space *mapping = inode->i_mapping;
+ struct cache_set *c = inode->i_sb->s_fs_info;
+ int ret = VM_FAULT_LOCKED;
+
+ sb_start_pagefault(inode->i_sb);
+ file_update_time(vma->vm_file);
+
+ /*
+ * i_mutex is required for synchronizing with fcollapse(), O_DIRECT
+ * writes
+ */
+ inode_lock(inode);
+
+ lock_page(page);
+ if (page->mapping != mapping ||
+ page_offset(page) > i_size_read(inode)) {
+ unlock_page(page);
+ ret = VM_FAULT_NOPAGE;
+ goto out;
+ }
+
+ if (!PageAllocated(page)) {
+ if (reserve_sectors(c, PAGE_SECTORS)) {
+ unlock_page(page);
+ ret = VM_FAULT_SIGBUS;
+ goto out;
+ }
+
+ SetPageAllocated(page);
+ }
+
+ set_page_dirty(page);
+ wait_for_stable_page(page);
+out:
+ inode_unlock(inode);
+ sb_end_pagefault(inode->i_sb);
+ return ret;
+}
+
+void bch_invalidatepage(struct page *page, unsigned int offset,
+ unsigned int length)
+{
+ struct inode *inode = page->mapping->host;
+ struct bch_inode_info *ei = to_bch_ei(inode);
+ struct cache_set *c = inode->i_sb->s_fs_info;
+
+ BUG_ON(!PageLocked(page));
+ BUG_ON(PageWriteback(page));
+
+ if (offset || length < PAGE_SIZE)
+ return;
+
+ bch_clear_page_bits(c, ei, page);
+}
+
+int bch_releasepage(struct page *page, gfp_t gfp_mask)
+{
+ struct inode *inode = page->mapping->host;
+ struct bch_inode_info *ei = to_bch_ei(inode);
+ struct cache_set *c = inode->i_sb->s_fs_info;
+
+ BUG_ON(!PageLocked(page));
+ BUG_ON(PageWriteback(page));
+
+ bch_clear_page_bits(c, ei, page);
+
+ if (PageDirty(page)) {
+ ClearPageDirty(page);
+ cancel_dirty_page(page);
+ }
+
+ return 1;
+}
+
+#ifdef CONFIG_MIGRATION
+int bch_migrate_page(struct address_space *mapping, struct page *newpage,
+ struct page *page, enum migrate_mode mode)
+{
+ int ret;
+
+ ret = migrate_page_move_mapping(mapping, newpage, page, NULL, mode, 0);
+ if (ret != MIGRATEPAGE_SUCCESS)
+ return ret;
+
+ if (PageAllocated(page)) {
+ ClearPageAllocated(page);
+ SetPageAllocated(newpage);
+ }
+
+ if (PageAppend(page)) {
+ ClearPageAppend(page);
+ SetPageAppend(newpage);
+ }
+
+ migrate_page_copy(newpage, page);
+ return MIGRATEPAGE_SUCCESS;
+}
+#endif
+
+int bch_fsync(struct file *file, loff_t start, loff_t end, int datasync)
+{
+ struct inode *inode = file->f_mapping->host;
+ struct bch_inode_info *ei = to_bch_ei(inode);
+ struct cache_set *c = inode->i_sb->s_fs_info;
+ int ret;
+
+ ret = filemap_write_and_wait_range(inode->i_mapping, start, end);
+ if (ret)
+ return ret;
+
+ inode_lock(inode);
+ if (datasync && end <= ei->i_size)
+ goto out;
+
+ /*
+ * If there's still outstanding appends, we may have not yet written an
+ * i_size that exposes the data we just fsynced - however, we can
+ * advance the i_size on disk up to the end of what we just explicitly
+ * wrote:
+ */
+
+ mutex_lock(&ei->update_lock);
+
+ if (end > ei->i_size &&
+ ei->i_size < inode->i_size) {
+ struct i_size_update *u;
+ unsigned idx;
+ loff_t new_i_size = min_t(u64, inode->i_size,
+ roundup(end, PAGE_SIZE));
+
+ BUG_ON(fifo_empty(&ei->i_size_updates));
+ BUG_ON(new_i_size < ei->i_size);
+
+ /*
+ * There can still be a pending i_size update < the size we're
+ * writing, because it may have been shared with pages > the
+ * size we fsynced to:
+ */
+ fifo_for_each_entry_ptr(u, &ei->i_size_updates, idx)
+ if (u->new_i_size < new_i_size)
+ u->new_i_size = -1;
+
+ ret = bch_write_inode_size(c, ei, new_i_size);
+ }
+
+ mutex_unlock(&ei->update_lock);
+out:
+ inode_unlock(inode);
+
+ if (ret)
+ return ret;
+
+ if (c->opts.journal_flush_disabled)
+ return 0;
+
+ return bch_journal_flush_seq(&c->journal, ei->journal_seq);
+}
+
+static int __bch_truncate_page(struct address_space *mapping,
+ pgoff_t index, loff_t start, loff_t end)
+{
+ unsigned start_offset = start & (PAGE_SIZE - 1);
+ unsigned end_offset = ((end - 1) & (PAGE_SIZE - 1)) + 1;
+ struct page *page;
+ int ret = 0;
+
+ /* Page boundary? Nothing to do */
+ if (!((index == start >> PAGE_SHIFT && start_offset) ||
+ (index == end >> PAGE_SHIFT && end_offset != PAGE_SIZE)))
+ return 0;
+
+ page = find_lock_page(mapping, index);
+ if (!page) {
+ struct inode *inode = mapping->host;
+ struct cache_set *c = inode->i_sb->s_fs_info;
+ struct btree_iter iter;
+ struct bkey_s_c k;
+
+ /*
+ * XXX: we're doing two index lookups when we end up reading the
+ * page
+ */
+ bch_btree_iter_init(&iter, c, BTREE_ID_EXTENTS,
+ POS(inode->i_ino,
+ index << (PAGE_SHIFT - 9)));
+ k = bch_btree_iter_peek(&iter);
+ bch_btree_iter_unlock(&iter);
+
+ if (!k.k ||
+ bkey_cmp(bkey_start_pos(k.k),
+ POS(inode->i_ino,
+ (index + 1) << (PAGE_SHIFT - 9))) >= 0)
+ return 0;
+
+ page = find_or_create_page(mapping,
+ index,
+ GFP_KERNEL);
+ if (unlikely(!page)) {
+ ret = -ENOMEM;
+ goto out;
+ }
+ }
+
+ if (!PageUptodate(page))
+ if (bch_read_single_page(page, mapping)) {
+ ret = -EIO;
+ goto unlock;
+ }
+
+ if (index == start >> PAGE_SHIFT &&
+ index == end >> PAGE_SHIFT)
+ zero_user_segment(page, start_offset, end_offset);
+ else if (index == start >> PAGE_SHIFT)
+ zero_user_segment(page, start_offset, PAGE_SIZE);
+ else if (index == end >> PAGE_SHIFT)
+ zero_user_segment(page, 0, end_offset);
+
+ set_page_dirty(page);
+unlock:
+ unlock_page(page);
+ put_page(page);
+out:
+ return ret;
+}
+
+static int bch_truncate_page(struct address_space *mapping, loff_t from)
+{
+ return __bch_truncate_page(mapping, from >> PAGE_SHIFT,
+ from, from + PAGE_SIZE);
+}
+
+int bch_truncate(struct inode *inode, struct iattr *iattr)
+{
+ struct bch_inode_info *ei = to_bch_ei(inode);
+ struct cache_set *c = inode->i_sb->s_fs_info;
+ struct i_size_update *u;
+ bool shrink = iattr->ia_size <= inode->i_size;
+ unsigned idx;
+ int ret = 0;
+
+ inode_dio_wait(inode);
+
+ mutex_lock(&ei->update_lock);
+
+ /*
+ * The new i_size could be bigger or smaller than the current on
+ * disk size (ei->i_size):
+ *
+ * If it's smaller (i.e. we actually are truncating), then in
+ * order to make the truncate appear atomic we have to write out
+ * the new i_size before discarding the data to be truncated.
+ *
+ * However, if the new i_size is bigger than the on disk i_size,
+ * then we _don't_ want to write the new i_size here - because
+ * if there are appends in flight, that would cause us to expose
+ * the range between the old and the new i_size before those
+ * appends have completed.
+ */
+
+ /*
+ * First, cancel i_size_updates that extend past the new
+ * i_size, so the i_size we write here doesn't get
+ * stomped on:
+ */
+ fifo_for_each_entry_ptr(u, &ei->i_size_updates, idx)
+ if (u->new_i_size > iattr->ia_size)
+ u->new_i_size = -1;
+
+ set_bit(BCH_INODE_WANT_NEW_APPEND, &ei->flags);
+ u = i_size_update_new(ei, iattr->ia_size);
+
+ atomic_long_inc(&u->count);
+ idx = u - ei->i_size_updates.data;
+
+ if (iattr->ia_size < ei->i_size)
+ ret = bch_write_inode_size(c, ei, iattr->ia_size);
+
+ mutex_unlock(&ei->update_lock);
+
+ /*
+ * XXX: if we error, we leak i_size_dirty count - and we can't
+ * just put it, because it actually is still dirty
+ */
+ if (unlikely(ret))
+ return ret;
+
+ /*
+ * truncate_setsize() does the i_size_write(), can't use
+ * bch_i_size_write()
+ */
+ EBUG_ON(iattr->ia_size < ei->i_size);
+ truncate_setsize(inode, iattr->ia_size);
+
+ /*
+ * There might be persistent reservations (from fallocate())
+ * above i_size, which bch_inode_truncate() will discard - we're
+ * only supposed to discard them if we're doing a real truncate
+ * here (new i_size < current i_size):
+ */
+ if (shrink) {
+ ret = bch_truncate_page(inode->i_mapping, iattr->ia_size);
+ if (unlikely(ret))
+ return ret;
+
+ ret = bch_inode_truncate(c, inode->i_ino,
+ round_up(iattr->ia_size, PAGE_SIZE) >> 9,
+ NULL,
+ &ei->journal_seq);
+ if (unlikely(ret))
+ return ret;
+ }
+
+ setattr_copy(inode, iattr);
+
+ inode->i_mtime = inode->i_ctime = CURRENT_TIME;
+ i_size_update_put(c, ei, idx, 1);
+ return 0;
+}
+
+static long bch_fpunch(struct inode *inode, loff_t offset, loff_t len)
+{
+ struct bch_inode_info *ei = to_bch_ei(inode);
+ struct cache_set *c = inode->i_sb->s_fs_info;
+ u64 ino = inode->i_ino;
+ u64 discard_start = round_up(offset, PAGE_SIZE) >> 9;
+ u64 discard_end = round_down(offset + len, PAGE_SIZE) >> 9;
+ int ret = 0;
+
+ inode_lock(inode);
+ ret = __bch_truncate_page(inode->i_mapping,
+ offset >> PAGE_SHIFT,
+ offset, offset + len);
+ if (unlikely(ret))
+ goto out;
+
+ if (offset >> PAGE_SHIFT !=
+ (offset + len) >> PAGE_SHIFT) {
+ ret = __bch_truncate_page(inode->i_mapping,
+ (offset + len) >> PAGE_SHIFT,
+ offset, offset + len);
+ if (unlikely(ret))
+ goto out;
+ }
+
+ truncate_pagecache_range(inode, offset, offset + len - 1);
+
+ if (discard_start < discard_end)
+ ret = bch_discard(c,
+ POS(ino, discard_start),
+ POS(ino, discard_end),
+ 0, NULL, &ei->journal_seq);
+out:
+ inode_unlock(inode);
+
+ return ret;
+}
+
+static long bch_fcollapse(struct inode *inode, loff_t offset, loff_t len)
+{
+ struct bch_inode_info *ei = to_bch_ei(inode);
+ struct cache_set *c = inode->i_sb->s_fs_info;
+ struct btree_iter src;
+ struct btree_iter dst;
+ BKEY_PADDED(k) copy;
+ struct bkey_s_c k;
+ struct i_size_update *u;
+ loff_t new_size;
+ unsigned idx;
+ int ret;
+
+ if ((offset | len) & (PAGE_SIZE - 1))
+ return -EINVAL;
+
+ bch_btree_iter_init_intent(&dst, c, BTREE_ID_EXTENTS,
+ POS(inode->i_ino, offset >> 9));
+ /* position will be set from dst iter's position: */
+ bch_btree_iter_init(&src, c, BTREE_ID_EXTENTS, POS_MIN);
+ bch_btree_iter_link(&src, &dst);
+
+ /*
+ * We need i_mutex to keep the page cache consistent with the extents
+ * btree, and the btree consistent with i_size - we don't need outside
+ * locking for the extents btree itself, because we're using linked
+ * iterators
+ *
+ * XXX: hmm, need to prevent reads adding things to the pagecache until
+ * we're done?
+ */
+ inode_lock(inode);
+
+ ret = -EINVAL;
+ if (offset + len >= inode->i_size)
+ goto err;
+
+ if (inode->i_size < len)
+ goto err;
+
+ new_size = inode->i_size - len;
+
+ inode_dio_wait(inode);
+
+ do {
+ ret = filemap_write_and_wait_range(inode->i_mapping,
+ offset, LLONG_MAX);
+ if (ret)
+ goto err;
+
+ ret = invalidate_inode_pages2_range(inode->i_mapping,
+ offset >> PAGE_SHIFT,
+ ULONG_MAX);
+ } while (ret == -EBUSY);
+
+ if (ret)
+ goto err;
+
+ while (bkey_cmp(dst.pos,
+ POS(inode->i_ino,
+ round_up(new_size, PAGE_SIZE) >> 9)) < 0) {
+ bch_btree_iter_set_pos(&src,
+ POS(dst.pos.inode, dst.pos.offset + (len >> 9)));
+
+ /* Have to take intent locks before read locks: */
+ ret = bch_btree_iter_traverse(&dst);
+ if (ret)
+ goto err_unwind;
+
+ k = bch_btree_iter_peek_with_holes(&src);
+ if (!k.k) {
+ ret = -EIO;
+ goto err_unwind;
+ }
+
+ bkey_reassemble(&copy.k, k);
+
+ if (bkey_deleted(&copy.k.k))
+ copy.k.k.type = KEY_TYPE_DISCARD;
+
+ bch_cut_front(src.pos, &copy.k);
+ copy.k.k.p.offset -= len >> 9;
+
+ BUG_ON(bkey_cmp(dst.pos, bkey_start_pos(&copy.k.k)));
+
+ ret = bch_btree_insert_at(&dst,
+ &keylist_single(&copy.k),
+ NULL, &ei->journal_seq,
+ BTREE_INSERT_ATOMIC|
+ BTREE_INSERT_NOFAIL);
+ if (ret < 0 && ret != -EINTR)
+ goto err_unwind;
+
+ bch_btree_iter_unlock(&src);
+ }
+
+ bch_btree_iter_unlock(&src);
+ bch_btree_iter_unlock(&dst);
+
+ ret = bch_inode_truncate(c, inode->i_ino,
+ round_up(new_size, PAGE_SIZE) >> 9,
+ NULL, &ei->journal_seq);
+ if (ret)
+ goto err_unwind;
+
+ mutex_lock(&ei->update_lock);
+
+ /*
+ * Cancel i_size updates > new_size:
+ *
+ * Note: we're also cancelling i_size updates for appends < new_size, and
+ * writing the new i_size before they finish - would be better to use an
+ * i_size_update here like truncate, so we can sequence our i_size
+ * updates with outstanding appends and not have to cancel them:
+ */
+ fifo_for_each_entry_ptr(u, &ei->i_size_updates, idx)
+ u->new_i_size = -1;
+
+ ret = bch_write_inode_size(c, ei, new_size);
+ bch_i_size_write(inode, new_size);
+
+ truncate_pagecache(inode, offset);
+
+ mutex_unlock(&ei->update_lock);
+
+ inode_unlock(inode);
+
+ return ret;
+err_unwind:
+ BUG();
+err:
+ bch_btree_iter_unlock(&src);
+ bch_btree_iter_unlock(&dst);
+ inode_unlock(inode);
+ return ret;
+}
+
+static long bch_fallocate(struct inode *inode, int mode,
+ loff_t offset, loff_t len)
+{
+ struct bch_inode_info *ei = to_bch_ei(inode);
+ struct cache_set *c = inode->i_sb->s_fs_info;
+ struct btree_iter iter;
+ struct bkey_i reservation;
+ struct bkey_s_c k;
+ struct bpos end;
+ loff_t block_start, block_end;
+ loff_t new_size = offset + len;
+ unsigned sectors;
+ int ret;
+
+ bch_btree_iter_init_intent(&iter, c, BTREE_ID_EXTENTS, POS_MIN);
+
+ inode_lock(inode);
+
+ if (!(mode & FALLOC_FL_KEEP_SIZE) &&
+ new_size > inode->i_size) {
+ ret = inode_newsize_ok(inode, new_size);
+ if (ret)
+ goto err;
+ }
+
+ if (mode & FALLOC_FL_ZERO_RANGE) {
+ /* just for __bch_truncate_page(): */
+ inode_dio_wait(inode);
+
+ ret = __bch_truncate_page(inode->i_mapping,
+ offset >> PAGE_SHIFT,
+ offset, offset + len);
+
+ if (!ret &&
+ offset >> PAGE_SHIFT !=
+ (offset + len) >> PAGE_SHIFT)
+ ret = __bch_truncate_page(inode->i_mapping,
+ (offset + len) >> PAGE_SHIFT,
+ offset, offset + len);
+
+ if (unlikely(ret))
+ goto err;
+
+ truncate_pagecache_range(inode, offset, offset + len - 1);
+
+ block_start = round_up(offset, PAGE_SIZE);
+ block_end = round_down(offset + len, PAGE_SIZE);
+ } else {
+ block_start = round_down(offset, PAGE_SIZE);
+ block_end = round_up(offset + len, PAGE_SIZE);
+ }
+
+ bch_btree_iter_set_pos(&iter, POS(inode->i_ino, block_start >> 9));
+ end = POS(inode->i_ino, block_end >> 9);
+
+ while (bkey_cmp(iter.pos, end) < 0) {
+ unsigned flags = 0;
+
+ k = bch_btree_iter_peek_with_holes(&iter);
+ if (!k.k) {
+ ret = bch_btree_iter_unlock(&iter) ?: -EIO;
+ goto err;
+ }
+
+ if (bkey_extent_is_data(k.k)) {
+ if (!(mode & FALLOC_FL_ZERO_RANGE)) {
+ bch_btree_iter_advance_pos(&iter);
+ continue;
+ }
+
+ /* don't check for -ENOSPC if we're deleting data: */
+ flags |= BTREE_INSERT_NOFAIL;
+ }
+
+ bkey_init(&reservation.k);
+ reservation.k.type = BCH_RESERVATION;
+ reservation.k.p = k.k->p;
+ reservation.k.size = k.k->size;
+
+ bch_cut_front(iter.pos, &reservation);
+ bch_cut_back(end, &reservation.k);
+
+ sectors = reservation.k.size;
+
+ ret = reserve_sectors(c, sectors);
+ if (ret)
+ goto err;
+
+ ret = bch_btree_insert_at(&iter,
+ &keylist_single(&reservation),
+ NULL, &ei->journal_seq,
+ BTREE_INSERT_ATOMIC|flags);
+
+ atomic64_sub_bug(sectors, &c->sectors_reserved);
+
+ if (ret < 0 && ret != -EINTR)
+ goto err;
+
+ }
+ bch_btree_iter_unlock(&iter);
+
+ if (!(mode & FALLOC_FL_KEEP_SIZE) &&
+ new_size > inode->i_size) {
+ struct i_size_update *u;
+ unsigned idx;
+
+ mutex_lock(&ei->update_lock);
+ bch_i_size_write(inode, new_size);
+
+ u = i_size_update_new(ei, new_size);
+ idx = u - ei->i_size_updates.data;
+ atomic_long_inc(&u->count);
+ mutex_unlock(&ei->update_lock);
+
+ i_size_update_put(c, ei, idx, 1);
+ }
+
+ inode_unlock(inode);
+
+ return 0;
+err:
+ bch_btree_iter_unlock(&iter);
+ inode_unlock(inode);
+ return ret;
+}
+
+long bch_fallocate_dispatch(struct file *file, int mode,
+ loff_t offset, loff_t len)
+{
+ struct inode *inode = file_inode(file);
+
+ if (!(mode & ~(FALLOC_FL_KEEP_SIZE|FALLOC_FL_ZERO_RANGE)))
+ return bch_fallocate(inode, mode, offset, len);
+
+ if (mode == (FALLOC_FL_PUNCH_HOLE|FALLOC_FL_KEEP_SIZE))
+ return bch_fpunch(inode, offset, len);
+
+ if (mode == FALLOC_FL_COLLAPSE_RANGE)
+ return bch_fcollapse(inode, offset, len);
+
+ return -EOPNOTSUPP;
+}
diff --git a/drivers/md/bcache/fs-io.h b/drivers/md/bcache/fs-io.h
new file mode 100644
index 000000000000..cb4574785ca1
--- /dev/null
+++ b/drivers/md/bcache/fs-io.h
@@ -0,0 +1,77 @@
+#ifndef _BCACHE_FS_IO_H
+#define _BCACHE_FS_IO_H
+
+#include <linux/uio.h>
+
+int bch_writepage(struct page *, struct writeback_control *);
+int bch_readpage(struct file *, struct page *);
+
+int bch_writepages(struct address_space *, struct writeback_control *);
+int bch_readpages(struct file *, struct address_space *,
+ struct list_head *, unsigned);
+
+int bch_write_begin(struct file *, struct address_space *, loff_t,
+ unsigned, unsigned, struct page **, void **);
+int bch_write_end(struct file *, struct address_space *, loff_t,
+ unsigned, unsigned, struct page *, void *);
+
+ssize_t bch_direct_IO(struct kiocb *, struct iov_iter *);
+
+ssize_t bch_write_iter(struct kiocb *, struct iov_iter *);
+
+int bch_fsync(struct file *, loff_t, loff_t, int);
+
+int bch_truncate(struct inode *, struct iattr *);
+long bch_fallocate_dispatch(struct file *, int, loff_t, loff_t);
+
+int bch_page_mkwrite(struct vm_area_struct *, struct vm_fault *);
+void bch_invalidatepage(struct page *, unsigned int, unsigned int);
+int bch_releasepage(struct page *, gfp_t);
+int bch_migrate_page(struct address_space *, struct page *,
+ struct page *, enum migrate_mode);
+
+struct bch_writepage_io {
+ struct closure cl;
+
+ struct bch_inode_info *ei;
+ unsigned long i_size_update_count[I_SIZE_UPDATE_ENTRIES];
+ unsigned long sectors_reserved;
+
+ struct bch_write_op op;
+ /* must come last: */
+ struct bch_write_bio bio;
+};
+
+extern struct bio_set *bch_writepage_bioset;
+
+struct dio_write {
+ struct closure cl;
+ struct kiocb *req;
+ long written;
+ long error;
+ loff_t offset;
+ bool append;
+
+ struct iovec *iovec;
+ struct iovec inline_vecs[UIO_FASTIOV];
+ struct iov_iter iter;
+
+ struct mm_struct *mm;
+
+ struct bch_write_op iop;
+ /* must be last: */
+ struct bch_write_bio bio;
+};
+
+extern struct bio_set *bch_dio_write_bioset;
+
+struct dio_read {
+ struct closure cl;
+ struct kiocb *req;
+ long ret;
+ struct bio bio;
+};
+
+extern struct bio_set *bch_dio_read_bioset;
+
+#endif /* _BCACHE_FS_IO_H */
diff --git a/drivers/md/bcache/fs.c b/drivers/md/bcache/fs.c
index 341a6e2e2cea..19544b5db60f 100644
--- a/drivers/md/bcache/fs.c
+++ b/drivers/md/bcache/fs.c
@@ -6,8 +6,8 @@
#include "dirent.h"
#include "extents.h"
#include "fs.h"
+#include "fs-io.h"
#include "inode.h"
-#include "io.h"
#include "journal.h"
#include "keylist.h"
#include "super.h"
@@ -16,111 +16,14 @@
#include <linux/aio.h>
#include <linux/backing-dev.h>
#include <linux/compat.h>
-#include <linux/falloc.h>
-#include <linux/migrate.h>
-#include <linux/mmu_context.h>
#include <linux/module.h>
#include <linux/mount.h>
#include <linux/statfs.h>
-#include <linux/task_io_accounting_ops.h>
-#include <linux/uio.h>
-#include <linux/writeback.h>
#include <linux/xattr.h>
-/*
- * our page flags:
- *
- * allocated - page has space on disk reserved for it (c->sectors_reserved) -
- * -ENOSPC was checked then, shouldn't be checked later
- *
- * append - page is dirty from an append write, new i_size can't be written
- * until after page is written; ref held on ei->i_size_dirty_count
- */
-
-#define PF_ANY(page, enforce) page
-PAGEFLAG(Allocated, private, PF_ANY)
-TESTSCFLAG(Allocated, private, PF_ANY)
-
-PAGEFLAG(Append, private_2, PF_ANY)
-TESTSCFLAG(Append, private_2, PF_ANY)
-#undef PF_ANY
-
-static struct bio_set *bch_writepage_bioset;
static struct kmem_cache *bch_inode_cache;
static void bch_inode_init(struct bch_inode_info *, struct bkey_s_c_inode);
-static int bch_read_single_page(struct page *, struct address_space *);
-
-#define SECTORS_CACHE 1024
-
-static int reserve_sectors(struct cache_set *c, unsigned sectors)
-{
- u64 sectors_to_get = SECTORS_CACHE + sectors;
-
- if (likely(atomic64_sub_return(sectors,
- &c->sectors_reserved_cache) >= 0))
- return 0;
-
- atomic64_add(sectors_to_get, &c->sectors_reserved);
-
- if (likely(!cache_set_full(c))) {
- atomic64_add(sectors_to_get, &c->sectors_reserved_cache);
- return 0;
- }
-
- atomic64_sub_bug(sectors_to_get, &c->sectors_reserved);
- atomic64_add(sectors, &c->sectors_reserved_cache);
- return -ENOSPC;
-}
-
-static void i_size_dirty_put(struct bch_inode_info *ei)
-{
- atomic_long_dec_bug(&ei->i_size_dirty_count);
-}
-
-static void i_size_dirty_get(struct bch_inode_info *ei)
-{
- lockdep_assert_held(&ei->vfs_inode.i_rwsem);
-
- atomic_long_inc(&ei->i_size_dirty_count);
-}
-
-static void i_size_update_put(struct cache_set *,
- struct bch_inode_info *,
- unsigned, unsigned long);
-
-static void bch_clear_page_bits(struct cache_set *c, struct bch_inode_info *ei,
- struct page *page)
-{
- EBUG_ON(!PageLocked(page));
-
- if (PageAllocated(page)) {
- atomic64_sub_bug(PAGE_SECTORS, &c->sectors_reserved);
- ClearPageAllocated(page);
- }
-
- if (PageAppend(page)) {
- struct bch_page_state *s = (void *) &page->private;
-
- i_size_update_put(c, ei, s->idx, 1);
- ClearPageAppend(page);
- }
-}
-
-/*
- * In memory i_size should never be < on disk i_size:
- */
-static void bch_i_size_write(struct inode *inode, loff_t new_i_size)
-{
- struct bch_inode_info *ei = to_bch_ei(inode);
-
- EBUG_ON(new_i_size < ei->i_size);
- i_size_write(inode, new_i_size);
-}
-
-/* returns true if we want to do the update */
-typedef int (*inode_set_fn)(struct bch_inode_info *,
- struct bch_inode *, void *);
/*
* I_SIZE_DIRTY requires special handling:
@@ -184,10 +87,10 @@ static void bch_write_inode_checks(struct cache_set *c,
}
}
-static int __must_check __bch_write_inode(struct cache_set *c,
- struct bch_inode_info *ei,
- inode_set_fn set,
- void *p)
+int __must_check __bch_write_inode(struct cache_set *c,
+ struct bch_inode_info *ei,
+ inode_set_fn set,
+ void *p)
{
struct btree_iter iter;
struct inode *inode = &ei->vfs_inode;
@@ -254,151 +157,6 @@ static int __must_check bch_write_inode(struct cache_set *c,
return __bch_write_inode(c, ei, NULL, NULL);
}
-static int inode_set_size(struct bch_inode_info *ei, struct bch_inode *bi,
- void *p)
-{
- loff_t *new_i_size = p;
- unsigned i_flags = le32_to_cpu(bi->i_flags);
-
- lockdep_assert_held(&ei->update_lock);
-
- bi->i_size = cpu_to_le64(*new_i_size);
-
- if (atomic_long_read(&ei->i_size_dirty_count))
- i_flags |= BCH_INODE_I_SIZE_DIRTY;
- else
- i_flags &= ~BCH_INODE_I_SIZE_DIRTY;
-
- bi->i_flags = cpu_to_le32(i_flags);;
-
- return 0;
-}
-
-static int __must_check bch_write_inode_size(struct cache_set *c,
- struct bch_inode_info *ei,
- loff_t new_size)
-{
- return __bch_write_inode(c, ei, inode_set_size, &new_size);
-}
-
-static int inode_set_dirty(struct bch_inode_info *ei,
- struct bch_inode *bi, void *p)
-{
- bi->i_flags = cpu_to_le32(le32_to_cpu(bi->i_flags)|
- BCH_INODE_I_SIZE_DIRTY);
- return 0;
-}
-
-static int check_make_i_size_dirty(struct bch_inode_info *ei,
- loff_t offset)
-{
- bool need_set_dirty;
- unsigned seq;
- int ret = 0;
-
- do {
- seq = read_seqcount_begin(&ei->shadow_i_size_lock);
- need_set_dirty = offset > ei->i_size &&
- !(ei->i_flags & BCH_INODE_I_SIZE_DIRTY);
- } while (read_seqcount_retry(&ei->shadow_i_size_lock, seq));
-
- if (!need_set_dirty)
- return 0;
-
- mutex_lock(&ei->update_lock);
-
- /* recheck under lock.. */
-
- if (offset > ei->i_size &&
- !(ei->i_flags & BCH_INODE_I_SIZE_DIRTY)) {
- struct cache_set *c = ei->vfs_inode.i_sb->s_fs_info;
-
- ret = __bch_write_inode(c, ei, inode_set_dirty, NULL);
- }
-
- mutex_unlock(&ei->update_lock);
-
- return ret;
-}
-
-static void i_size_update_put(struct cache_set *c,
- struct bch_inode_info *ei,
- unsigned idx,
- unsigned long count)
-{
- struct i_size_update *u = &ei->i_size_updates.data[idx];
- loff_t new_i_size = -1;
- long r;
-
- if (!count)
- return;
-
- r = atomic_long_sub_return(count, &u->count);
- BUG_ON(r < 0);
-
- if (r)
- return;
-
- /*
- * Flush i_size_updates entries in order - from the end of the fifo -
- * if the entry at the end is finished (refcount has gone to 0):
- */
-
- mutex_lock(&ei->update_lock);
-
- while (!fifo_empty(&ei->i_size_updates) &&
- !atomic_long_read(&(u = &fifo_front(&ei->i_size_updates))->count)) {
- struct i_size_update t;
-
- i_size_dirty_put(ei);
-
- if (u->new_i_size != -1) {
- BUG_ON(u->new_i_size < ei->i_size);
- new_i_size = u->new_i_size;
- }
-
- fifo_pop(&ei->i_size_updates, t);
- }
-
- if (new_i_size != -1) {
- int ret = bch_write_inode_size(c, ei, new_i_size);
-
- ret = ret;
- /*
- * XXX: need to pin the inode in memory if the inode update
- * fails
- */
- ret = ret;
- }
-
- mutex_unlock(&ei->update_lock);
-}
-
-static struct i_size_update *i_size_update_new(struct bch_inode_info *ei,
- loff_t new_size)
-{
- struct i_size_update *u;
-
- lockdep_assert_held(&ei->update_lock);
-
- if (fifo_empty(&ei->i_size_updates) ||
- (test_bit(BCH_INODE_WANT_NEW_APPEND, &ei->flags) &&
- !fifo_full(&ei->i_size_updates))) {
- clear_bit(BCH_INODE_WANT_NEW_APPEND, &ei->flags);
- fifo_push(&ei->i_size_updates,
- (struct i_size_update) { 0 });
-
- u = &fifo_back(&ei->i_size_updates);
- atomic_long_set(&u->count, 0);
- i_size_dirty_get(ei);
- }
-
- u = &fifo_back(&ei->i_size_updates);
- u->new_i_size = new_size;
-
- return u;
-}
-
static struct inode *bch_vfs_inode_get(struct super_block *sb, u64 inum)
{
struct cache_set *c = sb->s_fs_info;
@@ -832,79 +590,6 @@ static int bch_rename2(struct inode *old_dir, struct dentry *old_dentry,
return bch_rename(old_dir, old_dentry, new_dir, new_dentry);
}
-static int __bch_truncate_page(struct address_space *mapping,
- pgoff_t index, loff_t start, loff_t end)
-{
- unsigned start_offset = start & (PAGE_SIZE - 1);
- unsigned end_offset = ((end - 1) & (PAGE_SIZE - 1)) + 1;
- struct page *page;
- int ret = 0;
-
- /* Page boundary? Nothing to do */
- if (!((index == start >> PAGE_SHIFT && start_offset) ||
- (index == end >> PAGE_SHIFT && end_offset != PAGE_SIZE)))
- return 0;
-
- page = find_lock_page(mapping, index);
- if (!page) {
- struct inode *inode = mapping->host;
- struct cache_set *c = inode->i_sb->s_fs_info;
- struct btree_iter iter;
- struct bkey_s_c k;
-
- /*
- * XXX: we're doing two index lookups when we end up reading the
- * page
- */
- bch_btree_iter_init(&iter, c, BTREE_ID_EXTENTS,
- POS(inode->i_ino,
- index << (PAGE_SHIFT - 9)));
- k = bch_btree_iter_peek(&iter);
- bch_btree_iter_unlock(&iter);
-
- if (!k.k ||
- bkey_cmp(bkey_start_pos(k.k),
- POS(inode->i_ino,
- (index + 1) << (PAGE_SHIFT - 9))) >= 0)
- return 0;
-
- page = find_or_create_page(mapping,
- index,
- GFP_KERNEL);
- if (unlikely(!page)) {
- ret = -ENOMEM;
- goto out;
- }
- }
-
- if (!PageUptodate(page))
- if (bch_read_single_page(page, mapping)) {
- ret = -EIO;
- goto unlock;
- }
-
- if (index == start >> PAGE_SHIFT &&
- index == end >> PAGE_SHIFT)
- zero_user_segment(page, start_offset, end_offset);
- else if (index == start >> PAGE_SHIFT)
- zero_user_segment(page, start_offset, PAGE_SIZE);
- else if (index == end >> PAGE_SHIFT)
- zero_user_segment(page, 0, end_offset);
-
- set_page_dirty(page);
-unlock:
- unlock_page(page);
- put_page(page);
-out:
- return ret;
-}
-
-static int bch_truncate_page(struct address_space *mapping, loff_t from)
-{
- return __bch_truncate_page(mapping, from >> PAGE_SHIFT,
- from, from + PAGE_SIZE);
-}
-
static int bch_setattr(struct dentry *dentry, struct iattr *iattr)
{
struct inode *inode = dentry->d_inode;
@@ -922,85 +607,7 @@ static int bch_setattr(struct dentry *dentry, struct iattr *iattr)
return ret;
if (iattr->ia_valid & ATTR_SIZE) {
- bool shrink = iattr->ia_size <= inode->i_size;
- struct i_size_update *u;
- unsigned idx;
-
- inode_dio_wait(inode);
-
- mutex_lock(&ei->update_lock);
-
- /*
- * The new i_size could be bigger or smaller than the current on
- * disk size (ei->i_size):
- *
- * If it's smaller (i.e. we actually are truncating), then in
- * order to make the truncate appear atomic we have to write out
- * the new i_size before discarding the data to be truncated.
- *
- * However, if the new i_size is bigger than the on disk i_size,
- * then we _don't_ want to write the new i_size here - because
- * if there are appends in flight, that would cause us to expose
- * the range between the old and the new i_size before those
- * appends have completed.
- */
-
- /*
- * First, cancel i_size_updates that extend past the new
- * i_size, so the i_size we write here doesn't get
- * stomped on:
- */
- fifo_for_each_entry_ptr(u, &ei->i_size_updates, idx)
- if (u->new_i_size > iattr->ia_size)
- u->new_i_size = -1;
-
- set_bit(BCH_INODE_WANT_NEW_APPEND, &ei->flags);
- u = i_size_update_new(ei, iattr->ia_size);
-
- atomic_long_inc(&u->count);
- idx = u - ei->i_size_updates.data;
-
- if (iattr->ia_size < ei->i_size)
- ret = bch_write_inode_size(c, ei, iattr->ia_size);
-
- mutex_unlock(&ei->update_lock);
-
- /*
- * XXX: if we error, we leak i_size_dirty count - and we can't
- * just put it, because it actually is still dirty
- */
- if (unlikely(ret))
- return ret;
-
- /*
- * truncate_setsize() does the i_size_write(), can't use
- * bch_i_size_write()
- */
- EBUG_ON(iattr->ia_size < ei->i_size);
- truncate_setsize(inode, iattr->ia_size);
-
- /*
- * There might be persistent reservations (from fallocate())
- * above i_size, which bch_inode_truncate() will discard - we're
- * only supposed to discard them if we're doing a real truncate
- * here (new i_size < current i_size):
- */
- if (shrink) {
- ret = bch_truncate_page(inode->i_mapping, iattr->ia_size);
- if (unlikely(ret))
- return ret;
-
- ret = bch_inode_truncate(c, inode->i_ino,
- round_up(iattr->ia_size, PAGE_SIZE) >> 9,
- NULL, &ei->journal_seq);
- if (unlikely(ret))
- return ret;
- }
-
- setattr_copy(inode, iattr);
-
- inode->i_mtime = inode->i_ctime = CURRENT_TIME;
- i_size_update_put(c, ei, idx, 1);
+ ret = bch_truncate(inode, iattr);
} else {
mutex_lock(&ei->update_lock);
setattr_copy(inode, iattr);
@@ -1112,482 +719,12 @@ out:
return ret < 0 ? ret : 0;
}
-static long bch_fpunch(struct inode *inode, loff_t offset, loff_t len)
-{
- struct bch_inode_info *ei = to_bch_ei(inode);
- struct cache_set *c = inode->i_sb->s_fs_info;
- u64 ino = inode->i_ino;
- u64 discard_start = round_up(offset, PAGE_SIZE) >> 9;
- u64 discard_end = round_down(offset + len, PAGE_SIZE) >> 9;
- int ret = 0;
-
- inode_lock(inode);
- ret = __bch_truncate_page(inode->i_mapping,
- offset >> PAGE_SHIFT,
- offset, offset + len);
- if (unlikely(ret))
- goto out;
-
- if (offset >> PAGE_SHIFT !=
- (offset + len) >> PAGE_SHIFT) {
- ret = __bch_truncate_page(inode->i_mapping,
- (offset + len) >> PAGE_SHIFT,
- offset, offset + len);
- if (unlikely(ret))
- goto out;
- }
-
- truncate_pagecache_range(inode, offset, offset + len - 1);
-
- if (discard_start < discard_end)
- ret = bch_discard(c,
- POS(ino, discard_start),
- POS(ino, discard_end),
- 0, NULL, &ei->journal_seq);
-out:
- inode_unlock(inode);
-
- return ret;
-}
-
-static long bch_fcollapse(struct inode *inode, loff_t offset, loff_t len)
-{
- struct bch_inode_info *ei = to_bch_ei(inode);
- struct cache_set *c = inode->i_sb->s_fs_info;
- struct btree_iter src;
- struct btree_iter dst;
- BKEY_PADDED(k) copy;
- struct bkey_s_c k;
- struct i_size_update *u;
- loff_t new_size;
- unsigned idx;
- int ret;
-
- if ((offset | len) & (PAGE_SIZE - 1))
- return -EINVAL;
-
- bch_btree_iter_init_intent(&dst, c, BTREE_ID_EXTENTS,
- POS(inode->i_ino, offset >> 9));
- /* position will be set from dst iter's position: */
- bch_btree_iter_init(&src, c, BTREE_ID_EXTENTS, POS_MIN);
- bch_btree_iter_link(&src, &dst);
-
- /*
- * We need i_mutex to keep the page cache consistent with the extents
- * btree, and the btree consistent with i_size - we don't need outside
- * locking for the extents btree itself, because we're using linked
- * iterators
- *
- * XXX: hmm, need to prevent reads adding things to the pagecache until
- * we're done?
- */
- inode_lock(inode);
-
- ret = -EINVAL;
- if (offset + len >= inode->i_size)
- goto err;
-
- if (inode->i_size < len)
- goto err;
-
- new_size = inode->i_size - len;
-
- inode_dio_wait(inode);
-
- do {
- ret = filemap_write_and_wait_range(inode->i_mapping,
- offset, LLONG_MAX);
- if (ret)
- goto err;
-
- ret = invalidate_inode_pages2_range(inode->i_mapping,
- offset >> PAGE_SHIFT,
- ULONG_MAX);
- } while (ret == -EBUSY);
-
- if (ret)
- goto err;
-
- while (bkey_cmp(dst.pos,
- POS(inode->i_ino,
- round_up(new_size, PAGE_SIZE) >> 9)) < 0) {
- bch_btree_iter_set_pos(&src,
- POS(dst.pos.inode, dst.pos.offset + (len >> 9)));
-
- /* Have to take intent locks before read locks: */
- ret = bch_btree_iter_traverse(&dst);
- if (ret)
- goto err_unwind;
-
- k = bch_btree_iter_peek_with_holes(&src);
- if (!k.k) {
- ret = -EIO;
- goto err_unwind;
- }
-
- bkey_reassemble(&copy.k, k);
-
- if (bkey_deleted(&copy.k.k))
- copy.k.k.type = KEY_TYPE_DISCARD;
-
- bch_cut_front(src.pos, &copy.k);
- copy.k.k.p.offset -= len >> 9;
-
- BUG_ON(bkey_cmp(dst.pos, bkey_start_pos(&copy.k.k)));
-
- ret = bch_btree_insert_at(&dst,
- &keylist_single(&copy.k),
- NULL, &ei->journal_seq,
- BTREE_INSERT_ATOMIC|
- BTREE_INSERT_NOFAIL);
- if (ret < 0 && ret != -EINTR)
- goto err_unwind;
-
- bch_btree_iter_unlock(&src);
- }
-
- bch_btree_iter_unlock(&src);
- bch_btree_iter_unlock(&dst);
-
- ret = bch_inode_truncate(c, inode->i_ino,
- round_up(new_size, PAGE_SIZE) >> 9,
- NULL, &ei->journal_seq);
- if (ret)
- goto err_unwind;
-
- mutex_lock(&ei->update_lock);
-
- /*
- * Cancel i_size updates > new_size:
- *
- * Note: we're also cancelling i_size updates for appends < new_size, and
- * writing the new i_size before they finish - would be better to use an
- * i_size_update here like truncate, so we can sequence our i_size
- * updates with outstanding appends and not have to cancel them:
- */
- fifo_for_each_entry_ptr(u, &ei->i_size_updates, idx)
- u->new_i_size = -1;
-
- ret = bch_write_inode_size(c, ei, new_size);
- bch_i_size_write(inode, new_size);
-
- truncate_pagecache(inode, offset);
-
- mutex_unlock(&ei->update_lock);
-
- inode_unlock(inode);
-
- return ret;
-err_unwind:
- BUG();
-err:
- bch_btree_iter_unlock(&src);
- bch_btree_iter_unlock(&dst);
- inode_unlock(inode);
- return ret;
-}
-
-static long bch_fallocate_fallocate(struct inode *inode, int mode,
- loff_t offset, loff_t len)
-{
- struct bch_inode_info *ei = to_bch_ei(inode);
- struct cache_set *c = inode->i_sb->s_fs_info;
- struct btree_iter iter;
- struct bkey_i reservation;
- struct bkey_s_c k;
- struct bpos end;
- loff_t block_start, block_end;
- loff_t new_size = offset + len;
- unsigned sectors;
- int ret;
-
- bch_btree_iter_init_intent(&iter, c, BTREE_ID_EXTENTS, POS_MIN);
-
- inode_lock(inode);
-
- if (!(mode & FALLOC_FL_KEEP_SIZE) &&
- new_size > inode->i_size) {
- ret = inode_newsize_ok(inode, new_size);
- if (ret)
- goto err;
- }
-
- if (mode & FALLOC_FL_ZERO_RANGE) {
- /* just for __bch_truncate_page(): */
- inode_dio_wait(inode);
-
- ret = __bch_truncate_page(inode->i_mapping,
- offset >> PAGE_SHIFT,
- offset, offset + len);
-
- if (!ret &&
- offset >> PAGE_SHIFT !=
- (offset + len) >> PAGE_SHIFT)
- ret = __bch_truncate_page(inode->i_mapping,
- (offset + len) >> PAGE_SHIFT,
- offset, offset + len);
-
- if (unlikely(ret))
- goto err;
-
- truncate_pagecache_range(inode, offset, offset + len - 1);
-
- block_start = round_up(offset, PAGE_SIZE);
- block_end = round_down(offset + len, PAGE_SIZE);
- } else {
- block_start = round_down(offset, PAGE_SIZE);
- block_end = round_up(offset + len, PAGE_SIZE);
- }
-
- bch_btree_iter_set_pos(&iter, POS(inode->i_ino, block_start >> 9));
- end = POS(inode->i_ino, block_end >> 9);
-
- while (bkey_cmp(iter.pos, end) < 0) {
- unsigned flags = 0;
-
- k = bch_btree_iter_peek_with_holes(&iter);
- if (!k.k) {
- ret = bch_btree_iter_unlock(&iter) ?: -EIO;
- goto err;
- }
-
- if (bkey_extent_is_data(k.k)) {
- if (!(mode & FALLOC_FL_ZERO_RANGE)) {
- bch_btree_iter_advance_pos(&iter);
- continue;
- }
-
- /* don't check for -ENOSPC if we're deleting data: */
- flags |= BTREE_INSERT_NOFAIL;
- }
-
- bkey_init(&reservation.k);
- reservation.k.type = BCH_RESERVATION;
- reservation.k.p = k.k->p;
- reservation.k.size = k.k->size;
-
- bch_cut_front(iter.pos, &reservation);
- bch_cut_back(end, &reservation.k);
-
- sectors = reservation.k.size;
-
- ret = reserve_sectors(c, sectors);
- if (ret)
- goto err;
-
- ret = bch_btree_insert_at(&iter,
- &keylist_single(&reservation),
- NULL, &ei->journal_seq,
- BTREE_INSERT_ATOMIC|flags);
-
- atomic64_sub_bug(sectors, &c->sectors_reserved);
-
- if (ret < 0 && ret != -EINTR)
- goto err;
-
- }
- bch_btree_iter_unlock(&iter);
-
- if (!(mode & FALLOC_FL_KEEP_SIZE) &&
- new_size > inode->i_size) {
- struct i_size_update *u;
- unsigned idx;
-
- mutex_lock(&ei->update_lock);
- bch_i_size_write(inode, new_size);
-
- u = i_size_update_new(ei, new_size);
- idx = u - ei->i_size_updates.data;
- atomic_long_inc(&u->count);
- mutex_unlock(&ei->update_lock);
-
- i_size_update_put(c, ei, idx, 1);
- }
-
- inode_unlock(inode);
-
- return 0;
-err:
- bch_btree_iter_unlock(&iter);
- inode_unlock(inode);
- return ret;
-}
-
-static long bch_fallocate(struct file *file, int mode,
- loff_t offset, loff_t len)
-{
- struct inode *inode = file_inode(file);
-
- if (!(mode & ~(FALLOC_FL_KEEP_SIZE|FALLOC_FL_ZERO_RANGE)))
- return bch_fallocate_fallocate(inode, mode, offset, len);
-
- if (mode == (FALLOC_FL_PUNCH_HOLE|FALLOC_FL_KEEP_SIZE))
- return bch_fpunch(inode, offset, len);
-
- if (mode == FALLOC_FL_COLLAPSE_RANGE)
- return bch_fcollapse(inode, offset, len);
-
- return -EOPNOTSUPP;
-}
-
-static int bch_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf)
-{
- struct page *page = vmf->page;
- struct inode *inode = file_inode(vma->vm_file);
- struct address_space *mapping = inode->i_mapping;
- struct cache_set *c = inode->i_sb->s_fs_info;
- int ret = VM_FAULT_LOCKED;
-
- sb_start_pagefault(inode->i_sb);
- file_update_time(vma->vm_file);
-
- /*
- * i_mutex is required for synchronizing with fcollapse(), O_DIRECT
- * writes
- */
- inode_lock(inode);
-
- lock_page(page);
- if (page->mapping != mapping ||
- page_offset(page) > i_size_read(inode)) {
- unlock_page(page);
- ret = VM_FAULT_NOPAGE;
- goto out;
- }
-
- if (!PageAllocated(page)) {
- if (reserve_sectors(c, PAGE_SECTORS)) {
- unlock_page(page);
- ret = VM_FAULT_SIGBUS;
- goto out;
- }
-
- SetPageAllocated(page);
- }
-
- set_page_dirty(page);
- wait_for_stable_page(page);
-out:
- inode_unlock(inode);
- sb_end_pagefault(inode->i_sb);
- return ret;
-}
-
static const struct vm_operations_struct bch_vm_ops = {
.fault = filemap_fault,
.map_pages = filemap_map_pages,
.page_mkwrite = bch_page_mkwrite,
};
-static ssize_t
-bch_direct_write(struct kiocb *iocb, struct iov_iter *from, loff_t pos)
-{
- struct file *file = iocb->ki_filp;
- struct address_space *mapping = file->f_mapping;
- ssize_t written;
- size_t write_len;
- pgoff_t end;
-
- write_len = iov_iter_count(from);
- end = (pos + write_len - 1) >> PAGE_SHIFT;
-
- written = filemap_write_and_wait_range(mapping, pos, pos + write_len - 1);
- if (written)
- goto out;
-
- /*
- * After a write we want buffered reads to be sure to go to disk to get
- * the new data. We invalidate clean cached page from the region we're
- * about to write. We do this *before* the write so that we can return
- * without clobbering -EIOCBQUEUED from ->direct_IO().
- */
- if (mapping->nrpages) {
- written = invalidate_inode_pages2_range(mapping,
- pos >> PAGE_SHIFT, end);
- /*
- * If a page can not be invalidated, return 0 to fall back
- * to buffered write.
- */
- if (written) {
- if (written == -EBUSY)
- return 0;
- goto out;
- }
- }
-
- written = mapping->a_ops->direct_IO(iocb, from);
-
- /*
- * Finally, try again to invalidate clean pages which might have been
- * cached by non-direct readahead, or faulted in by get_user_pages()
- * if the source of the write was an mmap'ed region of the file
- * we're writing. Either one is a pretty crazy thing to do,
- * so we don't support it 100%. If this invalidation
- * fails, tough, the write still worked...
- *
- * Augh: this makes no sense for async writes - the second invalidate
- * has to come after the new data is visible. But, we can't just move it
- * to the end of the dio write path - for async writes we don't have
- * i_mutex held anymore,
- */
- if (mapping->nrpages) {
- invalidate_inode_pages2_range(mapping,
- pos >> PAGE_SHIFT, end);
- }
-out:
- return written;
-}
-
-static ssize_t __bch_write_iter(struct kiocb *iocb, struct iov_iter *from)
-{
- struct file *file = iocb->ki_filp;
- struct address_space * mapping = file->f_mapping;
- struct inode *inode = mapping->host;
- ssize_t ret;
-
- /* We can write back this queue in page reclaim */
- current->backing_dev_info = inode_to_bdi(inode);
- ret = file_remove_privs(file);
- if (ret)
- goto out;
-
- ret = file_update_time(file);
- if (ret)
- goto out;
-
- ret = iocb->ki_flags & IOCB_DIRECT
- ? bch_direct_write(iocb, from, iocb->ki_pos)
- : generic_perform_write(file, from, iocb->ki_pos);
-
- if (likely(ret > 0))
- iocb->ki_pos += ret;
-out:
- current->backing_dev_info = NULL;
- return ret;
-}
-
-static ssize_t bch_write_iter(struct kiocb *iocb, struct iov_iter *from)
-{
- struct file *file = iocb->ki_filp;
- struct inode *inode = file->f_mapping->host;
- ssize_t ret;
-
- inode_lock(inode);
- ret = generic_write_checks(iocb, from);
- if (ret > 0)
- ret = __bch_write_iter(iocb, from);
- inode_unlock(inode);
-
- if (ret > 0) {
- ssize_t err;
-
- err = generic_write_sync(iocb, ret);
- if (err < 0)
- ret = err;
- }
- return ret;
-}
-
static int bch_mmap(struct file *file, struct vm_area_struct *vma)
{
file_accessed(file);
@@ -1596,65 +733,6 @@ static int bch_mmap(struct file *file, struct vm_area_struct *vma)
return 0;
}
-static int bch_fsync(struct file *file, loff_t start, loff_t end, int datasync)
-{
- struct inode *inode = file->f_mapping->host;
- struct bch_inode_info *ei = to_bch_ei(inode);
- struct cache_set *c = inode->i_sb->s_fs_info;
- int ret;
-
- ret = filemap_write_and_wait_range(inode->i_mapping, start, end);
- if (ret)
- return ret;
-
- inode_lock(inode);
- if (datasync && end <= ei->i_size)
- goto out;
-
- /*
- * If there's still outstanding appends, we may have not yet written an
- * i_size that exposes the data we just fsynced - however, we can
- * advance the i_size on disk up to the end of what we just explicitly
- * wrote:
- */
-
- mutex_lock(&ei->update_lock);
-
- if (end > ei->i_size &&
- ei->i_size < inode->i_size) {
- struct i_size_update *u;
- unsigned idx;
- loff_t new_i_size = min_t(u64, inode->i_size,
- roundup(end, PAGE_SIZE));
-
- BUG_ON(fifo_empty(&ei->i_size_updates));
- BUG_ON(new_i_size < ei->i_size);
-
- /*
- * There can still be a pending i_size update < the size we're
- * writing, because it may have been shared with pages > the
- * size we fsynced to:
- */
- fifo_for_each_entry_ptr(u, &ei->i_size_updates, idx)
- if (u->new_i_size < new_i_size)
- u->new_i_size = -1;
-
- ret = bch_write_inode_size(c, ei, new_i_size);
- }
-
- mutex_unlock(&ei->update_lock);
-out:
- inode_unlock(inode);
-
- if (ret)
- return ret;
-
- if (c->opts.journal_flush_disabled)
- return 0;
-
- return bch_journal_flush_seq(&c->journal, ei->journal_seq);
-}
-
/* Inode flags: */
static const unsigned bch_inode_flags_to_vfs_flags_map[] = {
@@ -1838,8 +916,7 @@ static const struct file_operations bch_file_operations = {
.fsync = bch_fsync,
.splice_read = generic_file_splice_read,
.splice_write = iter_file_splice_write,
- .fallocate = bch_fallocate,
-
+ .fallocate = bch_fallocate_dispatch,
.unlocked_ioctl = bch_fs_ioctl,
#ifdef CONFIG_COMPAT
.compat_ioctl = bch_compat_fs_ioctl,
@@ -1876,7 +953,6 @@ static const struct file_operations bch_dir_file_operations = {
.read = generic_read_dir,
.iterate = bch_readdir,
.fsync = bch_fsync,
-
.unlocked_ioctl = bch_fs_ioctl,
#ifdef CONFIG_COMPAT
.compat_ioctl = bch_compat_fs_ioctl,
@@ -1887,7 +963,6 @@ static const struct inode_operations bch_symlink_inode_operations = {
.readlink = generic_readlink,
.get_link = page_get_link,
.setattr = bch_setattr,
-
.listxattr = bch_xattr_list,
.get_acl = bch_get_acl,
.set_acl = bch_set_acl,
@@ -1900,957 +975,21 @@ static const struct inode_operations bch_special_inode_operations = {
.set_acl = bch_set_acl,
};
-static int bch_bio_add_page(struct bio *bio, struct page *page)
-{
- sector_t offset = (sector_t) page->index << (PAGE_SHIFT - 9);
-
- BUG_ON(!bio->bi_max_vecs);
-
- if (!bio->bi_vcnt)
- bio->bi_iter.bi_sector = offset;
- else if (bio_end_sector(bio) != offset ||
- bio->bi_vcnt == bio->bi_max_vecs)
- return -1;
-
- bio->bi_io_vec[bio->bi_vcnt++] = (struct bio_vec) {
- .bv_page = page,
- .bv_len = PAGE_SIZE,
- .bv_offset = 0,
- };
-
- bio->bi_iter.bi_size += PAGE_SIZE;
-
- return 0;
-}
-
-static void bch_readpages_end_io(struct bio *bio)
-{
- struct bio_vec *bv;
- int i;
-
- bio_for_each_segment_all(bv, bio, i) {
- struct page *page = bv->bv_page;
-
- if (!bio->bi_error) {
- SetPageUptodate(page);
- } else {
- ClearPageUptodate(page);
- SetPageError(page);
- }
- unlock_page(page);
- }
-
- bio_put(bio);
-}
-
-static inline struct page *__readpage_next_page(struct address_space *mapping,
- struct list_head *pages,
- unsigned *nr_pages)
-{
- struct page *page;
- int ret;
-
- while (*nr_pages) {
- page = list_entry(pages->prev, struct page, lru);
- prefetchw(&page->flags);
- list_del(&page->lru);
-
- ret = add_to_page_cache_lru(page, mapping, page->index, GFP_NOFS);
-
- /* if add_to_page_cache_lru() succeeded, page is locked: */
- put_page(page);
-
- if (!ret)
- return page;
-
- (*nr_pages)--;
- }
-
- return NULL;
-}
-
-#define for_each_readpage_page(_mapping, _pages, _nr_pages, _page) \
- for (; \
- ((_page) = __readpage_next_page(_mapping, _pages, &(_nr_pages)));\
- (_nr_pages)--)
-
-static int bch_readpages(struct file *file, struct address_space *mapping,
- struct list_head *pages, unsigned nr_pages)
-{
- struct inode *inode = mapping->host;
- struct cache_set *c = inode->i_sb->s_fs_info;
- struct bio *bio = NULL;
- struct page *page;
-
- pr_debug("reading %u pages", nr_pages);
-
- for_each_readpage_page(mapping, pages, nr_pages, page) {
-again:
- if (!bio) {
- bio = bio_alloc(GFP_NOFS,
- min_t(unsigned, nr_pages,
- BIO_MAX_PAGES));
-
- bio->bi_end_io = bch_readpages_end_io;
- }
-
- if (bch_bio_add_page(bio, page)) {
- bch_read(c, bio, inode->i_ino);
- bio = NULL;
- goto again;
- }
- }
-
- if (bio)
- bch_read(c, bio, inode->i_ino);
-
- pr_debug("success");
- return 0;
-}
-
-static int bch_readpage(struct file *file, struct page *page)
-{
- struct address_space *mapping = page->mapping;
- struct inode *inode = mapping->host;
- struct cache_set *c = inode->i_sb->s_fs_info;
- struct bio *bio;
-
- bio = bio_alloc(GFP_NOFS, 1);
- bio_set_op_attrs(bio, REQ_OP_READ, REQ_SYNC);
- bio->bi_end_io = bch_readpages_end_io;
-
- bch_bio_add_page(bio, page);
- bch_read(c, bio, inode->i_ino);
-
- return 0;
-}
-
-struct bch_writepage_io {
- struct closure cl;
-
- struct bch_inode_info *ei;
- unsigned long i_size_update_count[I_SIZE_UPDATE_ENTRIES];
- unsigned long sectors_reserved;
-
- struct bch_write_op op;
- /* must come last: */
- struct bch_write_bio bio;
-};
-
-struct bch_writepage {
- struct cache_set *c;
- u64 inum;
- struct bch_writepage_io *io;
-};
-
-static void bch_writepage_io_free(struct closure *cl)
-{
- struct bch_writepage_io *io = container_of(cl,
- struct bch_writepage_io, cl);
- struct bio *bio = &io->bio.bio.bio;
-
- bio_put(bio);
-}
-
-static void bch_writepage_io_done(struct closure *cl)
-{
- struct bch_writepage_io *io = container_of(cl,
- struct bch_writepage_io, cl);
- struct cache_set *c = io->op.c;
- struct bio *bio = &io->bio.bio.bio;
- struct bch_inode_info *ei = io->ei;
- struct bio_vec *bvec;
- unsigned i;
-
- atomic64_sub_bug(io->sectors_reserved, &c->sectors_reserved);
-
- for (i = 0; i < ARRAY_SIZE(io->i_size_update_count); i++)
- i_size_update_put(c, ei, i, io->i_size_update_count[i]);
-
- bio_for_each_segment_all(bvec, bio, i) {
- struct page *page = bvec->bv_page;
-
- BUG_ON(!PageWriteback(page));
-
- if (io->bio.bio.bio.bi_error) {
- SetPageError(page);
- if (page->mapping)
- set_bit(AS_EIO, &page->mapping->flags);
- }
-
- end_page_writeback(page);
- }
-
- closure_return_with_destructor(&io->cl, bch_writepage_io_free);
-}
-
-static void bch_writepage_do_io(struct bch_writepage_io *io)
-{
- pr_debug("writing %u sectors to %llu:%llu",
- bio_sectors(&io->bio.bio.bio),
- io->op.insert_key.k.p.inode,
- (u64) io->bio.bio.bio.bi_iter.bi_sector);
-
- closure_call(&io->op.cl, bch_write, NULL, &io->cl);
- continue_at(&io->cl, bch_writepage_io_done, io->op.c->wq);
-}
-
-/*
- * Get a bch_writepage_io and add @page to it - appending to an existing one if
- * possible, else allocating a new one:
- */
-static void bch_writepage_io_alloc(struct bch_writepage *w,
- struct bch_inode_info *ei,
- struct page *page)
-{
-alloc_io:
- if (!w->io) {
- struct bio *bio = bio_alloc_bioset(GFP_NOFS, BIO_MAX_PAGES,
- bch_writepage_bioset);
- w->io = container_of(bio, struct bch_writepage_io, bio.bio.bio);
-
- closure_init(&w->io->cl, NULL);
- w->io->ei = ei;
- memset(w->io->i_size_update_count, 0,
- sizeof(w->io->i_size_update_count));
- w->io->sectors_reserved = 0;
-
- bch_write_op_init(&w->io->op, w->c, &w->io->bio, NULL,
- bkey_to_s_c(&KEY(w->inum, 0, 0)),
- NULL,
- &ei->journal_seq, 0);
- }
-
- if (bch_bio_add_page(&w->io->bio.bio.bio, page)) {
- bch_writepage_do_io(w->io);
- w->io = NULL;
- goto alloc_io;
- }
-
- /*
- * We shouldn't ever be handed pages for multiple inodes in a single
- * pass - right?
- */
- BUG_ON(ei != w->io->ei);
-}
-
-static int __bch_writepage(struct page *page, struct writeback_control *wbc,
- void *data)
-{
- struct inode *inode = page->mapping->host;
- struct bch_inode_info *ei = to_bch_ei(inode);
- struct bch_writepage *w = data;
- unsigned offset;
- loff_t i_size = i_size_read(inode);
- pgoff_t end_index = i_size >> PAGE_SHIFT;
-
- /* Is the page fully inside i_size? */
- if (page->index < end_index)
- goto do_io;
-
- /* Is the page fully outside i_size? (truncate in progress) */
- offset = i_size & (PAGE_SIZE - 1);
- if (page->index > end_index || !offset) {
- unlock_page(page);
- return 0;
- }
-
- /*
- * The page straddles i_size. It must be zeroed out on each and every
- * writepage invocation because it may be mmapped. "A file is mapped
- * in multiples of the page size. For a file that is not a multiple of
- * the page size, the remaining memory is zeroed when mapped, and
- * writes to that region are not written out to the file."
- */
- zero_user_segment(page, offset, PAGE_SIZE);
-do_io:
- if (check_make_i_size_dirty(ei, page_offset(page) + PAGE_SIZE)) {
- redirty_page_for_writepage(wbc, page);
- unlock_page(page);
- return 0;
- }
-
- bch_writepage_io_alloc(w, ei, page);
-
- /*
- * Before unlocking the page, transfer refcounts to w->io:
- */
- if (PageAppend(page)) {
- struct bch_page_state *s = (void *) &page->private;
-
- /*
- * i_size won't get updated and this write's data made visible
- * until the i_size_update this page points to completes - so
- * tell the write path to start a new one:
- */
- if (&ei->i_size_updates.data[s->idx] ==
- &fifo_back(&ei->i_size_updates))
- set_bit(BCH_INODE_WANT_NEW_APPEND, &ei->flags);
-
- w->io->i_size_update_count[s->idx]++;
- ClearPageAppend(page);
- }
-
- if (PageAllocated(page)) {
- w->io->sectors_reserved += PAGE_SECTORS;
- ClearPageAllocated(page);
- }
-
- BUG_ON(PageWriteback(page));
- set_page_writeback(page);
- unlock_page(page);
-
- return 0;
-}
-
-static int bch_writepages(struct address_space *mapping,
- struct writeback_control *wbc)
-{
- int ret;
- struct bch_writepage w = {
- .c = mapping->host->i_sb->s_fs_info,
- .inum = mapping->host->i_ino,
- .io = NULL,
- };
-
- ret = write_cache_pages(mapping, wbc, __bch_writepage, &w);
-
- if (w.io)
- bch_writepage_do_io(w.io);
-
- return ret;
-}
-
-static int bch_writepage(struct page *page, struct writeback_control *wbc)
-{
- struct inode *inode = page->mapping->host;
- int ret;
- struct bch_writepage w = {
- .c = inode->i_sb->s_fs_info,
- .inum = inode->i_ino,
- .io = NULL,
- };
-
- ret = __bch_writepage(page, NULL, &w);
- if (ret)
- return ret;
-
- if (w.io)
- bch_writepage_do_io(w.io);
-
- return 0;
-}
-
-static void bch_read_single_page_end_io(struct bio *bio)
-{
- complete(bio->bi_private);
-}
-
-static int bch_read_single_page(struct page *page,
- struct address_space *mapping)
-{
- struct inode *inode = mapping->host;
- struct cache_set *c = inode->i_sb->s_fs_info;
- struct bio *bio;
- int ret = 0;
- DECLARE_COMPLETION_ONSTACK(done);
-
- bio = bio_alloc(GFP_NOFS, 1);
- bio_set_op_attrs(bio, REQ_OP_READ, REQ_SYNC);
- bio->bi_private = &done;
- bio->bi_end_io = bch_read_single_page_end_io;
- bch_bio_add_page(bio, page);
-
- bch_read(c, bio, inode->i_ino);
- wait_for_completion(&done);
-
- if (!ret)
- ret = bio->bi_error;
- bio_put(bio);
-
- if (ret < 0)
- return ret;
-
- SetPageUptodate(page);
-
- return 0;
-}
-
-static int bch_write_begin(struct file *file, struct address_space *mapping,
- loff_t pos, unsigned len, unsigned flags,
- struct page **pagep, void **fsdata)
-{
- struct inode *inode = mapping->host;
- struct cache_set *c = inode->i_sb->s_fs_info;
- pgoff_t index = pos >> PAGE_SHIFT;
- unsigned offset = pos & (PAGE_SIZE - 1);
- struct page *page;
- int ret = 0;
-
- BUG_ON(inode_unhashed(mapping->host));
-
- page = grab_cache_page_write_begin(mapping, index, flags);
- if (!page)
- return -ENOMEM;
-
- if (!PageAllocated(page)) {
- if (reserve_sectors(c, PAGE_SECTORS)) {
- ret = -ENOSPC;
- goto err;
- }
-
- SetPageAllocated(page);
- }
-
- if (PageUptodate(page))
- goto out;
-
- /* If we're writing entire page, don't need to read it in first: */
- if (len == PAGE_SIZE)
- goto out;
-
- if (!offset && pos + len >= inode->i_size) {
- zero_user_segment(page, len, PAGE_SIZE);
- flush_dcache_page(page);
- goto out;
- }
-
- if (index > inode->i_size >> PAGE_SHIFT) {
- zero_user_segments(page, 0, offset, offset + len, PAGE_SIZE);
- flush_dcache_page(page);
- goto out;
- }
-
- ret = bch_read_single_page(page, mapping);
- if (ret)
- goto err;
-out:
- *pagep = page;
- return ret;
-err:
- unlock_page(page);
- put_page(page);
- page = NULL;
- goto out;
-}
-
-static int bch_write_end(struct file *filp, struct address_space *mapping,
- loff_t pos, unsigned len, unsigned copied,
- struct page *page, void *fsdata)
-{
- struct inode *inode = page->mapping->host;
- struct bch_inode_info *ei = to_bch_ei(inode);
- struct cache_set *c = inode->i_sb->s_fs_info;
-
- lockdep_assert_held(&inode->i_rwsem);
-
- if (unlikely(copied < len && !PageUptodate(page))) {
- /*
- * The page needs to be read in, but that would destroy
- * our partial write - simplest thing is to just force
- * userspace to redo the write:
- *
- * userspace doesn't _have_ to redo the write, so clear
- * PageAllocated:
- */
- copied = 0;
- zero_user(page, 0, PAGE_SIZE);
- flush_dcache_page(page);
- bch_clear_page_bits(c, ei, page);
- goto out;
- }
-
- if (!PageUptodate(page))
- SetPageUptodate(page);
- if (!PageDirty(page))
- set_page_dirty(page);
-
- if (pos + copied > inode->i_size) {
- struct i_size_update *u;
-
- /*
- * if page already has a ref on a i_size_update, even if it's an
- * older one, leave it - they have to be flushed in order so
- * that's just as good as taking a ref on a newer one, if we're
- * adding a newer one now
- *
- * - if there's no current i_size_update, or if we want to
- * create a new one and there's room for a new one, create it
- *
- * - set current i_size_update's i_size to new i_size
- *
- * - if !PageAppend, take a ref on the current i_size_update
- */
-
- /* XXX: locking */
- mutex_lock(&ei->update_lock);
- u = i_size_update_new(ei, pos + copied);
-
- if (!PageAppend(page)) {
- struct bch_page_state *s = (void *) &page->private;
-
- s->idx = u - ei->i_size_updates.data;
- atomic_long_inc(&u->count);
-
- SetPageAppend(page);
- }
-
- bch_i_size_write(inode, pos + copied);
- mutex_unlock(&ei->update_lock);
- }
-out:
- unlock_page(page);
- put_page(page);
-
- return copied;
-}
-
-static void bch_invalidatepage(struct page *page, unsigned int offset,
- unsigned int length)
-{
- struct inode *inode = page->mapping->host;
- struct bch_inode_info *ei = to_bch_ei(inode);
- struct cache_set *c = inode->i_sb->s_fs_info;
-
- BUG_ON(!PageLocked(page));
- BUG_ON(PageWriteback(page));
-
- if (offset || length < PAGE_SIZE)
- return;
-
- bch_clear_page_bits(c, ei, page);
-}
-
-static int bch_releasepage(struct page *page, gfp_t gfp_mask)
-{
- struct inode *inode = page->mapping->host;
- struct bch_inode_info *ei = to_bch_ei(inode);
- struct cache_set *c = inode->i_sb->s_fs_info;
-
- BUG_ON(!PageLocked(page));
- BUG_ON(PageWriteback(page));
-
- bch_clear_page_bits(c, ei, page);
-
- if (PageDirty(page)) {
- ClearPageDirty(page);
- cancel_dirty_page(page);
- }
-
- return 1;
-}
-
-/* O_DIRECT */
-
-static struct bio_set *bch_dio_read_bioset;
-static struct bio_set *bch_dio_write_bioset;
-
-struct dio_read {
- struct closure cl;
- struct kiocb *req;
- long ret;
- struct bio bio;
-};
-
-static void bch_dio_read_complete(struct closure *cl)
-{
- struct dio_read *dio = container_of(cl, struct dio_read, cl);
-
- dio->req->ki_complete(dio->req, dio->ret, 0);
- bio_put(&dio->bio);
-}
-
-static void bch_direct_IO_read_endio(struct bio *bio)
-{
- struct dio_read *dio = bio->bi_private;
-
- if (bio->bi_error)
- dio->ret = bio->bi_error;
-
- closure_put(&dio->cl);
- bio_check_pages_dirty(bio); /* transfers ownership */
-}
-
-static int bch_direct_IO_read(struct cache_set *c, struct kiocb *req,
- struct file *file, struct inode *inode,
- struct iov_iter *iter, loff_t offset)
-{
- struct dio_read *dio;
- struct bio *bio;
- unsigned long inum = inode->i_ino;
- ssize_t ret = 0;
- size_t pages = iov_iter_npages(iter, BIO_MAX_PAGES);
- bool sync = is_sync_kiocb(req);
- loff_t i_size;
-
- bio = bio_alloc_bioset(GFP_KERNEL, pages, bch_dio_read_bioset);
- bio_get(bio);
-
- dio = container_of(bio, struct dio_read, bio);
- closure_init(&dio->cl, NULL);
-
- /*
- * this is a _really_ horrible hack just to avoid an atomic sub at the
- * end:
- */
- if (!sync) {
- set_closure_fn(&dio->cl, bch_dio_read_complete, NULL);
- atomic_set(&dio->cl.remaining,
- CLOSURE_REMAINING_INITIALIZER -
- CLOSURE_RUNNING +
- CLOSURE_DESTRUCTOR);
- } else {
- atomic_set(&dio->cl.remaining,
- CLOSURE_REMAINING_INITIALIZER + 1);
- }
-
- dio->req = req;
- dio->ret = iter->count;
-
- i_size = i_size_read(inode);
- if (offset + dio->ret > i_size) {
- dio->ret = max_t(loff_t, 0, i_size - offset);
- iter->count = round_up(dio->ret, PAGE_SIZE);
- }
-
- if (!dio->ret) {
- closure_put(&dio->cl);
- goto out;
- }
-
- goto start;
- while (iter->count) {
- pages = iov_iter_npages(iter, BIO_MAX_PAGES);
- bio = bio_alloc(GFP_KERNEL, pages);
-start:
- bio->bi_iter.bi_sector = offset >> 9;
- bio->bi_end_io = bch_direct_IO_read_endio;
- bio->bi_private = dio;
-
- ret = bio_get_user_pages(bio, iter, 1);
- if (ret < 0) {
- /* XXX: fault inject this path */
- bio->bi_error = ret;
- bio_endio(bio);
- break;
- }
-
- offset += bio->bi_iter.bi_size;
- bio_set_pages_dirty(bio);
-
- if (iter->count)
- closure_get(&dio->cl);
-
- bch_read(c, bio, inum);
- }
-out:
- if (sync) {
- closure_sync(&dio->cl);
- closure_debug_destroy(&dio->cl);
- ret = dio->ret;
- bio_put(&dio->bio);
- return ret;
- } else {
- return -EIOCBQUEUED;
- }
-}
-
-struct dio_write {
- struct closure cl;
- struct kiocb *req;
- long written;
- long error;
- loff_t offset;
- bool append;
-
- struct iovec *iovec;
- struct iovec inline_vecs[UIO_FASTIOV];
- struct iov_iter iter;
-
- struct mm_struct *mm;
-
- struct bch_write_op iop;
- /* must be last: */
- struct bch_write_bio bio;
-};
-
-static void __bch_dio_write_complete(struct dio_write *dio)
-{
- inode_dio_end(dio->req->ki_filp->f_inode);
-
- if (dio->iovec && dio->iovec != dio->inline_vecs)
- kfree(dio->iovec);
-
- bio_put(&dio->bio.bio.bio);
-}
-
-static void bch_dio_write_complete(struct closure *cl)
-{
- struct dio_write *dio = container_of(cl, struct dio_write, cl);
- struct kiocb *req = dio->req;
- long ret = dio->written ?: dio->error;
-
- __bch_dio_write_complete(dio);
- req->ki_complete(req, ret, 0);
-}
-
-static void bch_dio_write_done(struct dio_write *dio)
-{
- struct bio_vec *bv;
- int i;
-
- dio->written += dio->iop.written << 9;
-
- if (dio->iop.error)
- dio->error = dio->iop.error;
-
- bio_for_each_segment_all(bv, &dio->bio.bio.bio, i)
- put_page(bv->bv_page);
-
- if (dio->iter.count)
- bio_reset(&dio->bio.bio.bio);
-}
-
-static void bch_do_direct_IO_write(struct dio_write *dio, bool sync)
-{
- struct file *file = dio->req->ki_filp;
- struct inode *inode = file->f_inode;
- struct bch_inode_info *ei = to_bch_ei(inode);
- struct cache_set *c = inode->i_sb->s_fs_info;
- struct bio *bio = &dio->bio.bio.bio;
- unsigned flags = BCH_WRITE_CHECK_ENOSPC;
- int ret;
-
- if (file->f_flags & O_DSYNC || IS_SYNC(file->f_mapping->host))
- flags |= BCH_WRITE_FLUSH;
-
- while (dio->iter.count) {
- bio->bi_iter.bi_sector = (dio->offset + dio->written) >> 9;
-
- ret = bio_get_user_pages(bio, &dio->iter, 0);
- if (ret < 0) {
- dio->error = ret;
- break;
- }
-
- bch_write_op_init(&dio->iop, c, &dio->bio, NULL,
- bkey_to_s_c(&KEY(inode->i_ino,
- bio_end_sector(bio),
- bio_sectors(bio))),
- NULL,
- &ei->journal_seq, flags);
-
- task_io_account_write(bio->bi_iter.bi_size);
-
- closure_call(&dio->iop.cl, bch_write, NULL, &dio->cl);
-
- if (!sync)
- break;
-
- closure_sync(&dio->cl);
- bch_dio_write_done(dio);
- }
-}
-
-static void bch_dio_write_loop_async(struct closure *cl)
-{
- struct dio_write *dio =
- container_of(cl, struct dio_write, cl);
-
- bch_dio_write_done(dio);
-
- if (dio->iter.count && !dio->error) {
- use_mm(dio->mm);
- bch_do_direct_IO_write(dio, false);
- unuse_mm(dio->mm);
-
- continue_at(&dio->cl,
- bch_dio_write_loop_async,
- dio->iter.count ? system_wq : NULL);
- } else {
-#if 0
- closure_return_with_destructor(cl, bch_dio_write_complete);
-#else
- closure_debug_destroy(cl);
- bch_dio_write_complete(cl);
-#endif
- }
-}
-
-static int bch_direct_IO_write(struct cache_set *c, struct kiocb *req,
- struct file *file, struct inode *inode,
- struct iov_iter *iter, loff_t offset)
-{
- struct bch_inode_info *ei = to_bch_ei(inode);
- struct dio_write *dio;
- struct bio *bio;
- size_t pages = iov_iter_npages(iter, BIO_MAX_PAGES);
- ssize_t ret;
- bool sync;
-
- lockdep_assert_held(&inode->i_rwsem);
-
- bio = bio_alloc_bioset(GFP_KERNEL, pages, bch_dio_write_bioset);
-
- dio = container_of(bio, struct dio_write, bio.bio.bio);
- dio->req = req;
- dio->written = 0;
- dio->error = 0;
- dio->offset = offset;
- dio->append = false;
- dio->iovec = NULL;
- dio->iter = *iter;
- dio->mm = current->mm;
-
- if (offset + iter->count > inode->i_size) {
- /*
- * XXX: try and convert this to i_size_update_new(), and maybe
- * make async O_DIRECT appends work
- */
-
- dio->append = true;
- i_size_dirty_get(ei);
- }
-
- ret = check_make_i_size_dirty(ei, offset + iter->count);
- if (ret) {
- if (dio->append)
- i_size_dirty_put(ei);
- bio_put(bio);
- return ret;
- }
-
- closure_init(&dio->cl, NULL);
-
- inode_dio_begin(inode);
-
- /*
- * appends are sync in order to do the i_size update under
- * i_mutex, after we know the write has completed successfully
- */
- sync = is_sync_kiocb(req) || dio->append;
-
- bch_do_direct_IO_write(dio, sync);
-
- if (sync) {
- closure_debug_destroy(&dio->cl);
- ret = dio->written ?: dio->error;
-
- if (dio->append) {
- loff_t new_i_size = offset + dio->written;
- int ret2 = 0;
-
- if (dio->written &&
- new_i_size > inode->i_size) {
- struct i_size_update *u;
- unsigned idx;
-
- mutex_lock(&ei->update_lock);
-
- bch_i_size_write(inode, new_i_size);
-
- fifo_for_each_entry_ptr(u, &ei->i_size_updates, idx) {
- if (u->new_i_size < new_i_size)
- u->new_i_size = -1;
- else
- BUG();
- }
-
- i_size_dirty_put(ei);
- ret2 = bch_write_inode_size(c, ei, new_i_size);
-
- mutex_unlock(&ei->update_lock);
- } else {
- i_size_dirty_put(ei);
- }
- }
-
- __bch_dio_write_complete(dio);
- return ret;
- } else {
- if (dio->iter.count) {
- if (dio->iter.nr_segs > ARRAY_SIZE(dio->inline_vecs)) {
- dio->iovec = kmalloc(dio->iter.nr_segs *
- sizeof(struct iovec),
- GFP_KERNEL);
- if (!dio->iovec)
- dio->error = -ENOMEM;
- } else {
- dio->iovec = dio->inline_vecs;
- }
-
- memcpy(dio->iovec,
- dio->iter.iov,
- dio->iter.nr_segs * sizeof(struct iovec));
- dio->iter.iov = dio->iovec;
- }
-
- continue_at_noreturn(&dio->cl,
- bch_dio_write_loop_async,
- dio->iter.count ? system_wq : NULL);
- return -EIOCBQUEUED;
- }
-}
-
-static ssize_t bch_direct_IO(struct kiocb *req, struct iov_iter *iter)
-{
- struct file *file = req->ki_filp;
- struct inode *inode = file->f_inode;
- struct cache_set *c = inode->i_sb->s_fs_info;
-
- if ((req->ki_pos|iter->count) & (block_bytes(c) - 1))
- return -EINVAL;
-
- return ((iov_iter_rw(iter) == WRITE)
- ? bch_direct_IO_write
- : bch_direct_IO_read)(c, req, file, inode, iter, req->ki_pos);
-}
-
-#ifdef CONFIG_MIGRATION
-static int bch_migrate_page(struct address_space *mapping,
- struct page *newpage, struct page *page,
- enum migrate_mode mode)
-{
- int ret;
-
- ret = migrate_page_move_mapping(mapping, newpage, page, NULL, mode, 0);
- if (ret != MIGRATEPAGE_SUCCESS)
- return ret;
-
- if (PageAllocated(page)) {
- ClearPageAllocated(page);
- SetPageAllocated(newpage);
- }
-
- if (PageAppend(page)) {
- ClearPageAppend(page);
- SetPageAppend(newpage);
- }
-
- migrate_page_copy(newpage, page);
- return MIGRATEPAGE_SUCCESS;
-}
-#endif
-
static const struct address_space_operations bch_address_space_operations = {
- .writepage = bch_writepage,
- .readpage = bch_readpage,
- .writepages = bch_writepages,
- .readpages = bch_readpages,
-
- .set_page_dirty = __set_page_dirty_nobuffers,
-
- .write_begin = bch_write_begin,
- .write_end = bch_write_end,
- .invalidatepage = bch_invalidatepage,
- .releasepage = bch_releasepage,
-
- .direct_IO = bch_direct_IO,
-
+ .writepage = bch_writepage,
+ .readpage = bch_readpage,
+ .writepages = bch_writepages,
+ .readpages = bch_readpages,
+ .set_page_dirty = __set_page_dirty_nobuffers,
+ .write_begin = bch_write_begin,
+ .write_end = bch_write_end,
+ .invalidatepage = bch_invalidatepage,
+ .releasepage = bch_releasepage,
+ .direct_IO = bch_direct_IO,
#ifdef CONFIG_MIGRATION
- .migratepage = bch_migrate_page,
+ .migratepage = bch_migrate_page,
#endif
- .error_remove_page = generic_error_remove_page,
+ .error_remove_page = generic_error_remove_page,
};
static void bch_inode_init(struct bch_inode_info *ei,
diff --git a/drivers/md/bcache/fs.h b/drivers/md/bcache/fs.h
index 6b08a8895d93..8972d2e360fb 100644
--- a/drivers/md/bcache/fs.h
+++ b/drivers/md/bcache/fs.h
@@ -42,11 +42,6 @@ enum {
BCH_INODE_WANT_NEW_APPEND,
};
-/* stored in page->private: */
-struct bch_page_state {
- u8 idx;
-};
-
#define to_bch_ei(_inode) \
container_of(_inode, struct bch_inode_info, vfs_inode)
@@ -55,4 +50,11 @@ static inline u8 mode_to_type(umode_t mode)
return (mode >> 12) & 15;
}
+/* returns 0 if we want to do the update, or error is passed up */
+typedef int (*inode_set_fn)(struct bch_inode_info *,
+ struct bch_inode *, void *);
+
+int __must_check __bch_write_inode(struct cache_set *, struct bch_inode_info *,
+ inode_set_fn, void *);
+
#endif /* _BCACHE_FS_H */