summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorKent Overstreet <kent.overstreet@gmail.com>2018-06-02 17:53:40 -0400
committerKent Overstreet <kent.overstreet@gmail.com>2018-06-03 21:01:03 -0400
commit98e46637b68323d1b82473c5eacba6d521f7a7d1 (patch)
tree4ed47a60e4b0aacf962e6eae6d835b08c990e8eb
parent815710daa06e043f65675201a7f779a7a6391c90 (diff)
bcachefs: fix a fun truncate bug
truncate was leaving extents past the end of i_size. Turns out, it was doing so because it thought it wasn't shrinking the file when it was, and it thought it wasn't shrinking because i_size had gotten screwed up - the in memory i_size was smaller than the on disk i_size, which is never supposed to happen. Also turns out, the thing that was screwing up i_size was truncate - specifically, the error path when the filemap_write_and_wait_range() call fails. Besides fixing truncate itself, this patch also fixes and makes rigorous a lot of the locking pertaining to i_size and ei_inode (the cached on disk inode in bch_inode_info). Signed-off-by: Kent Overstreet <kent.overstreet@gmail.com>
-rw-r--r--fs/bcachefs/fs-io.c115
-rw-r--r--fs/bcachefs/fs.c12
2 files changed, 88 insertions, 39 deletions
diff --git a/fs/bcachefs/fs-io.c b/fs/bcachefs/fs-io.c
index d7b17195ee84..40af7ea47279 100644
--- a/fs/bcachefs/fs-io.c
+++ b/fs/bcachefs/fs-io.c
@@ -257,12 +257,12 @@ static int i_sectors_dirty_finish(struct bch_fs *c, struct i_sectors_hook *h)
int ret;
mutex_lock(&h->inode->ei_update_lock);
- if (h->new_i_size != U64_MAX)
- i_size_write(&h->inode->v, h->new_i_size);
-
i_sectors_acct(c, h->inode, &h->quota_res, h->sectors);
ret = __bch2_write_inode(c, h->inode, i_sectors_dirty_finish_fn, h);
+
+ if (!ret && h->new_i_size != U64_MAX)
+ i_size_write(&h->inode->v, h->new_i_size);
mutex_unlock(&h->inode->ei_update_lock);
bch2_quota_reservation_put(c, h->inode, &h->quota_res);
@@ -348,17 +348,25 @@ bchfs_extent_update_hook(struct extent_insert_hook *hook,
return BTREE_INSERT_NEED_TRAVERSE;
}
- BUG_ON(h->inode_u.bi_flags & BCH_INODE_I_SIZE_DIRTY);
+ /* truncate in progress? */
+ if (h->inode_u.bi_flags & BCH_INODE_I_SIZE_DIRTY)
+ goto no_i_size_update;
h->inode_u.bi_size = offset;
do_pack = true;
inode->ei_inode.bi_size = offset;
- if (h->op->is_dio)
- i_size_write(&inode->v, offset);
+ spin_lock(&inode->v.i_lock);
+ if (offset > inode->v.i_size) {
+ if (h->op->is_dio)
+ i_size_write(&inode->v, offset);
+ else
+ BUG();
+ }
+ spin_unlock(&inode->v.i_lock);
}
-
+no_i_size_update:
if (sectors) {
if (!h->need_inode_update) {
h->need_inode_update = true;
@@ -1457,8 +1465,10 @@ int bch2_write_end(struct file *file, struct address_space *mapping,
copied = 0;
}
+ spin_lock(&inode->v.i_lock);
if (pos + copied > inode->v.i_size)
i_size_write(&inode->v, pos + copied);
+ spin_unlock(&inode->v.i_lock);
if (copied) {
if (!PageUptodate(page))
@@ -1563,8 +1573,10 @@ static int __bch2_buffered_write(struct bch_inode_info *inode,
nr_pages_copied = DIV_ROUND_UP(offset + copied, PAGE_SIZE);
inode->ei_last_dirtied = (unsigned long) current;
+ spin_lock(&inode->v.i_lock);
if (pos + copied > inode->v.i_size)
i_size_write(&inode->v, pos + copied);
+ spin_unlock(&inode->v.i_lock);
if (copied < len &&
((offset + copied) & (PAGE_SIZE - 1))) {
@@ -2149,25 +2161,61 @@ static int bch2_truncate_page(struct bch_inode_info *inode, loff_t from)
from, from + PAGE_SIZE);
}
+static int bch2_extend(struct bch_inode_info *inode, struct iattr *iattr)
+{
+ struct bch_fs *c = inode->v.i_sb->s_fs_info;
+ struct address_space *mapping = inode->v.i_mapping;
+ int ret;
+
+ ret = filemap_write_and_wait_range(mapping,
+ inode->ei_inode.bi_size, S64_MAX);
+ if (ret)
+ return ret;
+
+ truncate_setsize(&inode->v, iattr->ia_size);
+ setattr_copy(&inode->v, iattr);
+
+ mutex_lock(&inode->ei_update_lock);
+ inode->v.i_mtime = inode->v.i_ctime = current_time(&inode->v);
+ ret = bch2_write_inode_size(c, inode, inode->v.i_size);
+ mutex_unlock(&inode->ei_update_lock);
+
+ return ret;
+}
+
int bch2_truncate(struct bch_inode_info *inode, struct iattr *iattr)
{
struct bch_fs *c = inode->v.i_sb->s_fs_info;
struct address_space *mapping = inode->v.i_mapping;
- bool shrink = iattr->ia_size <= inode->v.i_size;
struct i_sectors_hook i_sectors_hook =
i_sectors_hook_init(inode, BCH_INODE_I_SIZE_DIRTY);
+ bool shrink;
int ret = 0;
inode_dio_wait(&inode->v);
pagecache_block_get(&mapping->add_lock);
- truncate_setsize(&inode->v, iattr->ia_size);
+ BUG_ON(inode->v.i_size < inode->ei_inode.bi_size);
+
+ shrink = iattr->ia_size <= inode->v.i_size;
+
+ if (!shrink) {
+ ret = bch2_extend(inode, iattr);
+ goto err_put_pagecache;
+ }
+
+ ret = bch2_truncate_page(inode, iattr->ia_size);
+ if (unlikely(ret))
+ goto err_put_pagecache;
- /* sync appends.. */
- /* XXX what protects inode->i_size? */
if (iattr->ia_size > inode->ei_inode.bi_size)
ret = filemap_write_and_wait_range(mapping,
- inode->ei_inode.bi_size, S64_MAX);
+ inode->ei_inode.bi_size,
+ iattr->ia_size - 1);
+ else if (iattr->ia_size & (PAGE_SIZE - 1))
+ ret = filemap_write_and_wait_range(mapping,
+ round_down(iattr->ia_size, PAGE_SIZE),
+ iattr->ia_size - 1);
if (ret)
goto err_put_pagecache;
@@ -2175,41 +2223,31 @@ int bch2_truncate(struct bch_inode_info *inode, struct iattr *iattr)
ret = i_sectors_dirty_start(c, &i_sectors_hook);
if (unlikely(ret))
- goto err;
+ goto err_put_pagecache;
- /*
- * There might be persistent reservations (from fallocate())
- * above i_size, which bch2_inode_truncate() will discard - we're
- * only supposed to discard them if we're doing a real truncate
- * here (new i_size < current i_size):
- */
- if (shrink) {
- ret = bch2_truncate_page(inode, iattr->ia_size);
- if (unlikely(ret))
- goto err;
+ truncate_setsize(&inode->v, iattr->ia_size);
- ret = bch2_inode_truncate(c, inode->v.i_ino,
- round_up(iattr->ia_size, PAGE_SIZE) >> 9,
- &i_sectors_hook.hook,
- &inode->ei_journal_seq);
- if (unlikely(ret))
- goto err;
- }
+ ret = bch2_inode_truncate(c, inode->v.i_ino,
+ round_up(iattr->ia_size, PAGE_SIZE) >> 9,
+ &i_sectors_hook.hook,
+ &inode->ei_journal_seq);
+ if (unlikely(ret))
+ goto err_put_sectors_dirty;
setattr_copy(&inode->v, iattr);
inode->v.i_mtime = inode->v.i_ctime = current_time(&inode->v);
-err:
- /*
- * On error - in particular, bch2_truncate_page() error - don't clear
- * I_SIZE_DIRTY, as we've left data above i_size!:
- */
- if (ret)
- i_sectors_hook.flags &= ~BCH_INODE_I_SIZE_DIRTY;
-
+out:
ret = i_sectors_dirty_finish(c, &i_sectors_hook) ?: ret;
err_put_pagecache:
pagecache_block_put(&mapping->add_lock);
return ret;
+err_put_sectors_dirty:
+ /*
+ * On error - in particular, bch2_truncate_page() error - don't clear
+ * I_SIZE_DIRTY, as we've left data above i_size!:
+ */
+ i_sectors_hook.flags &= ~BCH_INODE_I_SIZE_DIRTY;
+ goto out;
}
/* fallocate: */
@@ -2389,7 +2427,6 @@ btree_iter_err:
if (ret)
goto err_put_sectors_dirty;
- i_size_write(&inode->v, new_size);
i_sectors_hook.new_i_size = new_size;
err_put_sectors_dirty:
ret = i_sectors_dirty_finish(c, &i_sectors_hook) ?: ret;
diff --git a/fs/bcachefs/fs.c b/fs/bcachefs/fs.c
index dc6c651df2f3..19f3ba007e64 100644
--- a/fs/bcachefs/fs.c
+++ b/fs/bcachefs/fs.c
@@ -106,6 +106,8 @@ int __must_check __bch2_write_inode(struct bch_fs *c,
break;
}
+ BUG_ON(inode_u.bi_size != inode->ei_inode.bi_size);
+
if (set) {
ret = set(inode, &inode_u, p);
if (ret)
@@ -114,6 +116,10 @@ int __must_check __bch2_write_inode(struct bch_fs *c,
BUG_ON(i_nlink < nlink_bias(inode->v.i_mode));
+ BUG_ON(inode_u.bi_size != inode->ei_inode.bi_size &&
+ !(inode_u.bi_flags & BCH_INODE_I_SIZE_DIRTY) &&
+ inode_u.bi_size > i_size_read(&inode->v));
+
inode_u.bi_mode = inode->v.i_mode;
inode_u.bi_uid = i_uid_read(&inode->v);
inode_u.bi_gid = i_gid_read(&inode->v);
@@ -129,11 +135,17 @@ int __must_check __bch2_write_inode(struct bch_fs *c,
ret = bch2_btree_insert_at(c, NULL, NULL,
&inode->ei_journal_seq,
BTREE_INSERT_ATOMIC|
+ BTREE_INSERT_NOUNLOCK|
BTREE_INSERT_NOFAIL,
BTREE_INSERT_ENTRY(&iter, &inode_p.inode.k_i));
} while (ret == -EINTR);
if (!ret) {
+ /*
+ * the btree node lock protects inode->ei_inode, not
+ * ei_update_lock; this is important for inode updates via
+ * bchfs_write_index_update
+ */
inode->ei_inode = inode_u;
inode->ei_qid = bch_qid(&inode_u);
}