summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorKent Overstreet <kent.overstreet@gmail.com>2018-01-19 13:11:22 -0500
committerKent Overstreet <kent.overstreet@gmail.com>2018-01-19 18:52:31 -0500
commitae6e8a59d33008f46bb801850840dbd0a7608bbc (patch)
tree3e7a2512e46cdd69bededdb8337e5b2ba3619e2f
parent1dcfa0ef09dce9d6215142040016dd7d3fc7bfbc (diff)
bcachefs: quota limit enforcement
-rw-r--r--fs/bcachefs/fs-io.c246
-rw-r--r--fs/bcachefs/fs-ioctl.c3
-rw-r--r--fs/bcachefs/fs.c24
-rw-r--r--fs/bcachefs/fs.h1
-rw-r--r--fs/bcachefs/quota.c8
5 files changed, 211 insertions, 71 deletions
diff --git a/fs/bcachefs/fs-io.c b/fs/bcachefs/fs-io.c
index bd9a46d9b2f1..7693520d4675 100644
--- a/fs/bcachefs/fs-io.c
+++ b/fs/bcachefs/fs-io.c
@@ -27,9 +27,14 @@
#include <trace/events/bcachefs.h>
#include <trace/events/writeback.h>
+struct quota_res {
+ u64 sectors;
+};
+
struct i_sectors_hook {
struct extent_insert_hook hook;
struct bch_inode_info *inode;
+ struct quota_res quota_res;
s64 sectors;
u64 new_i_size;
unsigned flags;
@@ -62,6 +67,7 @@ struct dio_write {
unsigned loop:1,
sync:1,
free_iov:1;
+ struct quota_res quota_res;
struct iov_iter iter;
struct iovec inline_vecs[2];
@@ -107,6 +113,67 @@ static int write_invalidate_inode_pages_range(struct address_space *mapping,
return ret;
}
+/* quotas */
+
+#ifdef CONFIG_BCACHEFS_QUOTA
+
+static void bch2_quota_reservation_put(struct bch_fs *c,
+ struct bch_inode_info *inode,
+ struct quota_res *res)
+{
+ if (!res->sectors)
+ return;
+
+ mutex_lock(&inode->ei_update_lock);
+ BUG_ON(res->sectors > inode->ei_quota_reserved);
+
+ bch2_quota_acct(c, inode->ei_qid, Q_SPC,
+ -((s64) res->sectors), BCH_QUOTA_PREALLOC);
+ inode->ei_quota_reserved -= res->sectors;
+ mutex_unlock(&inode->ei_update_lock);
+
+ res->sectors = 0;
+}
+
+static int bch2_quota_reservation_add(struct bch_fs *c,
+ struct bch_inode_info *inode,
+ struct quota_res *res,
+ unsigned sectors,
+ bool check_enospc)
+{
+ int ret;
+
+ mutex_lock(&inode->ei_update_lock);
+ ret = bch2_quota_acct(c, inode->ei_qid, Q_SPC, sectors,
+ check_enospc ? BCH_QUOTA_PREALLOC : BCH_QUOTA_NOCHECK);
+ if (likely(!ret)) {
+ inode->ei_quota_reserved += sectors;
+ res->sectors += sectors;
+ }
+ mutex_unlock(&inode->ei_update_lock);
+
+ return ret;
+}
+
+#else
+
+static void bch2_quota_reservation_put(struct bch_fs *c,
+ struct bch_inode_info *inode,
+ struct quota_res *res)
+{
+}
+
+static int bch2_quota_reservation_add(struct bch_fs *c,
+ struct bch_inode_info *inode,
+ struct quota_res *res,
+ unsigned sectors,
+ bool check_enospc)
+{
+ return 0;
+}
+
+#endif
+
/* i_size updates: */
static int inode_set_size(struct bch_inode_info *inode,
@@ -128,16 +195,28 @@ static int __must_check bch2_write_inode_size(struct bch_fs *c,
return __bch2_write_inode(c, inode, inode_set_size, &new_size);
}
-static void __i_sectors_acct(struct bch_fs *c, struct bch_inode_info *inode, int sectors)
+static void __i_sectors_acct(struct bch_fs *c, struct bch_inode_info *inode,
+ struct quota_res *quota_res, int sectors)
{
+#ifdef CONFIG_BCACHEFS_QUOTA
+ if (quota_res && sectors > 0) {
+ BUG_ON(sectors > quota_res->sectors);
+ BUG_ON(sectors > inode->ei_quota_reserved);
+
+ quota_res->sectors -= sectors;
+ inode->ei_quota_reserved -= sectors;
+ } else {
+ bch2_quota_acct(c, inode->ei_qid, Q_SPC, sectors, BCH_QUOTA_WARN);
+ }
+#endif
inode->v.i_blocks += sectors;
- bch2_quota_acct(c, inode->ei_qid, Q_SPC, sectors, BCH_QUOTA_WARN);
}
-static void i_sectors_acct(struct bch_fs *c, struct bch_inode_info *inode, int sectors)
+static void i_sectors_acct(struct bch_fs *c, struct bch_inode_info *inode,
+ struct quota_res *quota_res, int sectors)
{
mutex_lock(&inode->ei_update_lock);
- __i_sectors_acct(c, inode, sectors);
+ __i_sectors_acct(c, inode, quota_res, sectors);
mutex_unlock(&inode->ei_update_lock);
}
@@ -186,11 +265,13 @@ static int i_sectors_dirty_finish(struct bch_fs *c, struct i_sectors_hook *h)
if (h->new_i_size != U64_MAX)
i_size_write(&h->inode->v, h->new_i_size);
- __i_sectors_acct(c, h->inode, h->sectors);
+ __i_sectors_acct(c, h->inode, &h->quota_res, h->sectors);
ret = __bch2_write_inode(c, h->inode, i_sectors_dirty_finish_fn, h);
mutex_unlock(&h->inode->ei_update_lock);
+ bch2_quota_reservation_put(c, h->inode, &h->quota_res);
+
h->sectors = 0;
return ret;
@@ -401,9 +482,12 @@ err:
bch2_btree_iter_unlock(&extent_iter);
bch2_btree_iter_unlock(&inode_iter);
- if (op->is_dio)
- i_sectors_acct(wop->c, op->inode,
+ if (op->is_dio) {
+ struct dio_write *dio = container_of(op, struct dio_write, iop);
+
+ i_sectors_acct(wop->c, op->inode, &dio->quota_res,
op->sectors_added - orig_sectors_added);
+ }
return ret;
}
@@ -456,6 +540,9 @@ union { struct {
unsigned reserved:1;
unsigned reservation_replicas:4;
+ /* Owns PAGE_SECTORS sized quota reservation: */
+ unsigned quota_reserved:1;
+
/*
* Number of sectors on disk - for i_blocks
* Uncompressed size, not compressed size:
@@ -495,67 +582,95 @@ static inline struct bch_page_state *page_state(struct page *page)
return s;
}
-static void __bch2_put_page_reservation(struct bch_fs *c, struct bch_page_state s)
+static inline unsigned page_res_sectors(struct bch_page_state s)
{
- struct disk_reservation res = {
- .sectors = s.reserved ? PAGE_SECTORS * s.reservation_replicas : 0
- };
+ return s.reserved ? s.reservation_replicas * PAGE_SECTORS : 0;
+}
+
+static void __bch2_put_page_reservation(struct bch_fs *c, struct bch_inode_info *inode,
+ struct bch_page_state s)
+{
+ struct disk_reservation res = { .sectors = page_res_sectors(s) };
+ struct quota_res quota_res = { .sectors = s.quota_reserved ? PAGE_SECTORS : 0 };
+
+ bch2_quota_reservation_put(c, inode, &quota_res);
bch2_disk_reservation_put(c, &res);
}
-static void bch2_put_page_reservation(struct bch_fs *c, struct page *page)
+static void bch2_put_page_reservation(struct bch_fs *c, struct bch_inode_info *inode,
+ struct page *page)
{
struct bch_page_state s;
s = page_state_cmpxchg(page_state(page), s, {
- if (!s.reserved)
- return;
- s.reserved = 0;
+ s.reserved = 0;
+ s.quota_reserved = 0;
});
- __bch2_put_page_reservation(c, s);
+ __bch2_put_page_reservation(c, inode, s);
}
-static int bch2_get_page_reservation(struct bch_fs *c, struct page *page,
- bool check_enospc)
+static int bch2_get_page_reservation(struct bch_fs *c, struct bch_inode_info *inode,
+ struct page *page, bool check_enospc)
{
struct bch_page_state *s = page_state(page), new, old;
- unsigned replicas = READ_ONCE(c->opts.data_replicas);
- struct disk_reservation res;
+ struct disk_reservation disk_res = bch2_disk_reservation_init(c,
+ READ_ONCE(c->opts.data_replicas));
+ struct quota_res quota_res = { 0 };
int ret = 0;
- if (s->reserved) {
- if (s->reservation_replicas >= replicas)
- return 0;
+ /*
+ * XXX: this could likely be quite a bit simpler, page reservations
+ * _should_ only be manipulated with page locked:
+ */
- } else {
- if (s->sectors == PAGE_SECTORS &&
- s->nr_replicas >= replicas &&
- !s->compressed)
- return 0;
- }
+ old = page_state_cmpxchg(s, new, {
+ if (new.reserved
+ ? (new.reservation_replicas < disk_res.nr_replicas)
+ : (new.sectors < PAGE_SECTORS ||
+ new.nr_replicas < disk_res.nr_replicas ||
+ new.compressed)) {
+ int sectors = (disk_res.nr_replicas * PAGE_SECTORS -
+ page_res_sectors(new) -
+ disk_res.sectors);
+
+ if (sectors > 0) {
+ ret = bch2_disk_reservation_add(c, &disk_res, sectors,
+ !check_enospc
+ ? BCH_DISK_RESERVATION_NOFAIL : 0);
+ if (unlikely(ret))
+ goto err;
+ }
- ret = bch2_disk_reservation_get(c, &res, PAGE_SECTORS, replicas,
- !check_enospc
- ? BCH_DISK_RESERVATION_NOFAIL : 0);
- if (ret)
- return ret;
+ new.reserved = 1;
+ new.reservation_replicas = disk_res.nr_replicas;
+ }
- old = page_state_cmpxchg(s, new, {
- new.reserved = 1;
- new.reservation_replicas = res.nr_replicas;
+ if (!new.quota_reserved &&
+ new.sectors + new.dirty_sectors < PAGE_SECTORS) {
+ ret = bch2_quota_reservation_add(c, inode, &quota_res,
+ PAGE_SECTORS - quota_res.sectors,
+ check_enospc);
+ if (unlikely(ret))
+ goto err;
+
+ new.quota_reserved = 1;
+ }
});
- __bch2_put_page_reservation(c, old);
- return 0;
+ quota_res.sectors -= (new.quota_reserved - old.quota_reserved) * PAGE_SECTORS;
+ disk_res.sectors -= page_res_sectors(new) - page_res_sectors(old);
+err:
+ bch2_quota_reservation_put(c, inode, &quota_res);
+ bch2_disk_reservation_put(c, &disk_res);
+ return ret;
}
static void bch2_clear_page_bits(struct page *page)
{
struct bch_inode_info *inode = to_bch_ei(page->mapping->host);
struct bch_fs *c = inode->v.i_sb->s_fs_info;
- struct disk_reservation res = { .sectors = PAGE_SECTORS };
struct bch_page_state s;
if (!PagePrivate(page))
@@ -565,24 +680,29 @@ static void bch2_clear_page_bits(struct page *page)
ClearPagePrivate(page);
if (s.dirty_sectors)
- i_sectors_acct(c, inode, -s.dirty_sectors);
+ i_sectors_acct(c, inode, NULL, -s.dirty_sectors);
- if (s.reserved)
- bch2_disk_reservation_put(c, &res);
+ __bch2_put_page_reservation(c, inode, s);
}
int bch2_set_page_dirty(struct page *page)
{
struct bch_inode_info *inode = to_bch_ei(page->mapping->host);
struct bch_fs *c = inode->v.i_sb->s_fs_info;
+ struct quota_res quota_res = { 0 };
struct bch_page_state old, new;
old = page_state_cmpxchg(page_state(page), new,
new.dirty_sectors = PAGE_SECTORS - new.sectors;
+ new.quota_reserved = 0;
);
+ quota_res.sectors += old.quota_reserved * PAGE_SECTORS;
+
if (old.dirty_sectors != new.dirty_sectors)
- i_sectors_acct(c, inode, new.dirty_sectors - old.dirty_sectors);
+ i_sectors_acct(c, inode, &quota_res,
+ new.dirty_sectors - old.dirty_sectors);
+ bch2_quota_reservation_put(c, inode, &quota_res);
return __set_page_dirty_nobuffers(page);
}
@@ -616,7 +736,7 @@ int bch2_page_mkwrite(struct vm_fault *vmf)
goto out;
}
- if (bch2_get_page_reservation(c, page, true)) {
+ if (bch2_get_page_reservation(c, inode, page, true)) {
unlock_page(page);
ret = VM_FAULT_SIGBUS;
goto out;
@@ -701,7 +821,7 @@ static int bio_add_page_contig(struct bio *bio, struct page *page)
{
sector_t offset = (sector_t) page->index << PAGE_SECTOR_SHIFT;
- BUG_ON(!bio->bi_max_vecs);
+ EBUG_ON(!bio->bi_max_vecs);
if (!bio->bi_vcnt)
bio->bi_iter.bi_sector = offset;
@@ -1094,7 +1214,7 @@ static void bch2_writepage_io_done(struct closure *cl)
* before calling end_page_writeback:
*/
if (io->op.sectors_added != io->new_sectors)
- i_sectors_acct(c, io->op.inode,
+ i_sectors_acct(c, io->op.inode, NULL,
io->op.sectors_added - (s64) io->new_sectors);
bio_for_each_segment_all(bvec, bio, i)
@@ -1429,7 +1549,7 @@ readpage:
if (ret)
goto err;
out:
- ret = bch2_get_page_reservation(c, page, true);
+ ret = bch2_get_page_reservation(c, inode, page, true);
if (ret) {
if (!PageUptodate(page)) {
/*
@@ -1455,11 +1575,11 @@ err_unlock:
return ret;
}
-int bch2_write_end(struct file *filp, struct address_space *mapping,
+int bch2_write_end(struct file *file, struct address_space *mapping,
loff_t pos, unsigned len, unsigned copied,
struct page *page, void *fsdata)
{
- struct bch_inode_info *inode = to_bch_ei(page->mapping->host);
+ struct bch_inode_info *inode = to_bch_ei(mapping->host);
struct bch_fs *c = inode->v.i_sb->s_fs_info;
lockdep_assert_held(&inode->v.i_rwsem);
@@ -1486,7 +1606,7 @@ int bch2_write_end(struct file *filp, struct address_space *mapping,
inode->ei_last_dirtied = (unsigned long) current;
} else {
- bch2_put_page_reservation(c, page);
+ bch2_put_page_reservation(c, inode, page);
}
unlock_page(page);
@@ -1697,8 +1817,8 @@ loop:
ret = dio->iop.op.error ?: ((long) dio->iop.op.written << 9);
err:
__pagecache_block_put(&mapping->add_lock);
- inode_dio_end(&inode->v);
bch2_disk_reservation_put(dio->iop.op.c, &dio->iop.op.res);
+ bch2_quota_reservation_put(dio->iop.op.c, inode, &dio->quota_res);
if (dio->free_iov)
kfree(dio->iter.iov);
@@ -1708,6 +1828,9 @@ err:
sync = dio->sync;
bio_put(bio);
+ /* inode->i_dio_count is our ref on inode and thus bch_fs */
+ inode_dio_end(&inode->v);
+
if (!sync) {
req->ki_complete(req, ret, 0);
ret = -EIOCBQUEUED;
@@ -1753,6 +1876,7 @@ static int bch2_direct_IO_write(struct kiocb *req,
dio->sync = is_sync_kiocb(req) ||
offset + iter->count > inode->v.i_size;
dio->free_iov = false;
+ dio->quota_res.sectors = 0;
dio->iter = *iter;
bch2_fswrite_op_init(&dio->iop, c, inode, io_opts(c, inode), true);
dio->iop.op.write_point = writepoint_hashed((unsigned long) dio->task);
@@ -1762,6 +1886,11 @@ static int bch2_direct_IO_write(struct kiocb *req,
!c->opts.journal_flush_disabled)
dio->iop.op.flags |= BCH_WRITE_FLUSH;
+ ret = bch2_quota_reservation_add(c, inode, &dio->quota_res,
+ iter->count >> 9, true);
+ if (unlikely(ret))
+ goto err;
+
ret = bch2_disk_reservation_get(c, &dio->iop.op.res, iter->count >> 9,
c->opts.data_replicas, 0);
if (unlikely(ret)) {
@@ -1778,6 +1907,7 @@ static int bch2_direct_IO_write(struct kiocb *req,
return bch2_dio_write_loop(dio);
err:
bch2_disk_reservation_put(c, &dio->iop.op.res);
+ bch2_quota_reservation_put(c, inode, &dio->quota_res);
closure_debug_destroy(&dio->cl);
bio_put(bio);
return ret;
@@ -1932,7 +2062,7 @@ create:
* XXX: because we aren't currently tracking whether the page has actual
* data in it (vs. just 0s, or only partially written) this wrong. ick.
*/
- ret = bch2_get_page_reservation(c, page, false);
+ ret = bch2_get_page_reservation(c, inode, page, false);
BUG_ON(ret);
if (index == start >> PAGE_SHIFT &&
@@ -2302,11 +2432,19 @@ static long bch2_fallocate(struct bch_inode_info *inode, int mode,
sectors = reservation.k.size;
reservation.v.nr_replicas = bch2_extent_nr_dirty_ptrs(k);
+ if (!bkey_extent_is_allocation(k.k)) {
+ ret = bch2_quota_reservation_add(c, inode,
+ &i_sectors_hook.quota_res,
+ sectors, true);
+ if (unlikely(ret))
+ goto err_put_sectors_dirty;
+ }
+
if (reservation.v.nr_replicas < replicas ||
bch2_extent_is_compressed(k)) {
ret = bch2_disk_reservation_get(c, &disk_res, sectors,
replicas, 0);
- if (ret)
+ if (unlikely(ret))
goto err_put_sectors_dirty;
reservation.v.nr_replicas = disk_res.nr_replicas;
diff --git a/fs/bcachefs/fs-ioctl.c b/fs/bcachefs/fs-ioctl.c
index 6ae67f92dd34..bfbe574dd697 100644
--- a/fs/bcachefs/fs-ioctl.c
+++ b/fs/bcachefs/fs-ioctl.c
@@ -173,7 +173,8 @@ static int bch2_set_projid(struct bch_fs *c,
qid.q[QTYP_PRJ] = projid;
ret = bch2_quota_transfer(c, 1 << QTYP_PRJ, qid, inode->ei_qid,
- inode->v.i_blocks);
+ inode->v.i_blocks +
+ inode->ei_quota_reserved);
if (ret)
return ret;
diff --git a/fs/bcachefs/fs.c b/fs/bcachefs/fs.c
index 8869ba0fa38e..aba845b2d966 100644
--- a/fs/bcachefs/fs.c
+++ b/fs/bcachefs/fs.c
@@ -217,10 +217,8 @@ static struct bch_inode_info *bch2_vfs_inode_create(struct bch_fs *c,
#ifdef CONFIG_BCACHEFS_POSIX_ACL
ret = posix_acl_create(&dir->v, &inode->v.i_mode, &default_acl, &acl);
- if (ret) {
- make_bad_inode(&inode->v);
+ if (ret)
goto err_make_bad;
- }
#endif
bch2_inode_init(c, &inode_u,
@@ -232,20 +230,17 @@ static struct bch_inode_info *bch2_vfs_inode_create(struct bch_fs *c,
inode_u.bi_project = dir->ei_qid.q[QTYP_PRJ];
ret = bch2_quota_acct(c, bch_qid(&inode_u), Q_INO, 1, BCH_QUOTA_PREALLOC);
- if (ret) {
- make_bad_inode(&inode->v);
+ if (ret)
goto err_make_bad;
- }
ret = bch2_inode_create(c, &inode_u,
BLOCKDEV_INODE_MAX, 0,
&c->unused_inode_hint);
- if (unlikely(ret)) {
- bch2_quota_acct(c, bch_qid(&inode_u), Q_INO, -1, BCH_QUOTA_WARN);
- goto err_make_bad;
- }
+ if (unlikely(ret))
+ goto err_acct_quota;
bch2_vfs_inode_init(c, inode, &inode_u);
+ atomic_long_inc(&c->nr_inodes);
if (default_acl) {
ret = bch2_set_acl(&inode->v, default_acl, ACL_TYPE_DEFAULT);
@@ -260,11 +255,12 @@ static struct bch_inode_info *bch2_vfs_inode_create(struct bch_fs *c,
}
insert_inode_hash(&inode->v);
- atomic_long_inc(&c->nr_inodes);
out:
posix_acl_release(default_acl);
posix_acl_release(acl);
return inode;
+err_acct_quota:
+ bch2_quota_acct(c, bch_qid(&inode_u), Q_INO, -1, BCH_QUOTA_WARN);
err_make_bad:
/*
* indicate to bch_evict_inode that the inode was never actually
@@ -643,7 +639,8 @@ static int bch2_setattr_nonsize(struct bch_inode_info *inode, struct iattr *iatt
if (qtypes) {
ret = bch2_quota_transfer(c, qtypes, qid, inode->ei_qid,
- inode->v.i_blocks);
+ inode->v.i_blocks +
+ inode->ei_quota_reserved);
if (ret)
goto out_unlock;
}
@@ -953,6 +950,7 @@ static void bch2_vfs_inode_init(struct bch_fs *c,
inode->v.i_ctime = bch2_time_to_timespec(c, bi->bi_ctime);
inode->ei_journal_seq = 0;
+ inode->ei_quota_reserved = 0;
inode->ei_qid = bch_qid(bi);
inode->ei_str_hash = bch2_hash_info_init(c, bi);
inode->ei_inode = *bi;
@@ -1038,6 +1036,8 @@ static void bch2_evict_inode(struct inode *vinode)
clear_inode(&inode->v);
+ BUG_ON(!is_bad_inode(&inode->v) && inode->ei_quota_reserved);
+
if (!inode->v.i_nlink && !is_bad_inode(&inode->v)) {
bch2_quota_acct(c, inode->ei_qid, Q_SPC, -((s64) inode->v.i_blocks),
BCH_QUOTA_WARN);
diff --git a/fs/bcachefs/fs.h b/fs/bcachefs/fs.h
index dd0bd4ef6c7e..fddfb2d2fcaf 100644
--- a/fs/bcachefs/fs.h
+++ b/fs/bcachefs/fs.h
@@ -13,6 +13,7 @@ struct bch_inode_info {
struct mutex ei_update_lock;
u64 ei_journal_seq;
+ u64 ei_quota_reserved;
unsigned long ei_last_dirtied;
struct bch_qid ei_qid;
diff --git a/fs/bcachefs/quota.c b/fs/bcachefs/quota.c
index c550fd9e2729..854f7f551cb7 100644
--- a/fs/bcachefs/quota.c
+++ b/fs/bcachefs/quota.c
@@ -749,14 +749,14 @@ static int bch2_set_quota(struct super_block *sb, struct kqid qid,
}
if (qdq->d_fieldmask & QC_SPC_SOFT)
- new_quota.v.c[Q_SPC].softlimit = cpu_to_le64(qdq->d_spc_softlimit);
+ new_quota.v.c[Q_SPC].softlimit = cpu_to_le64(qdq->d_spc_softlimit >> 9);
if (qdq->d_fieldmask & QC_SPC_HARD)
- new_quota.v.c[Q_SPC].hardlimit = cpu_to_le64(qdq->d_spc_hardlimit);
+ new_quota.v.c[Q_SPC].hardlimit = cpu_to_le64(qdq->d_spc_hardlimit >> 9);
if (qdq->d_fieldmask & QC_INO_SOFT)
- new_quota.v.c[Q_INO].softlimit = cpu_to_le64(qdq->d_spc_softlimit);
+ new_quota.v.c[Q_INO].softlimit = cpu_to_le64(qdq->d_ino_softlimit);
if (qdq->d_fieldmask & QC_INO_HARD)
- new_quota.v.c[Q_INO].hardlimit = cpu_to_le64(qdq->d_spc_hardlimit);
+ new_quota.v.c[Q_INO].hardlimit = cpu_to_le64(qdq->d_ino_hardlimit);
ret = bch2_btree_insert_at(c, NULL, NULL, NULL, 0,
BTREE_INSERT_ENTRY(&iter, &new_quota.k_i));