diff options
author | Kent Overstreet <kent.overstreet@gmail.com> | 2018-01-19 13:11:22 -0500 |
---|---|---|
committer | Kent Overstreet <kent.overstreet@gmail.com> | 2018-05-22 00:44:18 -0400 |
commit | 29c8f4d9157a87b41c5af37d348d96a1822cc6d1 (patch) | |
tree | c7a5d47c5c72d63f78aacf9fe675a0fea6d4409f | |
parent | cccad9983f45ab1bace7d5c12af0be5f8dd7bcdb (diff) |
bcachefs: quota limit enforcement
-rw-r--r-- | fs/bcachefs/fs-io.c | 246 | ||||
-rw-r--r-- | fs/bcachefs/fs-ioctl.c | 3 | ||||
-rw-r--r-- | fs/bcachefs/fs.c | 24 | ||||
-rw-r--r-- | fs/bcachefs/fs.h | 1 | ||||
-rw-r--r-- | fs/bcachefs/quota.c | 8 |
5 files changed, 211 insertions, 71 deletions
diff --git a/fs/bcachefs/fs-io.c b/fs/bcachefs/fs-io.c index 8dbd8a7418bb..dd9d868e604a 100644 --- a/fs/bcachefs/fs-io.c +++ b/fs/bcachefs/fs-io.c @@ -27,9 +27,14 @@ #include <trace/events/bcachefs.h> #include <trace/events/writeback.h> +struct quota_res { + u64 sectors; +}; + struct i_sectors_hook { struct extent_insert_hook hook; struct bch_inode_info *inode; + struct quota_res quota_res; s64 sectors; u64 new_i_size; unsigned flags; @@ -62,6 +67,7 @@ struct dio_write { unsigned loop:1, sync:1, free_iov:1; + struct quota_res quota_res; struct iov_iter iter; struct iovec inline_vecs[2]; @@ -107,6 +113,67 @@ static int write_invalidate_inode_pages_range(struct address_space *mapping, return ret; } +/* quotas */ + +#ifdef CONFIG_BCACHEFS_QUOTA + +static void bch2_quota_reservation_put(struct bch_fs *c, + struct bch_inode_info *inode, + struct quota_res *res) +{ + if (!res->sectors) + return; + + mutex_lock(&inode->ei_update_lock); + BUG_ON(res->sectors > inode->ei_quota_reserved); + + bch2_quota_acct(c, inode->ei_qid, Q_SPC, + -((s64) res->sectors), BCH_QUOTA_PREALLOC); + inode->ei_quota_reserved -= res->sectors; + mutex_unlock(&inode->ei_update_lock); + + res->sectors = 0; +} + +static int bch2_quota_reservation_add(struct bch_fs *c, + struct bch_inode_info *inode, + struct quota_res *res, + unsigned sectors, + bool check_enospc) +{ + int ret; + + mutex_lock(&inode->ei_update_lock); + ret = bch2_quota_acct(c, inode->ei_qid, Q_SPC, sectors, + check_enospc ? BCH_QUOTA_PREALLOC : BCH_QUOTA_NOCHECK); + if (likely(!ret)) { + inode->ei_quota_reserved += sectors; + res->sectors += sectors; + } + mutex_unlock(&inode->ei_update_lock); + + return ret; +} + +#else + +static void bch2_quota_reservation_put(struct bch_fs *c, + struct bch_inode_info *inode, + struct quota_res *res) +{ +} + +static int bch2_quota_reservation_add(struct bch_fs *c, + struct bch_inode_info *inode, + struct quota_res *res, + unsigned sectors, + bool check_enospc) +{ + return 0; +} + +#endif + /* i_size updates: */ static int inode_set_size(struct bch_inode_info *inode, @@ -128,16 +195,28 @@ static int __must_check bch2_write_inode_size(struct bch_fs *c, return __bch2_write_inode(c, inode, inode_set_size, &new_size); } -static void __i_sectors_acct(struct bch_fs *c, struct bch_inode_info *inode, int sectors) +static void __i_sectors_acct(struct bch_fs *c, struct bch_inode_info *inode, + struct quota_res *quota_res, int sectors) { +#ifdef CONFIG_BCACHEFS_QUOTA + if (quota_res && sectors > 0) { + BUG_ON(sectors > quota_res->sectors); + BUG_ON(sectors > inode->ei_quota_reserved); + + quota_res->sectors -= sectors; + inode->ei_quota_reserved -= sectors; + } else { + bch2_quota_acct(c, inode->ei_qid, Q_SPC, sectors, BCH_QUOTA_WARN); + } +#endif inode->v.i_blocks += sectors; - bch2_quota_acct(c, inode->ei_qid, Q_SPC, sectors, BCH_QUOTA_WARN); } -static void i_sectors_acct(struct bch_fs *c, struct bch_inode_info *inode, int sectors) +static void i_sectors_acct(struct bch_fs *c, struct bch_inode_info *inode, + struct quota_res *quota_res, int sectors) { mutex_lock(&inode->ei_update_lock); - __i_sectors_acct(c, inode, sectors); + __i_sectors_acct(c, inode, quota_res, sectors); mutex_unlock(&inode->ei_update_lock); } @@ -186,11 +265,13 @@ static int i_sectors_dirty_finish(struct bch_fs *c, struct i_sectors_hook *h) if (h->new_i_size != U64_MAX) i_size_write(&h->inode->v, h->new_i_size); - __i_sectors_acct(c, h->inode, h->sectors); + __i_sectors_acct(c, h->inode, &h->quota_res, h->sectors); ret = __bch2_write_inode(c, h->inode, i_sectors_dirty_finish_fn, h); mutex_unlock(&h->inode->ei_update_lock); + bch2_quota_reservation_put(c, h->inode, &h->quota_res); + h->sectors = 0; return ret; @@ -401,9 +482,12 @@ err: bch2_btree_iter_unlock(&extent_iter); bch2_btree_iter_unlock(&inode_iter); - if (op->is_dio) - i_sectors_acct(wop->c, op->inode, + if (op->is_dio) { + struct dio_write *dio = container_of(op, struct dio_write, iop); + + i_sectors_acct(wop->c, op->inode, &dio->quota_res, op->sectors_added - orig_sectors_added); + } return ret; } @@ -456,6 +540,9 @@ union { struct { unsigned reserved:1; unsigned reservation_replicas:4; + /* Owns PAGE_SECTORS sized quota reservation: */ + unsigned quota_reserved:1; + /* * Number of sectors on disk - for i_blocks * Uncompressed size, not compressed size: @@ -495,67 +582,95 @@ static inline struct bch_page_state *page_state(struct page *page) return s; } -static void __bch2_put_page_reservation(struct bch_fs *c, struct bch_page_state s) +static inline unsigned page_res_sectors(struct bch_page_state s) { - struct disk_reservation res = { - .sectors = s.reserved ? PAGE_SECTORS * s.reservation_replicas : 0 - }; + return s.reserved ? s.reservation_replicas * PAGE_SECTORS : 0; +} + +static void __bch2_put_page_reservation(struct bch_fs *c, struct bch_inode_info *inode, + struct bch_page_state s) +{ + struct disk_reservation res = { .sectors = page_res_sectors(s) }; + struct quota_res quota_res = { .sectors = s.quota_reserved ? PAGE_SECTORS : 0 }; + + bch2_quota_reservation_put(c, inode, "a_res); bch2_disk_reservation_put(c, &res); } -static void bch2_put_page_reservation(struct bch_fs *c, struct page *page) +static void bch2_put_page_reservation(struct bch_fs *c, struct bch_inode_info *inode, + struct page *page) { struct bch_page_state s; s = page_state_cmpxchg(page_state(page), s, { - if (!s.reserved) - return; - s.reserved = 0; + s.reserved = 0; + s.quota_reserved = 0; }); - __bch2_put_page_reservation(c, s); + __bch2_put_page_reservation(c, inode, s); } -static int bch2_get_page_reservation(struct bch_fs *c, struct page *page, - bool check_enospc) +static int bch2_get_page_reservation(struct bch_fs *c, struct bch_inode_info *inode, + struct page *page, bool check_enospc) { struct bch_page_state *s = page_state(page), new, old; - unsigned replicas = READ_ONCE(c->opts.data_replicas); - struct disk_reservation res; + struct disk_reservation disk_res = bch2_disk_reservation_init(c, + READ_ONCE(c->opts.data_replicas)); + struct quota_res quota_res = { 0 }; int ret = 0; - if (s->reserved) { - if (s->reservation_replicas >= replicas) - return 0; + /* + * XXX: this could likely be quite a bit simpler, page reservations + * _should_ only be manipulated with page locked: + */ - } else { - if (s->sectors == PAGE_SECTORS && - s->nr_replicas >= replicas && - !s->compressed) - return 0; - } + old = page_state_cmpxchg(s, new, { + if (new.reserved + ? (new.reservation_replicas < disk_res.nr_replicas) + : (new.sectors < PAGE_SECTORS || + new.nr_replicas < disk_res.nr_replicas || + new.compressed)) { + int sectors = (disk_res.nr_replicas * PAGE_SECTORS - + page_res_sectors(new) - + disk_res.sectors); + + if (sectors > 0) { + ret = bch2_disk_reservation_add(c, &disk_res, sectors, + !check_enospc + ? BCH_DISK_RESERVATION_NOFAIL : 0); + if (unlikely(ret)) + goto err; + } - ret = bch2_disk_reservation_get(c, &res, PAGE_SECTORS, replicas, - !check_enospc - ? BCH_DISK_RESERVATION_NOFAIL : 0); - if (ret) - return ret; + new.reserved = 1; + new.reservation_replicas = disk_res.nr_replicas; + } - old = page_state_cmpxchg(s, new, { - new.reserved = 1; - new.reservation_replicas = res.nr_replicas; + if (!new.quota_reserved && + new.sectors + new.dirty_sectors < PAGE_SECTORS) { + ret = bch2_quota_reservation_add(c, inode, "a_res, + PAGE_SECTORS - quota_res.sectors, + check_enospc); + if (unlikely(ret)) + goto err; + + new.quota_reserved = 1; + } }); - __bch2_put_page_reservation(c, old); - return 0; + quota_res.sectors -= (new.quota_reserved - old.quota_reserved) * PAGE_SECTORS; + disk_res.sectors -= page_res_sectors(new) - page_res_sectors(old); +err: + bch2_quota_reservation_put(c, inode, "a_res); + bch2_disk_reservation_put(c, &disk_res); + return ret; } static void bch2_clear_page_bits(struct page *page) { struct bch_inode_info *inode = to_bch_ei(page->mapping->host); struct bch_fs *c = inode->v.i_sb->s_fs_info; - struct disk_reservation res = { .sectors = PAGE_SECTORS }; struct bch_page_state s; if (!PagePrivate(page)) @@ -565,24 +680,29 @@ static void bch2_clear_page_bits(struct page *page) ClearPagePrivate(page); if (s.dirty_sectors) - i_sectors_acct(c, inode, -s.dirty_sectors); + i_sectors_acct(c, inode, NULL, -s.dirty_sectors); - if (s.reserved) - bch2_disk_reservation_put(c, &res); + __bch2_put_page_reservation(c, inode, s); } int bch2_set_page_dirty(struct page *page) { struct bch_inode_info *inode = to_bch_ei(page->mapping->host); struct bch_fs *c = inode->v.i_sb->s_fs_info; + struct quota_res quota_res = { 0 }; struct bch_page_state old, new; old = page_state_cmpxchg(page_state(page), new, new.dirty_sectors = PAGE_SECTORS - new.sectors; + new.quota_reserved = 0; ); + quota_res.sectors += old.quota_reserved * PAGE_SECTORS; + if (old.dirty_sectors != new.dirty_sectors) - i_sectors_acct(c, inode, new.dirty_sectors - old.dirty_sectors); + i_sectors_acct(c, inode, "a_res, + new.dirty_sectors - old.dirty_sectors); + bch2_quota_reservation_put(c, inode, "a_res); return __set_page_dirty_nobuffers(page); } @@ -616,7 +736,7 @@ int bch2_page_mkwrite(struct vm_fault *vmf) goto out; } - if (bch2_get_page_reservation(c, page, true)) { + if (bch2_get_page_reservation(c, inode, page, true)) { unlock_page(page); ret = VM_FAULT_SIGBUS; goto out; @@ -701,7 +821,7 @@ static int bio_add_page_contig(struct bio *bio, struct page *page) { sector_t offset = (sector_t) page->index << PAGE_SECTOR_SHIFT; - BUG_ON(!bio->bi_max_vecs); + EBUG_ON(!bio->bi_max_vecs); if (!bio->bi_vcnt) bio->bi_iter.bi_sector = offset; @@ -1094,7 +1214,7 @@ static void bch2_writepage_io_done(struct closure *cl) * before calling end_page_writeback: */ if (io->op.sectors_added != io->new_sectors) - i_sectors_acct(c, io->op.inode, + i_sectors_acct(c, io->op.inode, NULL, io->op.sectors_added - (s64) io->new_sectors); bio_for_each_segment_all(bvec, bio, i) @@ -1429,7 +1549,7 @@ readpage: if (ret) goto err; out: - ret = bch2_get_page_reservation(c, page, true); + ret = bch2_get_page_reservation(c, inode, page, true); if (ret) { if (!PageUptodate(page)) { /* @@ -1455,11 +1575,11 @@ err_unlock: return ret; } -int bch2_write_end(struct file *filp, struct address_space *mapping, +int bch2_write_end(struct file *file, struct address_space *mapping, loff_t pos, unsigned len, unsigned copied, struct page *page, void *fsdata) { - struct bch_inode_info *inode = to_bch_ei(page->mapping->host); + struct bch_inode_info *inode = to_bch_ei(mapping->host); struct bch_fs *c = inode->v.i_sb->s_fs_info; lockdep_assert_held(&inode->v.i_rwsem); @@ -1486,7 +1606,7 @@ int bch2_write_end(struct file *filp, struct address_space *mapping, inode->ei_last_dirtied = (unsigned long) current; } else { - bch2_put_page_reservation(c, page); + bch2_put_page_reservation(c, inode, page); } unlock_page(page); @@ -1696,8 +1816,8 @@ loop: ret = dio->iop.op.error ?: ((long) dio->iop.op.written << 9); err: __pagecache_block_put(&mapping->add_lock); - inode_dio_end(&inode->v); bch2_disk_reservation_put(dio->iop.op.c, &dio->iop.op.res); + bch2_quota_reservation_put(dio->iop.op.c, inode, &dio->quota_res); if (dio->free_iov) kfree(dio->iter.iov); @@ -1707,6 +1827,9 @@ err: sync = dio->sync; bio_put(bio); + /* inode->i_dio_count is our ref on inode and thus bch_fs */ + inode_dio_end(&inode->v); + if (!sync) { req->ki_complete(req, ret, 0); ret = -EIOCBQUEUED; @@ -1752,6 +1875,7 @@ static int bch2_direct_IO_write(struct kiocb *req, dio->sync = is_sync_kiocb(req) || offset + iter->count > inode->v.i_size; dio->free_iov = false; + dio->quota_res.sectors = 0; dio->iter = *iter; bch2_fswrite_op_init(&dio->iop, c, inode, io_opts(c, inode), true); dio->iop.op.write_point = writepoint_hashed((unsigned long) dio->task); @@ -1761,6 +1885,11 @@ static int bch2_direct_IO_write(struct kiocb *req, !c->opts.journal_flush_disabled) dio->iop.op.flags |= BCH_WRITE_FLUSH; + ret = bch2_quota_reservation_add(c, inode, &dio->quota_res, + iter->count >> 9, true); + if (unlikely(ret)) + goto err; + ret = bch2_disk_reservation_get(c, &dio->iop.op.res, iter->count >> 9, c->opts.data_replicas, 0); if (unlikely(ret)) { @@ -1777,6 +1906,7 @@ static int bch2_direct_IO_write(struct kiocb *req, return bch2_dio_write_loop(dio); err: bch2_disk_reservation_put(c, &dio->iop.op.res); + bch2_quota_reservation_put(c, inode, &dio->quota_res); closure_debug_destroy(&dio->cl); bio_put(bio); return ret; @@ -1931,7 +2061,7 @@ create: * XXX: because we aren't currently tracking whether the page has actual * data in it (vs. just 0s, or only partially written) this wrong. ick. */ - ret = bch2_get_page_reservation(c, page, false); + ret = bch2_get_page_reservation(c, inode, page, false); BUG_ON(ret); if (index == start >> PAGE_SHIFT && @@ -2301,11 +2431,19 @@ static long bch2_fallocate(struct bch_inode_info *inode, int mode, sectors = reservation.k.size; reservation.v.nr_replicas = bch2_extent_nr_dirty_ptrs(k); + if (!bkey_extent_is_allocation(k.k)) { + ret = bch2_quota_reservation_add(c, inode, + &i_sectors_hook.quota_res, + sectors, true); + if (unlikely(ret)) + goto err_put_sectors_dirty; + } + if (reservation.v.nr_replicas < replicas || bch2_extent_is_compressed(k)) { ret = bch2_disk_reservation_get(c, &disk_res, sectors, replicas, 0); - if (ret) + if (unlikely(ret)) goto err_put_sectors_dirty; reservation.v.nr_replicas = disk_res.nr_replicas; diff --git a/fs/bcachefs/fs-ioctl.c b/fs/bcachefs/fs-ioctl.c index 6ae67f92dd34..bfbe574dd697 100644 --- a/fs/bcachefs/fs-ioctl.c +++ b/fs/bcachefs/fs-ioctl.c @@ -173,7 +173,8 @@ static int bch2_set_projid(struct bch_fs *c, qid.q[QTYP_PRJ] = projid; ret = bch2_quota_transfer(c, 1 << QTYP_PRJ, qid, inode->ei_qid, - inode->v.i_blocks); + inode->v.i_blocks + + inode->ei_quota_reserved); if (ret) return ret; diff --git a/fs/bcachefs/fs.c b/fs/bcachefs/fs.c index 8869ba0fa38e..aba845b2d966 100644 --- a/fs/bcachefs/fs.c +++ b/fs/bcachefs/fs.c @@ -217,10 +217,8 @@ static struct bch_inode_info *bch2_vfs_inode_create(struct bch_fs *c, #ifdef CONFIG_BCACHEFS_POSIX_ACL ret = posix_acl_create(&dir->v, &inode->v.i_mode, &default_acl, &acl); - if (ret) { - make_bad_inode(&inode->v); + if (ret) goto err_make_bad; - } #endif bch2_inode_init(c, &inode_u, @@ -232,20 +230,17 @@ static struct bch_inode_info *bch2_vfs_inode_create(struct bch_fs *c, inode_u.bi_project = dir->ei_qid.q[QTYP_PRJ]; ret = bch2_quota_acct(c, bch_qid(&inode_u), Q_INO, 1, BCH_QUOTA_PREALLOC); - if (ret) { - make_bad_inode(&inode->v); + if (ret) goto err_make_bad; - } ret = bch2_inode_create(c, &inode_u, BLOCKDEV_INODE_MAX, 0, &c->unused_inode_hint); - if (unlikely(ret)) { - bch2_quota_acct(c, bch_qid(&inode_u), Q_INO, -1, BCH_QUOTA_WARN); - goto err_make_bad; - } + if (unlikely(ret)) + goto err_acct_quota; bch2_vfs_inode_init(c, inode, &inode_u); + atomic_long_inc(&c->nr_inodes); if (default_acl) { ret = bch2_set_acl(&inode->v, default_acl, ACL_TYPE_DEFAULT); @@ -260,11 +255,12 @@ static struct bch_inode_info *bch2_vfs_inode_create(struct bch_fs *c, } insert_inode_hash(&inode->v); - atomic_long_inc(&c->nr_inodes); out: posix_acl_release(default_acl); posix_acl_release(acl); return inode; +err_acct_quota: + bch2_quota_acct(c, bch_qid(&inode_u), Q_INO, -1, BCH_QUOTA_WARN); err_make_bad: /* * indicate to bch_evict_inode that the inode was never actually @@ -643,7 +639,8 @@ static int bch2_setattr_nonsize(struct bch_inode_info *inode, struct iattr *iatt if (qtypes) { ret = bch2_quota_transfer(c, qtypes, qid, inode->ei_qid, - inode->v.i_blocks); + inode->v.i_blocks + + inode->ei_quota_reserved); if (ret) goto out_unlock; } @@ -953,6 +950,7 @@ static void bch2_vfs_inode_init(struct bch_fs *c, inode->v.i_ctime = bch2_time_to_timespec(c, bi->bi_ctime); inode->ei_journal_seq = 0; + inode->ei_quota_reserved = 0; inode->ei_qid = bch_qid(bi); inode->ei_str_hash = bch2_hash_info_init(c, bi); inode->ei_inode = *bi; @@ -1038,6 +1036,8 @@ static void bch2_evict_inode(struct inode *vinode) clear_inode(&inode->v); + BUG_ON(!is_bad_inode(&inode->v) && inode->ei_quota_reserved); + if (!inode->v.i_nlink && !is_bad_inode(&inode->v)) { bch2_quota_acct(c, inode->ei_qid, Q_SPC, -((s64) inode->v.i_blocks), BCH_QUOTA_WARN); diff --git a/fs/bcachefs/fs.h b/fs/bcachefs/fs.h index dd0bd4ef6c7e..fddfb2d2fcaf 100644 --- a/fs/bcachefs/fs.h +++ b/fs/bcachefs/fs.h @@ -13,6 +13,7 @@ struct bch_inode_info { struct mutex ei_update_lock; u64 ei_journal_seq; + u64 ei_quota_reserved; unsigned long ei_last_dirtied; struct bch_qid ei_qid; diff --git a/fs/bcachefs/quota.c b/fs/bcachefs/quota.c index c550fd9e2729..854f7f551cb7 100644 --- a/fs/bcachefs/quota.c +++ b/fs/bcachefs/quota.c @@ -749,14 +749,14 @@ static int bch2_set_quota(struct super_block *sb, struct kqid qid, } if (qdq->d_fieldmask & QC_SPC_SOFT) - new_quota.v.c[Q_SPC].softlimit = cpu_to_le64(qdq->d_spc_softlimit); + new_quota.v.c[Q_SPC].softlimit = cpu_to_le64(qdq->d_spc_softlimit >> 9); if (qdq->d_fieldmask & QC_SPC_HARD) - new_quota.v.c[Q_SPC].hardlimit = cpu_to_le64(qdq->d_spc_hardlimit); + new_quota.v.c[Q_SPC].hardlimit = cpu_to_le64(qdq->d_spc_hardlimit >> 9); if (qdq->d_fieldmask & QC_INO_SOFT) - new_quota.v.c[Q_INO].softlimit = cpu_to_le64(qdq->d_spc_softlimit); + new_quota.v.c[Q_INO].softlimit = cpu_to_le64(qdq->d_ino_softlimit); if (qdq->d_fieldmask & QC_INO_HARD) - new_quota.v.c[Q_INO].hardlimit = cpu_to_le64(qdq->d_spc_hardlimit); + new_quota.v.c[Q_INO].hardlimit = cpu_to_le64(qdq->d_ino_hardlimit); ret = bch2_btree_insert_at(c, NULL, NULL, NULL, 0, BTREE_INSERT_ENTRY(&iter, &new_quota.k_i)); |