summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorKent Overstreet <kent.overstreet@gmail.com>2017-10-12 18:19:35 -0800
committerKent Overstreet <kent.overstreet@gmail.com>2018-05-22 00:44:18 -0400
commit67e5a2061eb2a8eceecb63ac810fd9d84d2b7f07 (patch)
tree4db88c32add88ece582ba12c308d1aadd6833d56
parent6f7e262cb716fbfffe3ba96c8de4384b9b2fd2f5 (diff)
bcachefs: Quota accounting
-rw-r--r--fs/bcachefs/Kconfig5
-rw-r--r--fs/bcachefs/Makefile1
-rw-r--r--fs/bcachefs/bcachefs.h4
-rw-r--r--fs/bcachefs/bcachefs_format.h64
-rw-r--r--fs/bcachefs/bkey.h2
-rw-r--r--fs/bcachefs/bkey_methods.c2
-rw-r--r--fs/bcachefs/fs-io.c2
-rw-r--r--fs/bcachefs/fs-ioctl.c28
-rw-r--r--fs/bcachefs/fs.c102
-rw-r--r--fs/bcachefs/fs.h2
-rw-r--r--fs/bcachefs/opts.c32
-rw-r--r--fs/bcachefs/opts.h9
-rw-r--r--fs/bcachefs/quota.c786
-rw-r--r--fs/bcachefs/quota.h48
-rw-r--r--fs/bcachefs/quota_types.h36
-rw-r--r--fs/bcachefs/super-io.c13
-rw-r--r--fs/bcachefs/super.c17
17 files changed, 1121 insertions, 32 deletions
diff --git a/fs/bcachefs/Kconfig b/fs/bcachefs/Kconfig
index 6401122ea1fb..415861fac070 100644
--- a/fs/bcachefs/Kconfig
+++ b/fs/bcachefs/Kconfig
@@ -20,6 +20,11 @@ config BCACHEFS_FS
The bcachefs filesystem - a modern, copy on write filesystem, with
support for multiple devices, compression, checksumming, etc.
+config BCACHEFS_QUOTA
+ bool "bcachefs quota support"
+ depends on BCACHEFS_FS
+ select QUOTACTL
+
config BCACHEFS_POSIX_ACL
bool "bcachefs POSIX ACL support"
depends on BCACHEFS_FS
diff --git a/fs/bcachefs/Makefile b/fs/bcachefs/Makefile
index dd82231dd620..36136ec0525a 100644
--- a/fs/bcachefs/Makefile
+++ b/fs/bcachefs/Makefile
@@ -35,6 +35,7 @@ bcachefs-y := \
move.o \
movinggc.o \
opts.o \
+ quota.o \
siphash.o \
six.o \
super.o \
diff --git a/fs/bcachefs/bcachefs.h b/fs/bcachefs/bcachefs.h
index f161d83160fe..4ae78e785c7b 100644
--- a/fs/bcachefs/bcachefs.h
+++ b/fs/bcachefs/bcachefs.h
@@ -281,6 +281,7 @@ do { \
#include "clock_types.h"
#include "journal_types.h"
#include "keylist_types.h"
+#include "quota_types.h"
#include "super_types.h"
/*
@@ -670,6 +671,9 @@ struct bch_fs {
unsigned writeback_pages_max;
atomic_long_t nr_inodes;
+ /* QUOTAS */
+ struct bch_memquota_type quotas[QTYP_NR];
+
/* DEBUG JUNK */
struct dentry *debug;
struct btree_debug btree_debug[BTREE_ID_NR];
diff --git a/fs/bcachefs/bcachefs_format.h b/fs/bcachefs/bcachefs_format.h
index d65b5e6656ba..cb9e450ba286 100644
--- a/fs/bcachefs/bcachefs_format.h
+++ b/fs/bcachefs/bcachefs_format.h
@@ -606,11 +606,13 @@ BKEY_VAL_TYPE(inode_generation, BCH_INODE_GENERATION);
BCH_INODE_FIELD(bi_generation, 32) \
BCH_INODE_FIELD(bi_dev, 32) \
BCH_INODE_FIELD(bi_data_checksum, 8) \
- BCH_INODE_FIELD(bi_compression, 8)
+ BCH_INODE_FIELD(bi_compression, 8) \
+ BCH_INODE_FIELD(bi_project, 32)
#define BCH_INODE_FIELDS_INHERIT() \
BCH_INODE_FIELD(bi_data_checksum) \
- BCH_INODE_FIELD(bi_compression)
+ BCH_INODE_FIELD(bi_compression) \
+ BCH_INODE_FIELD(bi_project)
enum {
/*
@@ -737,6 +739,36 @@ struct bch_alloc {
} __attribute__((packed, aligned(8)));
BKEY_VAL_TYPE(alloc, BCH_ALLOC);
+/* Quotas: */
+
+enum {
+ BCH_QUOTA = 128,
+};
+
+enum quota_types {
+ QTYP_USR = 0,
+ QTYP_GRP = 1,
+ QTYP_PRJ = 2,
+ QTYP_NR = 3,
+};
+
+enum quota_counters {
+ Q_SPC = 0,
+ Q_INO = 1,
+ Q_COUNTERS = 2,
+};
+
+struct bch_quota_counter {
+ __le64 hardlimit;
+ __le64 softlimit;
+};
+
+struct bch_quota {
+ struct bch_val v;
+ struct bch_quota_counter c[Q_COUNTERS];
+} __attribute__((packed, aligned(8)));
+BKEY_VAL_TYPE(quota, BCH_QUOTA);
+
/* Optional/variable size superblock sections: */
struct bch_sb_field {
@@ -749,7 +781,8 @@ struct bch_sb_field {
x(journal, 0) \
x(members, 1) \
x(crypt, 2) \
- x(replicas, 3)
+ x(replicas, 3) \
+ x(quota, 4)
enum bch_sb_field_type {
#define x(f, nr) BCH_SB_FIELD_##f = nr,
@@ -883,6 +916,23 @@ struct bch_sb_field_replicas {
struct bch_replicas_entry entries[0];
};
+/* BCH_SB_FIELD_quota: */
+
+struct bch_sb_quota_counter {
+ __le32 timelimit;
+ __le32 warnlimit;
+};
+
+struct bch_sb_quota_type {
+ __le64 flags;
+ struct bch_sb_quota_counter c[Q_COUNTERS];
+};
+
+struct bch_sb_field_quota {
+ struct bch_sb_field field;
+ struct bch_sb_quota_type q[QTYP_NR];
+} __attribute__((packed, aligned(8)));
+
/* Superblock: */
/*
@@ -986,6 +1036,11 @@ LE64_BITMASK(BCH_SB_META_REPLICAS_WANT, struct bch_sb, flags[0], 48, 52);
LE64_BITMASK(BCH_SB_DATA_REPLICAS_WANT, struct bch_sb, flags[0], 52, 56);
LE64_BITMASK(BCH_SB_POSIX_ACL, struct bch_sb, flags[0], 56, 57);
+LE64_BITMASK(BCH_SB_USRQUOTA, struct bch_sb, flags[0], 57, 58);
+LE64_BITMASK(BCH_SB_GRPQUOTA, struct bch_sb, flags[0], 58, 59);
+LE64_BITMASK(BCH_SB_PRJQUOTA, struct bch_sb, flags[0], 59, 60);
+
+/* 60-64 unused */
LE64_BITMASK(BCH_SB_STR_HASH_TYPE, struct bch_sb, flags[1], 0, 4);
LE64_BITMASK(BCH_SB_COMPRESSION_TYPE, struct bch_sb, flags[1], 4, 8);
@@ -1181,7 +1236,8 @@ LE32_BITMASK(JSET_BIG_ENDIAN, struct jset, flags, 4, 5);
DEF_BTREE_ID(INODES, 1, "inodes") \
DEF_BTREE_ID(DIRENTS, 2, "dirents") \
DEF_BTREE_ID(XATTRS, 3, "xattrs") \
- DEF_BTREE_ID(ALLOC, 4, "alloc")
+ DEF_BTREE_ID(ALLOC, 4, "alloc") \
+ DEF_BTREE_ID(QUOTAS, 5, "quotas")
#define DEF_BTREE_ID(kwd, val, name) BTREE_ID_##kwd = val,
diff --git a/fs/bcachefs/bkey.h b/fs/bcachefs/bkey.h
index 42ce39c9f747..f665e2e138bf 100644
--- a/fs/bcachefs/bkey.h
+++ b/fs/bcachefs/bkey.h
@@ -586,6 +586,8 @@ BKEY_VAL_ACCESSORS(xattr, BCH_XATTR);
BKEY_VAL_ACCESSORS(alloc, BCH_ALLOC);
+BKEY_VAL_ACCESSORS(quota, BCH_QUOTA);
+
/* byte order helpers */
#if !defined(__LITTLE_ENDIAN) && !defined(__BIG_ENDIAN)
diff --git a/fs/bcachefs/bkey_methods.c b/fs/bcachefs/bkey_methods.c
index 1736a483b7cf..3b3a09eb1cce 100644
--- a/fs/bcachefs/bkey_methods.c
+++ b/fs/bcachefs/bkey_methods.c
@@ -7,6 +7,7 @@
#include "error.h"
#include "extents.h"
#include "inode.h"
+#include "quota.h"
#include "xattr.h"
const struct bkey_ops *bch2_bkey_ops[] = {
@@ -15,6 +16,7 @@ const struct bkey_ops *bch2_bkey_ops[] = {
[BKEY_TYPE_DIRENTS] = &bch2_bkey_dirent_ops,
[BKEY_TYPE_XATTRS] = &bch2_bkey_xattr_ops,
[BKEY_TYPE_ALLOC] = &bch2_bkey_alloc_ops,
+ [BKEY_TYPE_QUOTAS] = &bch2_bkey_quota_ops,
[BKEY_TYPE_BTREE] = &bch2_bkey_btree_ops,
};
diff --git a/fs/bcachefs/fs-io.c b/fs/bcachefs/fs-io.c
index 28b23ac3f7fd..20273fdd78b1 100644
--- a/fs/bcachefs/fs-io.c
+++ b/fs/bcachefs/fs-io.c
@@ -12,6 +12,7 @@
#include "journal.h"
#include "io.h"
#include "keylist.h"
+#include "quota.h"
#include <linux/aio.h>
#include <linux/backing-dev.h>
@@ -129,6 +130,7 @@ static int __must_check bch2_write_inode_size(struct bch_fs *c,
static void __i_sectors_acct(struct bch_fs *c, struct bch_inode_info *inode, int sectors)
{
inode->v.i_blocks += sectors;
+ bch2_quota_acct(c, inode->ei_qid, Q_SPC, sectors, BCH_QUOTA_WARN);
}
static void i_sectors_acct(struct bch_fs *c, struct bch_inode_info *inode, int sectors)
diff --git a/fs/bcachefs/fs-ioctl.c b/fs/bcachefs/fs-ioctl.c
index 24228c8eb2d7..6ae67f92dd34 100644
--- a/fs/bcachefs/fs-ioctl.c
+++ b/fs/bcachefs/fs-ioctl.c
@@ -4,6 +4,7 @@
#include "chardev.h"
#include "fs.h"
#include "fs-ioctl.h"
+#include "quota.h"
#include <linux/compat.h>
#include <linux/mount.h>
@@ -154,10 +155,32 @@ static int bch2_ioc_fsgetxattr(struct bch_inode_info *inode,
struct fsxattr fa = { 0 };
fa.fsx_xflags = map_flags(bch_flags_to_xflags, inode->ei_inode.bi_flags);
+ fa.fsx_projid = inode->ei_qid.q[QTYP_PRJ];
return copy_to_user(arg, &fa, sizeof(fa));
}
+static int bch2_set_projid(struct bch_fs *c,
+ struct bch_inode_info *inode,
+ u32 projid)
+{
+ struct bch_qid qid = inode->ei_qid;
+ int ret;
+
+ if (projid == inode->ei_qid.q[QTYP_PRJ])
+ return 0;
+
+ qid.q[QTYP_PRJ] = projid;
+
+ ret = bch2_quota_transfer(c, 1 << QTYP_PRJ, qid, inode->ei_qid,
+ inode->v.i_blocks);
+ if (ret)
+ return ret;
+
+ inode->ei_qid.q[QTYP_PRJ] = projid;
+ return 0;
+}
+
static int bch2_ioc_fssetxattr(struct bch_fs *c,
struct file *file,
struct bch_inode_info *inode,
@@ -185,9 +208,14 @@ static int bch2_ioc_fssetxattr(struct bch_fs *c,
}
mutex_lock(&inode->ei_update_lock);
+ ret = bch2_set_projid(c, inode, fa.fsx_projid);
+ if (ret)
+ goto err_unlock;
+
ret = __bch2_write_inode(c, inode, bch2_inode_flags_set, &flags);
if (!ret)
bch2_inode_flags_to_vfs(inode);
+err_unlock:
mutex_unlock(&inode->ei_update_lock);
err:
inode_unlock(&inode->v);
diff --git a/fs/bcachefs/fs.c b/fs/bcachefs/fs.c
index 2ab2efc050ce..8869ba0fa38e 100644
--- a/fs/bcachefs/fs.c
+++ b/fs/bcachefs/fs.c
@@ -15,6 +15,7 @@
#include "io.h"
#include "journal.h"
#include "keylist.h"
+#include "quota.h"
#include "super.h"
#include "xattr.h"
@@ -116,6 +117,7 @@ int __must_check __bch2_write_inode(struct bch_fs *c,
inode_u.bi_mode = inode->v.i_mode;
inode_u.bi_uid = i_uid_read(&inode->v);
inode_u.bi_gid = i_gid_read(&inode->v);
+ inode_u.bi_project = inode->ei_qid.q[QTYP_PRJ];
inode_u.bi_nlink= i_nlink - nlink_bias(inode->v.i_mode);
inode_u.bi_dev = inode->v.i_rdev;
inode_u.bi_atime= timespec_to_bch2_time(c, inode->v.i_atime);
@@ -131,8 +133,10 @@ int __must_check __bch2_write_inode(struct bch_fs *c,
BTREE_INSERT_ENTRY(&iter, &inode_p.inode.k_i));
} while (ret == -EINTR);
- if (!ret)
+ if (!ret) {
inode->ei_inode = inode_u;
+ inode->ei_qid = bch_qid(&inode_u);
+ }
out:
bch2_btree_iter_unlock(&iter);
@@ -215,7 +219,7 @@ static struct bch_inode_info *bch2_vfs_inode_create(struct bch_fs *c,
ret = posix_acl_create(&dir->v, &inode->v.i_mode, &default_acl, &acl);
if (ret) {
make_bad_inode(&inode->v);
- goto err;
+ goto err_make_bad;
}
#endif
@@ -225,16 +229,20 @@ static struct bch_inode_info *bch2_vfs_inode_create(struct bch_fs *c,
inode->v.i_mode, rdev,
&dir->ei_inode);
+ inode_u.bi_project = dir->ei_qid.q[QTYP_PRJ];
+
+ ret = bch2_quota_acct(c, bch_qid(&inode_u), Q_INO, 1, BCH_QUOTA_PREALLOC);
+ if (ret) {
+ make_bad_inode(&inode->v);
+ goto err_make_bad;
+ }
+
ret = bch2_inode_create(c, &inode_u,
BLOCKDEV_INODE_MAX, 0,
&c->unused_inode_hint);
if (unlikely(ret)) {
- /*
- * indicate to bch_evict_inode that the inode was never actually
- * created:
- */
- make_bad_inode(&inode->v);
- goto err;
+ bch2_quota_acct(c, bch_qid(&inode_u), Q_INO, -1, BCH_QUOTA_WARN);
+ goto err_make_bad;
}
bch2_vfs_inode_init(c, inode, &inode_u);
@@ -257,6 +265,12 @@ out:
posix_acl_release(default_acl);
posix_acl_release(acl);
return inode;
+err_make_bad:
+ /*
+ * indicate to bch_evict_inode that the inode was never actually
+ * created:
+ */
+ make_bad_inode(&inode->v);
err:
clear_nlink(&inode->v);
iput(&inode->v);
@@ -604,36 +618,65 @@ static int bch2_rename2(struct inode *old_vdir, struct dentry *old_dentry,
return bch2_rename(c, old_dir, old_dentry, new_dir, new_dentry);
}
-static int bch2_setattr(struct dentry *dentry, struct iattr *iattr)
+static int bch2_setattr_nonsize(struct bch_inode_info *inode, struct iattr *iattr)
{
- struct bch_inode_info *inode = to_bch_ei(dentry->d_inode);
struct bch_fs *c = inode->v.i_sb->s_fs_info;
- int ret = 0;
+ struct bch_qid qid = inode->ei_qid;
+ unsigned qtypes = 0;
+ int ret;
- lockdep_assert_held(&inode->v.i_rwsem);
+ mutex_lock(&inode->ei_update_lock);
- ret = setattr_prepare(dentry, iattr);
- if (ret)
- return ret;
+ if (c->opts.usrquota &&
+ (iattr->ia_valid & ATTR_UID) &&
+ !uid_eq(iattr->ia_uid, inode->v.i_uid)) {
+ qid.q[QTYP_USR] = from_kuid(&init_user_ns, iattr->ia_uid),
+ qtypes |= 1 << QTYP_USR;
+ }
- if (iattr->ia_valid & ATTR_SIZE) {
- ret = bch2_truncate(inode, iattr);
- } else {
- mutex_lock(&inode->ei_update_lock);
- setattr_copy(&inode->v, iattr);
- ret = bch2_write_inode(c, inode);
- mutex_unlock(&inode->ei_update_lock);
+ if (c->opts.grpquota &&
+ (iattr->ia_valid & ATTR_GID) &&
+ !gid_eq(iattr->ia_gid, inode->v.i_gid)) {
+ qid.q[QTYP_GRP] = from_kgid(&init_user_ns, iattr->ia_gid);
+ qtypes |= 1 << QTYP_GRP;
}
- if (unlikely(ret))
- return ret;
+ if (qtypes) {
+ ret = bch2_quota_transfer(c, qtypes, qid, inode->ei_qid,
+ inode->v.i_blocks);
+ if (ret)
+ goto out_unlock;
+ }
+
+ setattr_copy(&inode->v, iattr);
- if (iattr->ia_valid & ATTR_MODE)
+ ret = bch2_write_inode(c, inode);
+out_unlock:
+ mutex_unlock(&inode->ei_update_lock);
+
+ if (!ret &&
+ iattr->ia_valid & ATTR_MODE)
ret = posix_acl_chmod(&inode->v, inode->v.i_mode);
return ret;
}
+static int bch2_setattr(struct dentry *dentry, struct iattr *iattr)
+{
+ struct bch_inode_info *inode = to_bch_ei(dentry->d_inode);
+ int ret;
+
+ lockdep_assert_held(&inode->v.i_rwsem);
+
+ ret = setattr_prepare(dentry, iattr);
+ if (ret)
+ return ret;
+
+ return iattr->ia_valid & ATTR_SIZE
+ ? bch2_truncate(inode, iattr)
+ : bch2_setattr_nonsize(inode, iattr);
+}
+
static int bch2_tmpfile(struct inode *vdir, struct dentry *dentry, umode_t mode)
{
struct bch_fs *c = vdir->i_sb->s_fs_info;
@@ -910,6 +953,7 @@ static void bch2_vfs_inode_init(struct bch_fs *c,
inode->v.i_ctime = bch2_time_to_timespec(c, bi->bi_ctime);
inode->ei_journal_seq = 0;
+ inode->ei_qid = bch_qid(bi);
inode->ei_str_hash = bch2_hash_info_init(c, bi);
inode->ei_inode = *bi;
@@ -995,6 +1039,10 @@ static void bch2_evict_inode(struct inode *vinode)
clear_inode(&inode->v);
if (!inode->v.i_nlink && !is_bad_inode(&inode->v)) {
+ bch2_quota_acct(c, inode->ei_qid, Q_SPC, -((s64) inode->v.i_blocks),
+ BCH_QUOTA_WARN);
+ bch2_quota_acct(c, inode->ei_qid, Q_INO, -1,
+ BCH_QUOTA_WARN);
bch2_inode_rm(c, inode->v.i_ino);
atomic_long_dec(&c->nr_inodes);
}
@@ -1279,6 +1327,10 @@ static struct dentry *bch2_mount(struct file_system_type *fs_type,
sb->s_maxbytes = MAX_LFS_FILESIZE;
sb->s_op = &bch_super_operations;
sb->s_export_op = &bch_export_ops;
+#ifdef CONFIG_BCACHEFS_QUOTA
+ sb->s_qcop = &bch2_quotactl_operations;
+ sb->s_quota_types = QTYPE_MASK_USR|QTYPE_MASK_GRP|QTYPE_MASK_PRJ;
+#endif
sb->s_xattr = bch2_xattr_handlers;
sb->s_magic = BCACHEFS_STATFS_MAGIC;
sb->s_time_gran = c->sb.time_precision;
diff --git a/fs/bcachefs/fs.h b/fs/bcachefs/fs.h
index 652105fb6ac8..dd0bd4ef6c7e 100644
--- a/fs/bcachefs/fs.h
+++ b/fs/bcachefs/fs.h
@@ -3,6 +3,7 @@
#include "opts.h"
#include "str_hash.h"
+#include "quota_types.h"
#include <linux/seqlock.h>
#include <linux/stat.h>
@@ -13,6 +14,7 @@ struct bch_inode_info {
struct mutex ei_update_lock;
u64 ei_journal_seq;
unsigned long ei_last_dirtied;
+ struct bch_qid ei_qid;
struct bch_hash_info ei_str_hash;
diff --git a/fs/bcachefs/opts.c b/fs/bcachefs/opts.c
index e6833d95f0ed..eae63cf82c40 100644
--- a/fs/bcachefs/opts.c
+++ b/fs/bcachefs/opts.c
@@ -167,6 +167,27 @@ int bch2_opt_lookup(const char *name)
return -1;
}
+struct synonym {
+ const char *s1, *s2;
+};
+
+static const struct synonym bch_opt_synonyms[] = {
+ { "quota", "usrquota" },
+};
+
+static int bch2_mount_opt_lookup(const char *name)
+{
+ const struct synonym *i;
+
+ for (i = bch_opt_synonyms;
+ i < bch_opt_synonyms + ARRAY_SIZE(bch_opt_synonyms);
+ i++)
+ if (!strcmp(name, i->s1))
+ name = i->s2;
+
+ return bch2_opt_lookup(name);
+}
+
int bch2_opt_parse(const struct bch_option *opt, const char *val, u64 *res)
{
ssize_t ret;
@@ -211,7 +232,7 @@ int bch2_parse_mount_opts(struct bch_opts *opts, char *options)
val = opt;
if (val) {
- id = bch2_opt_lookup(name);
+ id = bch2_mount_opt_lookup(name);
if (id < 0)
goto bad_opt;
@@ -219,12 +240,12 @@ int bch2_parse_mount_opts(struct bch_opts *opts, char *options)
if (ret < 0)
goto bad_val;
} else {
- id = bch2_opt_lookup(name);
+ id = bch2_mount_opt_lookup(name);
v = 1;
if (id < 0 &&
!strncmp("no", name, 2)) {
- id = bch2_opt_lookup(name + 2);
+ id = bch2_mount_opt_lookup(name + 2);
v = 0;
}
@@ -242,6 +263,11 @@ int bch2_parse_mount_opts(struct bch_opts *opts, char *options)
!IS_ENABLED(CONFIG_BCACHEFS_POSIX_ACL))
goto bad_opt;
+ if ((id == Opt_usrquota ||
+ id == Opt_grpquota) &&
+ !IS_ENABLED(CONFIG_BCACHEFS_QUOTA))
+ goto bad_opt;
+
bch2_opt_set_by_id(opts, id, v);
}
diff --git a/fs/bcachefs/opts.h b/fs/bcachefs/opts.h
index bc8fd46aeb23..5d42dd5f570d 100644
--- a/fs/bcachefs/opts.h
+++ b/fs/bcachefs/opts.h
@@ -112,6 +112,15 @@ enum opt_type {
BCH_OPT(acl, u8, OPT_MOUNT, \
OPT_BOOL(), \
BCH_SB_POSIX_ACL, true) \
+ BCH_OPT(usrquota, u8, OPT_MOUNT, \
+ OPT_BOOL(), \
+ BCH_SB_USRQUOTA, false) \
+ BCH_OPT(grpquota, u8, OPT_MOUNT, \
+ OPT_BOOL(), \
+ BCH_SB_GRPQUOTA, false) \
+ BCH_OPT(prjquota, u8, OPT_MOUNT, \
+ OPT_BOOL(), \
+ BCH_SB_PRJQUOTA, false) \
BCH_OPT(degraded, u8, OPT_MOUNT, \
OPT_BOOL(), \
NO_SB_OPT, false) \
diff --git a/fs/bcachefs/quota.c b/fs/bcachefs/quota.c
new file mode 100644
index 000000000000..c550fd9e2729
--- /dev/null
+++ b/fs/bcachefs/quota.c
@@ -0,0 +1,786 @@
+#include "bcachefs.h"
+#include "btree_update.h"
+#include "inode.h"
+#include "quota.h"
+#include "super-io.h"
+
+static const char *bch2_quota_invalid(const struct bch_fs *c, struct bkey_s_c k)
+{
+ struct bkey_s_c_quota dq;
+
+ if (k.k->p.inode >= QTYP_NR)
+ return "invalid quota type";
+
+ switch (k.k->type) {
+ case BCH_QUOTA: {
+ dq = bkey_s_c_to_quota(k);
+
+ if (bkey_val_bytes(k.k) != sizeof(struct bch_quota))
+ return "incorrect value size";
+
+ return NULL;
+ }
+ default:
+ return "invalid type";
+ }
+}
+
+static const char * const bch2_quota_counters[] = {
+ "space",
+ "inodes",
+};
+
+static void bch2_quota_to_text(struct bch_fs *c, char *buf,
+ size_t size, struct bkey_s_c k)
+{
+ char *out = buf, *end= buf + size;
+ struct bkey_s_c_quota dq;
+ unsigned i;
+
+ switch (k.k->type) {
+ case BCH_QUOTA:
+ dq = bkey_s_c_to_quota(k);
+
+ for (i = 0; i < Q_COUNTERS; i++)
+ out += scnprintf(out, end - out, "%s hardlimit %llu softlimit %llu",
+ bch2_quota_counters[i],
+ le64_to_cpu(dq.v->c[i].hardlimit),
+ le64_to_cpu(dq.v->c[i].softlimit));
+ break;
+ }
+}
+
+const struct bkey_ops bch2_bkey_quota_ops = {
+ .key_invalid = bch2_quota_invalid,
+ .val_to_text = bch2_quota_to_text,
+};
+
+#ifdef CONFIG_BCACHEFS_QUOTA
+
+#include <linux/cred.h>
+#include <linux/fs.h>
+#include <linux/quota.h>
+
+static inline unsigned __next_qtype(unsigned i, unsigned qtypes)
+{
+ qtypes >>= i;
+ return qtypes ? i + __ffs(qtypes) : QTYP_NR;
+}
+
+#define for_each_set_qtype(_c, _i, _q, _qtypes) \
+ for (_i = 0; \
+ (_i = __next_qtype(_i, _qtypes), \
+ _q = &(_c)->quotas[_i], \
+ _i < QTYP_NR); \
+ _i++)
+
+static inline unsigned enabled_qtypes(struct bch_fs *c)
+{
+ return ((c->opts.usrquota << QTYP_USR)|
+ (c->opts.grpquota << QTYP_GRP)|
+ (c->opts.prjquota << QTYP_PRJ));
+}
+
+static bool ignore_hardlimit(struct bch_memquota_type *q)
+{
+ if (capable(CAP_SYS_RESOURCE))
+ return true;
+#if 0
+ struct mem_dqinfo *info = &sb_dqopt(dquot->dq_sb)->info[dquot->dq_id.type];
+
+ return capable(CAP_SYS_RESOURCE) &&
+ (info->dqi_format->qf_fmt_id != QFMT_VFS_OLD ||
+ !(info->dqi_flags & DQF_ROOT_SQUASH));
+#endif
+ return false;
+}
+
+enum quota_msg {
+ SOFTWARN, /* Softlimit reached */
+ SOFTLONGWARN, /* Grace time expired */
+ HARDWARN, /* Hardlimit reached */
+
+ HARDBELOW, /* Usage got below inode hardlimit */
+ SOFTBELOW, /* Usage got below inode softlimit */
+};
+
+static int quota_nl[][Q_COUNTERS] = {
+ [HARDWARN][Q_SPC] = QUOTA_NL_BHARDWARN,
+ [SOFTLONGWARN][Q_SPC] = QUOTA_NL_BSOFTLONGWARN,
+ [SOFTWARN][Q_SPC] = QUOTA_NL_BSOFTWARN,
+ [HARDBELOW][Q_SPC] = QUOTA_NL_BHARDBELOW,
+ [SOFTBELOW][Q_SPC] = QUOTA_NL_BSOFTBELOW,
+
+ [HARDWARN][Q_INO] = QUOTA_NL_IHARDWARN,
+ [SOFTLONGWARN][Q_INO] = QUOTA_NL_ISOFTLONGWARN,
+ [SOFTWARN][Q_INO] = QUOTA_NL_ISOFTWARN,
+ [HARDBELOW][Q_INO] = QUOTA_NL_IHARDBELOW,
+ [SOFTBELOW][Q_INO] = QUOTA_NL_ISOFTBELOW,
+};
+
+struct quota_msgs {
+ u8 nr;
+ struct {
+ u8 qtype;
+ u8 msg;
+ } m[QTYP_NR * Q_COUNTERS];
+};
+
+static void prepare_msg(unsigned qtype,
+ enum quota_counters counter,
+ struct quota_msgs *msgs,
+ enum quota_msg msg_type)
+{
+ BUG_ON(msgs->nr >= ARRAY_SIZE(msgs->m));
+
+ msgs->m[msgs->nr].qtype = qtype;
+ msgs->m[msgs->nr].msg = quota_nl[msg_type][counter];
+ msgs->nr++;
+}
+
+static void prepare_warning(struct memquota_counter *qc,
+ unsigned qtype,
+ enum quota_counters counter,
+ struct quota_msgs *msgs,
+ enum quota_msg msg_type)
+{
+ if (qc->warning_issued & (1 << msg_type))
+ return;
+
+ prepare_msg(qtype, counter, msgs, msg_type);
+}
+
+static void flush_warnings(struct bch_qid qid,
+ struct super_block *sb,
+ struct quota_msgs *msgs)
+{
+ unsigned i;
+
+ for (i = 0; i < msgs->nr; i++)
+ quota_send_warning(make_kqid(&init_user_ns, msgs->m[i].qtype, qid.q[i]),
+ sb->s_dev, msgs->m[i].msg);
+}
+
+static int bch2_quota_check_limit(struct bch_fs *c,
+ unsigned qtype,
+ struct bch_memquota *mq,
+ struct quota_msgs *msgs,
+ enum quota_counters counter,
+ s64 v,
+ enum quota_acct_mode mode)
+{
+ struct bch_memquota_type *q = &c->quotas[qtype];
+ struct memquota_counter *qc = &mq->c[counter];
+ u64 n = qc->v + v;
+
+ BUG_ON((s64) n < 0);
+
+ if (mode == BCH_QUOTA_NOCHECK)
+ return 0;
+
+ if (v <= 0) {
+ if (n < qc->hardlimit &&
+ (qc->warning_issued & (1 << HARDWARN))) {
+ qc->warning_issued &= ~(1 << HARDWARN);
+ prepare_msg(qtype, counter, msgs, HARDBELOW);
+ }
+
+ if (n < qc->softlimit &&
+ (qc->warning_issued & (1 << SOFTWARN))) {
+ qc->warning_issued &= ~(1 << SOFTWARN);
+ prepare_msg(qtype, counter, msgs, SOFTBELOW);
+ }
+
+ qc->warning_issued = 0;
+ return 0;
+ }
+
+ if (qc->hardlimit &&
+ qc->hardlimit < n &&
+ !ignore_hardlimit(q)) {
+ if (mode == BCH_QUOTA_PREALLOC)
+ return -EDQUOT;
+
+ prepare_warning(qc, qtype, counter, msgs, HARDWARN);
+ }
+
+ if (qc->softlimit &&
+ qc->softlimit < n &&
+ qc->timer &&
+ ktime_get_real_seconds() >= qc->timer &&
+ !ignore_hardlimit(q)) {
+ if (mode == BCH_QUOTA_PREALLOC)
+ return -EDQUOT;
+
+ prepare_warning(qc, qtype, counter, msgs, SOFTLONGWARN);
+ }
+
+ if (qc->softlimit &&
+ qc->softlimit < n &&
+ qc->timer == 0) {
+ if (mode == BCH_QUOTA_PREALLOC)
+ return -EDQUOT;
+
+ prepare_warning(qc, qtype, counter, msgs, SOFTWARN);
+
+ /* XXX is this the right one? */
+ qc->timer = ktime_get_real_seconds() +
+ q->limits[counter].warnlimit;
+ }
+
+ return 0;
+}
+
+int bch2_quota_acct(struct bch_fs *c, struct bch_qid qid,
+ enum quota_counters counter, s64 v,
+ enum quota_acct_mode mode)
+{
+ unsigned qtypes = enabled_qtypes(c);
+ struct bch_memquota_type *q;
+ struct bch_memquota *mq[QTYP_NR];
+ struct quota_msgs msgs;
+ unsigned i;
+ int ret = 0;
+
+ memset(&msgs, 0, sizeof(msgs));
+
+ for_each_set_qtype(c, i, q, qtypes)
+ mutex_lock(&q->lock);
+
+ for_each_set_qtype(c, i, q, qtypes) {
+ mq[i] = genradix_ptr_alloc(&q->table, qid.q[i], GFP_NOFS);
+ if (!mq[i]) {
+ ret = -ENOMEM;
+ goto err;
+ }
+
+ ret = bch2_quota_check_limit(c, i, mq[i], &msgs, counter, v, mode);
+ if (ret)
+ goto err;
+ }
+
+ for_each_set_qtype(c, i, q, qtypes)
+ mq[i]->c[counter].v += v;
+err:
+ for_each_set_qtype(c, i, q, qtypes)
+ mutex_unlock(&q->lock);
+
+ flush_warnings(qid, c->vfs_sb, &msgs);
+
+ return ret;
+}
+
+static void __bch2_quota_transfer(struct bch_memquota *src_q,
+ struct bch_memquota *dst_q,
+ enum quota_counters counter, s64 v)
+{
+ BUG_ON(v > src_q->c[counter].v);
+ BUG_ON(v + dst_q->c[counter].v < v);
+
+ src_q->c[counter].v -= v;
+ dst_q->c[counter].v += v;
+}
+
+int bch2_quota_transfer(struct bch_fs *c, unsigned qtypes,
+ struct bch_qid dst,
+ struct bch_qid src, u64 space)
+{
+ struct bch_memquota_type *q;
+ struct bch_memquota *src_q[3], *dst_q[3];
+ struct quota_msgs msgs;
+ unsigned i;
+ int ret = 0;
+
+ qtypes &= enabled_qtypes(c);
+
+ memset(&msgs, 0, sizeof(msgs));
+
+ for_each_set_qtype(c, i, q, qtypes)
+ mutex_lock(&q->lock);
+
+ for_each_set_qtype(c, i, q, qtypes) {
+ src_q[i] = genradix_ptr_alloc(&q->table, src.q[i], GFP_NOFS);
+ dst_q[i] = genradix_ptr_alloc(&q->table, dst.q[i], GFP_NOFS);
+
+ if (!src_q[i] || !dst_q[i]) {
+ ret = -ENOMEM;
+ goto err;
+ }
+
+ ret = bch2_quota_check_limit(c, i, dst_q[i], &msgs, Q_SPC,
+ dst_q[i]->c[Q_SPC].v + space,
+ BCH_QUOTA_PREALLOC);
+ if (ret)
+ goto err;
+
+ ret = bch2_quota_check_limit(c, i, dst_q[i], &msgs, Q_INO,
+ dst_q[i]->c[Q_INO].v + 1,
+ BCH_QUOTA_PREALLOC);
+ if (ret)
+ goto err;
+ }
+
+ for_each_set_qtype(c, i, q, qtypes) {
+ __bch2_quota_transfer(src_q[i], dst_q[i], Q_SPC, space);
+ __bch2_quota_transfer(src_q[i], dst_q[i], Q_INO, 1);
+ }
+
+err:
+ for_each_set_qtype(c, i, q, qtypes)
+ mutex_unlock(&q->lock);
+
+ flush_warnings(dst, c->vfs_sb, &msgs);
+
+ return ret;
+}
+
+static int __bch2_quota_set(struct bch_fs *c, struct bkey_s_c k)
+{
+ struct bkey_s_c_quota dq;
+ struct bch_memquota_type *q;
+ struct bch_memquota *mq;
+ unsigned i;
+
+ BUG_ON(k.k->p.inode >= QTYP_NR);
+
+ switch (k.k->type) {
+ case BCH_QUOTA:
+ dq = bkey_s_c_to_quota(k);
+ q = &c->quotas[k.k->p.inode];
+
+ mutex_lock(&q->lock);
+ mq = genradix_ptr_alloc(&q->table, k.k->p.offset, GFP_KERNEL);
+ if (!mq) {
+ mutex_unlock(&q->lock);
+ return -ENOMEM;
+ }
+
+ for (i = 0; i < Q_COUNTERS; i++) {
+ mq->c[i].hardlimit = le64_to_cpu(dq.v->c[i].hardlimit);
+ mq->c[i].softlimit = le64_to_cpu(dq.v->c[i].softlimit);
+ }
+
+ mutex_unlock(&q->lock);
+ }
+
+ return 0;
+}
+
+static int bch2_quota_init_type(struct bch_fs *c, enum quota_types type)
+{
+ struct btree_iter iter;
+ struct bkey_s_c k;
+ int ret = 0;
+
+ for_each_btree_key(&iter, c, BTREE_ID_QUOTAS, POS(type, 0),
+ BTREE_ITER_PREFETCH, k) {
+ if (k.k->p.inode != type)
+ break;
+
+ ret = __bch2_quota_set(c, k);
+ if (ret)
+ break;
+ }
+
+ return bch2_btree_iter_unlock(&iter) ?: ret;
+}
+
+void bch2_fs_quota_exit(struct bch_fs *c)
+{
+ unsigned i;
+
+ for (i = 0; i < ARRAY_SIZE(c->quotas); i++)
+ genradix_free(&c->quotas[i].table);
+}
+
+void bch2_fs_quota_init(struct bch_fs *c)
+{
+ unsigned i;
+
+ for (i = 0; i < ARRAY_SIZE(c->quotas); i++)
+ mutex_init(&c->quotas[i].lock);
+}
+
+static void bch2_sb_quota_read(struct bch_fs *c)
+{
+ struct bch_sb_field_quota *sb_quota;
+ unsigned i, j;
+
+ sb_quota = bch2_sb_get_quota(c->disk_sb);
+ if (!sb_quota)
+ return;
+
+ for (i = 0; i < QTYP_NR; i++) {
+ struct bch_memquota_type *q = &c->quotas[i];
+
+ for (j = 0; j < Q_COUNTERS; j++) {
+ q->limits[j].timelimit =
+ le32_to_cpu(sb_quota->q[i].c[j].timelimit);
+ q->limits[j].warnlimit =
+ le32_to_cpu(sb_quota->q[i].c[j].warnlimit);
+ }
+ }
+}
+
+int bch2_fs_quota_read(struct bch_fs *c)
+{
+ unsigned i, qtypes = enabled_qtypes(c);
+ struct bch_memquota_type *q;
+ struct btree_iter iter;
+ struct bch_inode_unpacked u;
+ struct bkey_s_c k;
+ int ret;
+
+ mutex_lock(&c->sb_lock);
+ bch2_sb_quota_read(c);
+ mutex_unlock(&c->sb_lock);
+
+ for_each_set_qtype(c, i, q, qtypes) {
+ ret = bch2_quota_init_type(c, i);
+ if (ret)
+ return ret;
+ }
+
+ for_each_btree_key(&iter, c, BTREE_ID_INODES, POS_MIN,
+ BTREE_ITER_PREFETCH, k) {
+ switch (k.k->type) {
+ case BCH_INODE_FS:
+ ret = bch2_inode_unpack(bkey_s_c_to_inode(k), &u);
+ if (ret)
+ return ret;
+
+ bch2_quota_acct(c, bch_qid(&u), Q_SPC, u.bi_sectors,
+ BCH_QUOTA_NOCHECK);
+ bch2_quota_acct(c, bch_qid(&u), Q_INO, 1,
+ BCH_QUOTA_NOCHECK);
+ }
+ }
+ return bch2_btree_iter_unlock(&iter) ?: ret;
+}
+
+/* Enable/disable/delete quotas for an entire filesystem: */
+
+static int bch2_quota_enable(struct super_block *sb, unsigned uflags)
+{
+ struct bch_fs *c = sb->s_fs_info;
+
+ if (sb->s_flags & MS_RDONLY)
+ return -EROFS;
+
+ /* Accounting must be enabled at mount time: */
+ if (uflags & (FS_QUOTA_UDQ_ACCT|FS_QUOTA_GDQ_ACCT|FS_QUOTA_PDQ_ACCT))
+ return -EINVAL;
+
+ /* Can't enable enforcement without accounting: */
+ if ((uflags & FS_QUOTA_UDQ_ENFD) && !c->opts.usrquota)
+ return -EINVAL;
+
+ if ((uflags & FS_QUOTA_GDQ_ENFD) && !c->opts.grpquota)
+ return -EINVAL;
+
+ if (uflags & FS_QUOTA_PDQ_ENFD)
+ return -EINVAL;
+
+ mutex_lock(&c->sb_lock);
+ if (uflags & FS_QUOTA_UDQ_ENFD)
+ SET_BCH_SB_USRQUOTA(c->disk_sb, true);
+
+ if (uflags & FS_QUOTA_GDQ_ENFD)
+ SET_BCH_SB_GRPQUOTA(c->disk_sb, true);
+#if 0
+ if (uflags & FS_QUOTA_PDQ_ENFD)
+ SET_BCH_SB_PRJQUOTA(c->disk_sb, true);
+#endif
+
+ bch2_write_super(c);
+ mutex_unlock(&c->sb_lock);
+
+ return 0;
+}
+
+static int bch2_quota_disable(struct super_block *sb, unsigned uflags)
+{
+ struct bch_fs *c = sb->s_fs_info;
+
+ if (sb->s_flags & MS_RDONLY)
+ return -EROFS;
+
+ mutex_lock(&c->sb_lock);
+ if (uflags & FS_QUOTA_UDQ_ENFD)
+ SET_BCH_SB_USRQUOTA(c->disk_sb, false);
+
+ if (uflags & FS_QUOTA_GDQ_ENFD)
+ SET_BCH_SB_GRPQUOTA(c->disk_sb, false);
+
+ if (uflags & FS_QUOTA_PDQ_ENFD)
+ SET_BCH_SB_PRJQUOTA(c->disk_sb, false);
+
+ bch2_write_super(c);
+ mutex_unlock(&c->sb_lock);
+
+ return 0;
+}
+
+static int bch2_quota_remove(struct super_block *sb, unsigned uflags)
+{
+ struct bch_fs *c = sb->s_fs_info;
+ int ret;
+
+ if (sb->s_flags & MS_RDONLY)
+ return -EROFS;
+
+ if (uflags & FS_USER_QUOTA) {
+ if (c->opts.usrquota)
+ return -EINVAL;
+
+ ret = bch2_btree_delete_range(c, BTREE_ID_QUOTAS,
+ POS(QTYP_USR, 0),
+ POS(QTYP_USR + 1, 0),
+ ZERO_VERSION, NULL, NULL, NULL);
+ if (ret)
+ return ret;
+ }
+
+ if (uflags & FS_GROUP_QUOTA) {
+ if (c->opts.grpquota)
+ return -EINVAL;
+
+ ret = bch2_btree_delete_range(c, BTREE_ID_QUOTAS,
+ POS(QTYP_GRP, 0),
+ POS(QTYP_GRP + 1, 0),
+ ZERO_VERSION, NULL, NULL, NULL);
+ if (ret)
+ return ret;
+ }
+
+ if (uflags & FS_PROJ_QUOTA) {
+ if (c->opts.prjquota)
+ return -EINVAL;
+
+ ret = bch2_btree_delete_range(c, BTREE_ID_QUOTAS,
+ POS(QTYP_PRJ, 0),
+ POS(QTYP_PRJ + 1, 0),
+ ZERO_VERSION, NULL, NULL, NULL);
+ if (ret)
+ return ret;
+ }
+
+ return 0;
+}
+
+/*
+ * Return quota status information, such as enforcements, quota file inode
+ * numbers etc.
+ */
+static int bch2_quota_get_state(struct super_block *sb, struct qc_state *state)
+{
+ struct bch_fs *c = sb->s_fs_info;
+ unsigned qtypes = enabled_qtypes(c);
+ unsigned i;
+
+ memset(state, 0, sizeof(*state));
+
+ for (i = 0; i < QTYP_NR; i++) {
+ state->s_state[i].flags |= QCI_SYSFILE;
+
+ if (!(qtypes & (1 << i)))
+ continue;
+
+ state->s_state[i].flags |= QCI_ACCT_ENABLED;
+
+ state->s_state[i].spc_timelimit = c->quotas[i].limits[Q_SPC].timelimit;
+ state->s_state[i].spc_warnlimit = c->quotas[i].limits[Q_SPC].warnlimit;
+
+ state->s_state[i].ino_timelimit = c->quotas[i].limits[Q_INO].timelimit;
+ state->s_state[i].ino_warnlimit = c->quotas[i].limits[Q_INO].warnlimit;
+ }
+
+ return 0;
+}
+
+/*
+ * Adjust quota timers & warnings
+ */
+static int bch2_quota_set_info(struct super_block *sb, int type,
+ struct qc_info *info)
+{
+ struct bch_fs *c = sb->s_fs_info;
+ struct bch_sb_field_quota *sb_quota;
+ struct bch_memquota_type *q;
+
+ if (sb->s_flags & MS_RDONLY)
+ return -EROFS;
+
+ if (type >= QTYP_NR)
+ return -EINVAL;
+
+ if (!((1 << type) & enabled_qtypes(c)))
+ return -ESRCH;
+
+ if (info->i_fieldmask &
+ ~(QC_SPC_TIMER|QC_INO_TIMER|QC_SPC_WARNS|QC_INO_WARNS))
+ return -EINVAL;
+
+ q = &c->quotas[type];
+
+ mutex_lock(&c->sb_lock);
+ sb_quota = bch2_sb_get_quota(c->disk_sb);
+ if (!sb_quota) {
+ sb_quota = bch2_fs_sb_resize_quota(c, sizeof(*sb_quota) / sizeof(u64));
+ if (!sb_quota)
+ return -ENOSPC;
+ }
+
+ if (info->i_fieldmask & QC_SPC_TIMER)
+ sb_quota->q[type].c[Q_SPC].timelimit =
+ cpu_to_le32(info->i_spc_timelimit);
+
+ if (info->i_fieldmask & QC_SPC_WARNS)
+ sb_quota->q[type].c[Q_SPC].warnlimit =
+ cpu_to_le32(info->i_spc_warnlimit);
+
+ if (info->i_fieldmask & QC_INO_TIMER)
+ sb_quota->q[type].c[Q_INO].timelimit =
+ cpu_to_le32(info->i_ino_timelimit);
+
+ if (info->i_fieldmask & QC_INO_WARNS)
+ sb_quota->q[type].c[Q_INO].warnlimit =
+ cpu_to_le32(info->i_ino_warnlimit);
+
+ bch2_sb_quota_read(c);
+
+ bch2_write_super(c);
+ mutex_unlock(&c->sb_lock);
+
+ return 0;
+}
+
+/* Get/set individual quotas: */
+
+static void __bch2_quota_get(struct qc_dqblk *dst, struct bch_memquota *src)
+{
+ dst->d_space = src->c[Q_SPC].v << 9;
+ dst->d_spc_hardlimit = src->c[Q_SPC].hardlimit << 9;
+ dst->d_spc_softlimit = src->c[Q_SPC].softlimit << 9;
+ dst->d_spc_timer = src->c[Q_SPC].timer;
+ dst->d_spc_warns = src->c[Q_SPC].warns;
+
+ dst->d_ino_count = src->c[Q_INO].v;
+ dst->d_ino_hardlimit = src->c[Q_INO].hardlimit;
+ dst->d_ino_softlimit = src->c[Q_INO].softlimit;
+ dst->d_ino_timer = src->c[Q_INO].timer;
+ dst->d_ino_warns = src->c[Q_INO].warns;
+}
+
+static int bch2_get_quota(struct super_block *sb, struct kqid kqid,
+ struct qc_dqblk *qdq)
+{
+ struct bch_fs *c = sb->s_fs_info;
+ struct bch_memquota_type *q = &c->quotas[kqid.type];
+ qid_t qid = from_kqid(&init_user_ns, kqid);
+ struct bch_memquota *mq;
+
+ memset(qdq, 0, sizeof(*qdq));
+
+ mutex_lock(&q->lock);
+ mq = genradix_ptr(&q->table, qid);
+ if (mq)
+ __bch2_quota_get(qdq, mq);
+ mutex_unlock(&q->lock);
+
+ return 0;
+}
+
+static int bch2_get_next_quota(struct super_block *sb, struct kqid *kqid,
+ struct qc_dqblk *qdq)
+{
+ struct bch_fs *c = sb->s_fs_info;
+ struct bch_memquota_type *q = &c->quotas[kqid->type];
+ qid_t qid = from_kqid(&init_user_ns, *kqid);
+ struct genradix_iter iter = genradix_iter_init(&q->table, qid);
+ struct bch_memquota *mq;
+ int ret = 0;
+
+ mutex_lock(&q->lock);
+
+ while ((mq = genradix_iter_peek(&iter, &q->table))) {
+ if (memcmp(mq, page_address(ZERO_PAGE(0)), sizeof(*mq))) {
+ __bch2_quota_get(qdq, mq);
+ *kqid = make_kqid(current_user_ns(), kqid->type, iter.pos);
+ goto found;
+ }
+
+ genradix_iter_advance(&iter, &q->table);
+ }
+
+ ret = -ENOENT;
+found:
+ mutex_unlock(&q->lock);
+ return ret;
+}
+
+static int bch2_set_quota(struct super_block *sb, struct kqid qid,
+ struct qc_dqblk *qdq)
+{
+ struct bch_fs *c = sb->s_fs_info;
+ struct btree_iter iter;
+ struct bkey_s_c k;
+ struct bkey_i_quota new_quota;
+ int ret;
+
+ if (sb->s_flags & MS_RDONLY)
+ return -EROFS;
+
+ bkey_quota_init(&new_quota.k_i);
+ new_quota.k.p = POS(qid.type, from_kqid(&init_user_ns, qid));
+
+ bch2_btree_iter_init(&iter, c, BTREE_ID_QUOTAS, new_quota.k.p,
+ BTREE_ITER_WITH_HOLES|BTREE_ITER_INTENT);
+ k = bch2_btree_iter_peek_with_holes(&iter);
+
+ ret = btree_iter_err(k);
+ if (unlikely(ret))
+ return ret;
+
+ switch (k.k->type) {
+ case BCH_QUOTA:
+ new_quota.v = *bkey_s_c_to_quota(k).v;
+ break;
+ }
+
+ if (qdq->d_fieldmask & QC_SPC_SOFT)
+ new_quota.v.c[Q_SPC].softlimit = cpu_to_le64(qdq->d_spc_softlimit);
+ if (qdq->d_fieldmask & QC_SPC_HARD)
+ new_quota.v.c[Q_SPC].hardlimit = cpu_to_le64(qdq->d_spc_hardlimit);
+
+ if (qdq->d_fieldmask & QC_INO_SOFT)
+ new_quota.v.c[Q_INO].softlimit = cpu_to_le64(qdq->d_spc_softlimit);
+ if (qdq->d_fieldmask & QC_INO_HARD)
+ new_quota.v.c[Q_INO].hardlimit = cpu_to_le64(qdq->d_spc_hardlimit);
+
+ ret = bch2_btree_insert_at(c, NULL, NULL, NULL, 0,
+ BTREE_INSERT_ENTRY(&iter, &new_quota.k_i));
+ bch2_btree_iter_unlock(&iter);
+
+ if (ret)
+ return ret;
+
+ ret = __bch2_quota_set(c, bkey_i_to_s_c(&new_quota.k_i));
+
+ return ret;
+}
+
+const struct quotactl_ops bch2_quotactl_operations = {
+ .quota_enable = bch2_quota_enable,
+ .quota_disable = bch2_quota_disable,
+ .rm_xquota = bch2_quota_remove,
+
+ .get_state = bch2_quota_get_state,
+ .set_info = bch2_quota_set_info,
+
+ .get_dqblk = bch2_get_quota,
+ .get_nextdqblk = bch2_get_next_quota,
+ .set_dqblk = bch2_set_quota,
+};
+
+#endif /* CONFIG_BCACHEFS_QUOTA */
diff --git a/fs/bcachefs/quota.h b/fs/bcachefs/quota.h
new file mode 100644
index 000000000000..09d51a839c9d
--- /dev/null
+++ b/fs/bcachefs/quota.h
@@ -0,0 +1,48 @@
+#ifndef _BCACHEFS_QUOTA_H
+#define _BCACHEFS_QUOTA_H
+
+#include "quota_types.h"
+
+extern const struct bkey_ops bch2_bkey_quota_ops;
+
+enum quota_acct_mode {
+ BCH_QUOTA_PREALLOC,
+ BCH_QUOTA_WARN,
+ BCH_QUOTA_NOCHECK,
+};
+
+static inline struct bch_qid bch_qid(struct bch_inode_unpacked *u)
+{
+ return (struct bch_qid) {
+ .q[QTYP_USR] = u->bi_uid,
+ .q[QTYP_GRP] = u->bi_gid,
+ .q[QTYP_PRJ] = u->bi_project,
+ };
+}
+
+#ifdef CONFIG_BCACHEFS_QUOTA
+
+int bch2_quota_acct(struct bch_fs *, struct bch_qid, enum quota_counters,
+ s64, enum quota_acct_mode);
+
+int bch2_quota_transfer(struct bch_fs *, unsigned, struct bch_qid,
+ struct bch_qid, u64);
+
+void bch2_fs_quota_exit(struct bch_fs *);
+void bch2_fs_quota_init(struct bch_fs *);
+int bch2_fs_quota_read(struct bch_fs *);
+
+extern const struct quotactl_ops bch2_quotactl_operations;
+
+#else
+
+#define bch2_quota_acct(_c, _uid, _gid, _counter, _v) (0)
+#define bch2_quota_transfer(_c, _type, _src, _dst, _v) (0)
+
+static inline void bch2_fs_quota_exit(struct bch_fs *c) {}
+static inline void bch2_fs_quota_init(struct bch_fs *c) {}
+static inline int bch2_fs_quota_read(struct bch_fs *c) { return 0; }
+
+#endif
+
+#endif /* _BCACHEFS_QUOTA_H */
diff --git a/fs/bcachefs/quota_types.h b/fs/bcachefs/quota_types.h
new file mode 100644
index 000000000000..bcaed4ea8345
--- /dev/null
+++ b/fs/bcachefs/quota_types.h
@@ -0,0 +1,36 @@
+#ifndef _BCACHEFS_QUOTA_TYPES_H
+#define _BCACHEFS_QUOTA_TYPES_H
+
+#include <linux/generic-radix-tree.h>
+
+struct bch_qid {
+ u32 q[QTYP_NR];
+};
+
+struct memquota_counter {
+ u64 v;
+ u64 hardlimit;
+ u64 softlimit;
+ s64 timer;
+ int warns;
+ int warning_issued;
+};
+
+struct bch_memquota {
+ struct memquota_counter c[Q_COUNTERS];
+};
+
+typedef GENRADIX(struct bch_memquota) bch_memquota_table;
+
+struct quota_limit {
+ u32 timelimit;
+ u32 warnlimit;
+};
+
+struct bch_memquota_type {
+ struct quota_limit limits[Q_COUNTERS];
+ bch_memquota_table table;
+ struct mutex lock;
+};
+
+#endif /* _BCACHEFS_QUOTA_TYPES_H */
diff --git a/fs/bcachefs/super-io.c b/fs/bcachefs/super-io.c
index cc959d480037..16aee91bda16 100644
--- a/fs/bcachefs/super-io.c
+++ b/fs/bcachefs/super-io.c
@@ -1577,3 +1577,16 @@ out:
return ret;
}
+
+/* Quotas: */
+
+static const char *bch2_sb_validate_quota(struct bch_sb *sb,
+ struct bch_sb_field *f)
+{
+ struct bch_sb_field_quota *q = field_to_type(f, quota);
+
+ if (vstruct_bytes(&q->field) != sizeof(*q))
+ return "invalid field quota: wrong size";
+
+ return NULL;
+}
diff --git a/fs/bcachefs/super.c b/fs/bcachefs/super.c
index e3129dd6d3d8..3b1b7bebff07 100644
--- a/fs/bcachefs/super.c
+++ b/fs/bcachefs/super.c
@@ -29,6 +29,7 @@
#include "move.h"
#include "migrate.h"
#include "movinggc.h"
+#include "quota.h"
#include "super.h"
#include "super-io.h"
#include "sysfs.h"
@@ -367,6 +368,7 @@ err:
static void bch2_fs_free(struct bch_fs *c)
{
+ bch2_fs_quota_exit(c);
bch2_fs_fsio_exit(c);
bch2_fs_encryption_exit(c);
bch2_fs_btree_cache_exit(c);
@@ -493,6 +495,7 @@ static struct bch_fs *bch2_fs_alloc(struct bch_sb *sb, struct bch_opts opts)
bch2_fs_allocator_init(c);
bch2_fs_tiering_init(c);
+ bch2_fs_quota_init(c);
INIT_LIST_HEAD(&c->list);
@@ -775,6 +778,14 @@ static const char *__bch2_fs_start(struct bch_fs *c)
if (ret)
goto err;
bch_verbose(c, "fsck done");
+
+ if (c->opts.usrquota || c->opts.grpquota) {
+ bch_verbose(c, "reading quotas:");
+ ret = bch2_fs_quota_read(c);
+ if (ret)
+ goto err;
+ bch_verbose(c, "quotas done");
+ }
} else {
struct bch_inode_unpacked inode;
struct bkey_inode_buf packed_inode;
@@ -828,6 +839,12 @@ static const char *__bch2_fs_start(struct bch_fs *c)
NULL, NULL, NULL, 0))
goto err;
+ if (c->opts.usrquota || c->opts.grpquota) {
+ ret = bch2_fs_quota_read(c);
+ if (ret)
+ goto err;
+ }
+
err = "error writing first journal entry";
if (bch2_journal_meta(&c->journal))
goto err;