From d983a32e7902da73c52e80b5dda4bc14d16415ae Mon Sep 17 00:00:00 2001 From: Kent Overstreet Date: Thu, 5 Nov 2020 11:57:58 -0500 Subject: switch inodes to rhashtable --- drivers/dax/super.c | 55 +++++-- fs/9p/v9fs.h | 2 +- fs/9p/v9fs_vfs.h | 2 + fs/9p/vfs_inode.c | 99 ++++++------ fs/9p/vfs_inode_dotl.c | 91 ++++++----- fs/9p/vfs_super.c | 7 + fs/afs/callback.c | 2 +- fs/afs/dir.c | 6 +- fs/afs/dynroot.c | 29 +--- fs/afs/inode.c | 94 ++++++----- fs/afs/internal.h | 3 +- fs/afs/super.c | 3 + fs/bcachefs/fs.c | 10 +- fs/block_dev.c | 55 +++++-- fs/btrfs/ctree.h | 2 + fs/btrfs/inode.c | 84 +++++----- fs/btrfs/super.c | 6 + fs/ceph/inode.c | 38 ++++- fs/ceph/super.c | 4 + fs/ceph/super.h | 10 +- fs/cifs/cifsfs.c | 3 + fs/cifs/cifsfs.h | 1 + fs/cifs/inode.c | 84 ++++++---- fs/coda/cnode.c | 55 +++++-- fs/coda/coda_fs_i.h | 2 + fs/coda/inode.c | 4 + fs/dcache.c | 1 - fs/ecryptfs/ecryptfs_kernel.h | 1 + fs/ecryptfs/inode.c | 47 ++++-- fs/ecryptfs/main.c | 4 + fs/fuse/dev.c | 4 +- fs/fuse/dir.c | 2 +- fs/fuse/fuse_i.h | 5 - fs/fuse/inode.c | 55 +++++-- fs/inode.c | 362 ++++++++++++++++++------------------------ fs/nfs/inode.c | 104 +++++++----- fs/nfs/nfs.h | 2 + fs/nfs/super.c | 3 + fs/super.c | 5 + include/linux/fs.h | 38 +++-- mm/shmem.c | 61 +++++-- 41 files changed, 830 insertions(+), 615 deletions(-) diff --git a/drivers/dax/super.c b/drivers/dax/super.c index e84070b55463..d182d46aea0f 100644 --- a/drivers/dax/super.c +++ b/drivers/dax/super.c @@ -15,6 +15,7 @@ #include #include #include +#include #include "dax-private.h" static dev_t dax_devt; @@ -28,6 +29,38 @@ static struct super_block *dax_superblock __read_mostly; static struct hlist_head dax_host_list[DAX_HASH_SIZE]; static DEFINE_SPINLOCK(dax_host_lock); +static u32 dax_inode_key_hash_fn(const void *data, u32 len, u32 seed) +{ + const dev_t *devt = data; + + return jhash(devt, sizeof(*devt), seed); +} + +static u32 dax_inode_obj_hash_fn(const void *obj, u32 len, u32 seed) +{ + const struct inode *inode = obj; + + return jhash(&inode->i_rdev, sizeof(inode->i_rdev), seed); +} + +static int dax_inode_hash_cmp_fn(struct rhashtable_compare_arg *arg, + const void *obj) +{ + const struct inode *inode = obj; + const dev_t *devt = arg->key; + + if (inode->i_rdev == *devt) + return 0; + return 1; +} + +static const struct rhashtable_params dax_inode_table_params = { + .head_offset = offsetof(struct inode, i_hash), + .hashfn = dax_inode_key_hash_fn, + .obj_hashfn = dax_inode_obj_hash_fn, + .obj_cmpfn = dax_inode_hash_cmp_fn, +}; + int dax_read_lock(void) { return srcu_read_lock(&dax_srcu); @@ -513,18 +546,11 @@ static struct file_system_type dax_fs_type = { .kill_sb = kill_anon_super, }; -static int dax_test(struct inode *inode, void *data) +static int dax_set(struct inode *inode, const void *data) { - dev_t devt = *(dev_t *) data; - - return inode->i_rdev == devt; -} + const dev_t *devt = data; -static int dax_set(struct inode *inode, void *data) -{ - dev_t devt = *(dev_t *) data; - - inode->i_rdev = devt; + inode->i_rdev = *devt; return 0; } @@ -533,8 +559,7 @@ static struct dax_device *dax_dev_get(dev_t devt) struct dax_device *dax_dev; struct inode *inode; - inode = iget5_locked(dax_superblock, hash_32(devt + DAXFS_MAGIC, 31), - dax_test, dax_set, &devt); + inode = iget5_locked(dax_superblock, dax_set, &devt); if (!inode) return NULL; @@ -711,8 +736,14 @@ static int dax_fs_init(void) } dax_superblock = dax_mnt->mnt_sb; + rc = super_setup_inode_table(dax_superblock, &dax_inode_table_params); + if (rc) + goto err_itable; + return 0; + err_itable: + kern_unmount(dax_mnt); err_mount: kmem_cache_destroy(dax_cache); diff --git a/fs/9p/v9fs.h b/fs/9p/v9fs.h index 7b763776306e..4c304dd042ce 100644 --- a/fs/9p/v9fs.h +++ b/fs/9p/v9fs.h @@ -155,7 +155,7 @@ extern struct inode *v9fs_inode_from_fid_dotl(struct v9fs_session_info *v9ses, #define V9FS_DEFUID KUIDT_INIT(-2) #define V9FS_DEFGID KGIDT_INIT(-2) -static inline struct v9fs_session_info *v9fs_inode2v9ses(struct inode *inode) +static inline struct v9fs_session_info *v9fs_inode2v9ses(const struct inode *inode) { return (inode->i_sb->s_fs_info); } diff --git a/fs/9p/v9fs_vfs.h b/fs/9p/v9fs_vfs.h index fd2a2b040250..5d3293ca4071 100644 --- a/fs/9p/v9fs_vfs.h +++ b/fs/9p/v9fs_vfs.h @@ -40,6 +40,8 @@ extern const struct file_operations v9fs_cached_file_operations; extern const struct file_operations v9fs_cached_file_operations_dotl; extern const struct file_operations v9fs_mmap_file_operations; extern const struct file_operations v9fs_mmap_file_operations_dotl; +extern const struct rhashtable_params v9fs_inode_table_params; +extern const struct rhashtable_params v9fs_inode_table_dotl_params; extern struct kmem_cache *v9fs_inode_cache; struct inode *v9fs_alloc_inode(struct super_block *sb); diff --git a/fs/9p/vfs_inode.c b/fs/9p/vfs_inode.c index ae0c38ad1fcb..4e9094d77139 100644 --- a/fs/9p/vfs_inode.c +++ b/fs/9p/vfs_inode.c @@ -14,6 +14,7 @@ #include #include #include +#include #include #include #include @@ -81,7 +82,7 @@ static u32 unixmode2p9mode(struct v9fs_session_info *v9ses, umode_t mode) * */ static int p9mode2perm(struct v9fs_session_info *v9ses, - struct p9_wstat *stat) + const struct p9_wstat *stat) { int res; int mode = stat->mode; @@ -108,7 +109,7 @@ static int p9mode2perm(struct v9fs_session_info *v9ses, * */ static umode_t p9mode2unixmode(struct v9fs_session_info *v9ses, - struct p9_wstat *stat, dev_t *rdev) + const struct p9_wstat *stat, dev_t *rdev) { int res; u32 mode = stat->mode; @@ -216,6 +217,48 @@ v9fs_blank_wstat(struct p9_wstat *wstat) wstat->extension = NULL; } +static u32 v9fs_inode_key_hash_fn(const void *data, u32 len, u32 seed) +{ + const struct p9_qid *qid = data; + u64 hashv = qid->type ^ qid->version ^ qid->path; + + return jhash(&hashv, sizeof(hashv), seed); +} + +static u32 v9fs_inode_obj_hash_fn(const void *obj, u32 len, u32 seed) +{ + const struct p9_qid *qid = &V9FS_I(obj)->qid; + u64 hashv = qid->type ^ qid->version ^ qid->path; + + return jhash(&hashv, sizeof(hashv), seed); +} + +static int v9fs_inode_hash_cmp_fn(struct rhashtable_compare_arg *arg, + const void *obj) +{ + const struct inode *inode = obj; + struct v9fs_session_info *v9ses = v9fs_inode2v9ses(inode); + struct v9fs_inode *v9inode = V9FS_I(inode); + const struct p9_wstat *st = arg->key; + dev_t rdev; + int umode = p9mode2unixmode(v9ses, st, &rdev); + + /* compare qid details */ + if ((inode->i_mode & S_IFMT) == (umode & S_IFMT) && + v9inode->qid.type == st->qid.type && + v9inode->qid.version == st->qid.version && + v9inode->qid.path == st->qid.path) + return 0; + return 1; +} + +const struct rhashtable_params v9fs_inode_table_params = { + .head_offset = offsetof(struct inode, i_hash), + .hashfn = v9fs_inode_key_hash_fn, + .obj_hashfn = v9fs_inode_obj_hash_fn, + .obj_cmpfn = v9fs_inode_hash_cmp_fn, +}; + /** * v9fs_alloc_inode - helper function to allocate an inode * @@ -388,66 +431,26 @@ void v9fs_evict_inode(struct inode *inode) } } -static int v9fs_test_inode(struct inode *inode, void *data) +static int v9fs_set_inode(struct inode *inode, const void *data) { - int umode; - dev_t rdev; struct v9fs_inode *v9inode = V9FS_I(inode); - struct p9_wstat *st = (struct p9_wstat *)data; - struct v9fs_session_info *v9ses = v9fs_inode2v9ses(inode); + const struct p9_qid *qid = data; - umode = p9mode2unixmode(v9ses, st, &rdev); - /* don't match inode of different type */ - if ((inode->i_mode & S_IFMT) != (umode & S_IFMT)) - return 0; - - /* compare qid details */ - if (memcmp(&v9inode->qid.version, - &st->qid.version, sizeof(v9inode->qid.version))) - return 0; - - if (v9inode->qid.type != st->qid.type) - return 0; - - if (v9inode->qid.path != st->qid.path) - return 0; - return 1; -} - -static int v9fs_test_new_inode(struct inode *inode, void *data) -{ - return 0; -} - -static int v9fs_set_inode(struct inode *inode, void *data) -{ - struct v9fs_inode *v9inode = V9FS_I(inode); - struct p9_wstat *st = (struct p9_wstat *)data; - - memcpy(&v9inode->qid, &st->qid, sizeof(st->qid)); + v9inode->qid = *qid; return 0; } static struct inode *v9fs_qid_iget(struct super_block *sb, - struct p9_qid *qid, struct p9_wstat *st, int new) { dev_t rdev; int retval; umode_t umode; - unsigned long i_ino; struct inode *inode; struct v9fs_session_info *v9ses = sb->s_fs_info; - int (*test)(struct inode *, void *); - - if (new) - test = v9fs_test_new_inode; - else - test = v9fs_test_inode; - i_ino = v9fs_qid2ino(qid); - inode = iget5_locked(sb, i_ino, test, v9fs_set_inode, st); + inode = iget5_locked(sb, v9fs_set_inode, &st->qid); if (!inode) return ERR_PTR(-ENOMEM); if (!(inode->i_state & I_NEW)) @@ -457,7 +460,7 @@ static struct inode *v9fs_qid_iget(struct super_block *sb, * FIXME!! we may need support for stale inodes * later. */ - inode->i_ino = i_ino; + inode->i_ino = v9fs_qid2ino(&st->qid); umode = p9mode2unixmode(v9ses, st, &rdev); retval = v9fs_init_inode(v9ses, inode, umode, rdev); if (retval) @@ -484,7 +487,7 @@ v9fs_inode_from_fid(struct v9fs_session_info *v9ses, struct p9_fid *fid, if (IS_ERR(st)) return ERR_CAST(st); - inode = v9fs_qid_iget(sb, &st->qid, st, new); + inode = v9fs_qid_iget(sb, st, new); p9stat_free(st); kfree(st); return inode; diff --git a/fs/9p/vfs_inode_dotl.c b/fs/9p/vfs_inode_dotl.c index 0028eccb665a..e47d47f7620b 100644 --- a/fs/9p/vfs_inode_dotl.c +++ b/fs/9p/vfs_inode_dotl.c @@ -12,6 +12,7 @@ #include #include #include +#include #include #include #include @@ -32,6 +33,48 @@ #include "xattr.h" #include "acl.h" +static u32 v9fs_inode_dotl_key_hash_fn(const void *data, u32 len, u32 seed) +{ + const struct p9_stat_dotl *st = data; + u64 hashv = st->st_gen ^ st->qid.type ^ st->qid.version ^ st->qid.path; + + return jhash(&hashv, sizeof(hashv), seed); +} + +static u32 v9fs_inode_dotl_obj_hash_fn(const void *obj, u32 len, u32 seed) +{ + const struct inode *inode = obj; + const struct v9fs_inode *v9inode = V9FS_I(inode); + u64 hashv = inode->i_generation ^ + v9inode->qid.type ^ v9inode->qid.version ^ v9inode->qid.path; + + return jhash(&hashv, sizeof(hashv), seed); +} + +static int v9fs_inode_dotl_hash_cmp_fn(struct rhashtable_compare_arg *arg, + const void *obj) +{ + const struct inode *inode = obj; + const struct v9fs_inode *v9inode = V9FS_I(inode); + const struct p9_stat_dotl *st = arg->key; + + /* compare qid details */ + if ((inode->i_mode & S_IFMT) == (st->st_mode & S_IFMT) && + inode->i_generation == st->st_gen && + v9inode->qid.type == st->qid.type && + v9inode->qid.version == st->qid.version && + v9inode->qid.path == st->qid.path) + return 0; + return 1; +} + +const struct rhashtable_params v9fs_inode_table_dotl_params = { + .head_offset = offsetof(struct inode, i_hash), + .hashfn = v9fs_inode_dotl_key_hash_fn, + .obj_hashfn = v9fs_inode_dotl_obj_hash_fn, + .obj_cmpfn = v9fs_inode_dotl_hash_cmp_fn, +}; + static int v9fs_vfs_mknod_dotl(struct inode *dir, struct dentry *dentry, umode_t omode, dev_t rdev); @@ -53,41 +96,10 @@ static kgid_t v9fs_get_fsgid_for_create(struct inode *dir_inode) return current_fsgid(); } -static int v9fs_test_inode_dotl(struct inode *inode, void *data) -{ - struct v9fs_inode *v9inode = V9FS_I(inode); - struct p9_stat_dotl *st = (struct p9_stat_dotl *)data; - - /* don't match inode of different type */ - if ((inode->i_mode & S_IFMT) != (st->st_mode & S_IFMT)) - return 0; - - if (inode->i_generation != st->st_gen) - return 0; - - /* compare qid details */ - if (memcmp(&v9inode->qid.version, - &st->qid.version, sizeof(v9inode->qid.version))) - return 0; - - if (v9inode->qid.type != st->qid.type) - return 0; - - if (v9inode->qid.path != st->qid.path) - return 0; - return 1; -} - -/* Always get a new inode */ -static int v9fs_test_new_inode_dotl(struct inode *inode, void *data) -{ - return 0; -} - -static int v9fs_set_inode_dotl(struct inode *inode, void *data) +static int v9fs_set_inode_dotl(struct inode *inode, const void *data) { struct v9fs_inode *v9inode = V9FS_I(inode); - struct p9_stat_dotl *st = (struct p9_stat_dotl *)data; + const struct p9_stat_dotl *st = data; memcpy(&v9inode->qid, &st->qid, sizeof(st->qid)); inode->i_generation = st->st_gen; @@ -101,18 +113,10 @@ static struct inode *v9fs_qid_iget_dotl(struct super_block *sb, int new) { int retval; - unsigned long i_ino; struct inode *inode; struct v9fs_session_info *v9ses = sb->s_fs_info; - int (*test)(struct inode *, void *); - if (new) - test = v9fs_test_new_inode_dotl; - else - test = v9fs_test_inode_dotl; - - i_ino = v9fs_qid2ino(qid); - inode = iget5_locked(sb, i_ino, test, v9fs_set_inode_dotl, st); + inode = iget5_locked(sb, v9fs_set_inode_dotl, st); if (!inode) return ERR_PTR(-ENOMEM); if (!(inode->i_state & I_NEW)) @@ -122,7 +126,8 @@ static struct inode *v9fs_qid_iget_dotl(struct super_block *sb, * FIXME!! we may need support for stale inodes * later. */ - inode->i_ino = i_ino; + inode->i_ino = v9fs_qid2ino(&st->qid); + inode->i_generation = st->st_gen; retval = v9fs_init_inode(v9ses, inode, st->st_mode, new_decode_dev(st->st_rdev)); if (retval) diff --git a/fs/9p/vfs_super.c b/fs/9p/vfs_super.c index 74df32be4c6a..dd81576fe541 100644 --- a/fs/9p/vfs_super.c +++ b/fs/9p/vfs_super.c @@ -60,6 +60,7 @@ static int v9fs_fill_super(struct super_block *sb, struct v9fs_session_info *v9ses, int flags) { + const struct rhashtable_params *itable_params; int ret; sb->s_maxbytes = MAX_LFS_FILESIZE; @@ -69,13 +70,19 @@ v9fs_fill_super(struct super_block *sb, struct v9fs_session_info *v9ses, if (v9fs_proto_dotl(v9ses)) { sb->s_op = &v9fs_super_ops_dotl; sb->s_xattr = v9fs_xattr_handlers; + itable_params = &v9fs_inode_table_dotl_params; } else { sb->s_op = &v9fs_super_ops; sb->s_time_max = U32_MAX; + itable_params = &v9fs_inode_table_params; } sb->s_time_min = 0; + ret = super_setup_inode_table(sb, itable_params); + if (ret) + return ret; + ret = super_setup_bdi(sb); if (ret) return ret; diff --git a/fs/afs/callback.c b/fs/afs/callback.c index 7d9b23d981bf..8c85fb42c4af 100644 --- a/fs/afs/callback.c +++ b/fs/afs/callback.c @@ -129,7 +129,7 @@ static void afs_break_one_callback(struct afs_volume *volume, if (!sb) return; - inode = find_inode_rcu(sb, fid->vnode, afs_ilookup5_test_by_fid, fid); + inode = find_inode_rcu(sb, fid); if (inode) { vnode = AFS_FS_I(inode); afs_break_callback(vnode, afs_cb_break_for_callback); diff --git a/fs/afs/dir.c b/fs/afs/dir.c index 1d2e61e0ab04..842d3a1c1e8f 100644 --- a/fs/afs/dir.c +++ b/fs/afs/dir.c @@ -779,8 +779,7 @@ static struct inode *afs_do_lookup(struct inode *dir, struct dentry *dentry, goto out; /* Check to see if we already have an inode for the primary fid. */ - inode = ilookup5(dir->i_sb, cookie->fids[1].vnode, - afs_ilookup5_test_by_fid, &cookie->fids[1]); + inode = ilookup5(dir->i_sb, &cookie->fids[1]); if (inode) goto out; /* We do */ @@ -816,8 +815,7 @@ static struct inode *afs_do_lookup(struct inode *dir, struct dentry *dentry, /* Find any inodes that already exist and get their * callback counters. */ - ti = ilookup5_nowait(dir->i_sb, vp->fid.vnode, - afs_ilookup5_test_by_fid, &vp->fid); + ti = ilookup5_nowait(dir->i_sb, &vp->fid); if (!IS_ERR_OR_NULL(ti)) { vnode = AFS_FS_I(ti); vp->dv_before = vnode->status.data_version; diff --git a/fs/afs/dynroot.c b/fs/afs/dynroot.c index 7b784af604fd..9e0119a2f5f4 100644 --- a/fs/afs/dynroot.c +++ b/fs/afs/dynroot.c @@ -12,32 +12,6 @@ static atomic_t afs_autocell_ino; -/* - * iget5() comparator for inode created by autocell operations - * - * These pseudo inodes don't match anything. - */ -static int afs_iget5_pseudo_test(struct inode *inode, void *opaque) -{ - return 0; -} - -/* - * iget5() inode initialiser - */ -static int afs_iget5_pseudo_set(struct inode *inode, void *opaque) -{ - struct afs_super_info *as = AFS_FS_S(inode->i_sb); - struct afs_vnode *vnode = AFS_FS_I(inode); - struct afs_fid *fid = opaque; - - vnode->volume = as->volume; - vnode->fid = *fid; - inode->i_ino = fid->vnode; - inode->i_generation = fid->unique; - return 0; -} - /* * Create an inode for a dynamic root directory or an autocell dynamic * automount dir. @@ -61,8 +35,7 @@ struct inode *afs_iget_pseudo_dir(struct super_block *sb, bool root) fid.unique = 0; } - inode = iget5_locked(sb, fid.vnode, - afs_iget5_pseudo_test, afs_iget5_pseudo_set, &fid); + inode = iget5_locked(sb, afs_iget5_set, &fid); if (!inode) { _leave(" = -ENOMEM"); return ERR_PTR(-ENOMEM); diff --git a/fs/afs/inode.c b/fs/afs/inode.c index 0fe8844b4bee..aa5924443727 100644 --- a/fs/afs/inode.c +++ b/fs/afs/inode.c @@ -22,9 +22,46 @@ #include #include #include +#include #include "internal.h" #include "afs_fs.h" +static u32 afs_inode_key_hash_fn(const void *data, u32 len, u32 seed) +{ + const struct afs_fid *fid = data; + + return jhash(fid, sizeof(*fid), seed); +} + +static u32 afs_inode_obj_hash_fn(const void *obj, u32 len, u32 seed) +{ + const struct afs_vnode *vnode = + container_of(obj, struct afs_vnode, vfs_inode); + + return jhash(&vnode->fid, sizeof(vnode->fid), seed); +} + +static int afs_inode_hash_cmp_fn(struct rhashtable_compare_arg *arg, + const void *obj) +{ + const struct afs_vnode *vnode = + container_of(obj, struct afs_vnode, vfs_inode); + const struct afs_fid *fid = arg->key; + + if (fid->vnode == vnode->fid.vnode && + fid->vnode_hi == vnode->fid.vnode_hi && + fid->unique == vnode->fid.unique) + return 0; + return 1; +} + +const struct rhashtable_params afs_inode_table_params = { + .head_offset = offsetof(struct inode, i_hash), + .hashfn = afs_inode_key_hash_fn, + .obj_hashfn = afs_inode_obj_hash_fn, + .obj_cmpfn = afs_inode_hash_cmp_fn, +}; + static const struct inode_operations afs_symlink_inode_operations = { .get_link = page_get_link, .listxattr = afs_listxattr, @@ -363,41 +400,17 @@ int afs_fetch_status(struct afs_vnode *vnode, struct key *key, bool is_new, return afs_put_operation(op); } -/* - * ilookup() comparator - */ -int afs_ilookup5_test_by_fid(struct inode *inode, void *opaque) -{ - struct afs_vnode *vnode = AFS_FS_I(inode); - struct afs_fid *fid = opaque; - - return (fid->vnode == vnode->fid.vnode && - fid->vnode_hi == vnode->fid.vnode_hi && - fid->unique == vnode->fid.unique); -} - -/* - * iget5() comparator - */ -static int afs_iget5_test(struct inode *inode, void *opaque) -{ - struct afs_vnode_param *vp = opaque; - //struct afs_vnode *vnode = AFS_FS_I(inode); - - return afs_ilookup5_test_by_fid(inode, &vp->fid); -} - /* * iget5() inode initialiser */ -static int afs_iget5_set(struct inode *inode, void *opaque) +int afs_iget5_set(struct inode *inode, const void *opaque) { - struct afs_vnode_param *vp = opaque; + const struct afs_fid *fid = opaque; struct afs_super_info *as = AFS_FS_S(inode->i_sb); struct afs_vnode *vnode = AFS_FS_I(inode); vnode->volume = as->volume; - vnode->fid = vp->fid; + vnode->fid = *fid; /* YFS supports 96-bit vnode IDs, but Linux only supports * 64-bit inode numbers. @@ -452,7 +465,7 @@ struct inode *afs_iget(struct afs_operation *op, struct afs_vnode_param *vp) _enter(",{%llx:%llu.%u},,", vp->fid.vid, vp->fid.vnode, vp->fid.unique); - inode = iget5_locked(sb, vp->fid.vnode, afs_iget5_test, afs_iget5_set, vp); + inode = iget5_locked(sb, afs_iget5_set, &vp->fid); if (!inode) { _leave(" = -ENOMEM"); return ERR_PTR(-ENOMEM); @@ -488,20 +501,6 @@ bad_inode: return ERR_PTR(ret); } -static int afs_iget5_set_root(struct inode *inode, void *opaque) -{ - struct afs_super_info *as = AFS_FS_S(inode->i_sb); - struct afs_vnode *vnode = AFS_FS_I(inode); - - vnode->volume = as->volume; - vnode->fid.vid = as->volume->vid, - vnode->fid.vnode = 1; - vnode->fid.unique = 1; - inode->i_ino = 1; - inode->i_generation = 1; - return 0; -} - /* * Set up the root inode for a volume. This is always vnode 1, unique 1 within * the volume. @@ -512,11 +511,16 @@ struct inode *afs_root_iget(struct super_block *sb, struct key *key) struct afs_operation *op; struct afs_vnode *vnode; struct inode *inode; + const struct afs_fid fid = { + .vid = as->volume->vid, + .vnode = 1, + .unique = 1, + }; int ret; _enter(",{%llx},,", as->volume->vid); - inode = iget5_locked(sb, 1, NULL, afs_iget5_set_root, NULL); + inode = iget5_locked(sb, afs_iget5_set, &fid); if (!inode) { _leave(" = -ENOMEM"); return ERR_PTR(-ENOMEM); @@ -527,7 +531,9 @@ struct inode *afs_root_iget(struct super_block *sb, struct key *key) BUG_ON(!(inode->i_state & I_NEW)); vnode = AFS_FS_I(inode); - vnode->cb_v_break = as->volume->cb_v_break, + vnode->cb_v_break = as->volume->cb_v_break, + inode->i_ino = 1; + inode->i_generation = 1; op = afs_alloc_operation(key, as->volume); if (IS_ERR(op)) { diff --git a/fs/afs/internal.h b/fs/afs/internal.h index e5f0446f27e5..248da30960ef 100644 --- a/fs/afs/internal.h +++ b/fs/afs/internal.h @@ -1073,12 +1073,13 @@ extern void afs_fs_probe_cleanup(struct afs_net *); /* * inode.c */ +extern const struct rhashtable_params afs_inode_table_params; extern const struct afs_operation_ops afs_fetch_status_operation; extern void afs_vnode_commit_status(struct afs_operation *, struct afs_vnode_param *); extern int afs_fetch_status(struct afs_vnode *, struct key *, bool, afs_access_t *); -extern int afs_ilookup5_test_by_fid(struct inode *, void *); extern struct inode *afs_iget_pseudo_dir(struct super_block *, bool); +int afs_iget5_set(struct inode *inode, const void *opaque); extern struct inode *afs_iget(struct afs_operation *, struct afs_vnode_param *); extern struct inode *afs_root_iget(struct super_block *, struct key *); extern bool afs_check_validity(struct afs_vnode *); diff --git a/fs/afs/super.c b/fs/afs/super.c index b552357b1d13..9821072e12d5 100644 --- a/fs/afs/super.c +++ b/fs/afs/super.c @@ -453,6 +453,9 @@ static int afs_fill_super(struct super_block *sb, struct afs_fs_context *ctx) sb->s_op = &afs_super_ops; if (!as->dyn_root) sb->s_xattr = afs_xattr_handlers; + ret = super_setup_inode_table(sb, &afs_inode_table_params); + if (ret) + return ret; ret = super_setup_bdi(sb); if (ret) return ret; diff --git a/fs/bcachefs/fs.c b/fs/bcachefs/fs.c index b5b42fb46f6a..11c8bfedc48d 100644 --- a/fs/bcachefs/fs.c +++ b/fs/bcachefs/fs.c @@ -235,13 +235,6 @@ struct inode *bch2_vfs_inode_get(struct bch_fs *c, u64 inum) return &inode->v; } -static int inum_test(struct inode *inode, void *p) -{ - unsigned long *ino = p; - - return *ino == inode->i_ino; -} - static struct bch_inode_info * __bch2_create(struct bch_inode_info *dir, struct dentry *dentry, umode_t mode, dev_t rdev, bool tmpfile) @@ -321,8 +314,7 @@ err_before_quota: * bch2_trans_exit() and dropping locks, else we could race with another * thread pulling the inode in and modifying it: */ - old = to_bch_ei(inode_insert5(&inode->v, inode->v.i_ino, - inum_test, NULL, &inode->v.i_ino)); + old = to_bch_ei(inode_insert5(&inode->v)); BUG_ON(!old); if (unlikely(old != inode)) { diff --git a/fs/block_dev.c b/fs/block_dev.c index 8ae833e00443..7059190732a1 100644 --- a/fs/block_dev.c +++ b/fs/block_dev.c @@ -26,6 +26,7 @@ #include #include #include +#include #include #include #include @@ -54,6 +55,38 @@ struct block_device *I_BDEV(struct inode *inode) } EXPORT_SYMBOL(I_BDEV); +static u32 blockdev_key_hash_fn(const void *data, u32 len, u32 seed) +{ + const dev_t *k = data; + + return jhash(k, sizeof(*k), seed); +} + +static u32 blockdev_obj_hash_fn(const void *obj, u32 len, u32 seed) +{ + const struct bdev_inode *ei = + container_of(obj, struct bdev_inode, vfs_inode); + + return jhash(&ei->bdev.bd_dev, sizeof(ei->bdev.bd_dev), seed); +} + +static int blockdev_hash_cmp_fn(struct rhashtable_compare_arg *arg, + const void *obj) +{ + const struct bdev_inode *ei = + container_of(obj, struct bdev_inode, vfs_inode); + const dev_t *k = arg->key; + + return ei->bdev.bd_dev == *k ? 0 : 1; +} + +static const struct rhashtable_params blockdev_inode_table_params = { + .head_offset = offsetof(struct inode, i_hash), + .hashfn = blockdev_key_hash_fn, + .obj_hashfn = blockdev_obj_hash_fn, + .obj_cmpfn = blockdev_hash_cmp_fn, +}; + static void bdev_write_inode(struct block_device *bdev) { struct inode *inode = bdev->bd_inode; @@ -839,24 +872,13 @@ void __init bdev_cache_init(void) if (IS_ERR(bd_mnt)) panic("Cannot create bdev pseudo-fs"); blockdev_superblock = bd_mnt->mnt_sb; /* For writeback */ -} -/* - * Most likely _very_ bad one - but then it's hardly critical for small - * /dev and can be fixed when somebody will need really large one. - * Keep in mind that it will be fed through icache hash function too. - */ -static inline unsigned long hash(dev_t dev) -{ - return MAJOR(dev)+MINOR(dev); -} - -static int bdev_test(struct inode *inode, void *data) -{ - return BDEV_I(inode)->bdev.bd_dev == *(dev_t *)data; + err = super_setup_inode_table(blockdev_superblock, &blockdev_inode_table_params); + if (err) + panic("Cannot initialize bdev inode table"); } -static int bdev_set(struct inode *inode, void *data) +static int bdev_set(struct inode *inode, const void *data) { BDEV_I(inode)->bdev.bd_dev = *(dev_t *)data; return 0; @@ -867,8 +889,7 @@ struct block_device *bdget(dev_t dev) struct block_device *bdev; struct inode *inode; - inode = iget5_locked(blockdev_superblock, hash(dev), - bdev_test, bdev_set, &dev); + inode = iget5_locked(blockdev_superblock, bdev_set, &dev); if (!inode) return NULL; diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h index 9a72896bed2e..23d507971837 100644 --- a/fs/btrfs/ctree.h +++ b/fs/btrfs/ctree.h @@ -2930,6 +2930,8 @@ void btrfs_inode_safe_disk_i_size_write(struct inode *inode, u64 new_i_size); u64 btrfs_file_extent_end(const struct btrfs_path *path); /* inode.c */ +extern const struct rhashtable_params btrfs_inode_table_params; + struct extent_map *btrfs_get_extent_fiemap(struct btrfs_inode *inode, u64 start, u64 len); noinline int can_nocow_extent(struct inode *inode, u64 offset, u64 *len, diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c index 9570458aa847..223a5430407d 100644 --- a/fs/btrfs/inode.c +++ b/fs/btrfs/inode.c @@ -9,6 +9,7 @@ #include #include #include +#include #include #include #include @@ -57,6 +58,43 @@ struct btrfs_iget_args { struct btrfs_root *root; }; +static u32 btrfs_inode_key_hash_fn(const void *data, u32 len, u32 seed) +{ + const struct btrfs_iget_args *args = data; + + return jhash(args, sizeof(*args), seed); +} + +static u32 btrfs_inode_obj_hash_fn(const void *obj, u32 len, u32 seed) +{ + const struct inode *inode = obj; + const struct btrfs_iget_args args = { + BTRFS_I(inode)->location.objectid, + BTRFS_I(inode)->root, + }; + + return jhash(&args, sizeof(args), seed); +} + +static int btrfs_inode_hash_cmp_fn(struct rhashtable_compare_arg *arg, + const void *obj) +{ + const struct inode *inode = obj; + const struct btrfs_iget_args *args = arg->key; + + if (args->ino == BTRFS_I(inode)->location.objectid && + args->root == BTRFS_I(inode)->root) + return 0; + return 1; +} + +const struct rhashtable_params btrfs_inode_table_params = { + .head_offset = offsetof(struct inode, i_hash), + .hashfn = btrfs_inode_key_hash_fn, + .obj_hashfn = btrfs_inode_obj_hash_fn, + .obj_cmpfn = btrfs_inode_hash_cmp_fn, +}; + struct btrfs_dio_data { u64 reserve; u64 unsubmitted_oe_range_start; @@ -5327,10 +5365,9 @@ static void inode_tree_del(struct inode *inode) } } - -static int btrfs_init_locked_inode(struct inode *inode, void *p) +static int btrfs_init_locked_inode(struct inode *inode, const void *p) { - struct btrfs_iget_args *args = p; + const struct btrfs_iget_args *args = p; inode->i_ino = args->ino; BTRFS_I(inode)->location.objectid = args->ino; @@ -5341,30 +5378,6 @@ static int btrfs_init_locked_inode(struct inode *inode, void *p) return 0; } -static int btrfs_find_actor(struct inode *inode, void *opaque) -{ - struct btrfs_iget_args *args = opaque; - - return args->ino == BTRFS_I(inode)->location.objectid && - args->root == BTRFS_I(inode)->root; -} - -static struct inode *btrfs_iget_locked(struct super_block *s, u64 ino, - struct btrfs_root *root) -{ - struct inode *inode; - struct btrfs_iget_args args; - unsigned long hashval = btrfs_inode_hash(ino, root); - - args.ino = ino; - args.root = root; - - inode = iget5_locked(s, hashval, btrfs_find_actor, - btrfs_init_locked_inode, - (void *)&args); - return inode; -} - /* * Get an inode object given its inode number and corresponding root. * Path can be preallocated to prevent recursing back to iget through @@ -5375,8 +5388,9 @@ struct inode *btrfs_iget_path(struct super_block *s, u64 ino, struct btrfs_root *root, struct btrfs_path *path) { struct inode *inode; + struct btrfs_iget_args args = { ino, root }; - inode = btrfs_iget_locked(s, ino, root); + inode = iget5_locked(s, btrfs_init_locked_inode, &args); if (!inode) return ERR_PTR(-ENOMEM); @@ -5876,18 +5890,6 @@ int btrfs_set_inode_index(struct btrfs_inode *dir, u64 *index) return ret; } -static int btrfs_insert_inode_locked(struct inode *inode) -{ - struct btrfs_iget_args args; - - args.ino = BTRFS_I(inode)->location.objectid; - args.root = BTRFS_I(inode)->root; - - return insert_inode_locked4(inode, - btrfs_inode_hash(inode->i_ino, BTRFS_I(inode)->root), - btrfs_find_actor, &args); -} - /* * Inherit flags from the parent inode. * @@ -6020,7 +6022,7 @@ static struct inode *btrfs_new_inode(struct btrfs_trans_handle *trans, location->offset = 0; location->type = BTRFS_INODE_ITEM_KEY; - ret = btrfs_insert_inode_locked(inode); + ret = insert_inode_locked(inode); if (ret < 0) { iput(inode); goto fail; diff --git a/fs/btrfs/super.c b/fs/btrfs/super.c index 25967ecaaf0a..3898e5bee791 100644 --- a/fs/btrfs/super.c +++ b/fs/btrfs/super.c @@ -1307,6 +1307,12 @@ static int btrfs_fill_super(struct super_block *sb, sb->s_flags |= SB_I_VERSION; sb->s_iflags |= SB_I_CGROUPWB; + err = super_setup_inode_table(sb, &btrfs_inode_table_params); + if (err) { + btrfs_err(fs_info, "super_setup_inode_table failed"); + return err; + } + err = super_setup_bdi(sb); if (err) { btrfs_err(fs_info, "super_setup_bdi failed"); diff --git a/fs/ceph/inode.c b/fs/ceph/inode.c index d163fa96cb40..823084fff98c 100644 --- a/fs/ceph/inode.c +++ b/fs/ceph/inode.c @@ -14,6 +14,7 @@ #include #include #include +#include #include "super.h" #include "mds_client.h" @@ -36,14 +37,44 @@ static const struct inode_operations ceph_symlink_iops; static void ceph_inode_work(struct work_struct *work); +static u32 ceph_inode_key_hash_fn(const void *data, u32 len, u32 seed) +{ + const struct ceph_vino *pvino = data; + + return jhash(pvino, sizeof(*pvino), seed); +} + +static u32 ceph_inode_obj_hash_fn(const void *obj, u32 len, u32 seed) +{ + const struct ceph_inode_info *ci = ceph_inode(obj); + + return jhash(&ci->i_vino, sizeof(ci->i_vino), seed); +} + +static int ceph_inode_hash_cmp_fn(struct rhashtable_compare_arg *arg, + const void *obj) +{ + if (ceph_ino_compare(obj, arg->key)) + return 0; + return 1; +} + +const struct rhashtable_params ceph_inode_table_params = { + .head_offset = offsetof(struct inode, i_hash), + .hashfn = ceph_inode_key_hash_fn, + .obj_hashfn = ceph_inode_obj_hash_fn, + .obj_cmpfn = ceph_inode_hash_cmp_fn, +}; + /* * find or create an inode, given the ceph ino number */ -static int ceph_set_ino_cb(struct inode *inode, void *data) +static int ceph_set_ino_cb(struct inode *inode, const void *data) { struct ceph_inode_info *ci = ceph_inode(inode); + const struct ceph_vino *pvino = data; - ci->i_vino = *(struct ceph_vino *)data; + ci->i_vino = *pvino; inode->i_ino = ceph_vino_to_ino_t(ci->i_vino); inode_set_iversion_raw(inode, 0); return 0; @@ -53,8 +84,7 @@ struct inode *ceph_get_inode(struct super_block *sb, struct ceph_vino vino) { struct inode *inode; - inode = iget5_locked(sb, (unsigned long)vino.ino, ceph_ino_compare, - ceph_set_ino_cb, &vino); + inode = iget5_locked(sb, ceph_set_ino_cb, &vino); if (!inode) return ERR_PTR(-ENOMEM); diff --git a/fs/ceph/super.c b/fs/ceph/super.c index 7ec0e6d03d10..5cca520134fa 100644 --- a/fs/ceph/super.c +++ b/fs/ceph/super.c @@ -1086,6 +1086,10 @@ static int ceph_get_tree(struct fs_context *fc) dout("get_sb got existing client %p\n", fsc); } else { dout("get_sb using new client %p\n", fsc); + err = super_setup_inode_table(sb, &ceph_inode_table_params); + if (err < 0) + goto out_splat; + err = ceph_setup_bdi(sb, fsc); if (err < 0) goto out_splat; diff --git a/fs/ceph/super.h b/fs/ceph/super.h index a3995ebe0623..0acd007e734f 100644 --- a/fs/ceph/super.h +++ b/fs/ceph/super.h @@ -482,6 +482,8 @@ static inline ino_t ceph_vino_to_ino_t(struct ceph_vino vino) /* for printf-style formatting */ #define ceph_vinop(i) ceph_inode(i)->i_vino.ino, ceph_inode(i)->i_vino.snap +extern const struct rhashtable_params ceph_inode_table_params; + static inline u64 ceph_ino(struct inode *inode) { return ceph_inode(inode)->i_vino.ino; @@ -514,15 +516,15 @@ static inline u64 ceph_present_inode(struct inode *inode) return ceph_present_ino(inode->i_sb, ceph_ino(inode)); } -static inline int ceph_ino_compare(struct inode *inode, void *data) +static inline int ceph_ino_compare(const struct inode *inode, + const struct ceph_vino *pvino) { - struct ceph_vino *pvino = (struct ceph_vino *)data; struct ceph_inode_info *ci = ceph_inode(inode); + return ci->i_vino.ino == pvino->ino && ci->i_vino.snap == pvino->snap; } - static inline struct inode *ceph_find_inode(struct super_block *sb, struct ceph_vino vino) { @@ -531,7 +533,7 @@ static inline struct inode *ceph_find_inode(struct super_block *sb, * anyway, so there is no need to squash the inode number down to * 32-bits first. Just use low-order bits on arches with 32-bit long. */ - return ilookup5(sb, (unsigned long)vino.ino, ceph_ino_compare, &vino); + return ilookup5(sb, &vino); } diff --git a/fs/cifs/cifsfs.c b/fs/cifs/cifsfs.c index 0fb99d25e8a8..0945cc7f324b 100644 --- a/fs/cifs/cifsfs.c +++ b/fs/cifs/cifsfs.c @@ -202,6 +202,9 @@ cifs_read_super(struct super_block *sb) sb->s_magic = CIFS_MAGIC_NUMBER; sb->s_op = &cifs_super_ops; sb->s_xattr = cifs_xattr_handlers; + rc = super_setup_inode_table(sb, &cifs_inode_table_params); + if (rc) + goto out_no_root; rc = super_setup_bdi(sb); if (rc) goto out_no_root; diff --git a/fs/cifs/cifsfs.h b/fs/cifs/cifsfs.h index 99b3180c613a..130bb17f59b1 100644 --- a/fs/cifs/cifsfs.h +++ b/fs/cifs/cifsfs.h @@ -60,6 +60,7 @@ extern void cifs_sb_active(struct super_block *sb); extern void cifs_sb_deactive(struct super_block *sb); /* Functions related to inodes */ +extern const struct rhashtable_params cifs_inode_table_params; extern const struct inode_operations cifs_dir_inode_ops; extern struct inode *cifs_root_iget(struct super_block *); extern int cifs_create(struct inode *, struct dentry *, umode_t, diff --git a/fs/cifs/inode.c b/fs/cifs/inode.c index 1f75b25e559a..53ef1cde63fb 100644 --- a/fs/cifs/inode.c +++ b/fs/cifs/inode.c @@ -26,6 +26,7 @@ #include #include #include +#include #include #include "cifsfs.h" @@ -38,6 +39,49 @@ #include "cifs_unicode.h" #include "fscache.h" +static u32 cifs_inode_key_hash_fn(const void *data, u32 len, u32 seed) +{ + const struct cifs_fattr *fattr = data; + u64 v = fattr->cf_uniqueid ^ fattr->cf_createtime; + + return jhash(&v, sizeof(v), seed); +} + +static u32 cifs_inode_obj_hash_fn(const void *obj, u32 len, u32 seed) +{ + struct inode *inode = (struct inode *) obj; + u64 v = CIFS_I(inode)->uniqueid ^ CIFS_I(inode)->createtime; + + return jhash(&v, sizeof(v), seed); +} + +static int cifs_inode_hash_cmp_fn(struct rhashtable_compare_arg *arg, + const void *obj) +{ + struct inode *inode = (struct inode *) obj; + const struct cifs_fattr *fattr = arg->key; + + /* don't match inode with different uniqueid */ + if (CIFS_I(inode)->uniqueid != fattr->cf_uniqueid) + return 1; + + /* use createtime like an i_generation field */ + if (CIFS_I(inode)->createtime != fattr->cf_createtime) + return 1; + + /* don't match inode of different type */ + if ((inode->i_mode & S_IFMT) != (fattr->cf_mode & S_IFMT)) + return 1; + + return 0; +} + +const struct rhashtable_params cifs_inode_table_params = { + .head_offset = offsetof(struct inode, i_hash), + .hashfn = cifs_inode_key_hash_fn, + .obj_hashfn = cifs_inode_obj_hash_fn, + .obj_cmpfn = cifs_inode_hash_cmp_fn, +}; static void cifs_set_ops(struct inode *inode) { @@ -1202,33 +1246,9 @@ static const struct inode_operations cifs_ipc_inode_ops = { }; static int -cifs_find_inode(struct inode *inode, void *opaque) +cifs_init_inode(struct inode *inode, const void *opaque) { - struct cifs_fattr *fattr = (struct cifs_fattr *) opaque; - - /* don't match inode with different uniqueid */ - if (CIFS_I(inode)->uniqueid != fattr->cf_uniqueid) - return 0; - - /* use createtime like an i_generation field */ - if (CIFS_I(inode)->createtime != fattr->cf_createtime) - return 0; - - /* don't match inode of different type */ - if ((inode->i_mode & S_IFMT) != (fattr->cf_mode & S_IFMT)) - return 0; - - /* if it's not a directory or has no dentries, then flag it */ - if (S_ISDIR(inode->i_mode) && !hlist_empty(&inode->i_dentry)) - fattr->cf_flags |= CIFS_FATTR_INO_COLLISION; - - return 1; -} - -static int -cifs_init_inode(struct inode *inode, void *opaque) -{ - struct cifs_fattr *fattr = (struct cifs_fattr *) opaque; + const struct cifs_fattr *fattr = opaque; CIFS_I(inode)->uniqueid = fattr->cf_uniqueid; CIFS_I(inode)->createtime = fattr->cf_createtime; @@ -1260,17 +1280,17 @@ inode_has_hashed_dentries(struct inode *inode) struct inode * cifs_iget(struct super_block *sb, struct cifs_fattr *fattr) { - unsigned long hash; struct inode *inode; retry_iget5_locked: cifs_dbg(FYI, "looking for uniqueid=%llu\n", fattr->cf_uniqueid); - /* hash down to 32-bits on 32-bit arch */ - hash = cifs_uniqueid_to_ino_t(fattr->cf_uniqueid); - - inode = iget5_locked(sb, hash, cifs_find_inode, cifs_init_inode, fattr); + inode = iget5_locked(sb, cifs_init_inode, fattr); if (inode) { + /* if it's not a directory or has no dentries, then flag it */ + if (S_ISDIR(inode->i_mode) && !hlist_empty(&inode->i_dentry)) + fattr->cf_flags |= CIFS_FATTR_INO_COLLISION; + /* was there a potentially problematic inode collision? */ if (fattr->cf_flags & CIFS_FATTR_INO_COLLISION) { fattr->cf_flags &= ~CIFS_FATTR_INO_COLLISION; @@ -1287,7 +1307,7 @@ retry_iget5_locked: if (sb->s_flags & SB_NOATIME) inode->i_flags |= S_NOATIME | S_NOCMTIME; if (inode->i_state & I_NEW) { - inode->i_ino = hash; + inode->i_ino = cifs_uniqueid_to_ino_t(fattr->cf_uniqueid); #ifdef CONFIG_CIFS_FSCACHE /* initialize per-inode cache cookie pointer */ CIFS_I(inode)->fscache = NULL; diff --git a/fs/coda/cnode.c b/fs/coda/cnode.c index 06855f6c7902..6a387cdcdea6 100644 --- a/fs/coda/cnode.c +++ b/fs/coda/cnode.c @@ -3,6 +3,7 @@ (C) 1996 Peter Braam */ +#include #include #include #include @@ -12,11 +13,44 @@ #include "coda_psdev.h" #include "coda_linux.h" -static inline int coda_fideq(struct CodaFid *fid1, struct CodaFid *fid2) +static inline int coda_fideq(const struct CodaFid *fid1, + const struct CodaFid *fid2) { return memcmp(fid1, fid2, sizeof(*fid1)) == 0; } +static u32 coda_inode_key_hash_fn(const void *data, u32 len, u32 seed) +{ + const struct CodaFid *fid = data; + + return jhash(fid, sizeof(*fid), seed); +} + +static u32 coda_inode_obj_hash_fn(const void *obj, u32 len, u32 seed) +{ + const struct coda_inode_info *cii = + container_of(obj, struct coda_inode_info, vfs_inode); + + return jhash(&cii->c_fid, sizeof(cii->c_fid), seed); +} + +static int coda_inode_hash_cmp_fn(struct rhashtable_compare_arg *arg, + const void *obj) +{ + const struct coda_inode_info *cii = + container_of(obj, struct coda_inode_info, vfs_inode); + const struct CodaFid *fid = arg->key; + + return !coda_fideq(&cii->c_fid, fid); +} + +const struct rhashtable_params coda_inode_table_params = { + .head_offset = offsetof(struct inode, i_hash), + .hashfn = coda_inode_key_hash_fn, + .obj_hashfn = coda_inode_obj_hash_fn, + .obj_cmpfn = coda_inode_hash_cmp_fn, +}; + static const struct inode_operations coda_symlink_inode_operations = { .get_link = page_get_link, .setattr = coda_setattr, @@ -42,16 +76,9 @@ static void coda_fill_inode(struct inode *inode, struct coda_vattr *attr) init_special_inode(inode, inode->i_mode, huge_decode_dev(attr->va_rdev)); } -static int coda_test_inode(struct inode *inode, void *data) -{ - struct CodaFid *fid = (struct CodaFid *)data; - struct coda_inode_info *cii = ITOC(inode); - return coda_fideq(&cii->c_fid, fid); -} - -static int coda_set_inode(struct inode *inode, void *data) +static int coda_set_inode(struct inode *inode, const void *data) { - struct CodaFid *fid = (struct CodaFid *)data; + const struct CodaFid *fid = data; struct coda_inode_info *cii = ITOC(inode); cii->c_fid = *fid; return 0; @@ -62,9 +89,8 @@ struct inode * coda_iget(struct super_block * sb, struct CodaFid * fid, { struct inode *inode; struct coda_inode_info *cii; - unsigned long hash = coda_f2i(fid); - inode = iget5_locked(sb, hash, coda_test_inode, coda_set_inode, fid); + inode = iget5_locked(sb, coda_set_inode, fid); if (!inode) return ERR_PTR(-ENOMEM); @@ -72,7 +98,7 @@ struct inode * coda_iget(struct super_block * sb, struct CodaFid * fid, if (inode->i_state & I_NEW) { cii = ITOC(inode); /* we still need to set i_ino for things like stat(2) */ - inode->i_ino = hash; + inode->i_ino = coda_f2i(fid); /* inode is locked and unique, no need to grab cii->c_lock */ cii->c_mapcount = 0; unlock_new_inode(inode); @@ -135,9 +161,8 @@ void coda_replace_fid(struct inode *inode, struct CodaFid *oldfid, struct inode *coda_fid_to_inode(struct CodaFid *fid, struct super_block *sb) { struct inode *inode; - unsigned long hash = coda_f2i(fid); - inode = ilookup5(sb, hash, coda_test_inode, fid); + inode = ilookup5(sb, fid); if ( !inode ) return NULL; diff --git a/fs/coda/coda_fs_i.h b/fs/coda/coda_fs_i.h index 1763ff95d865..e44a49d64bfb 100644 --- a/fs/coda/coda_fs_i.h +++ b/fs/coda/coda_fs_i.h @@ -49,6 +49,8 @@ struct coda_file_info { #define C_DYING 0x4 /* from venus (which died) */ #define C_PURGE 0x8 +const struct rhashtable_params coda_inode_table_params; + struct inode *coda_cnode_make(struct CodaFid *, struct super_block *); struct inode *coda_iget(struct super_block *sb, struct CodaFid *fid, struct coda_vattr *attr); struct inode *coda_cnode_makectl(struct super_block *sb); diff --git a/fs/coda/inode.c b/fs/coda/inode.c index b1c70e2b9b1e..863777d488ab 100644 --- a/fs/coda/inode.c +++ b/fs/coda/inode.c @@ -192,6 +192,10 @@ static int coda_fill_super(struct super_block *sb, void *data, int silent) sb->s_time_min = S64_MIN; sb->s_time_max = S64_MAX; + error = super_setup_inode_table(sb, &coda_inode_table_params); + if (error) + goto error; + error = super_setup_bdi(sb); if (error) goto error; diff --git a/fs/dcache.c b/fs/dcache.c index 32d840329e27..8726c74d739e 100644 --- a/fs/dcache.c +++ b/fs/dcache.c @@ -3225,7 +3225,6 @@ void __init vfs_caches_init_early(void) INIT_HLIST_BL_HEAD(&in_lookup_hashtable[i]); dcache_init_early(); - inode_init_early(); } void __init vfs_caches_init(void) diff --git a/fs/ecryptfs/ecryptfs_kernel.h b/fs/ecryptfs/ecryptfs_kernel.h index e6ac78c62ca4..8666d4e5a18e 100644 --- a/fs/ecryptfs/ecryptfs_kernel.h +++ b/fs/ecryptfs/ecryptfs_kernel.h @@ -544,6 +544,7 @@ extern int ecryptfs_verbosity; extern unsigned int ecryptfs_message_buf_len; extern signed long ecryptfs_message_wait_timeout; extern unsigned int ecryptfs_number_of_users; +extern const struct rhashtable_params ecryptfs_inode_table_params; extern struct kmem_cache *ecryptfs_auth_tok_list_item_cache; extern struct kmem_cache *ecryptfs_file_info_cache; diff --git a/fs/ecryptfs/inode.c b/fs/ecryptfs/inode.c index e23752d9a79f..a0d2027e90b4 100644 --- a/fs/ecryptfs/inode.c +++ b/fs/ecryptfs/inode.c @@ -10,6 +10,7 @@ */ #include +#include #include #include #include @@ -21,6 +22,39 @@ #include #include "ecryptfs_kernel.h" +static u32 ecryptfs_inode_key_hash_fn(const void *data, u32 len, u32 seed) +{ + const struct inode *lower_inode = data; + + return jhash(&lower_inode, sizeof(lower_inode), seed); +} + +static u32 ecryptfs_inode_obj_hash_fn(const void *obj, u32 len, u32 seed) +{ + const struct inode *inode = obj; + const struct inode *lower_inode = ecryptfs_inode_to_lower((struct inode *) inode); + + return jhash(&lower_inode, sizeof(lower_inode), seed); +} + +static int ecryptfs_inode_hash_cmp_fn(struct rhashtable_compare_arg *arg, + const void *obj) +{ + const struct inode *inode = obj; + const struct inode *lower_inode = arg->key; + + if (ecryptfs_inode_to_lower((struct inode *) inode) == lower_inode) + return 0; + return 1; +} + +const struct rhashtable_params ecryptfs_inode_table_params = { + .head_offset = offsetof(struct inode, i_hash), + .hashfn = ecryptfs_inode_key_hash_fn, + .obj_hashfn = ecryptfs_inode_obj_hash_fn, + .obj_cmpfn = ecryptfs_inode_hash_cmp_fn, +}; + static struct dentry *lock_parent(struct dentry *dentry) { struct dentry *dir; @@ -36,14 +70,9 @@ static void unlock_dir(struct dentry *dir) dput(dir); } -static int ecryptfs_inode_test(struct inode *inode, void *lower_inode) -{ - return ecryptfs_inode_to_lower(inode) == lower_inode; -} - -static int ecryptfs_inode_set(struct inode *inode, void *opaque) +static int ecryptfs_inode_set(struct inode *inode, const void *opaque) { - struct inode *lower_inode = opaque; + struct inode *lower_inode = (void *) opaque; ecryptfs_set_inode_lower(inode, lower_inode); fsstack_copy_attr_all(inode, lower_inode); @@ -78,9 +107,7 @@ static struct inode *__ecryptfs_get_inode(struct inode *lower_inode, return ERR_PTR(-EXDEV); if (!igrab(lower_inode)) return ERR_PTR(-ESTALE); - inode = iget5_locked(sb, (unsigned long)lower_inode, - ecryptfs_inode_test, ecryptfs_inode_set, - lower_inode); + inode = iget5_locked(sb, ecryptfs_inode_set, lower_inode); if (!inode) { iput(lower_inode); return ERR_PTR(-EACCES); diff --git a/fs/ecryptfs/main.c b/fs/ecryptfs/main.c index e63259fdef28..6e9b528b85dd 100644 --- a/fs/ecryptfs/main.c +++ b/fs/ecryptfs/main.c @@ -505,6 +505,10 @@ static struct dentry *ecryptfs_mount(struct file_system_type *fs_type, int flags goto out; } + rc = super_setup_inode_table(s, &ecryptfs_inode_table_params); + if (rc) + goto out1; + rc = super_setup_bdi(s); if (rc) goto out1; diff --git a/fs/fuse/dev.c b/fs/fuse/dev.c index 02b3c36b3676..8b70e673e31b 100644 --- a/fs/fuse/dev.c +++ b/fs/fuse/dev.c @@ -1564,7 +1564,7 @@ static int fuse_notify_store(struct fuse_conn *fc, unsigned int size, if (!fc->sb) goto out_up_killsb; - inode = ilookup5(fc->sb, nodeid, fuse_inode_eq, &nodeid); + inode = ilookup5(fc->sb, &nodeid); if (!inode) goto out_up_killsb; @@ -1732,7 +1732,7 @@ static int fuse_notify_retrieve(struct fuse_conn *fc, unsigned int size, if (fc->sb) { u64 nodeid = outarg.nodeid; - inode = ilookup5(fc->sb, nodeid, fuse_inode_eq, &nodeid); + inode = ilookup5(fc->sb, &nodeid); if (inode) { err = fuse_retrieve(fc, inode, &outarg); iput(inode); diff --git a/fs/fuse/dir.c b/fs/fuse/dir.c index 26f028bc760b..8338f3cf6ef1 100644 --- a/fs/fuse/dir.c +++ b/fs/fuse/dir.c @@ -1010,7 +1010,7 @@ int fuse_reverse_inval_entry(struct super_block *sb, u64 parent_nodeid, struct dentry *dir; struct dentry *entry; - parent = ilookup5(sb, parent_nodeid, fuse_inode_eq, &parent_nodeid); + parent = ilookup5(sb, &parent_nodeid); if (!parent) return -ENOENT; diff --git a/fs/fuse/fuse_i.h b/fs/fuse/fuse_i.h index 740a8a7d7ae6..db95e2dbba38 100644 --- a/fs/fuse/fuse_i.h +++ b/fs/fuse/fuse_i.h @@ -793,11 +793,6 @@ extern const struct file_operations fuse_dev_operations; extern const struct dentry_operations fuse_dentry_operations; extern const struct dentry_operations fuse_root_dentry_operations; -/** - * Inode to nodeid comparison. - */ -int fuse_inode_eq(struct inode *inode, void *_nodeidp); - /** * Get a filled in inode */ diff --git a/fs/fuse/inode.c b/fs/fuse/inode.c index bba747520e9b..cc18b77fbd9d 100644 --- a/fs/fuse/inode.c +++ b/fs/fuse/inode.c @@ -23,6 +23,7 @@ #include #include #include +#include MODULE_AUTHOR("Miklos Szeredi "); MODULE_DESCRIPTION("Filesystem in Userspace"); @@ -64,6 +65,40 @@ MODULE_PARM_DESC(max_user_congthresh, static struct file_system_type fuseblk_fs_type; #endif +static u32 fuse_inode_key_hash_fn(const void *data, u32 len, u32 seed) +{ + const u64 *nodeid = data; + + return jhash(nodeid, sizeof(*nodeid), seed); +} + +static u32 fuse_inode_obj_hash_fn(const void *obj, u32 len, u32 seed) +{ + const struct fuse_inode *fuse_inode = + container_of(obj, struct fuse_inode, inode); + + return jhash(&fuse_inode->nodeid, sizeof(fuse_inode->nodeid), seed); +} + +static int fuse_inode_hash_cmp_fn(struct rhashtable_compare_arg *arg, + const void *obj) +{ + const struct fuse_inode *fuse_inode = + container_of(obj, struct fuse_inode, inode); + const u64 *nodeid = arg->key; + + if (fuse_inode->nodeid == *nodeid) + return 0; + return 1; +} + +static const struct rhashtable_params fuse_inode_table_params = { + .head_offset = offsetof(struct inode, i_hash), + .hashfn = fuse_inode_key_hash_fn, + .obj_hashfn = fuse_inode_obj_hash_fn, + .obj_cmpfn = fuse_inode_hash_cmp_fn, +}; + struct fuse_forget_link *fuse_alloc_forget(void) { return kzalloc(sizeof(struct fuse_forget_link), GFP_KERNEL_ACCOUNT); @@ -268,16 +303,7 @@ static void fuse_init_inode(struct inode *inode, struct fuse_attr *attr) BUG(); } -int fuse_inode_eq(struct inode *inode, void *_nodeidp) -{ - u64 nodeid = *(u64 *) _nodeidp; - if (get_node_id(inode) == nodeid) - return 1; - else - return 0; -} - -static int fuse_inode_set(struct inode *inode, void *_nodeidp) +static int fuse_inode_set(struct inode *inode, const void *_nodeidp) { u64 nodeid = *(u64 *) _nodeidp; get_fuse_inode(inode)->nodeid = nodeid; @@ -293,7 +319,7 @@ struct inode *fuse_iget(struct super_block *sb, u64 nodeid, struct fuse_conn *fc = get_fuse_conn_super(sb); retry: - inode = iget5_locked(sb, nodeid, fuse_inode_eq, fuse_inode_set, &nodeid); + inode = iget5_locked(sb, fuse_inode_set, &nodeid); if (!inode) return NULL; @@ -329,7 +355,7 @@ int fuse_reverse_inval_inode(struct super_block *sb, u64 nodeid, pgoff_t pg_start; pgoff_t pg_end; - inode = ilookup5(sb, nodeid, fuse_inode_eq, &nodeid); + inode = ilookup5(sb, &nodeid); if (!inode) return -ENOENT; @@ -693,7 +719,7 @@ static struct dentry *fuse_get_dentry(struct super_block *sb, if (handle->nodeid == 0) goto out_err; - inode = ilookup5(sb, handle->nodeid, fuse_inode_eq, &handle->nodeid); + inode = ilookup5(sb, &handle->nodeid); if (!inode) { struct fuse_entry_out outarg; const struct qstr name = QSTR_INIT(".", 1); @@ -1183,6 +1209,9 @@ int fuse_fill_super_common(struct super_block *sb, struct fuse_fs_context *ctx) fc->dev = sb->s_dev; fc->sb = sb; + err = super_setup_inode_table(sb, &fuse_inode_table_params); + if (err) + goto err_dev_free; err = fuse_bdi_init(fc, sb); if (err) goto err_dev_free; diff --git a/fs/inode.c b/fs/inode.c index 8f779180c030..83b9e7462115 100644 --- a/fs/inode.c +++ b/fs/inode.c @@ -19,6 +19,7 @@ #include #include /* for inode_has_buffers */ #include +#include #include #include #include @@ -55,10 +56,54 @@ * inode_hash_lock */ -static unsigned int i_hash_mask __read_mostly; -static unsigned int i_hash_shift __read_mostly; -static struct hlist_head *inode_hashtable __read_mostly; -static __cacheline_aligned_in_smp DEFINE_SPINLOCK(inode_hash_lock); +static u32 inode_key_hash_fn(const void *data, u32 len, u32 seed) +{ + const unsigned long *ino = data; + + return jhash(ino, sizeof(*ino), seed); +} + +static u32 inode_obj_hash_fn(const void *obj, u32 len, u32 seed) +{ + const struct inode *inode = obj; + + return jhash(&inode->i_ino, sizeof(inode->i_ino), seed); +} + +static int inode_hash_cmp_fn(struct rhashtable_compare_arg *arg, + const void *obj) +{ + const struct inode *inode = obj; + const unsigned long *ino = arg->key; + + return inode->i_ino == *ino ? 0 : 1; +} + +const struct rhashtable_params default_inode_table_params = { + .head_offset = offsetof(struct inode, i_hash), + .hashfn = inode_key_hash_fn, + .obj_hashfn = inode_obj_hash_fn, + .obj_cmpfn = inode_hash_cmp_fn, +}; + +int super_setup_inode_table(struct super_block *sb, + const struct rhashtable_params *params) +{ + int ret; + + if (sb->s_inode_table_init_done) + rhashtable_destroy(&sb->s_inode_table); + + sb->s_inode_table_init_done = false; + + ret = rhashtable_init(&sb->s_inode_table, params); + if (ret) + return ret; + + sb->s_inode_table_init_done = true; + return 0; +} +EXPORT_SYMBOL(super_setup_inode_table); /* * Empty aops. Can be used for the cases where the user does not @@ -364,7 +409,6 @@ EXPORT_SYMBOL(address_space_init_once); void inode_init_once(struct inode *inode) { memset(inode, 0, sizeof(*inode)); - INIT_HLIST_NODE(&inode->i_hash); INIT_LIST_HEAD(&inode->i_devices); INIT_LIST_HEAD(&inode->i_io_list); INIT_LIST_HEAD(&inode->i_wb_list); @@ -448,16 +492,6 @@ static inline void inode_sb_list_del(struct inode *inode) } } -static unsigned long hash(struct super_block *sb, unsigned long hashval) -{ - unsigned long tmp; - - tmp = (hashval * (unsigned long)sb) ^ (GOLDEN_RATIO_PRIME + hashval) / - L1_CACHE_BYTES; - tmp = tmp ^ ((tmp ^ GOLDEN_RATIO_PRIME) >> i_hash_shift); - return tmp & i_hash_mask; -} - /** * __insert_inode_hash - hash an inode * @inode: unhashed inode @@ -468,13 +502,12 @@ static unsigned long hash(struct super_block *sb, unsigned long hashval) */ void __insert_inode_hash(struct inode *inode, unsigned long hashval) { - struct hlist_head *b = inode_hashtable + hash(inode->i_sb, hashval); + struct super_block *sb = inode->i_sb; + int ret; - spin_lock(&inode_hash_lock); - spin_lock(&inode->i_lock); - hlist_add_head_rcu(&inode->i_hash, b); - spin_unlock(&inode->i_lock); - spin_unlock(&inode_hash_lock); + ret = rhashtable_lookup_insert_fast(&sb->s_inode_table, &inode->i_hash, + sb->s_inode_table.p); + BUG_ON(ret); } EXPORT_SYMBOL(__insert_inode_hash); @@ -486,11 +519,10 @@ EXPORT_SYMBOL(__insert_inode_hash); */ void __remove_inode_hash(struct inode *inode) { - spin_lock(&inode_hash_lock); - spin_lock(&inode->i_lock); - hlist_del_init_rcu(&inode->i_hash); - spin_unlock(&inode->i_lock); - spin_unlock(&inode_hash_lock); + struct super_block *sb = inode->i_sb; + + rhashtable_remove_fast(&sb->s_inode_table, &inode->i_hash, + sb->s_inode_table.p); } EXPORT_SYMBOL(__remove_inode_hash); @@ -782,45 +814,9 @@ long prune_icache_sb(struct super_block *sb, struct shrink_control *sc) return freed; } -static void __wait_on_freeing_inode(struct inode *inode); -/* - * Called with the inode lock held. - */ -static struct inode *find_inode(struct super_block *sb, - struct hlist_head *head, - int (*test)(struct inode *, void *), - void *data) -{ - struct inode *inode = NULL; - -repeat: - hlist_for_each_entry(inode, head, i_hash) { - if (inode->i_sb != sb) - continue; - if (!test(inode, data)) - continue; - spin_lock(&inode->i_lock); - if (inode->i_state & (I_FREEING|I_WILL_FREE)) { - __wait_on_freeing_inode(inode); - goto repeat; - } - __iget(inode); - spin_unlock(&inode->i_lock); - return inode; - } - return NULL; -} - -static int inum_test(struct inode *inode, void *p) +static int inum_set(struct inode *inode, const void *p) { - unsigned long *ino = p; - - return *ino == inode->i_ino; -} - -static int inum_set(struct inode *inode, void *p) -{ - unsigned long *ino = p; + const unsigned long *ino = p; inode->i_ino = *ino; return 0; @@ -1018,6 +1014,29 @@ void unlock_two_nondirectories(struct inode *inode1, struct inode *inode2) } EXPORT_SYMBOL(unlock_two_nondirectories); +/* + * If we try to find an inode in the inode hash while it is being + * deleted, we have to wait until the filesystem completes its + * deletion before reporting that it isn't found. This function waits + * until the deletion _might_ have completed. Callers are responsible + * to recheck inode state. + * + * It doesn't matter if I_NEW is not set initially, a call to + * wake_up_bit(&inode->i_state, __I_NEW) after removing from the hash list + * will DTRT. + */ +static void __wait_on_freeing_inode(struct inode *inode) +{ + wait_queue_head_t *wq; + DEFINE_WAIT_BIT(wait, &inode->i_state, __I_NEW); + wq = bit_waitqueue(&inode->i_state, __I_NEW); + prepare_to_wait(wq, &wait.wq_entry, TASK_UNINTERRUPTIBLE); + spin_unlock(&inode->i_lock); + rcu_read_unlock(); + schedule(); + finish_wait(wq, &wait.wq_entry); +} + /** * inode_insert5 - obtain an inode from a mounted file system * @inode: pre-allocated inode to use for insert to cache @@ -1038,48 +1057,40 @@ EXPORT_SYMBOL(unlock_two_nondirectories); * Note both @test and @set are called with the inode_hash_lock held, so can't * sleep. */ -struct inode *inode_insert5(struct inode *inode, unsigned long hashval, - int (*test)(struct inode *, void *), - int (*set)(struct inode *, void *), void *data) +struct inode *inode_insert5(struct inode *inode) { - struct hlist_head *head = inode_hashtable + hash(inode->i_sb, hashval); + struct super_block *sb = inode->i_sb; struct inode *old; -again: - spin_lock(&inode_hash_lock); - old = find_inode(inode->i_sb, head, test, data); - if (unlikely(old)) { - /* - * Uhhuh, somebody else created the same inode under us. - * Use the old inode instead of the preallocated one. - */ - spin_unlock(&inode_hash_lock); - if (IS_ERR(old)) - return NULL; - wait_on_inode(old); - if (unlikely(inode_unhashed(old))) { - iput(old); - goto again; - } - return old; - } - - if (set && unlikely(set(inode, data))) { - inode = NULL; - goto unlock; - } /* * Return the locked inode with I_NEW set, the * caller is responsible for filling in the contents */ - spin_lock(&inode->i_lock); inode->i_state |= I_NEW; - hlist_add_head_rcu(&inode->i_hash, head); - spin_unlock(&inode->i_lock); +again: + rcu_read_lock(); + old = rhashtable_lookup_get_insert_fast(&sb->s_inode_table, &inode->i_hash, + sb->s_inode_table.p); + if (old) { + if (!IS_ERR(old)) { + spin_lock(&old->i_lock); + if (inode->i_state & (I_FREEING|I_WILL_FREE)) { + __wait_on_freeing_inode(inode); + goto again; + } + __iget(old); + spin_unlock(&old->i_lock); + } + rcu_read_unlock(); + + inode->i_state &= ~I_NEW; + return old; + + } + rcu_read_unlock(); + if (list_empty(&inode->i_sb_list)) inode_sb_list_add(inode); -unlock: - spin_unlock(&inode_hash_lock); return inode; } @@ -1105,21 +1116,25 @@ EXPORT_SYMBOL(inode_insert5); * Note both @test and @set are called with the inode_hash_lock held, so can't * sleep. */ -struct inode *iget5_locked(struct super_block *sb, unsigned long hashval, - int (*test)(struct inode *, void *), - int (*set)(struct inode *, void *), void *data) +struct inode *iget5_locked(struct super_block *sb, + int (*set)(struct inode *, const void *), const void *key) { - struct inode *inode = ilookup5(sb, hashval, test, data); + struct inode *inode = ilookup5(sb, key); if (!inode) { struct inode *new = new_inode_pseudo(sb); - if (new) { - new->i_state = 0; - inode = inode_insert5(new, hashval, test, set, data); - if (unlikely(inode != new)) - destroy_inode(new); + if (!new) + return NULL; + + if (set && set(new, key)) { + destroy_inode(new); + return NULL; } + + inode = inode_insert5(new); + if (unlikely(inode != new)) + destroy_inode(new); } return inode; } @@ -1140,7 +1155,7 @@ EXPORT_SYMBOL(iget5_locked); */ struct inode *iget_locked(struct super_block *sb, unsigned long ino) { - return iget5_locked(sb, hash(sb, ino), inum_test, inum_set, &ino); + return iget5_locked(sb, inum_set, &ino); } EXPORT_SYMBOL(iget_locked); @@ -1153,14 +1168,8 @@ EXPORT_SYMBOL(iget_locked); */ static int test_inode_iunique(struct super_block *sb, unsigned long ino) { - struct hlist_head *b = inode_hashtable + hash(sb, ino); - struct inode *inode; - - hlist_for_each_entry_rcu(inode, b, i_hash) { - if (inode->i_ino == ino && inode->i_sb == sb) - return 0; - } - return 1; + return rhashtable_lookup(&sb->s_inode_table, &ino, + sb->s_inode_table.p) == NULL; } /** @@ -1237,17 +1246,24 @@ EXPORT_SYMBOL(igrab); * * Note2: @test is called with the inode_hash_lock held, so can't sleep. */ -struct inode *ilookup5_nowait(struct super_block *sb, unsigned long hashval, - int (*test)(struct inode *, void *), void *data) +struct inode *ilookup5_nowait(struct super_block *sb, const void *key) { - struct hlist_head *head = inode_hashtable + hash(sb, hashval); struct inode *inode; - - spin_lock(&inode_hash_lock); - inode = find_inode(sb, head, test, data); - spin_unlock(&inode_hash_lock); - - return IS_ERR(inode) ? NULL : inode; +repeat: + rcu_read_lock(); + inode = rhashtable_lookup(&sb->s_inode_table, key, + sb->s_inode_table.p); + if (inode) { + spin_lock(&inode->i_lock); + if (inode->i_state & (I_FREEING|I_WILL_FREE)) { + __wait_on_freeing_inode(inode); + goto repeat; + } + __iget(inode); + spin_unlock(&inode->i_lock); + } + rcu_read_unlock(); + return inode; } EXPORT_SYMBOL(ilookup5_nowait); @@ -1268,12 +1284,11 @@ EXPORT_SYMBOL(ilookup5_nowait); * * Note: @test is called with the inode_hash_lock held, so can't sleep. */ -struct inode *ilookup5(struct super_block *sb, unsigned long hashval, - int (*test)(struct inode *, void *), void *data) +struct inode *ilookup5(struct super_block *sb, const void *key) { struct inode *inode; again: - inode = ilookup5_nowait(sb, hashval, test, data); + inode = ilookup5_nowait(sb, key); if (inode) { wait_on_inode(inode); if (unlikely(inode_unhashed(inode))) { @@ -1295,7 +1310,7 @@ EXPORT_SYMBOL(ilookup5); */ struct inode *ilookup(struct super_block *sb, unsigned long ino) { - return ilookup5(sb, hash(sb, ino), inum_test, &ino); + return ilookup5(sb, &ino); } EXPORT_SYMBOL(ilookup); @@ -1320,22 +1335,19 @@ EXPORT_SYMBOL(ilookup); * * The caller must hold the RCU read lock. */ -struct inode *find_inode_rcu(struct super_block *sb, unsigned long hashval, - int (*test)(struct inode *, void *), void *data) +struct inode *find_inode_rcu(struct super_block *sb, const void *key) { - struct hlist_head *head = inode_hashtable + hash(sb, hashval); struct inode *inode; RCU_LOCKDEP_WARN(!rcu_read_lock_held(), "suspicious find_inode_rcu() usage"); - hlist_for_each_entry_rcu(inode, head, i_hash) { - if (inode->i_sb == sb && - !(READ_ONCE(inode->i_state) & (I_FREEING | I_WILL_FREE)) && - test(inode, data)) - return inode; - } - return NULL; + inode = rhashtable_lookup(&sb->s_inode_table, key, + sb->s_inode_table.p); + + if (inode && (READ_ONCE(inode->i_state) & (I_FREEING | I_WILL_FREE))) + inode = NULL; + return inode; } EXPORT_SYMBOL(find_inode_rcu); @@ -1361,23 +1373,15 @@ EXPORT_SYMBOL(find_inode_rcu); struct inode *find_inode_by_ino_rcu(struct super_block *sb, unsigned long ino) { - return find_inode_rcu(sb, ino, inum_test, &ino); + return find_inode_rcu(sb, &ino); } EXPORT_SYMBOL(find_inode_by_ino_rcu); int insert_inode_locked(struct inode *inode) -{ - return insert_inode_locked4(inode, hash(inode->i_sb, inode->i_ino), - inum_test, &inode->i_ino); -} -EXPORT_SYMBOL(insert_inode_locked); - -int insert_inode_locked4(struct inode *inode, unsigned long hashval, - int (*test)(struct inode *, void *), void *data) { struct inode *old; - old = inode_insert5(inode, hashval, test, NULL, data); + old = inode_insert5(inode); if (old != inode) { iput(old); @@ -1385,7 +1389,7 @@ int insert_inode_locked4(struct inode *inode, unsigned long hashval, } return 0; } -EXPORT_SYMBOL(insert_inode_locked4); +EXPORT_SYMBOL(insert_inode_locked); int generic_delete_inode(struct inode *inode) { @@ -1816,30 +1820,6 @@ int inode_needs_sync(struct inode *inode) } EXPORT_SYMBOL(inode_needs_sync); -/* - * If we try to find an inode in the inode hash while it is being - * deleted, we have to wait until the filesystem completes its - * deletion before reporting that it isn't found. This function waits - * until the deletion _might_ have completed. Callers are responsible - * to recheck inode state. - * - * It doesn't matter if I_NEW is not set initially, a call to - * wake_up_bit(&inode->i_state, __I_NEW) after removing from the hash list - * will DTRT. - */ -static void __wait_on_freeing_inode(struct inode *inode) -{ - wait_queue_head_t *wq; - DEFINE_WAIT_BIT(wait, &inode->i_state, __I_NEW); - wq = bit_waitqueue(&inode->i_state, __I_NEW); - prepare_to_wait(wq, &wait.wq_entry, TASK_UNINTERRUPTIBLE); - spin_unlock(&inode->i_lock); - spin_unlock(&inode_hash_lock); - schedule(); - finish_wait(wq, &wait.wq_entry); - spin_lock(&inode_hash_lock); -} - static __initdata unsigned long ihash_entries; static int __init set_ihash_entries(char *str) { @@ -1850,29 +1830,6 @@ static int __init set_ihash_entries(char *str) } __setup("ihash_entries=", set_ihash_entries); -/* - * Initialize the waitqueues and inode hash table. - */ -void __init inode_init_early(void) -{ - /* If hashes are distributed across NUMA nodes, defer - * hash allocation until vmalloc space is available. - */ - if (hashdist) - return; - - inode_hashtable = - alloc_large_system_hash("Inode-cache", - sizeof(struct hlist_head), - ihash_entries, - 14, - HASH_EARLY | HASH_ZERO, - &i_hash_shift, - &i_hash_mask, - 0, - 0); -} - void __init inode_init(void) { /* inode slab cache */ @@ -1882,21 +1839,6 @@ void __init inode_init(void) (SLAB_RECLAIM_ACCOUNT|SLAB_PANIC| SLAB_MEM_SPREAD|SLAB_ACCOUNT), init_once); - - /* Hash may have been set up in inode_init_early */ - if (!hashdist) - return; - - inode_hashtable = - alloc_large_system_hash("Inode-cache", - sizeof(struct hlist_head), - ihash_entries, - 14, - HASH_ZERO, - &i_hash_shift, - &i_hash_mask, - 0, - 0); } void init_special_inode(struct inode *inode, umode_t mode, dev_t rdev) diff --git a/fs/nfs/inode.c b/fs/nfs/inode.c index aa6493905bbe..7848cb7a0112 100644 --- a/fs/nfs/inode.c +++ b/fs/nfs/inode.c @@ -66,6 +66,55 @@ static int nfs_update_inode(struct inode *, struct nfs_fattr *); static struct kmem_cache * nfs_inode_cachep; +struct nfs_find_desc { + struct nfs_fh *fh; + struct nfs_fattr *fattr; +}; + +static u32 nfs_inode_key_hash_fn(const void *data, u32 len, u32 seed) +{ + const struct nfs_find_desc *desc = data; + struct nfs_fh *fh = desc->fh; + u64 fileid = desc->fattr->fileid; + + return jhash(&fileid, sizeof(fileid), seed) ^ + jhash(fh, sizeof(fh->size) + fh->size, seed); +} + +static u32 nfs_inode_obj_hash_fn(const void *obj, u32 len, u32 seed) +{ + const struct inode *inode = obj; + const struct nfs_fh *fh = NFS_FH(inode); + u64 fileid = NFS_FILEID(inode); + + return jhash(&fileid, sizeof(fileid), seed) ^ + jhash(fh, sizeof(fh->size) + fh->size, seed); +} + +static int nfs_inode_hash_cmp_fn(struct rhashtable_compare_arg *arg, + const void *obj) +{ + const struct inode *inode = obj; + const struct nfs_find_desc *desc = arg->key; + struct nfs_fh *fh = desc->fh; + struct nfs_fattr *fattr = desc->fattr; + + if (NFS_FILEID(inode) != fattr->fileid) + return 1; + if ((S_IFMT & inode->i_mode) != (S_IFMT & fattr->mode)) + return 1; + if (nfs_compare_fh(NFS_FH(inode), fh)) + return 1; + return 0; +} + +const struct rhashtable_params nfs_inode_table_params = { + .head_offset = offsetof(struct inode, i_hash), + .hashfn = nfs_inode_key_hash_fn, + .obj_hashfn = nfs_inode_obj_hash_fn, + .obj_cmpfn = nfs_inode_hash_cmp_fn, +}; + static inline unsigned long nfs_fattr_to_ino_t(struct nfs_fattr *fattr) { @@ -301,40 +350,11 @@ void nfs_set_inode_stale(struct inode *inode) spin_unlock(&inode->i_lock); } -struct nfs_find_desc { - struct nfs_fh *fh; - struct nfs_fattr *fattr; -}; - -/* - * In NFSv3 we can have 64bit inode numbers. In order to support - * this, and re-exported directories (also seen in NFSv2) - * we are forced to allow 2 different inodes to have the same - * i_ino. - */ -static int -nfs_find_actor(struct inode *inode, void *opaque) -{ - struct nfs_find_desc *desc = (struct nfs_find_desc *)opaque; - struct nfs_fh *fh = desc->fh; - struct nfs_fattr *fattr = desc->fattr; - - if (NFS_FILEID(inode) != fattr->fileid) - return 0; - if ((S_IFMT & inode->i_mode) != (S_IFMT & fattr->mode)) - return 0; - if (nfs_compare_fh(NFS_FH(inode), fh)) - return 0; - if (is_bad_inode(inode) || NFS_STALE(inode)) - return 0; - return 1; -} - static int -nfs_init_locked(struct inode *inode, void *opaque) +nfs_init_locked(struct inode *inode, const void *opaque) { - struct nfs_find_desc *desc = (struct nfs_find_desc *)opaque; - struct nfs_fattr *fattr = desc->fattr; + const struct nfs_find_desc *desc = (struct nfs_find_desc *)opaque; + const struct nfs_fattr *fattr = desc->fattr; set_nfs_fileid(inode, fattr->fileid); inode->i_mode = fattr->mode; @@ -413,14 +433,17 @@ nfs_ilookup(struct super_block *sb, struct nfs_fattr *fattr, struct nfs_fh *fh) .fattr = fattr, }; struct inode *inode; - unsigned long hash; if (!(fattr->valid & NFS_ATTR_FATTR_FILEID) || !(fattr->valid & NFS_ATTR_FATTR_TYPE)) return NULL; - hash = nfs_fattr_to_ino_t(fattr); - inode = ilookup5(sb, hash, nfs_find_actor, &desc); + inode = ilookup5(sb, &desc); + + if (inode && (is_bad_inode(inode) || NFS_STALE(inode))) { + iput(inode); + inode = NULL; + } dprintk("%s: returning %p\n", __func__, inode); return inode; @@ -438,7 +461,6 @@ nfs_fhget(struct super_block *sb, struct nfs_fh *fh, struct nfs_fattr *fattr, st .fattr = fattr }; struct inode *inode = ERR_PTR(-ENOENT); - unsigned long hash; nfs_attr_check_mountpoint(sb, fattr); @@ -449,9 +471,13 @@ nfs_fhget(struct super_block *sb, struct nfs_fh *fh, struct nfs_fattr *fattr, st if ((fattr->valid & NFS_ATTR_FATTR_TYPE) == 0) goto out_no_inode; - hash = nfs_fattr_to_ino_t(fattr); + inode = iget5_locked(sb, nfs_init_locked, &desc); + + if (inode && (is_bad_inode(inode) || NFS_STALE(inode))) { + iput(inode); + inode = NULL; + } - inode = iget5_locked(sb, hash, nfs_find_actor, nfs_init_locked, &desc); if (inode == NULL) { inode = ERR_PTR(-ENOMEM); goto out_no_inode; @@ -463,7 +489,7 @@ nfs_fhget(struct super_block *sb, struct nfs_fh *fh, struct nfs_fattr *fattr, st /* We set i_ino for the few things that still rely on it, * such as stat(2) */ - inode->i_ino = hash; + inode->i_ino = nfs_fattr_to_ino_t(fattr); /* We can't support update_atime(), since the server will reset it */ inode->i_flags |= S_NOATIME|S_NOCMTIME; diff --git a/fs/nfs/nfs.h b/fs/nfs/nfs.h index 5ba00610aede..bf6a962b18f1 100644 --- a/fs/nfs/nfs.h +++ b/fs/nfs/nfs.h @@ -22,6 +22,8 @@ struct nfs_subversion { struct list_head list; /* List of NFS versions */ }; +extern const struct rhashtable_params nfs_inode_table_params; + struct nfs_subversion *get_nfs_version(unsigned int); void put_nfs_version(struct nfs_subversion *); void register_nfs_version(struct nfs_subversion *); diff --git a/fs/nfs/super.c b/fs/nfs/super.c index 7a70287f21a2..73d5cd4199ec 100644 --- a/fs/nfs/super.c +++ b/fs/nfs/super.c @@ -1247,6 +1247,9 @@ int nfs_get_tree_common(struct fs_context *fc) nfs_free_server(server); server = NULL; } else { + error = super_setup_inode_table(s, &nfs_inode_table_params); + if (error) + goto error_splat_super; error = super_setup_bdi_name(s, "%u:%u", MAJOR(server->s_dev), MINOR(server->s_dev)); if (error) diff --git a/fs/super.c b/fs/super.c index 904459b35119..b9df9976ff7d 100644 --- a/fs/super.c +++ b/fs/super.c @@ -35,6 +35,7 @@ #include #include #include +#include #include #include #include @@ -162,6 +163,8 @@ static void destroy_super_work(struct work_struct *work) for (i = 0; i < SB_FREEZE_LEVELS; i++) percpu_free_rwsem(&s->s_writers.rw_sem[i]); + if (s->s_inode_table_init_done) + rhashtable_destroy(&s->s_inode_table); kfree(s); } @@ -273,6 +276,8 @@ static struct super_block *alloc_super(struct file_system_type *type, int flags, goto fail; if (list_lru_init_memcg(&s->s_inode_lru, &s->s_shrink)) goto fail; + if (super_setup_inode_table(s, &default_inode_table_params)) + goto fail; return s; fail: diff --git a/include/linux/fs.h b/include/linux/fs.h index c72048c5520e..b45e951c14b7 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -39,6 +39,7 @@ #include #include #include +#include #include #include @@ -70,7 +71,6 @@ struct fs_context; struct fs_parameter_spec; extern void __init inode_init(void); -extern void __init inode_init_early(void); extern void __init files_init(void); extern void __init files_maxfiles_init(void); @@ -667,7 +667,7 @@ struct inode { unsigned long dirtied_when; /* jiffies of first dirtying */ unsigned long dirtied_time_when; - struct hlist_node i_hash; + struct rhash_head i_hash; struct list_head i_io_list; /* backing dev IO list */ #ifdef CONFIG_CGROUP_WRITEBACK struct bdi_writeback *i_wb; /* the associated cgroup wb */ @@ -734,7 +734,7 @@ static inline unsigned int i_blocksize(const struct inode *node) static inline int inode_unhashed(struct inode *inode) { - return hlist_unhashed(&inode->i_hash); + return inode->i_hash.next == NULL; } /* @@ -745,7 +745,7 @@ static inline int inode_unhashed(struct inode *inode) */ static inline void inode_fake_hash(struct inode *inode) { - hlist_add_fake(&inode->i_hash); + inode->i_hash.next = (void *) 2; } /* @@ -1536,6 +1536,9 @@ struct super_block { */ int s_stack_depth; + struct rhashtable s_inode_table; + bool s_inode_table_init_done; + /* s_inode_list_lock protects s_inodes */ spinlock_t s_inode_list_lock ____cacheline_aligned_in_smp; struct list_head s_inodes; /* all inodes */ @@ -2927,24 +2930,24 @@ static inline int generic_drop_inode(struct inode *inode) } extern void d_mark_dontcache(struct inode *inode); +extern const struct rhashtable_params default_inode_table_params; +extern int super_setup_inode_table(struct super_block *sb, + const struct rhashtable_params *params); + extern struct inode *ilookup5_nowait(struct super_block *sb, - unsigned long hashval, int (*test)(struct inode *, void *), - void *data); -extern struct inode *ilookup5(struct super_block *sb, unsigned long hashval, - int (*test)(struct inode *, void *), void *data); + const void *key); +extern struct inode *ilookup5(struct super_block *sb, const void *key); extern struct inode *ilookup(struct super_block *sb, unsigned long ino); -extern struct inode *inode_insert5(struct inode *inode, unsigned long hashval, - int (*test)(struct inode *, void *), - int (*set)(struct inode *, void *), - void *data); -extern struct inode * iget5_locked(struct super_block *, unsigned long, int (*test)(struct inode *, void *), int (*set)(struct inode *, void *), void *); +extern struct inode *inode_insert5(struct inode *inode); +extern struct inode * iget5_locked(struct super_block *, + int (*set)(struct inode *, const void *), + const void *); extern struct inode * iget_locked(struct super_block *, unsigned long); -extern struct inode *find_inode_rcu(struct super_block *, unsigned long, - int (*)(struct inode *, void *), void *); +extern struct inode *find_inode_rcu(struct super_block *, const void *); extern struct inode *find_inode_by_ino_rcu(struct super_block *, unsigned long); -extern int insert_inode_locked4(struct inode *, unsigned long, int (*test)(struct inode *, void *), void *); extern int insert_inode_locked(struct inode *); + #ifdef CONFIG_DEBUG_LOCK_ALLOC extern void lockdep_annotate_inode_mutex_key(struct inode *inode); #else @@ -2989,7 +2992,8 @@ static inline void insert_inode_hash(struct inode *inode) extern void __remove_inode_hash(struct inode *); static inline void remove_inode_hash(struct inode *inode) { - if (!inode_unhashed(inode) && !hlist_fake(&inode->i_hash)) + if (!inode_unhashed(inode) && + inode->i_hash.next != (void *) 2) __remove_inode_hash(inode); } diff --git a/mm/shmem.c b/mm/shmem.c index 8e2b35ba93ad..252f7dc99e25 100644 --- a/mm/shmem.c +++ b/mm/shmem.c @@ -80,6 +80,7 @@ static struct vfsmount *shm_mnt; #include #include #include +#include #include @@ -94,6 +95,43 @@ static struct vfsmount *shm_mnt; /* Symlink up to this size is kmalloc'ed instead of using a swappable page */ #define SHORT_SYMLINK_LEN 128 +struct shmem_key { + u64 ino; + u32 gen; + +}; + +static u32 shmem_key_hash_fn(const void *data, u32 len, u32 seed) +{ + const struct shmem_key *k = data; + + return jhash(k, sizeof(*k), seed); +} + +static u32 shmem_obj_hash_fn(const void *obj, u32 len, u32 seed) +{ + const struct inode *inode = obj; + const struct shmem_key k = { inode->i_ino, inode->i_generation }; + + return jhash(&k, sizeof(k), seed); +} + +static int shmem_hash_cmp_fn(struct rhashtable_compare_arg *arg, + const void *obj) +{ + const struct inode *inode = obj; + const struct shmem_key *k = arg->key; + + return inode->i_ino == k->ino && inode->i_generation == k->gen ? 0 : 1; +} + +static const struct rhashtable_params shmem_inode_table_params = { + .head_offset = offsetof(struct inode, i_hash), + .hashfn = shmem_key_hash_fn, + .obj_hashfn = shmem_obj_hash_fn, + .obj_cmpfn = shmem_hash_cmp_fn, +}; + /* * shmem_fallocate communicates with shmem_fault or shmem_writepage via * inode->i_private (with i_mutex making sure that it has only one user at @@ -3326,14 +3364,6 @@ static struct dentry *shmem_get_parent(struct dentry *child) return ERR_PTR(-ESTALE); } -static int shmem_match(struct inode *ino, void *vfh) -{ - __u32 *fh = vfh; - __u64 inum = fh[2]; - inum = (inum << 32) | fh[1]; - return ino->i_ino == inum && fh[0] == ino->i_generation; -} - /* Find any alias of inode, but prefer a hashed alias */ static struct dentry *shmem_find_alias(struct inode *inode) { @@ -3346,18 +3376,18 @@ static struct dentry *shmem_find_alias(struct inode *inode) static struct dentry *shmem_fh_to_dentry(struct super_block *sb, struct fid *fid, int fh_len, int fh_type) { + struct shmem_key k; struct inode *inode; struct dentry *dentry = NULL; - u64 inum; if (fh_len < 3) return NULL; - inum = fid->raw[2]; - inum = (inum << 32) | fid->raw[1]; + k.ino = fid->raw[2]; + k.ino = (k.ino << 32) | fid->raw[1]; + k.gen = fid->raw[0]; - inode = ilookup5(sb, (unsigned long)(inum + fid->raw[0]), - shmem_match, fid->raw); + inode = ilookup5(sb, &k); if (inode) { dentry = shmem_find_alias(inode); iput(inode); @@ -3781,6 +3811,11 @@ static int shmem_fill_super(struct super_block *sb, struct fs_context *fc) sb->s_root = d_make_root(inode); if (!sb->s_root) goto failed; + + err = super_setup_inode_table(sb, &shmem_inode_table_params); + if (err) + goto failed; + return 0; failed: -- cgit v1.2.3