From e8cff84faa4ddb6716caed085f515fbb1d856099 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Mon, 9 Jul 2018 11:24:21 -0400 Subject: fold security_file_free() into file_free() .. and the call of file_free() in case of security_file_alloc() failure in get_empty_filp() should be simply file_free_rcu() - no point in rcu-delays there, anyway. Acked-by: Linus Torvalds Signed-off-by: Al Viro --- fs/file_table.c | 10 ++++------ 1 file changed, 4 insertions(+), 6 deletions(-) (limited to 'fs/file_table.c') diff --git a/fs/file_table.c b/fs/file_table.c index 7ec0b3e5f05d..eee7cf629e52 100644 --- a/fs/file_table.c +++ b/fs/file_table.c @@ -51,6 +51,7 @@ static void file_free_rcu(struct rcu_head *head) static inline void file_free(struct file *f) { + security_file_free(f); percpu_counter_dec(&nr_files); call_rcu(&f->f_u.fu_rcuhead, file_free_rcu); } @@ -123,11 +124,10 @@ struct file *get_empty_filp(void) if (unlikely(!f)) return ERR_PTR(-ENOMEM); - percpu_counter_inc(&nr_files); f->f_cred = get_cred(cred); error = security_file_alloc(f); if (unlikely(error)) { - file_free(f); + file_free_rcu(&f->f_u.fu_rcuhead); return ERR_PTR(error); } @@ -137,6 +137,7 @@ struct file *get_empty_filp(void) mutex_init(&f->f_pos_lock); eventpoll_init_file(f); /* f->f_version: 0 */ + percpu_counter_inc(&nr_files); return f; over: @@ -207,7 +208,6 @@ static void __fput(struct file *file) } if (file->f_op->release) file->f_op->release(inode, file); - security_file_free(file); if (unlikely(S_ISCHR(inode->i_mode) && inode->i_cdev != NULL && !(file->f_mode & FMODE_PATH))) { cdev_put(inode->i_cdev); @@ -302,10 +302,8 @@ EXPORT_SYMBOL(fput); void put_filp(struct file *file) { - if (atomic_long_dec_and_test(&file->f_count)) { - security_file_free(file); + if (atomic_long_dec_and_test(&file->f_count)) file_free(file); - } } void __init files_init(void) -- cgit v1.2.3 From c9c554f21490bbc96cc554f80024d27d09670480 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Wed, 11 Jul 2018 14:19:04 -0400 Subject: alloc_file(): switch to passing O_... flags instead of FMODE_... mode ... so that it could set both ->f_flags and ->f_mode, without callers having to set ->f_flags manually. Signed-off-by: Al Viro --- drivers/misc/cxl/api.c | 3 +-- drivers/scsi/cxlflash/ocxl_hw.c | 3 +-- fs/aio.c | 8 ++------ fs/anon_inodes.c | 3 +-- fs/file_table.c | 17 +++++++++-------- fs/hugetlbfs/inode.c | 3 +-- fs/pipe.c | 8 ++++---- include/linux/file.h | 2 +- ipc/shm.c | 8 ++++---- mm/memfd.c | 2 +- mm/shmem.c | 3 +-- net/socket.c | 3 +-- 12 files changed, 27 insertions(+), 36 deletions(-) (limited to 'fs/file_table.c') diff --git a/drivers/misc/cxl/api.c b/drivers/misc/cxl/api.c index 6b16946f9b05..0b5cb6cf91a0 100644 --- a/drivers/misc/cxl/api.c +++ b/drivers/misc/cxl/api.c @@ -102,12 +102,11 @@ static struct file *cxl_getfile(const char *name, path.mnt = mntget(cxl_vfs_mount); d_instantiate(path.dentry, inode); - file = alloc_file(&path, OPEN_FMODE(flags), fops); + file = alloc_file(&path, flags & (O_ACCMODE | O_NONBLOCK), fops); if (IS_ERR(file)) { path_put(&path); goto err_fs; } - file->f_flags = flags & (O_ACCMODE | O_NONBLOCK); file->private_data = priv; return file; diff --git a/drivers/scsi/cxlflash/ocxl_hw.c b/drivers/scsi/cxlflash/ocxl_hw.c index 497a68389461..99bb393a8a34 100644 --- a/drivers/scsi/cxlflash/ocxl_hw.c +++ b/drivers/scsi/cxlflash/ocxl_hw.c @@ -129,7 +129,7 @@ static struct file *ocxlflash_getfile(struct device *dev, const char *name, path.mnt = mntget(ocxlflash_vfs_mount); d_instantiate(path.dentry, inode); - file = alloc_file(&path, OPEN_FMODE(flags), fops); + file = alloc_file(&path, flags & (O_ACCMODE | O_NONBLOCK), fops); if (IS_ERR(file)) { rc = PTR_ERR(file); dev_err(dev, "%s: alloc_file failed rc=%d\n", @@ -138,7 +138,6 @@ static struct file *ocxlflash_getfile(struct device *dev, const char *name, goto err3; } - file->f_flags = flags & (O_ACCMODE | O_NONBLOCK); file->private_data = priv; out: return file; diff --git a/fs/aio.c b/fs/aio.c index e1d20124ec0e..9eea53887d6c 100644 --- a/fs/aio.c +++ b/fs/aio.c @@ -234,13 +234,9 @@ static struct file *aio_private_file(struct kioctx *ctx, loff_t nr_pages) path.mnt = mntget(aio_mnt); d_instantiate(path.dentry, inode); - file = alloc_file(&path, FMODE_READ | FMODE_WRITE, &aio_ring_fops); - if (IS_ERR(file)) { + file = alloc_file(&path, O_RDWR, &aio_ring_fops); + if (IS_ERR(file)) path_put(&path); - return file; - } - - file->f_flags = O_RDWR; return file; } diff --git a/fs/anon_inodes.c b/fs/anon_inodes.c index 3168ee4e77f4..6b235ab1df6c 100644 --- a/fs/anon_inodes.c +++ b/fs/anon_inodes.c @@ -102,12 +102,11 @@ struct file *anon_inode_getfile(const char *name, d_instantiate(path.dentry, anon_inode_inode); - file = alloc_file(&path, OPEN_FMODE(flags), fops); + file = alloc_file(&path, flags & (O_ACCMODE | O_NONBLOCK), fops); if (IS_ERR(file)) goto err_dput; file->f_mapping = anon_inode_inode->i_mapping; - file->f_flags = flags & (O_ACCMODE | O_NONBLOCK); file->private_data = priv; return file; diff --git a/fs/file_table.c b/fs/file_table.c index eee7cf629e52..086c3f5ec31a 100644 --- a/fs/file_table.c +++ b/fs/file_table.c @@ -153,10 +153,10 @@ over: * alloc_file - allocate and initialize a 'struct file' * * @path: the (dentry, vfsmount) pair for the new file - * @mode: the mode with which the new file will be opened + * @flags: O_... flags with which the new file will be opened * @fop: the 'struct file_operations' for the new file */ -struct file *alloc_file(const struct path *path, fmode_t mode, +struct file *alloc_file(const struct path *path, int flags, const struct file_operations *fop) { struct file *file; @@ -165,19 +165,20 @@ struct file *alloc_file(const struct path *path, fmode_t mode, if (IS_ERR(file)) return file; + file->f_mode = OPEN_FMODE(flags); + file->f_flags = flags; file->f_path = *path; file->f_inode = path->dentry->d_inode; file->f_mapping = path->dentry->d_inode->i_mapping; file->f_wb_err = filemap_sample_wb_err(file->f_mapping); - if ((mode & FMODE_READ) && + if ((file->f_mode & FMODE_READ) && likely(fop->read || fop->read_iter)) - mode |= FMODE_CAN_READ; - if ((mode & FMODE_WRITE) && + file->f_mode |= FMODE_CAN_READ; + if ((file->f_mode & FMODE_WRITE) && likely(fop->write || fop->write_iter)) - mode |= FMODE_CAN_WRITE; - file->f_mode = mode; + file->f_mode |= FMODE_CAN_WRITE; file->f_op = fop; - if ((mode & (FMODE_READ | FMODE_WRITE)) == FMODE_READ) + if ((file->f_mode & (FMODE_READ | FMODE_WRITE)) == FMODE_READ) i_readcount_inc(path->dentry->d_inode); return file; } diff --git a/fs/hugetlbfs/inode.c b/fs/hugetlbfs/inode.c index d508c7844681..71aed47422e2 100644 --- a/fs/hugetlbfs/inode.c +++ b/fs/hugetlbfs/inode.c @@ -1375,8 +1375,7 @@ struct file *hugetlb_file_setup(const char *name, size_t size, inode->i_size = size; clear_nlink(inode); - file = alloc_file(&path, FMODE_WRITE | FMODE_READ, - &hugetlbfs_file_operations); + file = alloc_file(&path, O_RDWR, &hugetlbfs_file_operations); if (IS_ERR(file)) goto out_dentry; /* inode is already attached */ diff --git a/fs/pipe.c b/fs/pipe.c index 9405e455f5b1..1909422e5a78 100644 --- a/fs/pipe.c +++ b/fs/pipe.c @@ -760,16 +760,17 @@ int create_pipe_files(struct file **res, int flags) d_instantiate(path.dentry, inode); - f = alloc_file(&path, FMODE_WRITE, &pipefifo_fops); + f = alloc_file(&path, O_WRONLY | (flags & (O_NONBLOCK | O_DIRECT)), + &pipefifo_fops); if (IS_ERR(f)) { err = PTR_ERR(f); goto err_dentry; } - f->f_flags = O_WRONLY | (flags & (O_NONBLOCK | O_DIRECT)); f->private_data = inode->i_pipe; - res[0] = alloc_file(&path, FMODE_READ, &pipefifo_fops); + res[0] = alloc_file(&path, O_RDONLY | (flags & O_NONBLOCK), + &pipefifo_fops); if (IS_ERR(res[0])) { put_pipe_info(inode, inode->i_pipe); fput(f); @@ -778,7 +779,6 @@ int create_pipe_files(struct file **res, int flags) path_get(&path); res[0]->private_data = inode->i_pipe; - res[0]->f_flags = O_RDONLY | (flags & O_NONBLOCK); res[1] = f; return 0; diff --git a/include/linux/file.h b/include/linux/file.h index 279720db984a..6d34a1262b31 100644 --- a/include/linux/file.h +++ b/include/linux/file.h @@ -18,7 +18,7 @@ struct file_operations; struct vfsmount; struct dentry; struct path; -extern struct file *alloc_file(const struct path *, fmode_t mode, +extern struct file *alloc_file(const struct path *, int flags, const struct file_operations *fop); static inline void fput_light(struct file *file, int fput_needed) diff --git a/ipc/shm.c b/ipc/shm.c index 051a3e1fb8df..c702abd578a7 100644 --- a/ipc/shm.c +++ b/ipc/shm.c @@ -1362,7 +1362,7 @@ long do_shmat(int shmid, char __user *shmaddr, int shmflg, struct ipc_namespace *ns; struct shm_file_data *sfd; struct path path; - fmode_t f_mode; + int f_flags; unsigned long populate = 0; err = -EINVAL; @@ -1395,11 +1395,11 @@ long do_shmat(int shmid, char __user *shmaddr, int shmflg, if (shmflg & SHM_RDONLY) { prot = PROT_READ; acc_mode = S_IRUGO; - f_mode = FMODE_READ; + f_flags = O_RDONLY; } else { prot = PROT_READ | PROT_WRITE; acc_mode = S_IRUGO | S_IWUGO; - f_mode = FMODE_READ | FMODE_WRITE; + f_flags = O_RDWR; } if (shmflg & SHM_EXEC) { prot |= PROT_EXEC; @@ -1449,7 +1449,7 @@ long do_shmat(int shmid, char __user *shmaddr, int shmflg, goto out_nattch; } - file = alloc_file(&path, f_mode, + file = alloc_file(&path, f_flags, is_file_hugepages(shp->shm_file) ? &shm_file_operations_huge : &shm_file_operations); diff --git a/mm/memfd.c b/mm/memfd.c index 27069518e3c5..2bb5e257080e 100644 --- a/mm/memfd.c +++ b/mm/memfd.c @@ -326,7 +326,7 @@ SYSCALL_DEFINE2(memfd_create, goto err_fd; } file->f_mode |= FMODE_LSEEK | FMODE_PREAD | FMODE_PWRITE; - file->f_flags |= O_RDWR | O_LARGEFILE; + file->f_flags |= O_LARGEFILE; if (flags & MFD_ALLOW_SEALING) { file_seals = memfd_file_seals_ptr(file); diff --git a/mm/shmem.c b/mm/shmem.c index 2cab84403055..84844e52bf24 100644 --- a/mm/shmem.c +++ b/mm/shmem.c @@ -3942,8 +3942,7 @@ static struct file *__shmem_file_setup(struct vfsmount *mnt, const char *name, l if (IS_ERR(res)) goto put_path; - res = alloc_file(&path, FMODE_WRITE | FMODE_READ, - &shmem_file_operations); + res = alloc_file(&path, O_RDWR, &shmem_file_operations); if (IS_ERR(res)) goto put_path; diff --git a/net/socket.c b/net/socket.c index 8a109012608a..2cdbe8f71b7f 100644 --- a/net/socket.c +++ b/net/socket.c @@ -411,7 +411,7 @@ struct file *sock_alloc_file(struct socket *sock, int flags, const char *dname) d_instantiate(path.dentry, SOCK_INODE(sock)); - file = alloc_file(&path, FMODE_READ | FMODE_WRITE, + file = alloc_file(&path, O_RDWR | (flags & O_NONBLOCK), &socket_file_ops); if (IS_ERR(file)) { /* drop dentry, keep inode for a bit */ @@ -423,7 +423,6 @@ struct file *sock_alloc_file(struct socket *sock, int flags, const char *dname) } sock->file = file; - file->f_flags = O_RDWR | (flags & O_NONBLOCK); file->private_data = sock; return file; } -- cgit v1.2.3 From 6de37b6dc085e7c5e092b69289af66876526da44 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Tue, 10 Jul 2018 13:12:05 -0400 Subject: pass creds to get_empty_filp(), make sure dentry_open() passes the right creds ... and rename get_empty_filp() to alloc_empty_file(). dentry_open() gets creds as argument, but the only thing that sees those is security_file_open() - file->f_cred still ends up with current_cred(). For almost all callers it's the same thing, but there are several broken cases. Acked-by: Linus Torvalds Signed-off-by: Al Viro --- fs/file_table.c | 5 ++--- fs/internal.h | 2 +- fs/namei.c | 2 +- fs/open.c | 2 +- 4 files changed, 5 insertions(+), 6 deletions(-) (limited to 'fs/file_table.c') diff --git a/fs/file_table.c b/fs/file_table.c index 086c3f5ec31a..76cfa4c43e13 100644 --- a/fs/file_table.c +++ b/fs/file_table.c @@ -101,9 +101,8 @@ int proc_nr_files(struct ctl_table *table, int write, * done, you will imbalance int the mount's writer count * and a warning at __fput() time. */ -struct file *get_empty_filp(void) +struct file *alloc_empty_file(const struct cred *cred) { - const struct cred *cred = current_cred(); static long old_max; struct file *f; int error; @@ -161,7 +160,7 @@ struct file *alloc_file(const struct path *path, int flags, { struct file *file; - file = get_empty_filp(); + file = alloc_empty_file(current_cred()); if (IS_ERR(file)) return file; diff --git a/fs/internal.h b/fs/internal.h index 5645b4ebf494..66473bf388e4 100644 --- a/fs/internal.h +++ b/fs/internal.h @@ -93,7 +93,7 @@ extern void chroot_fs_refs(const struct path *, const struct path *); /* * file_table.c */ -extern struct file *get_empty_filp(void); +extern struct file *alloc_empty_file(const struct cred *); /* * super.c diff --git a/fs/namei.c b/fs/namei.c index 734cef54fdf8..af2ec1803f57 100644 --- a/fs/namei.c +++ b/fs/namei.c @@ -3513,7 +3513,7 @@ static struct file *path_openat(struct nameidata *nd, int opened = 0; int error; - file = get_empty_filp(); + file = alloc_empty_file(current_cred()); if (IS_ERR(file)) return file; diff --git a/fs/open.c b/fs/open.c index 530da965e369..0061f9ea044d 100644 --- a/fs/open.c +++ b/fs/open.c @@ -921,7 +921,7 @@ struct file *dentry_open(const struct path *path, int flags, /* We must always pass in a valid mount pointer. */ BUG_ON(!path->mnt); - f = get_empty_filp(); + f = alloc_empty_file(cred); if (!IS_ERR(f)) { f->f_flags = flags; error = vfs_open(path, f, cred); -- cgit v1.2.3 From ea73ea7279884ba80896d4ea0f0443bf48b9e311 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Wed, 11 Jul 2018 15:00:04 -0400 Subject: pass ->f_flags value to alloc_empty_file() ... and have it set the f_flags-derived part of ->f_mode. Signed-off-by: Al Viro --- fs/file_table.c | 8 ++++---- fs/internal.h | 2 +- fs/namei.c | 4 +--- fs/open.c | 8 +++----- 4 files changed, 9 insertions(+), 13 deletions(-) (limited to 'fs/file_table.c') diff --git a/fs/file_table.c b/fs/file_table.c index 76cfa4c43e13..705f486f7007 100644 --- a/fs/file_table.c +++ b/fs/file_table.c @@ -101,7 +101,7 @@ int proc_nr_files(struct ctl_table *table, int write, * done, you will imbalance int the mount's writer count * and a warning at __fput() time. */ -struct file *alloc_empty_file(const struct cred *cred) +struct file *alloc_empty_file(int flags, const struct cred *cred) { static long old_max; struct file *f; @@ -135,6 +135,8 @@ struct file *alloc_empty_file(const struct cred *cred) spin_lock_init(&f->f_lock); mutex_init(&f->f_pos_lock); eventpoll_init_file(f); + f->f_flags = flags; + f->f_mode = OPEN_FMODE(flags); /* f->f_version: 0 */ percpu_counter_inc(&nr_files); return f; @@ -160,12 +162,10 @@ struct file *alloc_file(const struct path *path, int flags, { struct file *file; - file = alloc_empty_file(current_cred()); + file = alloc_empty_file(flags, current_cred()); if (IS_ERR(file)) return file; - file->f_mode = OPEN_FMODE(flags); - file->f_flags = flags; file->f_path = *path; file->f_inode = path->dentry->d_inode; file->f_mapping = path->dentry->d_inode->i_mapping; diff --git a/fs/internal.h b/fs/internal.h index 66473bf388e4..661c314aba30 100644 --- a/fs/internal.h +++ b/fs/internal.h @@ -93,7 +93,7 @@ extern void chroot_fs_refs(const struct path *, const struct path *); /* * file_table.c */ -extern struct file *alloc_empty_file(const struct cred *); +extern struct file *alloc_empty_file(int, const struct cred *); /* * super.c diff --git a/fs/namei.c b/fs/namei.c index af2ec1803f57..223925e30adb 100644 --- a/fs/namei.c +++ b/fs/namei.c @@ -3513,12 +3513,10 @@ static struct file *path_openat(struct nameidata *nd, int opened = 0; int error; - file = alloc_empty_file(current_cred()); + file = alloc_empty_file(op->open_flag, current_cred()); if (IS_ERR(file)) return file; - file->f_flags = op->open_flag; - if (unlikely(file->f_flags & __O_TMPFILE)) { error = do_tmpfile(nd, flags, op, file, &opened); goto out2; diff --git a/fs/open.c b/fs/open.c index 0061f9ea044d..15d2c3ab91ff 100644 --- a/fs/open.c +++ b/fs/open.c @@ -742,9 +742,6 @@ static int do_dentry_open(struct file *f, static const struct file_operations empty_fops = {}; int error; - f->f_mode = OPEN_FMODE(f->f_flags) | FMODE_LSEEK | - FMODE_PREAD | FMODE_PWRITE; - path_get(&f->f_path); f->f_inode = inode; f->f_mapping = inode->i_mapping; @@ -788,6 +785,8 @@ static int do_dentry_open(struct file *f, if (error) goto cleanup_all; + /* normally all 3 are set; ->open() can clear them if needed */ + f->f_mode |= FMODE_LSEEK | FMODE_PREAD | FMODE_PWRITE; if (!open) open = f->f_op->open; if (open) { @@ -921,9 +920,8 @@ struct file *dentry_open(const struct path *path, int flags, /* We must always pass in a valid mount pointer. */ BUG_ON(!path->mnt); - f = alloc_empty_file(cred); + f = alloc_empty_file(flags, cred); if (!IS_ERR(f)) { - f->f_flags = flags; error = vfs_open(path, f, cred); if (!error) { /* from now on we need fput() to dispose of f */ -- cgit v1.2.3 From f5d11409e61dadf1f9af91b22bbedc28a60a2e2c Mon Sep 17 00:00:00 2001 From: Al Viro Date: Mon, 9 Jul 2018 02:35:08 -0400 Subject: introduce FMODE_OPENED basically, "is that instance set up enough for regular fput(), or do we want put_filp() for that one". NOTE: the only alloc_file() caller that could be followed by put_filp() is in arch/ia64/kernel/perfmon.c, which is (Kconfig-level) broken. Acked-by: Linus Torvalds Signed-off-by: Al Viro --- fs/file_table.c | 1 + fs/open.c | 3 ++- include/linux/fs.h | 2 ++ 3 files changed, 5 insertions(+), 1 deletion(-) (limited to 'fs/file_table.c') diff --git a/fs/file_table.c b/fs/file_table.c index 705f486f7007..d664d10acfeb 100644 --- a/fs/file_table.c +++ b/fs/file_table.c @@ -176,6 +176,7 @@ struct file *alloc_file(const struct path *path, int flags, if ((file->f_mode & FMODE_WRITE) && likely(fop->write || fop->write_iter)) file->f_mode |= FMODE_CAN_WRITE; + file->f_mode |= FMODE_OPENED; file->f_op = fop; if ((file->f_mode & (FMODE_READ | FMODE_WRITE)) == FMODE_READ) i_readcount_inc(path->dentry->d_inode); diff --git a/fs/open.c b/fs/open.c index 4c65edefa487..f3c6cb6a57b9 100644 --- a/fs/open.c +++ b/fs/open.c @@ -749,7 +749,7 @@ static int do_dentry_open(struct file *f, f->f_wb_err = filemap_sample_wb_err(f->f_mapping); if (unlikely(f->f_flags & O_PATH)) { - f->f_mode = FMODE_PATH; + f->f_mode = FMODE_PATH | FMODE_OPENED; f->f_op = &empty_fops; return 0; } @@ -793,6 +793,7 @@ static int do_dentry_open(struct file *f, if (error) goto cleanup_all; } + f->f_mode |= FMODE_OPENED; if ((f->f_mode & (FMODE_READ | FMODE_WRITE)) == FMODE_READ) i_readcount_inc(inode); if ((f->f_mode & FMODE_READ) && diff --git a/include/linux/fs.h b/include/linux/fs.h index c4ca4c9c1130..05f34726e29c 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -148,6 +148,8 @@ typedef int (dio_iodone_t)(struct kiocb *iocb, loff_t offset, /* Has write method(s) */ #define FMODE_CAN_WRITE ((__force fmode_t)0x40000) +#define FMODE_OPENED ((__force fmode_t)0x80000) + /* File was opened by fanotify and shouldn't generate fanotify events */ #define FMODE_NONOTIFY ((__force fmode_t)0x4000000) -- cgit v1.2.3 From 4d27f3266f14e4d1d13125ce32cb49a40f3122c3 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Mon, 9 Jul 2018 11:14:39 -0400 Subject: fold put_filp() into fput() Just check FMODE_OPENED in __fput() and be done with that... Acked-by: Linus Torvalds Signed-off-by: Al Viro --- fs/file_table.c | 15 +++++---------- fs/namei.c | 4 ++-- fs/open.c | 11 +++-------- include/linux/file.h | 1 - 4 files changed, 10 insertions(+), 21 deletions(-) (limited to 'fs/file_table.c') diff --git a/fs/file_table.c b/fs/file_table.c index d664d10acfeb..9b70ed2bbc4e 100644 --- a/fs/file_table.c +++ b/fs/file_table.c @@ -192,6 +192,9 @@ static void __fput(struct file *file) struct vfsmount *mnt = file->f_path.mnt; struct inode *inode = file->f_inode; + if (unlikely(!(file->f_mode & FMODE_OPENED))) + goto out; + might_sleep(); fsnotify_close(file); @@ -221,12 +224,10 @@ static void __fput(struct file *file) put_write_access(inode); __mnt_drop_write(mnt); } - file->f_path.dentry = NULL; - file->f_path.mnt = NULL; - file->f_inode = NULL; - file_free(file); dput(dentry); mntput(mnt); +out: + file_free(file); } static LLIST_HEAD(delayed_fput_list); @@ -301,12 +302,6 @@ void __fput_sync(struct file *file) EXPORT_SYMBOL(fput); -void put_filp(struct file *file) -{ - if (atomic_long_dec_and_test(&file->f_count)) - file_free(file); -} - void __init files_init(void) { filp_cachep = kmem_cache_create("filp", sizeof(struct file), 0, diff --git a/fs/namei.c b/fs/namei.c index 3cf02804d5ff..503c4b968415 100644 --- a/fs/namei.c +++ b/fs/namei.c @@ -3531,7 +3531,7 @@ static struct file *path_openat(struct nameidata *nd, s = path_init(nd, flags); if (IS_ERR(s)) { - put_filp(file); + fput(file); return ERR_CAST(s); } while (!(error = link_path_walk(s, nd)) && @@ -3547,7 +3547,7 @@ static struct file *path_openat(struct nameidata *nd, out2: if (!(opened & FILE_OPENED)) { BUG_ON(!error); - put_filp(file); + fput(file); } if (unlikely(error)) { if (error == -EOPENSTALE) { diff --git a/fs/open.c b/fs/open.c index f3c6cb6a57b9..3d09b823f12b 100644 --- a/fs/open.c +++ b/fs/open.c @@ -921,15 +921,10 @@ struct file *dentry_open(const struct path *path, int flags, f = alloc_empty_file(flags, cred); if (!IS_ERR(f)) { error = vfs_open(path, f); - if (!error) { - /* from now on we need fput() to dispose of f */ + if (!error) error = open_check_o_direct(f); - if (error) { - fput(f); - f = ERR_PTR(error); - } - } else { - put_filp(f); + if (error) { + fput(f); f = ERR_PTR(error); } } diff --git a/include/linux/file.h b/include/linux/file.h index 6d34a1262b31..aed45d69811e 100644 --- a/include/linux/file.h +++ b/include/linux/file.h @@ -78,7 +78,6 @@ extern int f_dupfd(unsigned int from, struct file *file, unsigned flags); extern int replace_fd(unsigned fd, struct file *file, unsigned flags); extern void set_close_on_exec(unsigned int fd, int flag); extern bool get_close_on_exec(unsigned int fd); -extern void put_filp(struct file *); extern int get_unused_fd_flags(unsigned flags); extern void put_unused_fd(unsigned int fd); -- cgit v1.2.3 From d93aa9d82aea80b80f225dbf9c7986df444d8106 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Sat, 9 Jun 2018 09:40:05 -0400 Subject: new wrapper: alloc_file_pseudo() takes inode, vfsmount, name, O_... flags and file_operations and either returns a new struct file (in which case inode reference we held is consumed) or returns ERR_PTR(), in which case no refcounts are altered. converted aio_private_file() and sock_alloc_file() to it Acked-by: Linus Torvalds Signed-off-by: Al Viro --- fs/aio.c | 20 ++++---------------- fs/file_table.c | 27 +++++++++++++++++++++++++++ include/linux/file.h | 3 +++ net/socket.c | 28 +++++----------------------- 4 files changed, 39 insertions(+), 39 deletions(-) (limited to 'fs/file_table.c') diff --git a/fs/aio.c b/fs/aio.c index 9eea53887d6c..c3a8bac16374 100644 --- a/fs/aio.c +++ b/fs/aio.c @@ -215,9 +215,7 @@ static const struct address_space_operations aio_ctx_aops; static struct file *aio_private_file(struct kioctx *ctx, loff_t nr_pages) { - struct qstr this = QSTR_INIT("[aio]", 5); struct file *file; - struct path path; struct inode *inode = alloc_anon_inode(aio_mnt->mnt_sb); if (IS_ERR(inode)) return ERR_CAST(inode); @@ -226,27 +224,17 @@ static struct file *aio_private_file(struct kioctx *ctx, loff_t nr_pages) inode->i_mapping->private_data = ctx; inode->i_size = PAGE_SIZE * nr_pages; - path.dentry = d_alloc_pseudo(aio_mnt->mnt_sb, &this); - if (!path.dentry) { - iput(inode); - return ERR_PTR(-ENOMEM); - } - path.mnt = mntget(aio_mnt); - - d_instantiate(path.dentry, inode); - file = alloc_file(&path, O_RDWR, &aio_ring_fops); + file = alloc_file_pseudo(inode, aio_mnt, "[aio]", + O_RDWR, &aio_ring_fops); if (IS_ERR(file)) - path_put(&path); + iput(inode); return file; } static struct dentry *aio_mount(struct file_system_type *fs_type, int flags, const char *dev_name, void *data) { - static const struct dentry_operations ops = { - .d_dname = simple_dname, - }; - struct dentry *root = mount_pseudo(fs_type, "aio:", NULL, &ops, + struct dentry *root = mount_pseudo(fs_type, "aio:", NULL, NULL, AIO_RING_MAGIC); if (!IS_ERR(root)) diff --git a/fs/file_table.c b/fs/file_table.c index 9b70ed2bbc4e..6b3723909342 100644 --- a/fs/file_table.c +++ b/fs/file_table.c @@ -184,6 +184,33 @@ struct file *alloc_file(const struct path *path, int flags, } EXPORT_SYMBOL(alloc_file); +struct file *alloc_file_pseudo(struct inode *inode, struct vfsmount *mnt, + const char *name, int flags, + const struct file_operations *fops) +{ + static const struct dentry_operations anon_ops = { + .d_dname = simple_dname + }; + struct qstr this = QSTR_INIT(name, strlen(name)); + struct path path; + struct file *file; + + path.dentry = d_alloc_pseudo(mnt->mnt_sb, &this); + if (!path.dentry) + return ERR_PTR(-ENOMEM); + if (!mnt->mnt_sb->s_d_op) + d_set_d_op(path.dentry, &anon_ops); + path.mnt = mntget(mnt); + d_instantiate(path.dentry, inode); + file = alloc_file(&path, flags, fops); + if (IS_ERR(file)) { + ihold(inode); + path_put(&path); + } + return file; +} +EXPORT_SYMBOL(alloc_file_pseudo); + /* the real guts of fput() - releasing the last reference to file */ static void __fput(struct file *file) diff --git a/include/linux/file.h b/include/linux/file.h index aed45d69811e..5b25388f2f79 100644 --- a/include/linux/file.h +++ b/include/linux/file.h @@ -17,9 +17,12 @@ extern void fput(struct file *); struct file_operations; struct vfsmount; struct dentry; +struct inode; struct path; extern struct file *alloc_file(const struct path *, int flags, const struct file_operations *fop); +extern struct file *alloc_file_pseudo(struct inode *, struct vfsmount *, + const char *, int flags, const struct file_operations *); static inline void fput_light(struct file *file, int fput_needed) { diff --git a/net/socket.c b/net/socket.c index 2cdbe8f71b7f..4cf3568caf9f 100644 --- a/net/socket.c +++ b/net/socket.c @@ -391,33 +391,15 @@ static struct file_system_type sock_fs_type = { struct file *sock_alloc_file(struct socket *sock, int flags, const char *dname) { - struct qstr name = { .name = "" }; - struct path path; struct file *file; - if (dname) { - name.name = dname; - name.len = strlen(name.name); - } else if (sock->sk) { - name.name = sock->sk->sk_prot_creator->name; - name.len = strlen(name.name); - } - path.dentry = d_alloc_pseudo(sock_mnt->mnt_sb, &name); - if (unlikely(!path.dentry)) { - sock_release(sock); - return ERR_PTR(-ENOMEM); - } - path.mnt = mntget(sock_mnt); - - d_instantiate(path.dentry, SOCK_INODE(sock)); + if (!dname) + dname = sock->sk ? sock->sk->sk_prot_creator->name : ""; - file = alloc_file(&path, O_RDWR | (flags & O_NONBLOCK), - &socket_file_ops); + file = alloc_file_pseudo(SOCK_INODE(sock), sock_mnt, dname, + O_RDWR | (flags & O_NONBLOCK), + &socket_file_ops); if (IS_ERR(file)) { - /* drop dentry, keep inode for a bit */ - ihold(d_inode(path.dentry)); - path_put(&path); - /* ... and now kill it properly */ sock_release(sock); return file; } -- cgit v1.2.3 From 183266f26f45a47958afb5c9aa1b3d4651e2eb8c Mon Sep 17 00:00:00 2001 From: Al Viro Date: Sun, 17 Jun 2018 14:15:10 -0400 Subject: new helper: alloc_file_clone() alloc_file_clone(old_file, mode, ops): create a new struct file with ->f_path equal to that of old_file. pipe converted. Acked-by: Linus Torvalds Signed-off-by: Al Viro --- fs/file_table.c | 11 +++++++++++ fs/pipe.c | 6 ++---- include/linux/file.h | 2 ++ 3 files changed, 15 insertions(+), 4 deletions(-) (limited to 'fs/file_table.c') diff --git a/fs/file_table.c b/fs/file_table.c index 6b3723909342..78b067ddb386 100644 --- a/fs/file_table.c +++ b/fs/file_table.c @@ -211,6 +211,17 @@ struct file *alloc_file_pseudo(struct inode *inode, struct vfsmount *mnt, } EXPORT_SYMBOL(alloc_file_pseudo); +struct file *alloc_file_clone(struct file *base, int flags, + const struct file_operations *fops) +{ + struct file *f = alloc_file(&base->f_path, flags, fops); + if (!IS_ERR(f)) { + path_get(&f->f_path); + f->f_mapping = base->f_mapping; + } + return f; +} + /* the real guts of fput() - releasing the last reference to file */ static void __fput(struct file *file) diff --git a/fs/pipe.c b/fs/pipe.c index 9701bd3458d1..5f50070774bc 100644 --- a/fs/pipe.c +++ b/fs/pipe.c @@ -761,15 +761,13 @@ int create_pipe_files(struct file **res, int flags) f->private_data = inode->i_pipe; - res[0] = alloc_file(&f->f_path, O_RDONLY | (flags & O_NONBLOCK), - &pipefifo_fops); + res[0] = alloc_file_clone(f, O_RDONLY | (flags & O_NONBLOCK), + &pipefifo_fops); if (IS_ERR(res[0])) { put_pipe_info(inode, inode->i_pipe); fput(f); return PTR_ERR(res[0]); } - - path_get(&f->f_path); res[0]->private_data = inode->i_pipe; res[1] = f; return 0; diff --git a/include/linux/file.h b/include/linux/file.h index 5b25388f2f79..60914843c737 100644 --- a/include/linux/file.h +++ b/include/linux/file.h @@ -23,6 +23,8 @@ extern struct file *alloc_file(const struct path *, int flags, const struct file_operations *fop); extern struct file *alloc_file_pseudo(struct inode *, struct vfsmount *, const char *, int flags, const struct file_operations *); +extern struct file *alloc_file_clone(struct file *, int flags, + const struct file_operations *); static inline void fput_light(struct file *file, int fput_needed) { -- cgit v1.2.3 From ee1904ba44bd4a242b453e8fe179b374906da173 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Sun, 17 Jun 2018 14:21:27 -0400 Subject: make alloc_file() static Acked-by: Linus Torvalds Signed-off-by: Al Viro --- fs/file_table.c | 3 +-- include/linux/file.h | 2 -- 2 files changed, 1 insertion(+), 4 deletions(-) (limited to 'fs/file_table.c') diff --git a/fs/file_table.c b/fs/file_table.c index 78b067ddb386..d6eccd04d703 100644 --- a/fs/file_table.c +++ b/fs/file_table.c @@ -157,7 +157,7 @@ over: * @flags: O_... flags with which the new file will be opened * @fop: the 'struct file_operations' for the new file */ -struct file *alloc_file(const struct path *path, int flags, +static struct file *alloc_file(const struct path *path, int flags, const struct file_operations *fop) { struct file *file; @@ -182,7 +182,6 @@ struct file *alloc_file(const struct path *path, int flags, i_readcount_inc(path->dentry->d_inode); return file; } -EXPORT_SYMBOL(alloc_file); struct file *alloc_file_pseudo(struct inode *inode, struct vfsmount *mnt, const char *name, int flags, diff --git a/include/linux/file.h b/include/linux/file.h index 60914843c737..6b2fb032416c 100644 --- a/include/linux/file.h +++ b/include/linux/file.h @@ -19,8 +19,6 @@ struct vfsmount; struct dentry; struct inode; struct path; -extern struct file *alloc_file(const struct path *, int flags, - const struct file_operations *fop); extern struct file *alloc_file_pseudo(struct inode *, struct vfsmount *, const char *, int flags, const struct file_operations *); extern struct file *alloc_file_clone(struct file *, int flags, -- cgit v1.2.3 From d3b1084dfd629ef89bc1c4bab95e5cb87e7d08c2 Mon Sep 17 00:00:00 2001 From: Miklos Szeredi Date: Wed, 18 Jul 2018 15:44:40 +0200 Subject: vfs: make open_with_fake_path() not contribute to nr_files Stacking file operations in overlay will store an extra open file for each overlay file opened. The overhead is just that of "struct file" which is about 256bytes, because overlay already pins an extra dentry and inode when the file is open, which add up to a much larger overhead. For fear of breaking working setups, don't start accounting the extra file. Signed-off-by: Miklos Szeredi --- fs/file_table.c | 69 +++++++++++++++++++++++++++++++++++++----------------- fs/internal.h | 1 + fs/open.c | 2 +- include/linux/fs.h | 3 +++ 4 files changed, 53 insertions(+), 22 deletions(-) (limited to 'fs/file_table.c') diff --git a/fs/file_table.c b/fs/file_table.c index 9b70ed2bbc4e..0cc7bea6b51a 100644 --- a/fs/file_table.c +++ b/fs/file_table.c @@ -52,7 +52,8 @@ static void file_free_rcu(struct rcu_head *head) static inline void file_free(struct file *f) { security_file_free(f); - percpu_counter_dec(&nr_files); + if (!(f->f_mode & FMODE_NOACCOUNT)) + percpu_counter_dec(&nr_files); call_rcu(&f->f_u.fu_rcuhead, file_free_rcu); } @@ -91,6 +92,34 @@ int proc_nr_files(struct ctl_table *table, int write, } #endif +static struct file *__alloc_file(int flags, const struct cred *cred) +{ + struct file *f; + int error; + + f = kmem_cache_zalloc(filp_cachep, GFP_KERNEL); + if (unlikely(!f)) + return ERR_PTR(-ENOMEM); + + f->f_cred = get_cred(cred); + error = security_file_alloc(f); + if (unlikely(error)) { + file_free_rcu(&f->f_u.fu_rcuhead); + return ERR_PTR(error); + } + + atomic_long_set(&f->f_count, 1); + rwlock_init(&f->f_owner.lock); + spin_lock_init(&f->f_lock); + mutex_init(&f->f_pos_lock); + eventpoll_init_file(f); + f->f_flags = flags; + f->f_mode = OPEN_FMODE(flags); + /* f->f_version: 0 */ + + return f; +} + /* Find an unused file structure and return a pointer to it. * Returns an error pointer if some error happend e.g. we over file * structures limit, run out of memory or operation is not permitted. @@ -105,7 +134,6 @@ struct file *alloc_empty_file(int flags, const struct cred *cred) { static long old_max; struct file *f; - int error; /* * Privileged users can go above max_files @@ -119,26 +147,10 @@ struct file *alloc_empty_file(int flags, const struct cred *cred) goto over; } - f = kmem_cache_zalloc(filp_cachep, GFP_KERNEL); - if (unlikely(!f)) - return ERR_PTR(-ENOMEM); - - f->f_cred = get_cred(cred); - error = security_file_alloc(f); - if (unlikely(error)) { - file_free_rcu(&f->f_u.fu_rcuhead); - return ERR_PTR(error); - } + f = __alloc_file(flags, cred); + if (!IS_ERR(f)) + percpu_counter_inc(&nr_files); - atomic_long_set(&f->f_count, 1); - rwlock_init(&f->f_owner.lock); - spin_lock_init(&f->f_lock); - mutex_init(&f->f_pos_lock); - eventpoll_init_file(f); - f->f_flags = flags; - f->f_mode = OPEN_FMODE(flags); - /* f->f_version: 0 */ - percpu_counter_inc(&nr_files); return f; over: @@ -150,6 +162,21 @@ over: return ERR_PTR(-ENFILE); } +/* + * Variant of alloc_empty_file() that doesn't check and modify nr_files. + * + * Should not be used unless there's a very good reason to do so. + */ +struct file *alloc_empty_file_noaccount(int flags, const struct cred *cred) +{ + struct file *f = __alloc_file(flags, cred); + + if (!IS_ERR(f)) + f->f_mode |= FMODE_NOACCOUNT; + + return f; +} + /** * alloc_file - allocate and initialize a 'struct file' * diff --git a/fs/internal.h b/fs/internal.h index 52a346903748..442098fa0a84 100644 --- a/fs/internal.h +++ b/fs/internal.h @@ -94,6 +94,7 @@ extern void chroot_fs_refs(const struct path *, const struct path *); * file_table.c */ extern struct file *alloc_empty_file(int, const struct cred *); +extern struct file *alloc_empty_file_noaccount(int, const struct cred *); /* * super.c diff --git a/fs/open.c b/fs/open.c index dd15711eb658..9c6617dbb2c0 100644 --- a/fs/open.c +++ b/fs/open.c @@ -928,7 +928,7 @@ EXPORT_SYMBOL(dentry_open); struct file *open_with_fake_path(const struct path *path, int flags, struct inode *inode, const struct cred *cred) { - struct file *f = alloc_empty_file(flags, cred); + struct file *f = alloc_empty_file_noaccount(flags, cred); if (!IS_ERR(f)) { int error; diff --git a/include/linux/fs.h b/include/linux/fs.h index 5ce2b413abc6..e1884840d556 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -156,6 +156,9 @@ typedef int (dio_iodone_t)(struct kiocb *iocb, loff_t offset, /* File is capable of returning -EAGAIN if I/O will block */ #define FMODE_NOWAIT ((__force fmode_t)0x8000000) +/* File does not contribute to nr_files count */ +#define FMODE_NOACCOUNT ((__force fmode_t)0x20000000) + /* * Flag for rw_copy_check_uvector and compat_rw_copy_check_uvector * that indicates that they should check the contents of the iovec are -- cgit v1.2.3