diff options
-rw-r--r-- | drivers/gpu/drm/i915/gt/shmem_utils.c | 2 | ||||
-rw-r--r-- | drivers/gpu/drm/vmwgfx/ttm_object.c | 2 | ||||
-rw-r--r-- | fs/eventpoll.c | 2 | ||||
-rw-r--r-- | fs/file.c | 77 | ||||
-rw-r--r-- | fs/file_table.c | 49 | ||||
-rw-r--r-- | include/linux/file_ref.h | 177 | ||||
-rw-r--r-- | include/linux/fs.h | 10 |
7 files changed, 292 insertions, 27 deletions
diff --git a/drivers/gpu/drm/i915/gt/shmem_utils.c b/drivers/gpu/drm/i915/gt/shmem_utils.c index 1fb6ff77fd89..bb696b29ee2c 100644 --- a/drivers/gpu/drm/i915/gt/shmem_utils.c +++ b/drivers/gpu/drm/i915/gt/shmem_utils.c @@ -40,7 +40,7 @@ struct file *shmem_create_from_object(struct drm_i915_gem_object *obj) if (i915_gem_object_is_shmem(obj)) { file = obj->base.filp; - atomic_long_inc(&file->f_count); + get_file(file); return file; } diff --git a/drivers/gpu/drm/vmwgfx/ttm_object.c b/drivers/gpu/drm/vmwgfx/ttm_object.c index 3353e97687d1..a17e62867f3b 100644 --- a/drivers/gpu/drm/vmwgfx/ttm_object.c +++ b/drivers/gpu/drm/vmwgfx/ttm_object.c @@ -471,7 +471,7 @@ void ttm_object_device_release(struct ttm_object_device **p_tdev) */ static bool __must_check get_dma_buf_unless_doomed(struct dma_buf *dmabuf) { - return atomic_long_inc_not_zero(&dmabuf->file->f_count) != 0L; + return file_ref_get(&dmabuf->file->f_ref); } /** diff --git a/fs/eventpoll.c b/fs/eventpoll.c index 1ae4542f0bd8..212383cefe6c 100644 --- a/fs/eventpoll.c +++ b/fs/eventpoll.c @@ -1002,7 +1002,7 @@ static struct file *epi_fget(const struct epitem *epi) struct file *file; file = epi->ffd.file; - if (!atomic_long_inc_not_zero(&file->f_count)) + if (!file_ref_get(&file->f_ref)) file = NULL; return file; } diff --git a/fs/file.c b/fs/file.c index 7251d215048d..efd9959b03c2 100644 --- a/fs/file.c +++ b/fs/file.c @@ -20,10 +20,73 @@ #include <linux/spinlock.h> #include <linux/rcupdate.h> #include <linux/close_range.h> +#include <linux/file_ref.h> #include <net/sock.h> #include "internal.h" +/** + * __file_ref_put - Slowpath of file_ref_put() + * @ref: Pointer to the reference count + * @cnt: Current reference count + * + * Invoked when the reference count is outside of the valid zone. + * + * Return: + * True if this was the last reference with no future references + * possible. This signals the caller that it can safely schedule the + * object, which is protected by the reference counter, for + * deconstruction. + * + * False if there are still active references or the put() raced + * with a concurrent get()/put() pair. Caller is not allowed to + * deconstruct the protected object. + */ +bool __file_ref_put(file_ref_t *ref, unsigned long cnt) +{ + /* Did this drop the last reference? */ + if (likely(cnt == FILE_REF_NOREF)) { + /* + * Carefully try to set the reference count to FILE_REF_DEAD. + * + * This can fail if a concurrent get() operation has + * elevated it again or the corresponding put() even marked + * it dead already. Both are valid situations and do not + * require a retry. If this fails the caller is not + * allowed to deconstruct the object. + */ + if (!atomic_long_try_cmpxchg_release(&ref->refcnt, &cnt, FILE_REF_DEAD)) + return false; + + /* + * The caller can safely schedule the object for + * deconstruction. Provide acquire ordering. + */ + smp_acquire__after_ctrl_dep(); + return true; + } + + /* + * If the reference count was already in the dead zone, then this + * put() operation is imbalanced. Warn, put the reference count back to + * DEAD and tell the caller to not deconstruct the object. + */ + if (WARN_ONCE(cnt >= FILE_REF_RELEASED, "imbalanced put on file reference count")) { + atomic_long_set(&ref->refcnt, FILE_REF_DEAD); + return false; + } + + /* + * This is a put() operation on a saturated refcount. Restore the + * mean saturation value and tell the caller to not deconstruct the + * object. + */ + if (cnt > FILE_REF_MAXREF) + atomic_long_set(&ref->refcnt, FILE_REF_SATURATED); + return false; +} +EXPORT_SYMBOL_GPL(__file_ref_put); + unsigned int sysctl_nr_open __read_mostly = 1024*1024; unsigned int sysctl_nr_open_min = BITS_PER_LONG; /* our min() is unusable in constant expressions ;-/ */ @@ -820,7 +883,7 @@ static struct file *__get_file_rcu(struct file __rcu **f) if (!file) return NULL; - if (unlikely(!atomic_long_inc_not_zero(&file->f_count))) + if (unlikely(!file_ref_get(&file->f_ref))) return ERR_PTR(-EAGAIN); file_reloaded = rcu_dereference_raw(*f); @@ -834,8 +897,8 @@ static struct file *__get_file_rcu(struct file __rcu **f) OPTIMIZER_HIDE_VAR(file_reloaded_cmp); /* - * atomic_long_inc_not_zero() above provided a full memory - * barrier when we acquired a reference. + * file_ref_get() above provided a full memory barrier when we + * acquired a reference. * * This is paired with the write barrier from assigning to the * __rcu protected file pointer so that if that pointer still @@ -933,11 +996,11 @@ static inline struct file *__fget_files_rcu(struct files_struct *files, * We need to confirm it by incrementing the refcount * and then check the lookup again. * - * atomic_long_inc_not_zero() gives us a full memory - * barrier. We only really need an 'acquire' one to - * protect the loads below, but we don't have that. + * file_ref_get() gives us a full memory barrier. We + * only really need an 'acquire' one to protect the + * loads below, but we don't have that. */ - if (unlikely(!atomic_long_inc_not_zero(&file->f_count))) + if (unlikely(!file_ref_get(&file->f_ref))) continue; /* diff --git a/fs/file_table.c b/fs/file_table.c index 9e46fd4336b0..976736be47cb 100644 --- a/fs/file_table.c +++ b/fs/file_table.c @@ -39,13 +39,17 @@ static struct files_stat_struct files_stat = { /* SLAB cache for file structures */ static struct kmem_cache *filp_cachep __ro_after_init; +static struct kmem_cache *bfilp_cachep __ro_after_init; static struct percpu_counter nr_files __cacheline_aligned_in_smp; /* Container for backing file with optional user path */ struct backing_file { struct file file; - struct path user_path; + union { + struct path user_path; + freeptr_t bf_freeptr; + }; }; static inline struct backing_file *backing_file(struct file *f) @@ -67,7 +71,7 @@ static inline void file_free(struct file *f) put_cred(f->f_cred); if (unlikely(f->f_mode & FMODE_BACKING)) { path_put(backing_file_user_path(f)); - kfree(backing_file(f)); + kmem_cache_free(bfilp_cachep, backing_file(f)); } else { kmem_cache_free(filp_cachep, f); } @@ -164,16 +168,32 @@ static int init_file(struct file *f, int flags, const struct cred *cred) * the respective member when opening the file. */ mutex_init(&f->f_pos_lock); - f->f_flags = flags; - f->f_mode = OPEN_FMODE(flags); - /* f->f_version: 0 */ + memset(&f->f_path, 0, sizeof(f->f_path)); + memset(&f->f_ra, 0, sizeof(f->f_ra)); + + f->f_flags = flags; + f->f_mode = OPEN_FMODE(flags); + + f->f_op = NULL; + f->f_mapping = NULL; + f->private_data = NULL; + f->f_inode = NULL; + f->f_owner = NULL; +#ifdef CONFIG_EPOLL + f->f_ep = NULL; +#endif + + f->f_iocb_flags = 0; + f->f_pos = 0; + f->f_wb_err = 0; + f->f_sb_err = 0; /* * We're SLAB_TYPESAFE_BY_RCU so initialize f_count last. While * fget-rcu pattern users need to be able to handle spurious * refcount bumps we should reinitialize the reused file first. */ - atomic_long_set(&f->f_count, 1); + file_ref_init(&f->f_ref, 1); return 0; } @@ -205,7 +225,7 @@ struct file *alloc_empty_file(int flags, const struct cred *cred) goto over; } - f = kmem_cache_zalloc(filp_cachep, GFP_KERNEL); + f = kmem_cache_alloc(filp_cachep, GFP_KERNEL); if (unlikely(!f)) return ERR_PTR(-ENOMEM); @@ -239,7 +259,7 @@ struct file *alloc_empty_file_noaccount(int flags, const struct cred *cred) struct file *f; int error; - f = kmem_cache_zalloc(filp_cachep, GFP_KERNEL); + f = kmem_cache_alloc(filp_cachep, GFP_KERNEL); if (unlikely(!f)) return ERR_PTR(-ENOMEM); @@ -266,13 +286,13 @@ struct file *alloc_empty_backing_file(int flags, const struct cred *cred) struct backing_file *ff; int error; - ff = kzalloc(sizeof(struct backing_file), GFP_KERNEL); + ff = kmem_cache_alloc(bfilp_cachep, GFP_KERNEL); if (unlikely(!ff)) return ERR_PTR(-ENOMEM); error = init_file(&ff->file, flags, cred); if (unlikely(error)) { - kfree(ff); + kmem_cache_free(bfilp_cachep, ff); return ERR_PTR(error); } @@ -478,7 +498,7 @@ static DECLARE_DELAYED_WORK(delayed_fput_work, delayed_fput); void fput(struct file *file) { - if (atomic_long_dec_and_test(&file->f_count)) { + if (file_ref_put(&file->f_ref)) { struct task_struct *task = current; if (unlikely(!(file->f_mode & (FMODE_BACKING | FMODE_OPENED)))) { @@ -511,7 +531,7 @@ void fput(struct file *file) */ void __fput_sync(struct file *file) { - if (atomic_long_dec_and_test(&file->f_count)) + if (file_ref_put(&file->f_ref)) __fput(file); } @@ -528,6 +548,11 @@ void __init files_init(void) filp_cachep = kmem_cache_create("filp", sizeof(struct file), &args, SLAB_HWCACHE_ALIGN | SLAB_PANIC | SLAB_ACCOUNT | SLAB_TYPESAFE_BY_RCU); + + args.freeptr_offset = offsetof(struct backing_file, bf_freeptr); + bfilp_cachep = kmem_cache_create("bfilp", sizeof(struct backing_file), + &args, SLAB_HWCACHE_ALIGN | SLAB_PANIC | + SLAB_ACCOUNT | SLAB_TYPESAFE_BY_RCU); percpu_counter_init(&nr_files, 0, GFP_KERNEL); } diff --git a/include/linux/file_ref.h b/include/linux/file_ref.h new file mode 100644 index 000000000000..9b3a8d9b17ab --- /dev/null +++ b/include/linux/file_ref.h @@ -0,0 +1,177 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ +#ifndef _LINUX_FILE_REF_H +#define _LINUX_FILE_REF_H + +#include <linux/atomic.h> +#include <linux/preempt.h> +#include <linux/types.h> + +/* + * file_ref is a reference count implementation specifically for use by + * files. It takes inspiration from rcuref but differs in key aspects + * such as support for SLAB_TYPESAFE_BY_RCU type caches. + * + * FILE_REF_ONEREF FILE_REF_MAXREF + * 0x0000000000000000UL 0x7FFFFFFFFFFFFFFFUL + * <-------------------valid -------------------> + * + * FILE_REF_SATURATED + * 0x8000000000000000UL 0xA000000000000000UL 0xBFFFFFFFFFFFFFFFUL + * <-----------------------saturation zone----------------------> + * + * FILE_REF_RELEASED FILE_REF_DEAD + * 0xC000000000000000UL 0xE000000000000000UL + * <-------------------dead zone-------------------> + * + * FILE_REF_NOREF + * 0xFFFFFFFFFFFFFFFFUL + */ + +#ifdef CONFIG_64BIT +#define FILE_REF_ONEREF 0x0000000000000000UL +#define FILE_REF_MAXREF 0x7FFFFFFFFFFFFFFFUL +#define FILE_REF_SATURATED 0xA000000000000000UL +#define FILE_REF_RELEASED 0xC000000000000000UL +#define FILE_REF_DEAD 0xE000000000000000UL +#define FILE_REF_NOREF 0xFFFFFFFFFFFFFFFFUL +#else +#define FILE_REF_ONEREF 0x00000000U +#define FILE_REF_MAXREF 0x7FFFFFFFU +#define FILE_REF_SATURATED 0xA0000000U +#define FILE_REF_RELEASED 0xC0000000U +#define FILE_REF_DEAD 0xE0000000U +#define FILE_REF_NOREF 0xFFFFFFFFU +#endif + +typedef struct { +#ifdef CONFIG_64BIT + atomic64_t refcnt; +#else + atomic_t refcnt; +#endif +} file_ref_t; + +/** + * file_ref_init - Initialize a file reference count + * @ref: Pointer to the reference count + * @cnt: The initial reference count typically '1' + */ +static inline void file_ref_init(file_ref_t *ref, unsigned long cnt) +{ + atomic_long_set(&ref->refcnt, cnt - 1); +} + +bool __file_ref_put(file_ref_t *ref, unsigned long cnt); + +/** + * file_ref_get - Acquire one reference on a file + * @ref: Pointer to the reference count + * + * Similar to atomic_inc_not_zero() but saturates at FILE_REF_MAXREF. + * + * Provides full memory ordering. + * + * Return: False if the attempt to acquire a reference failed. This happens + * when the last reference has been put already. True if a reference + * was successfully acquired + */ +static __always_inline __must_check bool file_ref_get(file_ref_t *ref) +{ + /* + * Unconditionally increase the reference count with full + * ordering. The saturation and dead zones provide enough + * tolerance for this. + * + * If this indicates negative the file in question the fail can + * be freed and immediately reused due to SLAB_TYPSAFE_BY_RCU. + * Hence, unconditionally altering the file reference count to + * e.g., reset the file reference count back to the middle of + * the deadzone risk end up marking someone else's file as dead + * behind their back. + * + * It would be possible to do a careful: + * + * cnt = atomic_long_inc_return(); + * if (likely(cnt >= 0)) + * return true; + * + * and then something like: + * + * if (cnt >= FILE_REF_RELEASE) + * atomic_long_try_cmpxchg(&ref->refcnt, &cnt, FILE_REF_DEAD), + * + * to set the value back to the middle of the deadzone. But it's + * practically impossible to go from FILE_REF_DEAD to + * FILE_REF_ONEREF. It would need 2305843009213693952/2^61 + * file_ref_get()s to resurrect such a dead file. + */ + return !atomic_long_add_negative(1, &ref->refcnt); +} + +/** + * file_ref_inc - Acquire one reference on a file + * @ref: Pointer to the reference count + * + * Acquire an additional reference on a file. Warns if the caller didn't + * already hold a reference. + */ +static __always_inline void file_ref_inc(file_ref_t *ref) +{ + long prior = atomic_long_fetch_inc_relaxed(&ref->refcnt); + WARN_ONCE(prior < 0, "file_ref_inc() on a released file reference"); +} + +/** + * file_ref_put -- Release a file reference + * @ref: Pointer to the reference count + * + * Provides release memory ordering, such that prior loads and stores + * are done before, and provides an acquire ordering on success such + * that free() must come after. + * + * Return: True if this was the last reference with no future references + * possible. This signals the caller that it can safely release + * the object which is protected by the reference counter. + * False if there are still active references or the put() raced + * with a concurrent get()/put() pair. Caller is not allowed to + * release the protected object. + */ +static __always_inline __must_check bool file_ref_put(file_ref_t *ref) +{ + long cnt; + + /* + * While files are SLAB_TYPESAFE_BY_RCU and thus file_ref_put() + * calls don't risk UAFs when a file is recyclyed, it is still + * vulnerable to UAFs caused by freeing the whole slab page once + * it becomes unused. Prevent file_ref_put() from being + * preempted protects against this. + */ + guard(preempt)(); + /* + * Unconditionally decrease the reference count. The saturation + * and dead zones provide enough tolerance for this. If this + * fails then we need to handle the last reference drop and + * cases inside the saturation and dead zones. + */ + cnt = atomic_long_dec_return(&ref->refcnt); + if (cnt >= 0) + return false; + return __file_ref_put(ref, cnt); +} + +/** + * file_ref_read - Read the number of file references + * @ref: Pointer to the reference count + * + * Return: The number of held references (0 ... N) + */ +static inline unsigned long file_ref_read(file_ref_t *ref) +{ + unsigned long c = atomic_long_read(&ref->refcnt); + + /* Return 0 if within the DEAD zone. */ + return c >= FILE_REF_RELEASED ? 0 : c + 1; +} + +#endif diff --git a/include/linux/fs.h b/include/linux/fs.h index e3c603d01337..c13f648a1c13 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -45,6 +45,7 @@ #include <linux/slab.h> #include <linux/maple_tree.h> #include <linux/rw_hint.h> +#include <linux/file_ref.h> #include <asm/byteorder.h> #include <uapi/linux/fs.h> @@ -1005,7 +1006,7 @@ static inline int ra_has_index(struct file_ra_state *ra, pgoff_t index) /** * struct file - Represents a file - * @f_count: reference count + * @f_ref: reference count * @f_lock: Protects f_ep, f_flags. Must not be taken from IRQ context. * @f_mode: FMODE_* flags often used in hotpaths * @f_op: file operations @@ -1030,7 +1031,7 @@ static inline int ra_has_index(struct file_ra_state *ra, pgoff_t index) * @f_freeptr: Pointer used by SLAB_TYPESAFE_BY_RCU file cache (don't touch.) */ struct file { - atomic_long_t f_count; + file_ref_t f_ref; spinlock_t f_lock; fmode_t f_mode; const struct file_operations *f_op; @@ -1078,15 +1079,14 @@ struct file_handle { static inline struct file *get_file(struct file *f) { - long prior = atomic_long_fetch_inc_relaxed(&f->f_count); - WARN_ONCE(!prior, "struct file::f_count incremented from zero; use-after-free condition present!\n"); + file_ref_inc(&f->f_ref); return f; } struct file *get_file_rcu(struct file __rcu **f); struct file *get_file_active(struct file **f); -#define file_count(x) atomic_long_read(&(x)->f_count) +#define file_count(f) file_ref_read(&(f)->f_ref) #define MAX_NON_LFS ((1UL<<31) - 1) |