diff options
author | Stephen Rothwell <sfr@canb.auug.org.au> | 2020-06-16 13:16:24 +1000 |
---|---|---|
committer | Stephen Rothwell <sfr@canb.auug.org.au> | 2020-06-16 13:16:24 +1000 |
commit | 70006740ef82246bc6ecc4d8da0fa9d5553a5004 (patch) | |
tree | 30dc62518df5cd4537ce0a0384bd52f550c89da4 | |
parent | 499cb7f8839d80fdf14ae68563213a1bdf8e8fce (diff) | |
parent | 2c50e42e7dab0821f55dbf809080668ef1cf03d7 (diff) |
Merge branch 'akpm/master'
46 files changed, 288 insertions, 138 deletions
diff --git a/arch/alpha/kernel/syscalls/syscall.tbl b/arch/alpha/kernel/syscalls/syscall.tbl index 598779a35e49..b249824c8267 100644 --- a/arch/alpha/kernel/syscalls/syscall.tbl +++ b/arch/alpha/kernel/syscalls/syscall.tbl @@ -481,3 +481,4 @@ 550 common watch_mount sys_watch_mount 551 common watch_sb sys_watch_sb 552 common fsinfo sys_fsinfo +553 common process_madvise sys_process_madvise diff --git a/arch/arm/tools/syscall.tbl b/arch/arm/tools/syscall.tbl index 7dd5907463ef..7b3832d920ee 100644 --- a/arch/arm/tools/syscall.tbl +++ b/arch/arm/tools/syscall.tbl @@ -455,3 +455,4 @@ 440 common watch_mount sys_watch_mount 441 common watch_sb sys_watch_sb 442 common fsinfo sys_fsinfo +443 common process_madvise sys_process_madvise diff --git a/arch/arm64/include/asm/unistd.h b/arch/arm64/include/asm/unistd.h index 949788f5ba40..d1f7d35f986e 100644 --- a/arch/arm64/include/asm/unistd.h +++ b/arch/arm64/include/asm/unistd.h @@ -38,7 +38,7 @@ #define __ARM_NR_compat_set_tls (__ARM_NR_COMPAT_BASE + 5) #define __ARM_NR_COMPAT_END (__ARM_NR_COMPAT_BASE + 0x800) -#define __NR_compat_syscalls 443 +#define __NR_compat_syscalls 444 #endif #define __ARCH_WANT_SYS_CLONE diff --git a/arch/arm64/include/asm/unistd32.h b/arch/arm64/include/asm/unistd32.h index d49b63db5b08..f8dafe900cb9 100644 --- a/arch/arm64/include/asm/unistd32.h +++ b/arch/arm64/include/asm/unistd32.h @@ -891,6 +891,8 @@ __SYSCALL(__NR_watch_mount, sys_watch_mount) __SYSCALL(__NR_watch_sb, sys_watch_sb) #define __NR_fsinfo 442 __SYSCALL(__NR_fsinfo, sys_fsinfo) +#define __NR_process_madvise 443 +__SYSCALL(__NR_process_madvise, compat_sys_process_madvise) /* * Please add new compat syscalls above this comment and update diff --git a/arch/ia64/kernel/syscalls/syscall.tbl b/arch/ia64/kernel/syscalls/syscall.tbl index 457270289902..6636a1aef36b 100644 --- a/arch/ia64/kernel/syscalls/syscall.tbl +++ b/arch/ia64/kernel/syscalls/syscall.tbl @@ -362,3 +362,4 @@ 440 common watch_mount sys_watch_mount 441 common watch_sb sys_watch_sb 442 common fsinfo sys_fsinfo +443 common process_madvise sys_process_madvise diff --git a/arch/m68k/kernel/syscalls/syscall.tbl b/arch/m68k/kernel/syscalls/syscall.tbl index 86872b908471..8cd84a7fbf57 100644 --- a/arch/m68k/kernel/syscalls/syscall.tbl +++ b/arch/m68k/kernel/syscalls/syscall.tbl @@ -441,3 +441,4 @@ 440 common watch_mount sys_watch_mount 441 common watch_sb sys_watch_sb 442 common fsinfo sys_fsinfo +443 common process_madvise sys_process_madvise diff --git a/arch/microblaze/kernel/syscalls/syscall.tbl b/arch/microblaze/kernel/syscalls/syscall.tbl index 11e1587078b3..f581a02257ec 100644 --- a/arch/microblaze/kernel/syscalls/syscall.tbl +++ b/arch/microblaze/kernel/syscalls/syscall.tbl @@ -447,3 +447,4 @@ 440 common watch_mount sys_watch_mount 441 common watch_sb sys_watch_sb 442 common fsinfo sys_fsinfo +443 common process_madvise sys_process_madvise diff --git a/arch/mips/kernel/syscalls/syscall_n32.tbl b/arch/mips/kernel/syscalls/syscall_n32.tbl index 3cde581d0cfb..c85bdc3abe55 100644 --- a/arch/mips/kernel/syscalls/syscall_n32.tbl +++ b/arch/mips/kernel/syscalls/syscall_n32.tbl @@ -380,3 +380,4 @@ 440 n32 watch_mount sys_watch_mount 441 n32 watch_sb sys_watch_sb 442 n32 fsinfo sys_fsinfo +443 n32 process_madvise compat_sys_process_madvise diff --git a/arch/mips/kernel/syscalls/syscall_n64.tbl b/arch/mips/kernel/syscalls/syscall_n64.tbl index e0270d5700b5..9e08c40d3cb9 100644 --- a/arch/mips/kernel/syscalls/syscall_n64.tbl +++ b/arch/mips/kernel/syscalls/syscall_n64.tbl @@ -356,3 +356,4 @@ 440 n64 watch_mount sys_watch_mount 441 n64 watch_sb sys_watch_sb 442 n64 fsinfo sys_fsinfo +443 n64 process_madvise sys_process_madvise diff --git a/arch/mips/kernel/syscalls/syscall_o32.tbl b/arch/mips/kernel/syscalls/syscall_o32.tbl index 002ca3e4aebe..a2b591d7c874 100644 --- a/arch/mips/kernel/syscalls/syscall_o32.tbl +++ b/arch/mips/kernel/syscalls/syscall_o32.tbl @@ -429,3 +429,4 @@ 440 o32 watch_mount sys_watch_mount 441 o32 watch_sb sys_watch_sb 442 o32 fsinfo sys_fsinfo +443 o32 process_madvise sys_process_madvise compat_sys_process_madvise diff --git a/arch/parisc/kernel/syscalls/syscall.tbl b/arch/parisc/kernel/syscalls/syscall.tbl index 6e6b2114065f..98e7442e4d7a 100644 --- a/arch/parisc/kernel/syscalls/syscall.tbl +++ b/arch/parisc/kernel/syscalls/syscall.tbl @@ -439,3 +439,4 @@ 440 common watch_mount sys_watch_mount 441 common watch_sb sys_watch_sb 442 common fsinfo sys_fsinfo +443 common process_madvise sys_process_madvise compat_sys_process_madvise diff --git a/arch/powerpc/kernel/syscalls/syscall.tbl b/arch/powerpc/kernel/syscalls/syscall.tbl index 4dda0f1ce513..72fb9dd88e16 100644 --- a/arch/powerpc/kernel/syscalls/syscall.tbl +++ b/arch/powerpc/kernel/syscalls/syscall.tbl @@ -531,3 +531,4 @@ 440 common watch_mount sys_watch_mount 441 common watch_sb sys_watch_sb 442 common fsinfo sys_fsinfo +443 common process_madvise sys_process_madvise compat_sys_process_madvise diff --git a/arch/s390/kernel/syscalls/syscall.tbl b/arch/s390/kernel/syscalls/syscall.tbl index a750ad6821f1..b731fcb03f1b 100644 --- a/arch/s390/kernel/syscalls/syscall.tbl +++ b/arch/s390/kernel/syscalls/syscall.tbl @@ -444,3 +444,4 @@ 440 common watch_mount sys_watch_mount sys_watch_mount 441 common watch_sb sys_watch_sb sys_watch_sb 442 common fsinfo sys_fsinfo sys_fsinfo +443 common process_madvise sys_process_madvise compat_sys_process_madvise diff --git a/arch/sh/include/asm/string_32.h b/arch/sh/include/asm/string_32.h index 3558b1d7123e..be3f9a08cbc9 100644 --- a/arch/sh/include/asm/string_32.h +++ b/arch/sh/include/asm/string_32.h @@ -28,32 +28,6 @@ static inline char *strcpy(char *__dest, const char *__src) return __xdest; } -#define __HAVE_ARCH_STRNCPY -static inline char *strncpy(char *__dest, const char *__src, size_t __n) -{ - register char *__xdest = __dest; - unsigned long __dummy; - - if (__n == 0) - return __xdest; - - __asm__ __volatile__( - "1:\n" - "mov.b @%1+, %2\n\t" - "mov.b %2, @%0\n\t" - "cmp/eq #0, %2\n\t" - "bt/s 2f\n\t" - " cmp/eq %5,%1\n\t" - "bf/s 1b\n\t" - " add #1, %0\n" - "2:" - : "=r" (__dest), "=r" (__src), "=&z" (__dummy) - : "0" (__dest), "1" (__src), "r" (__src+__n) - : "memory", "t"); - - return __xdest; -} - #define __HAVE_ARCH_STRCMP static inline int strcmp(const char *__cs, const char *__ct) { diff --git a/arch/sh/kernel/syscalls/syscall.tbl b/arch/sh/kernel/syscalls/syscall.tbl index 4d7b900bb350..e7a4804e23cf 100644 --- a/arch/sh/kernel/syscalls/syscall.tbl +++ b/arch/sh/kernel/syscalls/syscall.tbl @@ -444,3 +444,4 @@ 440 common watch_mount sys_watch_mount 441 common watch_sb sys_watch_sb 442 common fsinfo sys_fsinfo +443 common process_madvise sys_process_madvise diff --git a/arch/sh/lib/delay.c b/arch/sh/lib/delay.c index 540e670dbafc..cccab21f3e56 100644 --- a/arch/sh/lib/delay.c +++ b/arch/sh/lib/delay.c @@ -42,6 +42,7 @@ inline void __const_udelay(unsigned long xloops) : "macl", "mach"); __delay(++xloops); } +EXPORT_SYMBOL(__delay); void __udelay(unsigned long usecs) { diff --git a/arch/sh/mm/consistent.c b/arch/sh/mm/consistent.c index 3169a343a5ab..0de206c1acfe 100644 --- a/arch/sh/mm/consistent.c +++ b/arch/sh/mm/consistent.c @@ -57,8 +57,6 @@ int __init platform_resource_setup_memory(struct platform_device *pdev, return -ENOMEM; } - memset(buf, 0, memsize); - r->flags = IORESOURCE_MEM; r->start = dma_handle; r->end = r->start + memsize - 1; diff --git a/arch/sparc/kernel/syscalls/syscall.tbl b/arch/sparc/kernel/syscalls/syscall.tbl index 6b25b0a33834..d6126ee2f51d 100644 --- a/arch/sparc/kernel/syscalls/syscall.tbl +++ b/arch/sparc/kernel/syscalls/syscall.tbl @@ -487,3 +487,4 @@ 440 common watch_mount sys_watch_mount 441 common watch_sb sys_watch_sb 442 common fsinfo sys_fsinfo +443 common process_madvise sys_process_madvise compat_sys_process_madvise diff --git a/arch/x86/entry/syscalls/syscall_32.tbl b/arch/x86/entry/syscalls/syscall_32.tbl index 65d3497d1eb8..686d59df1e12 100644 --- a/arch/x86/entry/syscalls/syscall_32.tbl +++ b/arch/x86/entry/syscalls/syscall_32.tbl @@ -446,3 +446,4 @@ 440 i386 watch_mount sys_watch_mount 441 i386 watch_sb sys_watch_sb 442 i386 fsinfo sys_fsinfo +443 i386 process_madvise sys_process_madvise compat_sys_process_madvise diff --git a/arch/x86/entry/syscalls/syscall_64.tbl b/arch/x86/entry/syscalls/syscall_64.tbl index 3100ed45b108..b345b35b3308 100644 --- a/arch/x86/entry/syscalls/syscall_64.tbl +++ b/arch/x86/entry/syscalls/syscall_64.tbl @@ -363,6 +363,7 @@ 440 common watch_mount sys_watch_mount 441 common watch_sb sys_watch_sb 442 common fsinfo sys_fsinfo +443 64 process_madvise sys_process_madvise # # x32-specific system call numbers start at 512 to avoid cache impact @@ -406,3 +407,4 @@ 545 x32 execveat compat_sys_execveat 546 x32 preadv2 compat_sys_preadv64v2 547 x32 pwritev2 compat_sys_pwritev64v2 +548 x32 process_madvise compat_sys_process_madvise diff --git a/arch/xtensa/kernel/syscalls/syscall.tbl b/arch/xtensa/kernel/syscalls/syscall.tbl index fc382476421f..96cb07095e4c 100644 --- a/arch/xtensa/kernel/syscalls/syscall.tbl +++ b/arch/xtensa/kernel/syscalls/syscall.tbl @@ -412,3 +412,4 @@ 440 common watch_mount sys_watch_mount 441 common watch_sb sys_watch_sb 442 common fsinfo sys_fsinfo +443 common process_madvise sys_process_madvise diff --git a/drivers/sh/clk/cpg.c b/drivers/sh/clk/cpg.c index eeb028b9cdb3..a5cacfe24a42 100644 --- a/drivers/sh/clk/cpg.c +++ b/drivers/sh/clk/cpg.c @@ -36,36 +36,21 @@ static void sh_clk_write(int value, struct clk *clk) iowrite32(value, clk->mapped_reg); } -static unsigned int r8(const void __iomem *addr) -{ - return ioread8(addr); -} - -static unsigned int r16(const void __iomem *addr) -{ - return ioread16(addr); -} - -static unsigned int r32(const void __iomem *addr) -{ - return ioread32(addr); -} - static int sh_clk_mstp_enable(struct clk *clk) { sh_clk_write(sh_clk_read(clk) & ~(1 << clk->enable_bit), clk); if (clk->status_reg) { - unsigned int (*read)(const void __iomem *addr); + unsigned int (*read)(void __iomem *addr); int i; void __iomem *mapped_status = (phys_addr_t)clk->status_reg - (phys_addr_t)clk->enable_reg + clk->mapped_reg; if (clk->flags & CLK_ENABLE_REG_8BIT) - read = r8; + read = ioread8; else if (clk->flags & CLK_ENABLE_REG_16BIT) - read = r16; + read = ioread16; else - read = r32; + read = ioread32; for (i = 1000; (read(mapped_status) & (1 << clk->enable_bit)) && i; diff --git a/fs/io_uring.c b/fs/io_uring.c index e716630719eb..a07c44efc48b 100644 --- a/fs/io_uring.c +++ b/fs/io_uring.c @@ -3346,7 +3346,7 @@ static int io_madvise(struct io_kiocb *req, bool force_nonblock) if (force_nonblock) return -EAGAIN; - ret = do_madvise(ma->addr, ma->len, ma->advice); + ret = do_madvise(NULL, current->mm, ma->addr, ma->len, ma->advice); if (ret < 0) req_set_fail_links(req); io_cqring_add_event(req, ret); diff --git a/include/asm-generic/vmlinux.lds.h b/include/asm-generic/vmlinux.lds.h index db600ef218d7..d7c7c7f36c4a 100644 --- a/include/asm-generic/vmlinux.lds.h +++ b/include/asm-generic/vmlinux.lds.h @@ -374,6 +374,7 @@ */ #ifndef RO_AFTER_INIT_DATA #define RO_AFTER_INIT_DATA \ + . = ALIGN(8); \ __start_ro_after_init = .; \ *(.data..ro_after_init) \ JUMP_TABLE_DATA \ diff --git a/include/linux/compat.h b/include/linux/compat.h index e90100c0de72..86b61e873947 100644 --- a/include/linux/compat.h +++ b/include/linux/compat.h @@ -827,6 +827,10 @@ asmlinkage long compat_sys_pwritev64v2(unsigned long fd, unsigned long vlen, loff_t pos, rwf_t flags); #endif +asmlinkage ssize_t compat_sys_process_madvise(compat_int_t pidfd, + const struct compat_iovec __user *vec, + compat_ulong_t vlen, compat_int_t behavior, + compat_int_t flags); /* * Deprecated system calls which are still defined in diff --git a/include/linux/memcontrol.h b/include/linux/memcontrol.h index e77197a62809..bbf624a7f5a6 100644 --- a/include/linux/memcontrol.h +++ b/include/linux/memcontrol.h @@ -520,7 +520,7 @@ unsigned long mem_cgroup_get_zone_lru_size(struct lruvec *lruvec, struct mem_cgroup_per_node *mz; mz = container_of(lruvec, struct mem_cgroup_per_node, lruvec); - return mz->lru_zone_size[zone_idx][lru]; + return READ_ONCE(mz->lru_zone_size[zone_idx][lru]); } void mem_cgroup_handle_over_high(void); diff --git a/include/linux/mm.h b/include/linux/mm.h index dc7b87310c10..573947c595f6 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -1057,6 +1057,7 @@ vm_fault_t finish_mkwrite_fault(struct vm_fault *vmf); static inline enum zone_type page_zonenum(const struct page *page) { + ASSERT_EXCLUSIVE_BITS(page->flags, ZONES_MASK << ZONES_PGSHIFT); return (page->flags >> ZONES_PGSHIFT) & ZONES_MASK; } @@ -2585,7 +2586,8 @@ extern int __do_munmap(struct mm_struct *, unsigned long, size_t, struct list_head *uf, bool downgrade); extern int do_munmap(struct mm_struct *, unsigned long, size_t, struct list_head *uf); -extern int do_madvise(unsigned long start, size_t len_in, int behavior); +extern int do_madvise(struct task_struct *target_task, struct mm_struct *mm, + unsigned long start, size_t len_in, int behavior); static inline unsigned long do_mmap_pgoff(struct file *file, unsigned long addr, diff --git a/include/linux/pid.h b/include/linux/pid.h index 176d6cf80e7c..86e0e7cb7872 100644 --- a/include/linux/pid.h +++ b/include/linux/pid.h @@ -77,6 +77,7 @@ extern const struct file_operations pidfd_fops; struct file; extern struct pid *pidfd_pid(const struct file *file); +struct pid *pidfd_get_pid(unsigned int fd); static inline struct pid *get_pid(struct pid *pid) { diff --git a/include/linux/syscalls.h b/include/linux/syscalls.h index 0c3e296ec277..63ffa6dc9da3 100644 --- a/include/linux/syscalls.h +++ b/include/linux/syscalls.h @@ -879,6 +879,8 @@ asmlinkage long sys_munlockall(void); asmlinkage long sys_mincore(unsigned long start, size_t len, unsigned char __user * vec); asmlinkage long sys_madvise(unsigned long start, size_t len, int behavior); +asmlinkage long sys_process_madvise(int pidfd, const struct iovec __user *vec, + unsigned long vlen, int behavior, unsigned int flags); asmlinkage long sys_remap_file_pages(unsigned long start, unsigned long size, unsigned long prot, unsigned long pgoff, unsigned long flags); diff --git a/include/uapi/asm-generic/unistd.h b/include/uapi/asm-generic/unistd.h index 9018bb54bfa3..6a5856c9d9b4 100644 --- a/include/uapi/asm-generic/unistd.h +++ b/include/uapi/asm-generic/unistd.h @@ -863,9 +863,12 @@ __SYSCALL(__NR_watch_mount, sys_watch_mount) __SYSCALL(__NR_watch_sb, sys_watch_sb) #define __NR_fsinfo 442 __SYSCALL(__NR_fsinfo, sys_fsinfo) +#define __NR_process_madvise 443 +__SC_COMP(__NR_process_madvise, sys_process_madvise, \ + compat_sys_process_madvise) #undef __NR_syscalls -#define __NR_syscalls 443 +#define __NR_syscalls 444 /* * 32 bit systems traditionally used different diff --git a/kernel/exit.c b/kernel/exit.c index 727150f28103..ed2c4924a7cc 100644 --- a/kernel/exit.c +++ b/kernel/exit.c @@ -1474,23 +1474,6 @@ end: return retval; } -static struct pid *pidfd_get_pid(unsigned int fd) -{ - struct fd f; - struct pid *pid; - - f = fdget(fd); - if (!f.file) - return ERR_PTR(-EBADF); - - pid = pidfd_pid(f.file); - if (!IS_ERR(pid)) - get_pid(pid); - - fdput(f); - return pid; -} - static long kernel_waitid(int which, pid_t upid, struct waitid_info *infop, int options, struct rusage *ru) { diff --git a/kernel/pid.c b/kernel/pid.c index f1496b757162..3122043fe364 100644 --- a/kernel/pid.c +++ b/kernel/pid.c @@ -518,6 +518,23 @@ struct pid *find_ge_pid(int nr, struct pid_namespace *ns) return idr_get_next(&ns->idr, &nr); } +struct pid *pidfd_get_pid(unsigned int fd) +{ + struct fd f; + struct pid *pid; + + f = fdget(fd); + if (!f.file) + return ERR_PTR(-EBADF); + + pid = pidfd_pid(f.file); + if (!IS_ERR(pid)) + get_pid(pid); + + fdput(f); + return pid; +} + /** * pidfd_create() - Create a new pid file descriptor. * diff --git a/kernel/sys_ni.c b/kernel/sys_ni.c index 519317f3904c..fad48ace1123 100644 --- a/kernel/sys_ni.c +++ b/kernel/sys_ni.c @@ -287,6 +287,8 @@ COND_SYSCALL(mlockall); COND_SYSCALL(munlockall); COND_SYSCALL(mincore); COND_SYSCALL(madvise); +COND_SYSCALL(process_madvise); +COND_SYSCALL_COMPAT(process_madvise); COND_SYSCALL(remap_file_pages); COND_SYSCALL(mbind); COND_SYSCALL_COMPAT(mbind); diff --git a/mm/filemap.c b/mm/filemap.c index f0ae9a6308cb..9a46679b4642 100644 --- a/mm/filemap.c +++ b/mm/filemap.c @@ -2370,6 +2370,7 @@ static struct file *do_sync_mmap_readahead(struct vm_fault *vmf) struct address_space *mapping = file->f_mapping; struct file *fpin = NULL; pgoff_t offset = vmf->pgoff; + unsigned int mmap_miss; /* If we don't want any read-ahead, don't bother */ if (vmf->vma->vm_flags & VM_RAND_READ) @@ -2385,14 +2386,15 @@ static struct file *do_sync_mmap_readahead(struct vm_fault *vmf) } /* Avoid banging the cache line if not needed */ - if (ra->mmap_miss < MMAP_LOTSAMISS * 10) - ra->mmap_miss++; + mmap_miss = READ_ONCE(ra->mmap_miss); + if (mmap_miss < MMAP_LOTSAMISS * 10) + WRITE_ONCE(ra->mmap_miss, ++mmap_miss); /* * Do we miss much more than hit in this file? If so, * stop bothering with read-ahead. It will only hurt. */ - if (ra->mmap_miss > MMAP_LOTSAMISS) + if (mmap_miss > MMAP_LOTSAMISS) return fpin; /* @@ -2418,13 +2420,15 @@ static struct file *do_async_mmap_readahead(struct vm_fault *vmf, struct file_ra_state *ra = &file->f_ra; struct address_space *mapping = file->f_mapping; struct file *fpin = NULL; + unsigned int mmap_miss; pgoff_t offset = vmf->pgoff; /* If we don't want any read-ahead, don't bother */ if (vmf->vma->vm_flags & VM_RAND_READ || !ra->ra_pages) return fpin; - if (ra->mmap_miss > 0) - ra->mmap_miss--; + mmap_miss = READ_ONCE(ra->mmap_miss); + if (mmap_miss) + WRITE_ONCE(ra->mmap_miss, --mmap_miss); if (PageReadahead(page)) { fpin = maybe_unlock_mmap_for_io(vmf, fpin); page_cache_async_readahead(mapping, ra, file, @@ -2590,6 +2594,7 @@ void filemap_map_pages(struct vm_fault *vmf, unsigned long max_idx; XA_STATE(xas, &mapping->i_pages, start_pgoff); struct page *page; + unsigned int mmap_miss = READ_ONCE(file->f_ra.mmap_miss); rcu_read_lock(); xas_for_each(&xas, page, end_pgoff) { @@ -2626,8 +2631,8 @@ void filemap_map_pages(struct vm_fault *vmf, if (page->index >= max_idx) goto unlock; - if (file->f_ra.mmap_miss > 0) - file->f_ra.mmap_miss--; + if (mmap_miss > 0) + mmap_miss--; vmf->address += (xas.xa_index - last_pgoff) << PAGE_SHIFT; if (vmf->pte) @@ -2647,6 +2652,7 @@ next: break; } rcu_read_unlock(); + WRITE_ONCE(file->f_ra.mmap_miss, mmap_miss); } EXPORT_SYMBOL(filemap_map_pages); diff --git a/mm/frontswap.c b/mm/frontswap.c index bfa3a339253e..5c8c66bbedde 100644 --- a/mm/frontswap.c +++ b/mm/frontswap.c @@ -61,16 +61,16 @@ static u64 frontswap_failed_stores; static u64 frontswap_invalidates; static inline void inc_frontswap_loads(void) { - frontswap_loads++; + data_race(frontswap_loads++); } static inline void inc_frontswap_succ_stores(void) { - frontswap_succ_stores++; + data_race(frontswap_succ_stores++); } static inline void inc_frontswap_failed_stores(void) { - frontswap_failed_stores++; + data_race(frontswap_failed_stores++); } static inline void inc_frontswap_invalidates(void) { - frontswap_invalidates++; + data_race(frontswap_invalidates++); } #else static inline void inc_frontswap_loads(void) { } diff --git a/mm/kmemleak.c b/mm/kmemleak.c index e362dc3d2028..5e252d91eb14 100644 --- a/mm/kmemleak.c +++ b/mm/kmemleak.c @@ -1169,8 +1169,10 @@ static bool update_checksum(struct kmemleak_object *object) u32 old_csum = object->checksum; kasan_disable_current(); + kcsan_disable_current(); object->checksum = crc32(0, (void *)object->pointer, object->size); kasan_enable_current(); + kcsan_enable_current(); return object->checksum != old_csum; } diff --git a/mm/list_lru.c b/mm/list_lru.c index 9222910ab1cb..dbff67afe197 100644 --- a/mm/list_lru.c +++ b/mm/list_lru.c @@ -180,7 +180,7 @@ unsigned long list_lru_count_one(struct list_lru *lru, rcu_read_lock(); l = list_lru_from_memcg_idx(nlru, memcg_cache_id(memcg)); - count = l->nr_items; + count = READ_ONCE(l->nr_items); rcu_read_unlock(); return count; diff --git a/mm/madvise.c b/mm/madvise.c index dd1d43cf026d..2164df1c0823 100644 --- a/mm/madvise.c +++ b/mm/madvise.c @@ -17,17 +17,19 @@ #include <linux/falloc.h> #include <linux/fadvise.h> #include <linux/sched.h> +#include <linux/sched/mm.h> #include <linux/ksm.h> #include <linux/fs.h> #include <linux/file.h> #include <linux/blkdev.h> #include <linux/backing-dev.h> +#include <linux/compat.h> #include <linux/pagewalk.h> #include <linux/swap.h> #include <linux/swapops.h> #include <linux/shmem_fs.h> #include <linux/mmu_notifier.h> -#include <linux/sched/mm.h> +#include <linux/uio.h> #include <asm/tlb.h> @@ -36,6 +38,7 @@ struct madvise_walk_private { struct mmu_gather *tlb; bool pageout; + struct task_struct *target_task; }; /* @@ -255,6 +258,7 @@ static long madvise_willneed(struct vm_area_struct *vma, struct vm_area_struct **prev, unsigned long start, unsigned long end) { + struct mm_struct *mm = vma->vm_mm; struct file *file = vma->vm_file; loff_t offset; @@ -289,12 +293,12 @@ static long madvise_willneed(struct vm_area_struct *vma, */ *prev = NULL; /* tell sys_madvise we drop mmap_lock */ get_file(file); - mmap_read_unlock(current->mm); + mmap_read_unlock(mm); offset = (loff_t)(start - vma->vm_start) + ((loff_t)vma->vm_pgoff << PAGE_SHIFT); vfs_fadvise(file, offset, end - start, POSIX_FADV_WILLNEED); fput(file); - mmap_read_lock(current->mm); + mmap_read_lock(mm); return 0; } @@ -315,6 +319,10 @@ static int madvise_cold_or_pageout_pte_range(pmd_t *pmd, if (fatal_signal_pending(current)) return -EINTR; + if (private->target_task && + fatal_signal_pending(private->target_task)) + return -EINTR; + #ifdef CONFIG_TRANSPARENT_HUGEPAGE if (pmd_trans_huge(*pmd)) { pmd_t orig_pmd; @@ -476,12 +484,14 @@ static const struct mm_walk_ops cold_walk_ops = { }; static void madvise_cold_page_range(struct mmu_gather *tlb, + struct task_struct *task, struct vm_area_struct *vma, unsigned long addr, unsigned long end) { struct madvise_walk_private walk_private = { .pageout = false, .tlb = tlb, + .target_task = task, }; tlb_start_vma(tlb, vma); @@ -489,7 +499,8 @@ static void madvise_cold_page_range(struct mmu_gather *tlb, tlb_end_vma(tlb, vma); } -static long madvise_cold(struct vm_area_struct *vma, +static long madvise_cold(struct task_struct *task, + struct vm_area_struct *vma, struct vm_area_struct **prev, unsigned long start_addr, unsigned long end_addr) { @@ -502,19 +513,21 @@ static long madvise_cold(struct vm_area_struct *vma, lru_add_drain(); tlb_gather_mmu(&tlb, mm, start_addr, end_addr); - madvise_cold_page_range(&tlb, vma, start_addr, end_addr); + madvise_cold_page_range(&tlb, task, vma, start_addr, end_addr); tlb_finish_mmu(&tlb, start_addr, end_addr); return 0; } static void madvise_pageout_page_range(struct mmu_gather *tlb, + struct task_struct *task, struct vm_area_struct *vma, unsigned long addr, unsigned long end) { struct madvise_walk_private walk_private = { .pageout = true, .tlb = tlb, + .target_task = task, }; tlb_start_vma(tlb, vma); @@ -538,7 +551,8 @@ static inline bool can_do_pageout(struct vm_area_struct *vma) inode_permission(file_inode(vma->vm_file), MAY_WRITE) == 0; } -static long madvise_pageout(struct vm_area_struct *vma, +static long madvise_pageout(struct task_struct *task, + struct vm_area_struct *vma, struct vm_area_struct **prev, unsigned long start_addr, unsigned long end_addr) { @@ -554,7 +568,7 @@ static long madvise_pageout(struct vm_area_struct *vma, lru_add_drain(); tlb_gather_mmu(&tlb, mm, start_addr, end_addr); - madvise_pageout_page_range(&tlb, vma, start_addr, end_addr); + madvise_pageout_page_range(&tlb, task, vma, start_addr, end_addr); tlb_finish_mmu(&tlb, start_addr, end_addr); return 0; @@ -683,7 +697,6 @@ out: if (nr_swap) { if (current->mm == mm) sync_mm_rss(mm); - add_mm_counter(mm, MM_SWAPENTS, nr_swap); } arch_leave_lazy_mmu_mode(); @@ -763,6 +776,8 @@ static long madvise_dontneed_free(struct vm_area_struct *vma, unsigned long start, unsigned long end, int behavior) { + struct mm_struct *mm = vma->vm_mm; + *prev = vma; if (!can_madv_lru_vma(vma)) return -EINVAL; @@ -770,8 +785,8 @@ static long madvise_dontneed_free(struct vm_area_struct *vma, if (!userfaultfd_remove(vma, start, end)) { *prev = NULL; /* mmap_lock has been dropped, prev is stale */ - mmap_read_lock(current->mm); - vma = find_vma(current->mm, start); + mmap_read_lock(mm); + vma = find_vma(mm, start); if (!vma) return -ENOMEM; if (start < vma->vm_start) { @@ -825,6 +840,7 @@ static long madvise_remove(struct vm_area_struct *vma, loff_t offset; int error; struct file *f; + struct mm_struct *mm = vma->vm_mm; *prev = NULL; /* tell sys_madvise we drop mmap_lock */ @@ -852,13 +868,13 @@ static long madvise_remove(struct vm_area_struct *vma, get_file(f); if (userfaultfd_remove(vma, start, end)) { /* mmap_lock was not released by userfaultfd_remove() */ - mmap_read_unlock(current->mm); + mmap_read_unlock(mm); } error = vfs_fallocate(f, FALLOC_FL_PUNCH_HOLE | FALLOC_FL_KEEP_SIZE, offset, end - start); fput(f); - mmap_read_lock(current->mm); + mmap_read_lock(mm); return error; } @@ -932,7 +948,8 @@ static int madvise_inject_error(int behavior, #endif static long -madvise_vma(struct vm_area_struct *vma, struct vm_area_struct **prev, +madvise_vma(struct task_struct *task, struct vm_area_struct *vma, + struct vm_area_struct **prev, unsigned long start, unsigned long end, int behavior) { switch (behavior) { @@ -941,9 +958,9 @@ madvise_vma(struct vm_area_struct *vma, struct vm_area_struct **prev, case MADV_WILLNEED: return madvise_willneed(vma, prev, start, end); case MADV_COLD: - return madvise_cold(vma, prev, start, end); + return madvise_cold(task, vma, prev, start, end); case MADV_PAGEOUT: - return madvise_pageout(vma, prev, start, end); + return madvise_pageout(task, vma, prev, start, end); case MADV_FREE: case MADV_DONTNEED: return madvise_dontneed_free(vma, prev, start, end, behavior); @@ -990,6 +1007,22 @@ madvise_behavior_valid(int behavior) } } +static bool +process_madvise_behavior_valid(int behavior) +{ + switch (behavior) { + case MADV_COLD: + case MADV_PAGEOUT: +#ifdef CONFIG_KSM + case MADV_MERGEABLE: + case MADV_UNMERGEABLE: +#endif + return true; + default: + return false; + } +} + /* * The madvise(2) system call. * @@ -1037,6 +1070,11 @@ madvise_behavior_valid(int behavior) * MADV_DONTDUMP - the application wants to prevent pages in the given range * from being included in its core dump. * MADV_DODUMP - cancel MADV_DONTDUMP: no longer exclude from core dump. + * MADV_COLD - the application is not expected to use this memory soon, + * deactivate pages in this range so that they can be reclaimed + * easily if memory pressure hanppens. + * MADV_PAGEOUT - the application is not expected to use this memory soon, + * page out the pages in this range immediately. * * return values: * zero - success @@ -1051,7 +1089,8 @@ madvise_behavior_valid(int behavior) * -EBADF - map exists, but area maps something that isn't a file. * -EAGAIN - a kernel resource was temporarily unavailable. */ -int do_madvise(unsigned long start, size_t len_in, int behavior) +int do_madvise(struct task_struct *target_task, struct mm_struct *mm, + unsigned long start, size_t len_in, int behavior) { unsigned long end, tmp; struct vm_area_struct *vma, *prev; @@ -1089,7 +1128,7 @@ int do_madvise(unsigned long start, size_t len_in, int behavior) write = madvise_need_mmap_write(behavior); if (write) { - if (mmap_write_lock_killable(current->mm)) + if (mmap_write_lock_killable(mm)) return -EINTR; /* @@ -1104,12 +1143,12 @@ int do_madvise(unsigned long start, size_t len_in, int behavior) * but for now we have the mmget_still_valid() * model. */ - if (!mmget_still_valid(current->mm)) { - mmap_write_unlock(current->mm); + if (!mmget_still_valid(mm)) { + mmap_write_unlock(mm); return -EINTR; } } else { - mmap_read_lock(current->mm); + mmap_read_lock(mm); } /* @@ -1117,7 +1156,7 @@ int do_madvise(unsigned long start, size_t len_in, int behavior) * ranges, just ignore them, but return -ENOMEM at the end. * - different from the way of handling in mlock etc. */ - vma = find_vma_prev(current->mm, start, &prev); + vma = find_vma_prev(mm, start, &prev); if (vma && start > vma->vm_start) prev = vma; @@ -1142,7 +1181,8 @@ int do_madvise(unsigned long start, size_t len_in, int behavior) tmp = end; /* Here vma->vm_start <= start < tmp <= (end|vma->vm_end). */ - error = madvise_vma(vma, &prev, start, tmp, behavior); + error = madvise_vma(target_task, vma, &prev, + start, tmp, behavior); if (error) goto out; start = tmp; @@ -1154,19 +1194,122 @@ int do_madvise(unsigned long start, size_t len_in, int behavior) if (prev) vma = prev->vm_next; else /* madvise_remove dropped mmap_lock */ - vma = find_vma(current->mm, start); + vma = find_vma(mm, start); } out: blk_finish_plug(&plug); if (write) - mmap_write_unlock(current->mm); + mmap_write_unlock(mm); else - mmap_read_unlock(current->mm); + mmap_read_unlock(mm); return error; } SYSCALL_DEFINE3(madvise, unsigned long, start, size_t, len_in, int, behavior) { - return do_madvise(start, len_in, behavior); + return do_madvise(current, current->mm, start, len_in, behavior); } + +static int process_madvise_vec(struct task_struct *target_task, + struct mm_struct *mm, struct iov_iter *iter, int behavior) +{ + struct iovec iovec; + int ret = 0; + + while (iov_iter_count(iter)) { + iovec = iov_iter_iovec(iter); + ret = do_madvise(target_task, mm, (unsigned long)iovec.iov_base, + iovec.iov_len, behavior); + if (ret < 0) + break; + iov_iter_advance(iter, iovec.iov_len); + } + + return ret; +} + +static ssize_t do_process_madvise(int pidfd, struct iov_iter *iter, + int behavior, unsigned int flags) +{ + ssize_t ret; + struct pid *pid; + struct task_struct *task; + struct mm_struct *mm; + size_t total_len = iov_iter_count(iter); + + if (flags != 0) + return -EINVAL; + + pid = pidfd_get_pid(pidfd); + if (IS_ERR(pid)) + return PTR_ERR(pid); + + task = get_pid_task(pid, PIDTYPE_PID); + if (!task) { + ret = -ESRCH; + goto put_pid; + } + + if (task->mm != current->mm && + !process_madvise_behavior_valid(behavior)) { + ret = -EINVAL; + goto release_task; + } + + mm = mm_access(task, PTRACE_MODE_ATTACH_FSCREDS); + if (IS_ERR_OR_NULL(mm)) { + ret = IS_ERR(mm) ? PTR_ERR(mm) : -ESRCH; + goto release_task; + } + + ret = process_madvise_vec(task, mm, iter, behavior); + if (ret >= 0) + ret = total_len - iov_iter_count(iter); + + mmput(mm); +release_task: + put_task_struct(task); +put_pid: + put_pid(pid); + return ret; +} + +SYSCALL_DEFINE5(process_madvise, int, pidfd, const struct iovec __user *, vec, + unsigned long, vlen, int, behavior, unsigned int, flags) +{ + ssize_t ret; + struct iovec iovstack[UIO_FASTIOV]; + struct iovec *iov = iovstack; + struct iov_iter iter; + + ret = import_iovec(READ, vec, vlen, ARRAY_SIZE(iovstack), &iov, &iter); + if (ret >= 0) { + ret = do_process_madvise(pidfd, &iter, behavior, flags); + kfree(iov); + } + return ret; +} + +#ifdef CONFIG_COMPAT +COMPAT_SYSCALL_DEFINE5(process_madvise, compat_int_t, pidfd, + const struct compat_iovec __user *, vec, + compat_ulong_t, vlen, + compat_int_t, behavior, + compat_int_t, flags) + +{ + ssize_t ret; + struct iovec iovstack[UIO_FASTIOV]; + struct iovec *iov = iovstack; + struct iov_iter iter; + + ret = compat_import_iovec(READ, vec, vlen, ARRAY_SIZE(iovstack), + &iov, &iter); + if (ret >= 0) { + ret = do_process_madvise(pidfd, &iter, behavior, flags); + kfree(iov); + } + return ret; +} +#endif diff --git a/mm/memory.c b/mm/memory.c index dc7f3543b1fd..bc6a471932b5 100644 --- a/mm/memory.c +++ b/mm/memory.c @@ -3123,8 +3123,8 @@ vm_fault_t do_swap_page(struct vm_fault *vmf) if (!page) { struct swap_info_struct *si = swp_swap_info(entry); - if (si->flags & SWP_SYNCHRONOUS_IO && - __swap_count(entry) == 1) { + if (data_race(si->flags & SWP_SYNCHRONOUS_IO) && + __swap_count(entry) == 1) { /* skip swapcache */ page = alloc_page_vma(GFP_HIGHUSER_MOVABLE, vma, vmf->address); diff --git a/mm/mempool.c b/mm/mempool.c index 85efab3da720..79bff63ecf27 100644 --- a/mm/mempool.c +++ b/mm/mempool.c @@ -489,7 +489,7 @@ void mempool_free(void *element, mempool_t *pool) * ensures that there will be frees which return elements to the * pool waking up the waiters. */ - if (unlikely(pool->curr_nr < pool->min_nr)) { + if (unlikely(READ_ONCE(pool->curr_nr) < pool->min_nr)) { spin_lock_irqsave(&pool->lock, flags); if (likely(pool->curr_nr < pool->min_nr)) { add_element(pool, element); diff --git a/mm/page_counter.c b/mm/page_counter.c index c56db2d5e159..e3a205275a0b 100644 --- a/mm/page_counter.c +++ b/mm/page_counter.c @@ -77,8 +77,8 @@ void page_counter_charge(struct page_counter *counter, unsigned long nr_pages) * This is indeed racy, but we can live with some * inaccuracy in the watermark. */ - if (new > c->watermark) - c->watermark = new; + if (new > READ_ONCE(c->watermark)) + WRITE_ONCE(c->watermark, new); } } @@ -119,9 +119,10 @@ bool page_counter_try_charge(struct page_counter *counter, propagate_protected_usage(counter, new); /* * This is racy, but we can live with some - * inaccuracy in the failcnt. + * inaccuracy in the failcnt which is only used + * to report stats. */ - c->failcnt++; + data_race(c->failcnt++); *fail = c; goto failed; } @@ -130,8 +131,8 @@ bool page_counter_try_charge(struct page_counter *counter, * Just like with failcnt, we can live with some * inaccuracy in the watermark. */ - if (new > c->watermark) - c->watermark = new; + if (new > READ_ONCE(c->watermark)) + WRITE_ONCE(c->watermark, new); } return true; diff --git a/mm/page_io.c b/mm/page_io.c index e8726f3e3820..44f8fdd8283a 100644 --- a/mm/page_io.c +++ b/mm/page_io.c @@ -85,7 +85,7 @@ static void swap_slot_free_notify(struct page *page) return; sis = page_swap_info(page); - if (!(sis->flags & SWP_BLKDEV)) + if (data_race(!(sis->flags & SWP_BLKDEV))) return; /* @@ -285,7 +285,7 @@ int __swap_writepage(struct page *page, struct writeback_control *wbc, struct swap_info_struct *sis = page_swap_info(page); VM_BUG_ON_PAGE(!PageSwapCache(page), page); - if (sis->flags & SWP_FS) { + if (data_race(sis->flags & SWP_FS)) { struct kiocb kiocb; struct file *swap_file = sis->swap_file; struct address_space *mapping = swap_file->f_mapping; @@ -376,7 +376,7 @@ int swap_readpage(struct page *page, bool synchronous) goto out; } - if (sis->flags & SWP_FS) { + if (data_race(sis->flags & SWP_FS)) { struct file *swap_file = sis->swap_file; struct address_space *mapping = swap_file->f_mapping; @@ -438,7 +438,7 @@ int swap_set_page_dirty(struct page *page) { struct swap_info_struct *sis = page_swap_info(page); - if (sis->flags & SWP_FS) { + if (data_race(sis->flags & SWP_FS)) { struct address_space *mapping = sis->swap_file->f_mapping; VM_BUG_ON_PAGE(!PageSwapCache(page), page); diff --git a/mm/rmap.c b/mm/rmap.c index 5fe2dedce1fc..c81dce3e244c 100644 --- a/mm/rmap.c +++ b/mm/rmap.c @@ -672,7 +672,7 @@ static bool should_defer_flush(struct mm_struct *mm, enum ttu_flags flags) */ void flush_tlb_batched_pending(struct mm_struct *mm) { - if (mm->tlb_flush_batched) { + if (data_race(mm->tlb_flush_batched)) { flush_tlb_mm(mm); /* diff --git a/mm/swap.c b/mm/swap.c index dbcab84c6fce..667133d25e56 100644 --- a/mm/swap.c +++ b/mm/swap.c @@ -632,7 +632,8 @@ void lru_add_drain_cpu(int cpu) __pagevec_lru_add(pvec); pvec = &per_cpu(lru_rotate.pvec, cpu); - if (pagevec_count(pvec)) { + /* Disabling interrupts below acts as a compiler barrier. */ + if (data_race(pagevec_count(pvec))) { unsigned long flags; /* No harm done if a racing interrupt already did this */ @@ -793,7 +794,7 @@ void lru_add_drain_all(void) struct work_struct *work = &per_cpu(lru_add_drain_work, cpu); if (pagevec_count(&per_cpu(lru_pvecs.lru_add, cpu)) || - pagevec_count(&per_cpu(lru_rotate.pvec, cpu)) || + data_race(pagevec_count(&per_cpu(lru_rotate.pvec, cpu))) || pagevec_count(&per_cpu(lru_pvecs.lru_deactivate_file, cpu)) || pagevec_count(&per_cpu(lru_pvecs.lru_deactivate, cpu)) || pagevec_count(&per_cpu(lru_pvecs.lru_lazyfree, cpu)) || diff --git a/mm/swap_state.c b/mm/swap_state.c index e98ff460e9e9..1050fded788b 100644 --- a/mm/swap_state.c +++ b/mm/swap_state.c @@ -57,8 +57,8 @@ static bool enable_vma_readahead __read_mostly = true; #define GET_SWAP_RA_VAL(vma) \ (atomic_long_read(&(vma)->swap_readahead_info) ? : 4) -#define INC_CACHE_INFO(x) do { swap_cache_info.x++; } while (0) -#define ADD_CACHE_INFO(x, nr) do { swap_cache_info.x += (nr); } while (0) +#define INC_CACHE_INFO(x) data_race(swap_cache_info.x++) +#define ADD_CACHE_INFO(x, nr) data_race(swap_cache_info.x += (nr)) static struct { unsigned long add_total; diff --git a/mm/swapfile.c b/mm/swapfile.c index 987276c557d1..c0477896576f 100644 --- a/mm/swapfile.c +++ b/mm/swapfile.c @@ -672,7 +672,7 @@ static void swap_range_alloc(struct swap_info_struct *si, unsigned long offset, if (offset == si->lowest_bit) si->lowest_bit += nr_entries; if (end == si->highest_bit) - si->highest_bit -= nr_entries; + WRITE_ONCE(si->highest_bit, si->highest_bit - nr_entries); si->inuse_pages += nr_entries; if (si->inuse_pages == si->pages) { si->lowest_bit = si->max; @@ -704,7 +704,7 @@ static void swap_range_free(struct swap_info_struct *si, unsigned long offset, if (end > si->highest_bit) { bool was_full = !si->highest_bit; - si->highest_bit = end; + WRITE_ONCE(si->highest_bit, end); if (was_full && (si->flags & SWP_WRITEOK)) add_to_avail_list(si); } @@ -868,7 +868,7 @@ checks: else goto done; } - si->swap_map[offset] = usage; + WRITE_ONCE(si->swap_map[offset], usage); inc_cluster_info_page(si, si->cluster_info, offset); unlock_cluster(ci); @@ -927,12 +927,13 @@ done: scan: spin_unlock(&si->lock); - while (++offset <= si->highest_bit) { - if (!si->swap_map[offset]) { + while (++offset <= READ_ONCE(si->highest_bit)) { + if (data_race(!si->swap_map[offset])) { spin_lock(&si->lock); goto checks; } - if (vm_swap_full() && si->swap_map[offset] == SWAP_HAS_CACHE) { + if (vm_swap_full() && + READ_ONCE(si->swap_map[offset]) == SWAP_HAS_CACHE) { spin_lock(&si->lock); goto checks; } @@ -944,11 +945,12 @@ scan: } offset = si->lowest_bit; while (offset < scan_base) { - if (!si->swap_map[offset]) { + if (data_race(!si->swap_map[offset])) { spin_lock(&si->lock); goto checks; } - if (vm_swap_full() && si->swap_map[offset] == SWAP_HAS_CACHE) { + if (vm_swap_full() && + READ_ONCE(si->swap_map[offset]) == SWAP_HAS_CACHE) { spin_lock(&si->lock); goto checks; } @@ -1147,7 +1149,7 @@ static struct swap_info_struct *__swap_info_get(swp_entry_t entry) p = swp_swap_info(entry); if (!p) goto bad_nofile; - if (!(p->flags & SWP_USED)) + if (data_race(!(p->flags & SWP_USED))) goto bad_device; offset = swp_offset(entry); if (offset >= p->max) @@ -1173,7 +1175,7 @@ static struct swap_info_struct *_swap_info_get(swp_entry_t entry) p = __swap_info_get(entry); if (!p) goto out; - if (!p->swap_map[swp_offset(entry)]) + if (data_race(!p->swap_map[swp_offset(entry)])) goto bad_free; return p; @@ -1242,7 +1244,10 @@ static unsigned char __swap_entry_free_locked(struct swap_info_struct *p, } usage = count | has_cache; - p->swap_map[offset] = usage ? : SWAP_HAS_CACHE; + if (usage) + WRITE_ONCE(p->swap_map[offset], usage); + else + WRITE_ONCE(p->swap_map[offset], SWAP_HAS_CACHE); return usage; } @@ -1294,7 +1299,7 @@ struct swap_info_struct *get_swap_device(swp_entry_t entry) goto bad_nofile; rcu_read_lock(); - if (!(si->flags & SWP_VALID)) + if (data_race(!(si->flags & SWP_VALID))) goto unlock_out; offset = swp_offset(entry); if (offset >= si->max) @@ -3482,7 +3487,7 @@ static int __swap_duplicate(swp_entry_t entry, unsigned char usage) } else err = -ENOENT; /* unused swap entry */ - p->swap_map[offset] = count | has_cache; + WRITE_ONCE(p->swap_map[offset], count | has_cache); unlock_out: unlock_cluster_or_swap_info(p, ci); |