summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorStephen Rothwell <sfr@canb.auug.org.au>2020-06-16 13:16:24 +1000
committerStephen Rothwell <sfr@canb.auug.org.au>2020-06-16 13:16:24 +1000
commit70006740ef82246bc6ecc4d8da0fa9d5553a5004 (patch)
tree30dc62518df5cd4537ce0a0384bd52f550c89da4
parent499cb7f8839d80fdf14ae68563213a1bdf8e8fce (diff)
parent2c50e42e7dab0821f55dbf809080668ef1cf03d7 (diff)
Merge branch 'akpm/master'
-rw-r--r--arch/alpha/kernel/syscalls/syscall.tbl1
-rw-r--r--arch/arm/tools/syscall.tbl1
-rw-r--r--arch/arm64/include/asm/unistd.h2
-rw-r--r--arch/arm64/include/asm/unistd32.h2
-rw-r--r--arch/ia64/kernel/syscalls/syscall.tbl1
-rw-r--r--arch/m68k/kernel/syscalls/syscall.tbl1
-rw-r--r--arch/microblaze/kernel/syscalls/syscall.tbl1
-rw-r--r--arch/mips/kernel/syscalls/syscall_n32.tbl1
-rw-r--r--arch/mips/kernel/syscalls/syscall_n64.tbl1
-rw-r--r--arch/mips/kernel/syscalls/syscall_o32.tbl1
-rw-r--r--arch/parisc/kernel/syscalls/syscall.tbl1
-rw-r--r--arch/powerpc/kernel/syscalls/syscall.tbl1
-rw-r--r--arch/s390/kernel/syscalls/syscall.tbl1
-rw-r--r--arch/sh/include/asm/string_32.h26
-rw-r--r--arch/sh/kernel/syscalls/syscall.tbl1
-rw-r--r--arch/sh/lib/delay.c1
-rw-r--r--arch/sh/mm/consistent.c2
-rw-r--r--arch/sparc/kernel/syscalls/syscall.tbl1
-rw-r--r--arch/x86/entry/syscalls/syscall_32.tbl1
-rw-r--r--arch/x86/entry/syscalls/syscall_64.tbl2
-rw-r--r--arch/xtensa/kernel/syscalls/syscall.tbl1
-rw-r--r--drivers/sh/clk/cpg.c23
-rw-r--r--fs/io_uring.c2
-rw-r--r--include/asm-generic/vmlinux.lds.h1
-rw-r--r--include/linux/compat.h4
-rw-r--r--include/linux/memcontrol.h2
-rw-r--r--include/linux/mm.h4
-rw-r--r--include/linux/pid.h1
-rw-r--r--include/linux/syscalls.h2
-rw-r--r--include/uapi/asm-generic/unistd.h5
-rw-r--r--kernel/exit.c17
-rw-r--r--kernel/pid.c17
-rw-r--r--kernel/sys_ni.c2
-rw-r--r--mm/filemap.c20
-rw-r--r--mm/frontswap.c8
-rw-r--r--mm/kmemleak.c2
-rw-r--r--mm/list_lru.c2
-rw-r--r--mm/madvise.c195
-rw-r--r--mm/memory.c4
-rw-r--r--mm/mempool.c2
-rw-r--r--mm/page_counter.c13
-rw-r--r--mm/page_io.c8
-rw-r--r--mm/rmap.c2
-rw-r--r--mm/swap.c5
-rw-r--r--mm/swap_state.c4
-rw-r--r--mm/swapfile.c31
46 files changed, 288 insertions, 138 deletions
diff --git a/arch/alpha/kernel/syscalls/syscall.tbl b/arch/alpha/kernel/syscalls/syscall.tbl
index 598779a35e49..b249824c8267 100644
--- a/arch/alpha/kernel/syscalls/syscall.tbl
+++ b/arch/alpha/kernel/syscalls/syscall.tbl
@@ -481,3 +481,4 @@
550 common watch_mount sys_watch_mount
551 common watch_sb sys_watch_sb
552 common fsinfo sys_fsinfo
+553 common process_madvise sys_process_madvise
diff --git a/arch/arm/tools/syscall.tbl b/arch/arm/tools/syscall.tbl
index 7dd5907463ef..7b3832d920ee 100644
--- a/arch/arm/tools/syscall.tbl
+++ b/arch/arm/tools/syscall.tbl
@@ -455,3 +455,4 @@
440 common watch_mount sys_watch_mount
441 common watch_sb sys_watch_sb
442 common fsinfo sys_fsinfo
+443 common process_madvise sys_process_madvise
diff --git a/arch/arm64/include/asm/unistd.h b/arch/arm64/include/asm/unistd.h
index 949788f5ba40..d1f7d35f986e 100644
--- a/arch/arm64/include/asm/unistd.h
+++ b/arch/arm64/include/asm/unistd.h
@@ -38,7 +38,7 @@
#define __ARM_NR_compat_set_tls (__ARM_NR_COMPAT_BASE + 5)
#define __ARM_NR_COMPAT_END (__ARM_NR_COMPAT_BASE + 0x800)
-#define __NR_compat_syscalls 443
+#define __NR_compat_syscalls 444
#endif
#define __ARCH_WANT_SYS_CLONE
diff --git a/arch/arm64/include/asm/unistd32.h b/arch/arm64/include/asm/unistd32.h
index d49b63db5b08..f8dafe900cb9 100644
--- a/arch/arm64/include/asm/unistd32.h
+++ b/arch/arm64/include/asm/unistd32.h
@@ -891,6 +891,8 @@ __SYSCALL(__NR_watch_mount, sys_watch_mount)
__SYSCALL(__NR_watch_sb, sys_watch_sb)
#define __NR_fsinfo 442
__SYSCALL(__NR_fsinfo, sys_fsinfo)
+#define __NR_process_madvise 443
+__SYSCALL(__NR_process_madvise, compat_sys_process_madvise)
/*
* Please add new compat syscalls above this comment and update
diff --git a/arch/ia64/kernel/syscalls/syscall.tbl b/arch/ia64/kernel/syscalls/syscall.tbl
index 457270289902..6636a1aef36b 100644
--- a/arch/ia64/kernel/syscalls/syscall.tbl
+++ b/arch/ia64/kernel/syscalls/syscall.tbl
@@ -362,3 +362,4 @@
440 common watch_mount sys_watch_mount
441 common watch_sb sys_watch_sb
442 common fsinfo sys_fsinfo
+443 common process_madvise sys_process_madvise
diff --git a/arch/m68k/kernel/syscalls/syscall.tbl b/arch/m68k/kernel/syscalls/syscall.tbl
index 86872b908471..8cd84a7fbf57 100644
--- a/arch/m68k/kernel/syscalls/syscall.tbl
+++ b/arch/m68k/kernel/syscalls/syscall.tbl
@@ -441,3 +441,4 @@
440 common watch_mount sys_watch_mount
441 common watch_sb sys_watch_sb
442 common fsinfo sys_fsinfo
+443 common process_madvise sys_process_madvise
diff --git a/arch/microblaze/kernel/syscalls/syscall.tbl b/arch/microblaze/kernel/syscalls/syscall.tbl
index 11e1587078b3..f581a02257ec 100644
--- a/arch/microblaze/kernel/syscalls/syscall.tbl
+++ b/arch/microblaze/kernel/syscalls/syscall.tbl
@@ -447,3 +447,4 @@
440 common watch_mount sys_watch_mount
441 common watch_sb sys_watch_sb
442 common fsinfo sys_fsinfo
+443 common process_madvise sys_process_madvise
diff --git a/arch/mips/kernel/syscalls/syscall_n32.tbl b/arch/mips/kernel/syscalls/syscall_n32.tbl
index 3cde581d0cfb..c85bdc3abe55 100644
--- a/arch/mips/kernel/syscalls/syscall_n32.tbl
+++ b/arch/mips/kernel/syscalls/syscall_n32.tbl
@@ -380,3 +380,4 @@
440 n32 watch_mount sys_watch_mount
441 n32 watch_sb sys_watch_sb
442 n32 fsinfo sys_fsinfo
+443 n32 process_madvise compat_sys_process_madvise
diff --git a/arch/mips/kernel/syscalls/syscall_n64.tbl b/arch/mips/kernel/syscalls/syscall_n64.tbl
index e0270d5700b5..9e08c40d3cb9 100644
--- a/arch/mips/kernel/syscalls/syscall_n64.tbl
+++ b/arch/mips/kernel/syscalls/syscall_n64.tbl
@@ -356,3 +356,4 @@
440 n64 watch_mount sys_watch_mount
441 n64 watch_sb sys_watch_sb
442 n64 fsinfo sys_fsinfo
+443 n64 process_madvise sys_process_madvise
diff --git a/arch/mips/kernel/syscalls/syscall_o32.tbl b/arch/mips/kernel/syscalls/syscall_o32.tbl
index 002ca3e4aebe..a2b591d7c874 100644
--- a/arch/mips/kernel/syscalls/syscall_o32.tbl
+++ b/arch/mips/kernel/syscalls/syscall_o32.tbl
@@ -429,3 +429,4 @@
440 o32 watch_mount sys_watch_mount
441 o32 watch_sb sys_watch_sb
442 o32 fsinfo sys_fsinfo
+443 o32 process_madvise sys_process_madvise compat_sys_process_madvise
diff --git a/arch/parisc/kernel/syscalls/syscall.tbl b/arch/parisc/kernel/syscalls/syscall.tbl
index 6e6b2114065f..98e7442e4d7a 100644
--- a/arch/parisc/kernel/syscalls/syscall.tbl
+++ b/arch/parisc/kernel/syscalls/syscall.tbl
@@ -439,3 +439,4 @@
440 common watch_mount sys_watch_mount
441 common watch_sb sys_watch_sb
442 common fsinfo sys_fsinfo
+443 common process_madvise sys_process_madvise compat_sys_process_madvise
diff --git a/arch/powerpc/kernel/syscalls/syscall.tbl b/arch/powerpc/kernel/syscalls/syscall.tbl
index 4dda0f1ce513..72fb9dd88e16 100644
--- a/arch/powerpc/kernel/syscalls/syscall.tbl
+++ b/arch/powerpc/kernel/syscalls/syscall.tbl
@@ -531,3 +531,4 @@
440 common watch_mount sys_watch_mount
441 common watch_sb sys_watch_sb
442 common fsinfo sys_fsinfo
+443 common process_madvise sys_process_madvise compat_sys_process_madvise
diff --git a/arch/s390/kernel/syscalls/syscall.tbl b/arch/s390/kernel/syscalls/syscall.tbl
index a750ad6821f1..b731fcb03f1b 100644
--- a/arch/s390/kernel/syscalls/syscall.tbl
+++ b/arch/s390/kernel/syscalls/syscall.tbl
@@ -444,3 +444,4 @@
440 common watch_mount sys_watch_mount sys_watch_mount
441 common watch_sb sys_watch_sb sys_watch_sb
442 common fsinfo sys_fsinfo sys_fsinfo
+443 common process_madvise sys_process_madvise compat_sys_process_madvise
diff --git a/arch/sh/include/asm/string_32.h b/arch/sh/include/asm/string_32.h
index 3558b1d7123e..be3f9a08cbc9 100644
--- a/arch/sh/include/asm/string_32.h
+++ b/arch/sh/include/asm/string_32.h
@@ -28,32 +28,6 @@ static inline char *strcpy(char *__dest, const char *__src)
return __xdest;
}
-#define __HAVE_ARCH_STRNCPY
-static inline char *strncpy(char *__dest, const char *__src, size_t __n)
-{
- register char *__xdest = __dest;
- unsigned long __dummy;
-
- if (__n == 0)
- return __xdest;
-
- __asm__ __volatile__(
- "1:\n"
- "mov.b @%1+, %2\n\t"
- "mov.b %2, @%0\n\t"
- "cmp/eq #0, %2\n\t"
- "bt/s 2f\n\t"
- " cmp/eq %5,%1\n\t"
- "bf/s 1b\n\t"
- " add #1, %0\n"
- "2:"
- : "=r" (__dest), "=r" (__src), "=&z" (__dummy)
- : "0" (__dest), "1" (__src), "r" (__src+__n)
- : "memory", "t");
-
- return __xdest;
-}
-
#define __HAVE_ARCH_STRCMP
static inline int strcmp(const char *__cs, const char *__ct)
{
diff --git a/arch/sh/kernel/syscalls/syscall.tbl b/arch/sh/kernel/syscalls/syscall.tbl
index 4d7b900bb350..e7a4804e23cf 100644
--- a/arch/sh/kernel/syscalls/syscall.tbl
+++ b/arch/sh/kernel/syscalls/syscall.tbl
@@ -444,3 +444,4 @@
440 common watch_mount sys_watch_mount
441 common watch_sb sys_watch_sb
442 common fsinfo sys_fsinfo
+443 common process_madvise sys_process_madvise
diff --git a/arch/sh/lib/delay.c b/arch/sh/lib/delay.c
index 540e670dbafc..cccab21f3e56 100644
--- a/arch/sh/lib/delay.c
+++ b/arch/sh/lib/delay.c
@@ -42,6 +42,7 @@ inline void __const_udelay(unsigned long xloops)
: "macl", "mach");
__delay(++xloops);
}
+EXPORT_SYMBOL(__delay);
void __udelay(unsigned long usecs)
{
diff --git a/arch/sh/mm/consistent.c b/arch/sh/mm/consistent.c
index 3169a343a5ab..0de206c1acfe 100644
--- a/arch/sh/mm/consistent.c
+++ b/arch/sh/mm/consistent.c
@@ -57,8 +57,6 @@ int __init platform_resource_setup_memory(struct platform_device *pdev,
return -ENOMEM;
}
- memset(buf, 0, memsize);
-
r->flags = IORESOURCE_MEM;
r->start = dma_handle;
r->end = r->start + memsize - 1;
diff --git a/arch/sparc/kernel/syscalls/syscall.tbl b/arch/sparc/kernel/syscalls/syscall.tbl
index 6b25b0a33834..d6126ee2f51d 100644
--- a/arch/sparc/kernel/syscalls/syscall.tbl
+++ b/arch/sparc/kernel/syscalls/syscall.tbl
@@ -487,3 +487,4 @@
440 common watch_mount sys_watch_mount
441 common watch_sb sys_watch_sb
442 common fsinfo sys_fsinfo
+443 common process_madvise sys_process_madvise compat_sys_process_madvise
diff --git a/arch/x86/entry/syscalls/syscall_32.tbl b/arch/x86/entry/syscalls/syscall_32.tbl
index 65d3497d1eb8..686d59df1e12 100644
--- a/arch/x86/entry/syscalls/syscall_32.tbl
+++ b/arch/x86/entry/syscalls/syscall_32.tbl
@@ -446,3 +446,4 @@
440 i386 watch_mount sys_watch_mount
441 i386 watch_sb sys_watch_sb
442 i386 fsinfo sys_fsinfo
+443 i386 process_madvise sys_process_madvise compat_sys_process_madvise
diff --git a/arch/x86/entry/syscalls/syscall_64.tbl b/arch/x86/entry/syscalls/syscall_64.tbl
index 3100ed45b108..b345b35b3308 100644
--- a/arch/x86/entry/syscalls/syscall_64.tbl
+++ b/arch/x86/entry/syscalls/syscall_64.tbl
@@ -363,6 +363,7 @@
440 common watch_mount sys_watch_mount
441 common watch_sb sys_watch_sb
442 common fsinfo sys_fsinfo
+443 64 process_madvise sys_process_madvise
#
# x32-specific system call numbers start at 512 to avoid cache impact
@@ -406,3 +407,4 @@
545 x32 execveat compat_sys_execveat
546 x32 preadv2 compat_sys_preadv64v2
547 x32 pwritev2 compat_sys_pwritev64v2
+548 x32 process_madvise compat_sys_process_madvise
diff --git a/arch/xtensa/kernel/syscalls/syscall.tbl b/arch/xtensa/kernel/syscalls/syscall.tbl
index fc382476421f..96cb07095e4c 100644
--- a/arch/xtensa/kernel/syscalls/syscall.tbl
+++ b/arch/xtensa/kernel/syscalls/syscall.tbl
@@ -412,3 +412,4 @@
440 common watch_mount sys_watch_mount
441 common watch_sb sys_watch_sb
442 common fsinfo sys_fsinfo
+443 common process_madvise sys_process_madvise
diff --git a/drivers/sh/clk/cpg.c b/drivers/sh/clk/cpg.c
index eeb028b9cdb3..a5cacfe24a42 100644
--- a/drivers/sh/clk/cpg.c
+++ b/drivers/sh/clk/cpg.c
@@ -36,36 +36,21 @@ static void sh_clk_write(int value, struct clk *clk)
iowrite32(value, clk->mapped_reg);
}
-static unsigned int r8(const void __iomem *addr)
-{
- return ioread8(addr);
-}
-
-static unsigned int r16(const void __iomem *addr)
-{
- return ioread16(addr);
-}
-
-static unsigned int r32(const void __iomem *addr)
-{
- return ioread32(addr);
-}
-
static int sh_clk_mstp_enable(struct clk *clk)
{
sh_clk_write(sh_clk_read(clk) & ~(1 << clk->enable_bit), clk);
if (clk->status_reg) {
- unsigned int (*read)(const void __iomem *addr);
+ unsigned int (*read)(void __iomem *addr);
int i;
void __iomem *mapped_status = (phys_addr_t)clk->status_reg -
(phys_addr_t)clk->enable_reg + clk->mapped_reg;
if (clk->flags & CLK_ENABLE_REG_8BIT)
- read = r8;
+ read = ioread8;
else if (clk->flags & CLK_ENABLE_REG_16BIT)
- read = r16;
+ read = ioread16;
else
- read = r32;
+ read = ioread32;
for (i = 1000;
(read(mapped_status) & (1 << clk->enable_bit)) && i;
diff --git a/fs/io_uring.c b/fs/io_uring.c
index e716630719eb..a07c44efc48b 100644
--- a/fs/io_uring.c
+++ b/fs/io_uring.c
@@ -3346,7 +3346,7 @@ static int io_madvise(struct io_kiocb *req, bool force_nonblock)
if (force_nonblock)
return -EAGAIN;
- ret = do_madvise(ma->addr, ma->len, ma->advice);
+ ret = do_madvise(NULL, current->mm, ma->addr, ma->len, ma->advice);
if (ret < 0)
req_set_fail_links(req);
io_cqring_add_event(req, ret);
diff --git a/include/asm-generic/vmlinux.lds.h b/include/asm-generic/vmlinux.lds.h
index db600ef218d7..d7c7c7f36c4a 100644
--- a/include/asm-generic/vmlinux.lds.h
+++ b/include/asm-generic/vmlinux.lds.h
@@ -374,6 +374,7 @@
*/
#ifndef RO_AFTER_INIT_DATA
#define RO_AFTER_INIT_DATA \
+ . = ALIGN(8); \
__start_ro_after_init = .; \
*(.data..ro_after_init) \
JUMP_TABLE_DATA \
diff --git a/include/linux/compat.h b/include/linux/compat.h
index e90100c0de72..86b61e873947 100644
--- a/include/linux/compat.h
+++ b/include/linux/compat.h
@@ -827,6 +827,10 @@ asmlinkage long compat_sys_pwritev64v2(unsigned long fd,
unsigned long vlen, loff_t pos, rwf_t flags);
#endif
+asmlinkage ssize_t compat_sys_process_madvise(compat_int_t pidfd,
+ const struct compat_iovec __user *vec,
+ compat_ulong_t vlen, compat_int_t behavior,
+ compat_int_t flags);
/*
* Deprecated system calls which are still defined in
diff --git a/include/linux/memcontrol.h b/include/linux/memcontrol.h
index e77197a62809..bbf624a7f5a6 100644
--- a/include/linux/memcontrol.h
+++ b/include/linux/memcontrol.h
@@ -520,7 +520,7 @@ unsigned long mem_cgroup_get_zone_lru_size(struct lruvec *lruvec,
struct mem_cgroup_per_node *mz;
mz = container_of(lruvec, struct mem_cgroup_per_node, lruvec);
- return mz->lru_zone_size[zone_idx][lru];
+ return READ_ONCE(mz->lru_zone_size[zone_idx][lru]);
}
void mem_cgroup_handle_over_high(void);
diff --git a/include/linux/mm.h b/include/linux/mm.h
index dc7b87310c10..573947c595f6 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -1057,6 +1057,7 @@ vm_fault_t finish_mkwrite_fault(struct vm_fault *vmf);
static inline enum zone_type page_zonenum(const struct page *page)
{
+ ASSERT_EXCLUSIVE_BITS(page->flags, ZONES_MASK << ZONES_PGSHIFT);
return (page->flags >> ZONES_PGSHIFT) & ZONES_MASK;
}
@@ -2585,7 +2586,8 @@ extern int __do_munmap(struct mm_struct *, unsigned long, size_t,
struct list_head *uf, bool downgrade);
extern int do_munmap(struct mm_struct *, unsigned long, size_t,
struct list_head *uf);
-extern int do_madvise(unsigned long start, size_t len_in, int behavior);
+extern int do_madvise(struct task_struct *target_task, struct mm_struct *mm,
+ unsigned long start, size_t len_in, int behavior);
static inline unsigned long
do_mmap_pgoff(struct file *file, unsigned long addr,
diff --git a/include/linux/pid.h b/include/linux/pid.h
index 176d6cf80e7c..86e0e7cb7872 100644
--- a/include/linux/pid.h
+++ b/include/linux/pid.h
@@ -77,6 +77,7 @@ extern const struct file_operations pidfd_fops;
struct file;
extern struct pid *pidfd_pid(const struct file *file);
+struct pid *pidfd_get_pid(unsigned int fd);
static inline struct pid *get_pid(struct pid *pid)
{
diff --git a/include/linux/syscalls.h b/include/linux/syscalls.h
index 0c3e296ec277..63ffa6dc9da3 100644
--- a/include/linux/syscalls.h
+++ b/include/linux/syscalls.h
@@ -879,6 +879,8 @@ asmlinkage long sys_munlockall(void);
asmlinkage long sys_mincore(unsigned long start, size_t len,
unsigned char __user * vec);
asmlinkage long sys_madvise(unsigned long start, size_t len, int behavior);
+asmlinkage long sys_process_madvise(int pidfd, const struct iovec __user *vec,
+ unsigned long vlen, int behavior, unsigned int flags);
asmlinkage long sys_remap_file_pages(unsigned long start, unsigned long size,
unsigned long prot, unsigned long pgoff,
unsigned long flags);
diff --git a/include/uapi/asm-generic/unistd.h b/include/uapi/asm-generic/unistd.h
index 9018bb54bfa3..6a5856c9d9b4 100644
--- a/include/uapi/asm-generic/unistd.h
+++ b/include/uapi/asm-generic/unistd.h
@@ -863,9 +863,12 @@ __SYSCALL(__NR_watch_mount, sys_watch_mount)
__SYSCALL(__NR_watch_sb, sys_watch_sb)
#define __NR_fsinfo 442
__SYSCALL(__NR_fsinfo, sys_fsinfo)
+#define __NR_process_madvise 443
+__SC_COMP(__NR_process_madvise, sys_process_madvise, \
+ compat_sys_process_madvise)
#undef __NR_syscalls
-#define __NR_syscalls 443
+#define __NR_syscalls 444
/*
* 32 bit systems traditionally used different
diff --git a/kernel/exit.c b/kernel/exit.c
index 727150f28103..ed2c4924a7cc 100644
--- a/kernel/exit.c
+++ b/kernel/exit.c
@@ -1474,23 +1474,6 @@ end:
return retval;
}
-static struct pid *pidfd_get_pid(unsigned int fd)
-{
- struct fd f;
- struct pid *pid;
-
- f = fdget(fd);
- if (!f.file)
- return ERR_PTR(-EBADF);
-
- pid = pidfd_pid(f.file);
- if (!IS_ERR(pid))
- get_pid(pid);
-
- fdput(f);
- return pid;
-}
-
static long kernel_waitid(int which, pid_t upid, struct waitid_info *infop,
int options, struct rusage *ru)
{
diff --git a/kernel/pid.c b/kernel/pid.c
index f1496b757162..3122043fe364 100644
--- a/kernel/pid.c
+++ b/kernel/pid.c
@@ -518,6 +518,23 @@ struct pid *find_ge_pid(int nr, struct pid_namespace *ns)
return idr_get_next(&ns->idr, &nr);
}
+struct pid *pidfd_get_pid(unsigned int fd)
+{
+ struct fd f;
+ struct pid *pid;
+
+ f = fdget(fd);
+ if (!f.file)
+ return ERR_PTR(-EBADF);
+
+ pid = pidfd_pid(f.file);
+ if (!IS_ERR(pid))
+ get_pid(pid);
+
+ fdput(f);
+ return pid;
+}
+
/**
* pidfd_create() - Create a new pid file descriptor.
*
diff --git a/kernel/sys_ni.c b/kernel/sys_ni.c
index 519317f3904c..fad48ace1123 100644
--- a/kernel/sys_ni.c
+++ b/kernel/sys_ni.c
@@ -287,6 +287,8 @@ COND_SYSCALL(mlockall);
COND_SYSCALL(munlockall);
COND_SYSCALL(mincore);
COND_SYSCALL(madvise);
+COND_SYSCALL(process_madvise);
+COND_SYSCALL_COMPAT(process_madvise);
COND_SYSCALL(remap_file_pages);
COND_SYSCALL(mbind);
COND_SYSCALL_COMPAT(mbind);
diff --git a/mm/filemap.c b/mm/filemap.c
index f0ae9a6308cb..9a46679b4642 100644
--- a/mm/filemap.c
+++ b/mm/filemap.c
@@ -2370,6 +2370,7 @@ static struct file *do_sync_mmap_readahead(struct vm_fault *vmf)
struct address_space *mapping = file->f_mapping;
struct file *fpin = NULL;
pgoff_t offset = vmf->pgoff;
+ unsigned int mmap_miss;
/* If we don't want any read-ahead, don't bother */
if (vmf->vma->vm_flags & VM_RAND_READ)
@@ -2385,14 +2386,15 @@ static struct file *do_sync_mmap_readahead(struct vm_fault *vmf)
}
/* Avoid banging the cache line if not needed */
- if (ra->mmap_miss < MMAP_LOTSAMISS * 10)
- ra->mmap_miss++;
+ mmap_miss = READ_ONCE(ra->mmap_miss);
+ if (mmap_miss < MMAP_LOTSAMISS * 10)
+ WRITE_ONCE(ra->mmap_miss, ++mmap_miss);
/*
* Do we miss much more than hit in this file? If so,
* stop bothering with read-ahead. It will only hurt.
*/
- if (ra->mmap_miss > MMAP_LOTSAMISS)
+ if (mmap_miss > MMAP_LOTSAMISS)
return fpin;
/*
@@ -2418,13 +2420,15 @@ static struct file *do_async_mmap_readahead(struct vm_fault *vmf,
struct file_ra_state *ra = &file->f_ra;
struct address_space *mapping = file->f_mapping;
struct file *fpin = NULL;
+ unsigned int mmap_miss;
pgoff_t offset = vmf->pgoff;
/* If we don't want any read-ahead, don't bother */
if (vmf->vma->vm_flags & VM_RAND_READ || !ra->ra_pages)
return fpin;
- if (ra->mmap_miss > 0)
- ra->mmap_miss--;
+ mmap_miss = READ_ONCE(ra->mmap_miss);
+ if (mmap_miss)
+ WRITE_ONCE(ra->mmap_miss, --mmap_miss);
if (PageReadahead(page)) {
fpin = maybe_unlock_mmap_for_io(vmf, fpin);
page_cache_async_readahead(mapping, ra, file,
@@ -2590,6 +2594,7 @@ void filemap_map_pages(struct vm_fault *vmf,
unsigned long max_idx;
XA_STATE(xas, &mapping->i_pages, start_pgoff);
struct page *page;
+ unsigned int mmap_miss = READ_ONCE(file->f_ra.mmap_miss);
rcu_read_lock();
xas_for_each(&xas, page, end_pgoff) {
@@ -2626,8 +2631,8 @@ void filemap_map_pages(struct vm_fault *vmf,
if (page->index >= max_idx)
goto unlock;
- if (file->f_ra.mmap_miss > 0)
- file->f_ra.mmap_miss--;
+ if (mmap_miss > 0)
+ mmap_miss--;
vmf->address += (xas.xa_index - last_pgoff) << PAGE_SHIFT;
if (vmf->pte)
@@ -2647,6 +2652,7 @@ next:
break;
}
rcu_read_unlock();
+ WRITE_ONCE(file->f_ra.mmap_miss, mmap_miss);
}
EXPORT_SYMBOL(filemap_map_pages);
diff --git a/mm/frontswap.c b/mm/frontswap.c
index bfa3a339253e..5c8c66bbedde 100644
--- a/mm/frontswap.c
+++ b/mm/frontswap.c
@@ -61,16 +61,16 @@ static u64 frontswap_failed_stores;
static u64 frontswap_invalidates;
static inline void inc_frontswap_loads(void) {
- frontswap_loads++;
+ data_race(frontswap_loads++);
}
static inline void inc_frontswap_succ_stores(void) {
- frontswap_succ_stores++;
+ data_race(frontswap_succ_stores++);
}
static inline void inc_frontswap_failed_stores(void) {
- frontswap_failed_stores++;
+ data_race(frontswap_failed_stores++);
}
static inline void inc_frontswap_invalidates(void) {
- frontswap_invalidates++;
+ data_race(frontswap_invalidates++);
}
#else
static inline void inc_frontswap_loads(void) { }
diff --git a/mm/kmemleak.c b/mm/kmemleak.c
index e362dc3d2028..5e252d91eb14 100644
--- a/mm/kmemleak.c
+++ b/mm/kmemleak.c
@@ -1169,8 +1169,10 @@ static bool update_checksum(struct kmemleak_object *object)
u32 old_csum = object->checksum;
kasan_disable_current();
+ kcsan_disable_current();
object->checksum = crc32(0, (void *)object->pointer, object->size);
kasan_enable_current();
+ kcsan_enable_current();
return object->checksum != old_csum;
}
diff --git a/mm/list_lru.c b/mm/list_lru.c
index 9222910ab1cb..dbff67afe197 100644
--- a/mm/list_lru.c
+++ b/mm/list_lru.c
@@ -180,7 +180,7 @@ unsigned long list_lru_count_one(struct list_lru *lru,
rcu_read_lock();
l = list_lru_from_memcg_idx(nlru, memcg_cache_id(memcg));
- count = l->nr_items;
+ count = READ_ONCE(l->nr_items);
rcu_read_unlock();
return count;
diff --git a/mm/madvise.c b/mm/madvise.c
index dd1d43cf026d..2164df1c0823 100644
--- a/mm/madvise.c
+++ b/mm/madvise.c
@@ -17,17 +17,19 @@
#include <linux/falloc.h>
#include <linux/fadvise.h>
#include <linux/sched.h>
+#include <linux/sched/mm.h>
#include <linux/ksm.h>
#include <linux/fs.h>
#include <linux/file.h>
#include <linux/blkdev.h>
#include <linux/backing-dev.h>
+#include <linux/compat.h>
#include <linux/pagewalk.h>
#include <linux/swap.h>
#include <linux/swapops.h>
#include <linux/shmem_fs.h>
#include <linux/mmu_notifier.h>
-#include <linux/sched/mm.h>
+#include <linux/uio.h>
#include <asm/tlb.h>
@@ -36,6 +38,7 @@
struct madvise_walk_private {
struct mmu_gather *tlb;
bool pageout;
+ struct task_struct *target_task;
};
/*
@@ -255,6 +258,7 @@ static long madvise_willneed(struct vm_area_struct *vma,
struct vm_area_struct **prev,
unsigned long start, unsigned long end)
{
+ struct mm_struct *mm = vma->vm_mm;
struct file *file = vma->vm_file;
loff_t offset;
@@ -289,12 +293,12 @@ static long madvise_willneed(struct vm_area_struct *vma,
*/
*prev = NULL; /* tell sys_madvise we drop mmap_lock */
get_file(file);
- mmap_read_unlock(current->mm);
+ mmap_read_unlock(mm);
offset = (loff_t)(start - vma->vm_start)
+ ((loff_t)vma->vm_pgoff << PAGE_SHIFT);
vfs_fadvise(file, offset, end - start, POSIX_FADV_WILLNEED);
fput(file);
- mmap_read_lock(current->mm);
+ mmap_read_lock(mm);
return 0;
}
@@ -315,6 +319,10 @@ static int madvise_cold_or_pageout_pte_range(pmd_t *pmd,
if (fatal_signal_pending(current))
return -EINTR;
+ if (private->target_task &&
+ fatal_signal_pending(private->target_task))
+ return -EINTR;
+
#ifdef CONFIG_TRANSPARENT_HUGEPAGE
if (pmd_trans_huge(*pmd)) {
pmd_t orig_pmd;
@@ -476,12 +484,14 @@ static const struct mm_walk_ops cold_walk_ops = {
};
static void madvise_cold_page_range(struct mmu_gather *tlb,
+ struct task_struct *task,
struct vm_area_struct *vma,
unsigned long addr, unsigned long end)
{
struct madvise_walk_private walk_private = {
.pageout = false,
.tlb = tlb,
+ .target_task = task,
};
tlb_start_vma(tlb, vma);
@@ -489,7 +499,8 @@ static void madvise_cold_page_range(struct mmu_gather *tlb,
tlb_end_vma(tlb, vma);
}
-static long madvise_cold(struct vm_area_struct *vma,
+static long madvise_cold(struct task_struct *task,
+ struct vm_area_struct *vma,
struct vm_area_struct **prev,
unsigned long start_addr, unsigned long end_addr)
{
@@ -502,19 +513,21 @@ static long madvise_cold(struct vm_area_struct *vma,
lru_add_drain();
tlb_gather_mmu(&tlb, mm, start_addr, end_addr);
- madvise_cold_page_range(&tlb, vma, start_addr, end_addr);
+ madvise_cold_page_range(&tlb, task, vma, start_addr, end_addr);
tlb_finish_mmu(&tlb, start_addr, end_addr);
return 0;
}
static void madvise_pageout_page_range(struct mmu_gather *tlb,
+ struct task_struct *task,
struct vm_area_struct *vma,
unsigned long addr, unsigned long end)
{
struct madvise_walk_private walk_private = {
.pageout = true,
.tlb = tlb,
+ .target_task = task,
};
tlb_start_vma(tlb, vma);
@@ -538,7 +551,8 @@ static inline bool can_do_pageout(struct vm_area_struct *vma)
inode_permission(file_inode(vma->vm_file), MAY_WRITE) == 0;
}
-static long madvise_pageout(struct vm_area_struct *vma,
+static long madvise_pageout(struct task_struct *task,
+ struct vm_area_struct *vma,
struct vm_area_struct **prev,
unsigned long start_addr, unsigned long end_addr)
{
@@ -554,7 +568,7 @@ static long madvise_pageout(struct vm_area_struct *vma,
lru_add_drain();
tlb_gather_mmu(&tlb, mm, start_addr, end_addr);
- madvise_pageout_page_range(&tlb, vma, start_addr, end_addr);
+ madvise_pageout_page_range(&tlb, task, vma, start_addr, end_addr);
tlb_finish_mmu(&tlb, start_addr, end_addr);
return 0;
@@ -683,7 +697,6 @@ out:
if (nr_swap) {
if (current->mm == mm)
sync_mm_rss(mm);
-
add_mm_counter(mm, MM_SWAPENTS, nr_swap);
}
arch_leave_lazy_mmu_mode();
@@ -763,6 +776,8 @@ static long madvise_dontneed_free(struct vm_area_struct *vma,
unsigned long start, unsigned long end,
int behavior)
{
+ struct mm_struct *mm = vma->vm_mm;
+
*prev = vma;
if (!can_madv_lru_vma(vma))
return -EINVAL;
@@ -770,8 +785,8 @@ static long madvise_dontneed_free(struct vm_area_struct *vma,
if (!userfaultfd_remove(vma, start, end)) {
*prev = NULL; /* mmap_lock has been dropped, prev is stale */
- mmap_read_lock(current->mm);
- vma = find_vma(current->mm, start);
+ mmap_read_lock(mm);
+ vma = find_vma(mm, start);
if (!vma)
return -ENOMEM;
if (start < vma->vm_start) {
@@ -825,6 +840,7 @@ static long madvise_remove(struct vm_area_struct *vma,
loff_t offset;
int error;
struct file *f;
+ struct mm_struct *mm = vma->vm_mm;
*prev = NULL; /* tell sys_madvise we drop mmap_lock */
@@ -852,13 +868,13 @@ static long madvise_remove(struct vm_area_struct *vma,
get_file(f);
if (userfaultfd_remove(vma, start, end)) {
/* mmap_lock was not released by userfaultfd_remove() */
- mmap_read_unlock(current->mm);
+ mmap_read_unlock(mm);
}
error = vfs_fallocate(f,
FALLOC_FL_PUNCH_HOLE | FALLOC_FL_KEEP_SIZE,
offset, end - start);
fput(f);
- mmap_read_lock(current->mm);
+ mmap_read_lock(mm);
return error;
}
@@ -932,7 +948,8 @@ static int madvise_inject_error(int behavior,
#endif
static long
-madvise_vma(struct vm_area_struct *vma, struct vm_area_struct **prev,
+madvise_vma(struct task_struct *task, struct vm_area_struct *vma,
+ struct vm_area_struct **prev,
unsigned long start, unsigned long end, int behavior)
{
switch (behavior) {
@@ -941,9 +958,9 @@ madvise_vma(struct vm_area_struct *vma, struct vm_area_struct **prev,
case MADV_WILLNEED:
return madvise_willneed(vma, prev, start, end);
case MADV_COLD:
- return madvise_cold(vma, prev, start, end);
+ return madvise_cold(task, vma, prev, start, end);
case MADV_PAGEOUT:
- return madvise_pageout(vma, prev, start, end);
+ return madvise_pageout(task, vma, prev, start, end);
case MADV_FREE:
case MADV_DONTNEED:
return madvise_dontneed_free(vma, prev, start, end, behavior);
@@ -990,6 +1007,22 @@ madvise_behavior_valid(int behavior)
}
}
+static bool
+process_madvise_behavior_valid(int behavior)
+{
+ switch (behavior) {
+ case MADV_COLD:
+ case MADV_PAGEOUT:
+#ifdef CONFIG_KSM
+ case MADV_MERGEABLE:
+ case MADV_UNMERGEABLE:
+#endif
+ return true;
+ default:
+ return false;
+ }
+}
+
/*
* The madvise(2) system call.
*
@@ -1037,6 +1070,11 @@ madvise_behavior_valid(int behavior)
* MADV_DONTDUMP - the application wants to prevent pages in the given range
* from being included in its core dump.
* MADV_DODUMP - cancel MADV_DONTDUMP: no longer exclude from core dump.
+ * MADV_COLD - the application is not expected to use this memory soon,
+ * deactivate pages in this range so that they can be reclaimed
+ * easily if memory pressure hanppens.
+ * MADV_PAGEOUT - the application is not expected to use this memory soon,
+ * page out the pages in this range immediately.
*
* return values:
* zero - success
@@ -1051,7 +1089,8 @@ madvise_behavior_valid(int behavior)
* -EBADF - map exists, but area maps something that isn't a file.
* -EAGAIN - a kernel resource was temporarily unavailable.
*/
-int do_madvise(unsigned long start, size_t len_in, int behavior)
+int do_madvise(struct task_struct *target_task, struct mm_struct *mm,
+ unsigned long start, size_t len_in, int behavior)
{
unsigned long end, tmp;
struct vm_area_struct *vma, *prev;
@@ -1089,7 +1128,7 @@ int do_madvise(unsigned long start, size_t len_in, int behavior)
write = madvise_need_mmap_write(behavior);
if (write) {
- if (mmap_write_lock_killable(current->mm))
+ if (mmap_write_lock_killable(mm))
return -EINTR;
/*
@@ -1104,12 +1143,12 @@ int do_madvise(unsigned long start, size_t len_in, int behavior)
* but for now we have the mmget_still_valid()
* model.
*/
- if (!mmget_still_valid(current->mm)) {
- mmap_write_unlock(current->mm);
+ if (!mmget_still_valid(mm)) {
+ mmap_write_unlock(mm);
return -EINTR;
}
} else {
- mmap_read_lock(current->mm);
+ mmap_read_lock(mm);
}
/*
@@ -1117,7 +1156,7 @@ int do_madvise(unsigned long start, size_t len_in, int behavior)
* ranges, just ignore them, but return -ENOMEM at the end.
* - different from the way of handling in mlock etc.
*/
- vma = find_vma_prev(current->mm, start, &prev);
+ vma = find_vma_prev(mm, start, &prev);
if (vma && start > vma->vm_start)
prev = vma;
@@ -1142,7 +1181,8 @@ int do_madvise(unsigned long start, size_t len_in, int behavior)
tmp = end;
/* Here vma->vm_start <= start < tmp <= (end|vma->vm_end). */
- error = madvise_vma(vma, &prev, start, tmp, behavior);
+ error = madvise_vma(target_task, vma, &prev,
+ start, tmp, behavior);
if (error)
goto out;
start = tmp;
@@ -1154,19 +1194,122 @@ int do_madvise(unsigned long start, size_t len_in, int behavior)
if (prev)
vma = prev->vm_next;
else /* madvise_remove dropped mmap_lock */
- vma = find_vma(current->mm, start);
+ vma = find_vma(mm, start);
}
out:
blk_finish_plug(&plug);
if (write)
- mmap_write_unlock(current->mm);
+ mmap_write_unlock(mm);
else
- mmap_read_unlock(current->mm);
+ mmap_read_unlock(mm);
return error;
}
SYSCALL_DEFINE3(madvise, unsigned long, start, size_t, len_in, int, behavior)
{
- return do_madvise(start, len_in, behavior);
+ return do_madvise(current, current->mm, start, len_in, behavior);
}
+
+static int process_madvise_vec(struct task_struct *target_task,
+ struct mm_struct *mm, struct iov_iter *iter, int behavior)
+{
+ struct iovec iovec;
+ int ret = 0;
+
+ while (iov_iter_count(iter)) {
+ iovec = iov_iter_iovec(iter);
+ ret = do_madvise(target_task, mm, (unsigned long)iovec.iov_base,
+ iovec.iov_len, behavior);
+ if (ret < 0)
+ break;
+ iov_iter_advance(iter, iovec.iov_len);
+ }
+
+ return ret;
+}
+
+static ssize_t do_process_madvise(int pidfd, struct iov_iter *iter,
+ int behavior, unsigned int flags)
+{
+ ssize_t ret;
+ struct pid *pid;
+ struct task_struct *task;
+ struct mm_struct *mm;
+ size_t total_len = iov_iter_count(iter);
+
+ if (flags != 0)
+ return -EINVAL;
+
+ pid = pidfd_get_pid(pidfd);
+ if (IS_ERR(pid))
+ return PTR_ERR(pid);
+
+ task = get_pid_task(pid, PIDTYPE_PID);
+ if (!task) {
+ ret = -ESRCH;
+ goto put_pid;
+ }
+
+ if (task->mm != current->mm &&
+ !process_madvise_behavior_valid(behavior)) {
+ ret = -EINVAL;
+ goto release_task;
+ }
+
+ mm = mm_access(task, PTRACE_MODE_ATTACH_FSCREDS);
+ if (IS_ERR_OR_NULL(mm)) {
+ ret = IS_ERR(mm) ? PTR_ERR(mm) : -ESRCH;
+ goto release_task;
+ }
+
+ ret = process_madvise_vec(task, mm, iter, behavior);
+ if (ret >= 0)
+ ret = total_len - iov_iter_count(iter);
+
+ mmput(mm);
+release_task:
+ put_task_struct(task);
+put_pid:
+ put_pid(pid);
+ return ret;
+}
+
+SYSCALL_DEFINE5(process_madvise, int, pidfd, const struct iovec __user *, vec,
+ unsigned long, vlen, int, behavior, unsigned int, flags)
+{
+ ssize_t ret;
+ struct iovec iovstack[UIO_FASTIOV];
+ struct iovec *iov = iovstack;
+ struct iov_iter iter;
+
+ ret = import_iovec(READ, vec, vlen, ARRAY_SIZE(iovstack), &iov, &iter);
+ if (ret >= 0) {
+ ret = do_process_madvise(pidfd, &iter, behavior, flags);
+ kfree(iov);
+ }
+ return ret;
+}
+
+#ifdef CONFIG_COMPAT
+COMPAT_SYSCALL_DEFINE5(process_madvise, compat_int_t, pidfd,
+ const struct compat_iovec __user *, vec,
+ compat_ulong_t, vlen,
+ compat_int_t, behavior,
+ compat_int_t, flags)
+
+{
+ ssize_t ret;
+ struct iovec iovstack[UIO_FASTIOV];
+ struct iovec *iov = iovstack;
+ struct iov_iter iter;
+
+ ret = compat_import_iovec(READ, vec, vlen, ARRAY_SIZE(iovstack),
+ &iov, &iter);
+ if (ret >= 0) {
+ ret = do_process_madvise(pidfd, &iter, behavior, flags);
+ kfree(iov);
+ }
+ return ret;
+}
+#endif
diff --git a/mm/memory.c b/mm/memory.c
index dc7f3543b1fd..bc6a471932b5 100644
--- a/mm/memory.c
+++ b/mm/memory.c
@@ -3123,8 +3123,8 @@ vm_fault_t do_swap_page(struct vm_fault *vmf)
if (!page) {
struct swap_info_struct *si = swp_swap_info(entry);
- if (si->flags & SWP_SYNCHRONOUS_IO &&
- __swap_count(entry) == 1) {
+ if (data_race(si->flags & SWP_SYNCHRONOUS_IO) &&
+ __swap_count(entry) == 1) {
/* skip swapcache */
page = alloc_page_vma(GFP_HIGHUSER_MOVABLE, vma,
vmf->address);
diff --git a/mm/mempool.c b/mm/mempool.c
index 85efab3da720..79bff63ecf27 100644
--- a/mm/mempool.c
+++ b/mm/mempool.c
@@ -489,7 +489,7 @@ void mempool_free(void *element, mempool_t *pool)
* ensures that there will be frees which return elements to the
* pool waking up the waiters.
*/
- if (unlikely(pool->curr_nr < pool->min_nr)) {
+ if (unlikely(READ_ONCE(pool->curr_nr) < pool->min_nr)) {
spin_lock_irqsave(&pool->lock, flags);
if (likely(pool->curr_nr < pool->min_nr)) {
add_element(pool, element);
diff --git a/mm/page_counter.c b/mm/page_counter.c
index c56db2d5e159..e3a205275a0b 100644
--- a/mm/page_counter.c
+++ b/mm/page_counter.c
@@ -77,8 +77,8 @@ void page_counter_charge(struct page_counter *counter, unsigned long nr_pages)
* This is indeed racy, but we can live with some
* inaccuracy in the watermark.
*/
- if (new > c->watermark)
- c->watermark = new;
+ if (new > READ_ONCE(c->watermark))
+ WRITE_ONCE(c->watermark, new);
}
}
@@ -119,9 +119,10 @@ bool page_counter_try_charge(struct page_counter *counter,
propagate_protected_usage(counter, new);
/*
* This is racy, but we can live with some
- * inaccuracy in the failcnt.
+ * inaccuracy in the failcnt which is only used
+ * to report stats.
*/
- c->failcnt++;
+ data_race(c->failcnt++);
*fail = c;
goto failed;
}
@@ -130,8 +131,8 @@ bool page_counter_try_charge(struct page_counter *counter,
* Just like with failcnt, we can live with some
* inaccuracy in the watermark.
*/
- if (new > c->watermark)
- c->watermark = new;
+ if (new > READ_ONCE(c->watermark))
+ WRITE_ONCE(c->watermark, new);
}
return true;
diff --git a/mm/page_io.c b/mm/page_io.c
index e8726f3e3820..44f8fdd8283a 100644
--- a/mm/page_io.c
+++ b/mm/page_io.c
@@ -85,7 +85,7 @@ static void swap_slot_free_notify(struct page *page)
return;
sis = page_swap_info(page);
- if (!(sis->flags & SWP_BLKDEV))
+ if (data_race(!(sis->flags & SWP_BLKDEV)))
return;
/*
@@ -285,7 +285,7 @@ int __swap_writepage(struct page *page, struct writeback_control *wbc,
struct swap_info_struct *sis = page_swap_info(page);
VM_BUG_ON_PAGE(!PageSwapCache(page), page);
- if (sis->flags & SWP_FS) {
+ if (data_race(sis->flags & SWP_FS)) {
struct kiocb kiocb;
struct file *swap_file = sis->swap_file;
struct address_space *mapping = swap_file->f_mapping;
@@ -376,7 +376,7 @@ int swap_readpage(struct page *page, bool synchronous)
goto out;
}
- if (sis->flags & SWP_FS) {
+ if (data_race(sis->flags & SWP_FS)) {
struct file *swap_file = sis->swap_file;
struct address_space *mapping = swap_file->f_mapping;
@@ -438,7 +438,7 @@ int swap_set_page_dirty(struct page *page)
{
struct swap_info_struct *sis = page_swap_info(page);
- if (sis->flags & SWP_FS) {
+ if (data_race(sis->flags & SWP_FS)) {
struct address_space *mapping = sis->swap_file->f_mapping;
VM_BUG_ON_PAGE(!PageSwapCache(page), page);
diff --git a/mm/rmap.c b/mm/rmap.c
index 5fe2dedce1fc..c81dce3e244c 100644
--- a/mm/rmap.c
+++ b/mm/rmap.c
@@ -672,7 +672,7 @@ static bool should_defer_flush(struct mm_struct *mm, enum ttu_flags flags)
*/
void flush_tlb_batched_pending(struct mm_struct *mm)
{
- if (mm->tlb_flush_batched) {
+ if (data_race(mm->tlb_flush_batched)) {
flush_tlb_mm(mm);
/*
diff --git a/mm/swap.c b/mm/swap.c
index dbcab84c6fce..667133d25e56 100644
--- a/mm/swap.c
+++ b/mm/swap.c
@@ -632,7 +632,8 @@ void lru_add_drain_cpu(int cpu)
__pagevec_lru_add(pvec);
pvec = &per_cpu(lru_rotate.pvec, cpu);
- if (pagevec_count(pvec)) {
+ /* Disabling interrupts below acts as a compiler barrier. */
+ if (data_race(pagevec_count(pvec))) {
unsigned long flags;
/* No harm done if a racing interrupt already did this */
@@ -793,7 +794,7 @@ void lru_add_drain_all(void)
struct work_struct *work = &per_cpu(lru_add_drain_work, cpu);
if (pagevec_count(&per_cpu(lru_pvecs.lru_add, cpu)) ||
- pagevec_count(&per_cpu(lru_rotate.pvec, cpu)) ||
+ data_race(pagevec_count(&per_cpu(lru_rotate.pvec, cpu))) ||
pagevec_count(&per_cpu(lru_pvecs.lru_deactivate_file, cpu)) ||
pagevec_count(&per_cpu(lru_pvecs.lru_deactivate, cpu)) ||
pagevec_count(&per_cpu(lru_pvecs.lru_lazyfree, cpu)) ||
diff --git a/mm/swap_state.c b/mm/swap_state.c
index e98ff460e9e9..1050fded788b 100644
--- a/mm/swap_state.c
+++ b/mm/swap_state.c
@@ -57,8 +57,8 @@ static bool enable_vma_readahead __read_mostly = true;
#define GET_SWAP_RA_VAL(vma) \
(atomic_long_read(&(vma)->swap_readahead_info) ? : 4)
-#define INC_CACHE_INFO(x) do { swap_cache_info.x++; } while (0)
-#define ADD_CACHE_INFO(x, nr) do { swap_cache_info.x += (nr); } while (0)
+#define INC_CACHE_INFO(x) data_race(swap_cache_info.x++)
+#define ADD_CACHE_INFO(x, nr) data_race(swap_cache_info.x += (nr))
static struct {
unsigned long add_total;
diff --git a/mm/swapfile.c b/mm/swapfile.c
index 987276c557d1..c0477896576f 100644
--- a/mm/swapfile.c
+++ b/mm/swapfile.c
@@ -672,7 +672,7 @@ static void swap_range_alloc(struct swap_info_struct *si, unsigned long offset,
if (offset == si->lowest_bit)
si->lowest_bit += nr_entries;
if (end == si->highest_bit)
- si->highest_bit -= nr_entries;
+ WRITE_ONCE(si->highest_bit, si->highest_bit - nr_entries);
si->inuse_pages += nr_entries;
if (si->inuse_pages == si->pages) {
si->lowest_bit = si->max;
@@ -704,7 +704,7 @@ static void swap_range_free(struct swap_info_struct *si, unsigned long offset,
if (end > si->highest_bit) {
bool was_full = !si->highest_bit;
- si->highest_bit = end;
+ WRITE_ONCE(si->highest_bit, end);
if (was_full && (si->flags & SWP_WRITEOK))
add_to_avail_list(si);
}
@@ -868,7 +868,7 @@ checks:
else
goto done;
}
- si->swap_map[offset] = usage;
+ WRITE_ONCE(si->swap_map[offset], usage);
inc_cluster_info_page(si, si->cluster_info, offset);
unlock_cluster(ci);
@@ -927,12 +927,13 @@ done:
scan:
spin_unlock(&si->lock);
- while (++offset <= si->highest_bit) {
- if (!si->swap_map[offset]) {
+ while (++offset <= READ_ONCE(si->highest_bit)) {
+ if (data_race(!si->swap_map[offset])) {
spin_lock(&si->lock);
goto checks;
}
- if (vm_swap_full() && si->swap_map[offset] == SWAP_HAS_CACHE) {
+ if (vm_swap_full() &&
+ READ_ONCE(si->swap_map[offset]) == SWAP_HAS_CACHE) {
spin_lock(&si->lock);
goto checks;
}
@@ -944,11 +945,12 @@ scan:
}
offset = si->lowest_bit;
while (offset < scan_base) {
- if (!si->swap_map[offset]) {
+ if (data_race(!si->swap_map[offset])) {
spin_lock(&si->lock);
goto checks;
}
- if (vm_swap_full() && si->swap_map[offset] == SWAP_HAS_CACHE) {
+ if (vm_swap_full() &&
+ READ_ONCE(si->swap_map[offset]) == SWAP_HAS_CACHE) {
spin_lock(&si->lock);
goto checks;
}
@@ -1147,7 +1149,7 @@ static struct swap_info_struct *__swap_info_get(swp_entry_t entry)
p = swp_swap_info(entry);
if (!p)
goto bad_nofile;
- if (!(p->flags & SWP_USED))
+ if (data_race(!(p->flags & SWP_USED)))
goto bad_device;
offset = swp_offset(entry);
if (offset >= p->max)
@@ -1173,7 +1175,7 @@ static struct swap_info_struct *_swap_info_get(swp_entry_t entry)
p = __swap_info_get(entry);
if (!p)
goto out;
- if (!p->swap_map[swp_offset(entry)])
+ if (data_race(!p->swap_map[swp_offset(entry)]))
goto bad_free;
return p;
@@ -1242,7 +1244,10 @@ static unsigned char __swap_entry_free_locked(struct swap_info_struct *p,
}
usage = count | has_cache;
- p->swap_map[offset] = usage ? : SWAP_HAS_CACHE;
+ if (usage)
+ WRITE_ONCE(p->swap_map[offset], usage);
+ else
+ WRITE_ONCE(p->swap_map[offset], SWAP_HAS_CACHE);
return usage;
}
@@ -1294,7 +1299,7 @@ struct swap_info_struct *get_swap_device(swp_entry_t entry)
goto bad_nofile;
rcu_read_lock();
- if (!(si->flags & SWP_VALID))
+ if (data_race(!(si->flags & SWP_VALID)))
goto unlock_out;
offset = swp_offset(entry);
if (offset >= si->max)
@@ -3482,7 +3487,7 @@ static int __swap_duplicate(swp_entry_t entry, unsigned char usage)
} else
err = -ENOENT; /* unused swap entry */
- p->swap_map[offset] = count | has_cache;
+ WRITE_ONCE(p->swap_map[offset], count | has_cache);
unlock_out:
unlock_cluster_or_swap_info(p, ci);