summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorIngo Molnar <mingo@kernel.org>2025-05-13 10:39:22 +0200
committerIngo Molnar <mingo@kernel.org>2025-05-13 10:39:22 +0200
commit34be751998c1407a460efe3a20f9c4ddb8c82b9f (patch)
treee6b55b0ee269828b0cb63c67039c094ef0f407dc
parent69cb33e2f81a3265383f0c8bbd27c32b4a5a6bf3 (diff)
parent1b3f2bd04d90f61e1f291b5e365b9bc4ce0ea7c7 (diff)
Merge branch 'x86/mm' into x86/core, to resolve conflicts
Conflicts: arch/x86/mm/numa.c arch/x86/mm/pgtable.c Signed-off-by: Ingo Molnar <mingo@kernel.org>
-rw-r--r--arch/x86/Kconfig.assembler4
-rw-r--r--arch/x86/include/asm/mwait.h66
-rw-r--r--arch/x86/include/asm/page_32_types.h1
-rw-r--r--arch/x86/include/asm/pgtable-2level_types.h2
-rw-r--r--arch/x86/include/asm/pgtable-3level_types.h4
-rw-r--r--arch/x86/include/asm/pgtable.h10
-rw-r--r--arch/x86/include/asm/pgtable_64_types.h2
-rw-r--r--arch/x86/kernel/process.c9
-rw-r--r--arch/x86/mm/Makefile2
-rw-r--r--arch/x86/mm/fault.c3
-rw-r--r--arch/x86/mm/init_32.c7
-rw-r--r--arch/x86/mm/init_64.c7
-rw-r--r--arch/x86/mm/mm_internal.h4
-rw-r--r--arch/x86/mm/numa.c3
-rw-r--r--arch/x86/mm/numa_32.c61
-rw-r--r--arch/x86/mm/numa_64.c13
-rw-r--r--arch/x86/mm/numa_internal.h10
-rw-r--r--arch/x86/mm/pat/memtype.c31
-rw-r--r--arch/x86/mm/pat/set_memory.c4
-rw-r--r--arch/x86/mm/pgtable.c103
-rw-r--r--arch/x86/mm/pti.c4
-rw-r--r--arch/x86/mm/tlb.c63
-rw-r--r--drivers/char/mem.c18
-rw-r--r--include/linux/io.h21
-rw-r--r--tools/testing/selftests/x86/lam.c9
25 files changed, 130 insertions, 331 deletions
diff --git a/arch/x86/Kconfig.assembler b/arch/x86/Kconfig.assembler
index 6d20a6ce0507..fa8858546d5e 100644
--- a/arch/x86/Kconfig.assembler
+++ b/arch/x86/Kconfig.assembler
@@ -15,10 +15,6 @@ config AS_SHA256_NI
def_bool $(as-instr,sha256msg1 %xmm0$(comma)%xmm1)
help
Supported by binutils >= 2.24 and LLVM integrated assembler
-config AS_TPAUSE
- def_bool $(as-instr,tpause %ecx)
- help
- Supported by binutils >= 2.31.1 and LLVM integrated assembler >= V7
config AS_GFNI
def_bool $(as-instr,vgf2p8mulb %xmm0$(comma)%xmm1$(comma)%xmm2)
diff --git a/arch/x86/include/asm/mwait.h b/arch/x86/include/asm/mwait.h
index ce857ef54cf1..dd2b129b0418 100644
--- a/arch/x86/include/asm/mwait.h
+++ b/arch/x86/include/asm/mwait.h
@@ -25,29 +25,31 @@
#define TPAUSE_C01_STATE 1
#define TPAUSE_C02_STATE 0
-static __always_inline void __monitor(const void *eax, unsigned long ecx,
- unsigned long edx)
+static __always_inline void __monitor(const void *eax, u32 ecx, u32 edx)
{
- /* "monitor %eax, %ecx, %edx;" */
- asm volatile(".byte 0x0f, 0x01, 0xc8;"
- :: "a" (eax), "c" (ecx), "d"(edx));
+ /*
+ * Use the instruction mnemonic with implicit operands, as the LLVM
+ * assembler fails to assemble the mnemonic with explicit operands:
+ */
+ asm volatile("monitor" :: "a" (eax), "c" (ecx), "d" (edx));
}
-static __always_inline void __monitorx(const void *eax, unsigned long ecx,
- unsigned long edx)
+static __always_inline void __monitorx(const void *eax, u32 ecx, u32 edx)
{
- /* "monitorx %eax, %ecx, %edx;" */
- asm volatile(".byte 0x0f, 0x01, 0xfa;"
+ /* "monitorx %eax, %ecx, %edx" */
+ asm volatile(".byte 0x0f, 0x01, 0xfa"
:: "a" (eax), "c" (ecx), "d"(edx));
}
-static __always_inline void __mwait(unsigned long eax, unsigned long ecx)
+static __always_inline void __mwait(u32 eax, u32 ecx)
{
mds_idle_clear_cpu_buffers();
- /* "mwait %eax, %ecx;" */
- asm volatile(".byte 0x0f, 0x01, 0xc9;"
- :: "a" (eax), "c" (ecx));
+ /*
+ * Use the instruction mnemonic with implicit operands, as the LLVM
+ * assembler fails to assemble the mnemonic with explicit operands:
+ */
+ asm volatile("mwait" :: "a" (eax), "c" (ecx));
}
/*
@@ -76,13 +78,12 @@ static __always_inline void __mwait(unsigned long eax, unsigned long ecx)
* EAX (logical) address to monitor
* ECX #GP if not zero
*/
-static __always_inline void __mwaitx(unsigned long eax, unsigned long ebx,
- unsigned long ecx)
+static __always_inline void __mwaitx(u32 eax, u32 ebx, u32 ecx)
{
/* No MDS buffer clear as this is AMD/HYGON only */
- /* "mwaitx %eax, %ebx, %ecx;" */
- asm volatile(".byte 0x0f, 0x01, 0xfb;"
+ /* "mwaitx %eax, %ebx, %ecx" */
+ asm volatile(".byte 0x0f, 0x01, 0xfb"
:: "a" (eax), "b" (ebx), "c" (ecx));
}
@@ -95,12 +96,11 @@ static __always_inline void __mwaitx(unsigned long eax, unsigned long ebx,
* executing mwait, it would otherwise go unnoticed and the next tick
* would not be reprogrammed accordingly before mwait ever wakes up.
*/
-static __always_inline void __sti_mwait(unsigned long eax, unsigned long ecx)
+static __always_inline void __sti_mwait(u32 eax, u32 ecx)
{
mds_idle_clear_cpu_buffers();
- /* "mwait %eax, %ecx;" */
- asm volatile("sti; .byte 0x0f, 0x01, 0xc9;"
- :: "a" (eax), "c" (ecx));
+
+ asm volatile("sti; mwait" :: "a" (eax), "c" (ecx));
}
/*
@@ -113,16 +113,13 @@ static __always_inline void __sti_mwait(unsigned long eax, unsigned long ecx)
* New with Core Duo processors, MWAIT can take some hints based on CPU
* capability.
*/
-static __always_inline void mwait_idle_with_hints(unsigned long eax, unsigned long ecx)
+static __always_inline void mwait_idle_with_hints(u32 eax, u32 ecx)
{
if (static_cpu_has_bug(X86_BUG_MONITOR) || !current_set_polling_and_test()) {
- if (static_cpu_has_bug(X86_BUG_CLFLUSH_MONITOR)) {
- mb();
- clflush((void *)&current_thread_info()->flags);
- mb();
- }
+ const void *addr = &current_thread_info()->flags;
- __monitor((void *)&current_thread_info()->flags, 0, 0);
+ alternative_input("", "clflush (%[addr])", X86_BUG_CLFLUSH_MONITOR, [addr] "a" (addr));
+ __monitor(addr, 0, 0);
if (!need_resched()) {
if (ecx & 1) {
@@ -144,16 +141,9 @@ static __always_inline void mwait_idle_with_hints(unsigned long eax, unsigned lo
*/
static inline void __tpause(u32 ecx, u32 edx, u32 eax)
{
- /* "tpause %ecx, %edx, %eax;" */
- #ifdef CONFIG_AS_TPAUSE
- asm volatile("tpause %%ecx\n"
- :
- : "c"(ecx), "d"(edx), "a"(eax));
- #else
- asm volatile(".byte 0x66, 0x0f, 0xae, 0xf1\t\n"
- :
- : "c"(ecx), "d"(edx), "a"(eax));
- #endif
+ /* "tpause %ecx" */
+ asm volatile(".byte 0x66, 0x0f, 0xae, 0xf1"
+ :: "c" (ecx), "d" (edx), "a" (eax));
}
#endif /* _ASM_X86_MWAIT_H */
diff --git a/arch/x86/include/asm/page_32_types.h b/arch/x86/include/asm/page_32_types.h
index a9b62e0e6f79..623f1e9f493e 100644
--- a/arch/x86/include/asm/page_32_types.h
+++ b/arch/x86/include/asm/page_32_types.h
@@ -73,7 +73,6 @@ extern unsigned int __VMALLOC_RESERVE;
extern int sysctl_legacy_va_layout;
extern void find_low_pfn_range(void);
-extern void setup_bootmem_allocator(void);
#endif /* !__ASSEMBLER__ */
diff --git a/arch/x86/include/asm/pgtable-2level_types.h b/arch/x86/include/asm/pgtable-2level_types.h
index 66425424ce91..54690bd4ddbe 100644
--- a/arch/x86/include/asm/pgtable-2level_types.h
+++ b/arch/x86/include/asm/pgtable-2level_types.h
@@ -18,8 +18,6 @@ typedef union {
} pte_t;
#endif /* !__ASSEMBLER__ */
-#define SHARED_KERNEL_PMD 0
-
#define ARCH_PAGE_TABLE_SYNC_MASK PGTBL_PMD_MODIFIED
/*
diff --git a/arch/x86/include/asm/pgtable-3level_types.h b/arch/x86/include/asm/pgtable-3level_types.h
index 9d5b257d44e3..580b09bf6a45 100644
--- a/arch/x86/include/asm/pgtable-3level_types.h
+++ b/arch/x86/include/asm/pgtable-3level_types.h
@@ -27,9 +27,7 @@ typedef union {
} pmd_t;
#endif /* !__ASSEMBLER__ */
-#define SHARED_KERNEL_PMD (!static_cpu_has(X86_FEATURE_PTI))
-
-#define ARCH_PAGE_TABLE_SYNC_MASK (SHARED_KERNEL_PMD ? 0 : PGTBL_PMD_MODIFIED)
+#define ARCH_PAGE_TABLE_SYNC_MASK PGTBL_PMD_MODIFIED
/*
* PGDIR_SHIFT determines what a top-level page table entry can map
diff --git a/arch/x86/include/asm/pgtable.h b/arch/x86/include/asm/pgtable.h
index 7bd6bd6df4a1..5ddba366d3b4 100644
--- a/arch/x86/include/asm/pgtable.h
+++ b/arch/x86/include/asm/pgtable.h
@@ -292,13 +292,6 @@ static inline unsigned long pgd_pfn(pgd_t pgd)
return (pgd_val(pgd) & PTE_PFN_MASK) >> PAGE_SHIFT;
}
-#define p4d_leaf p4d_leaf
-static inline bool p4d_leaf(p4d_t p4d)
-{
- /* No 512 GiB pages yet */
- return 0;
-}
-
#define pte_page(pte) pfn_to_page(pte_pfn(pte))
#define pmd_leaf pmd_leaf
@@ -1472,9 +1465,6 @@ static inline bool pgdp_maps_userspace(void *__ptr)
return (((ptr & ~PAGE_MASK) / sizeof(pgd_t)) < PGD_KERNEL_START);
}
-#define pgd_leaf pgd_leaf
-static inline bool pgd_leaf(pgd_t pgd) { return false; }
-
#ifdef CONFIG_MITIGATION_PAGE_TABLE_ISOLATION
/*
* All top-level MITIGATION_PAGE_TABLE_ISOLATION page tables are order-1 pages
diff --git a/arch/x86/include/asm/pgtable_64_types.h b/arch/x86/include/asm/pgtable_64_types.h
index 5bb782d856f2..e83721db18c9 100644
--- a/arch/x86/include/asm/pgtable_64_types.h
+++ b/arch/x86/include/asm/pgtable_64_types.h
@@ -46,8 +46,6 @@ extern unsigned int ptrs_per_p4d;
#endif /* !__ASSEMBLER__ */
-#define SHARED_KERNEL_PMD 0
-
#ifdef CONFIG_X86_5LEVEL
/*
diff --git a/arch/x86/kernel/process.c b/arch/x86/kernel/process.c
index 9e6180777565..1871e833ecc7 100644
--- a/arch/x86/kernel/process.c
+++ b/arch/x86/kernel/process.c
@@ -901,13 +901,10 @@ static __init bool prefer_mwait_c1_over_halt(void)
static __cpuidle void mwait_idle(void)
{
if (!current_set_polling_and_test()) {
- if (this_cpu_has(X86_BUG_CLFLUSH_MONITOR)) {
- mb(); /* quirk */
- clflush((void *)&current_thread_info()->flags);
- mb(); /* quirk */
- }
+ const void *addr = &current_thread_info()->flags;
- __monitor((void *)&current_thread_info()->flags, 0, 0);
+ alternative_input("", "clflush (%[addr])", X86_BUG_CLFLUSH_MONITOR, [addr] "a" (addr));
+ __monitor(addr, 0, 0);
if (!need_resched()) {
__sti_mwait(0, 0);
raw_local_irq_disable();
diff --git a/arch/x86/mm/Makefile b/arch/x86/mm/Makefile
index 3faa60f13a61..cebe5812d78d 100644
--- a/arch/x86/mm/Makefile
+++ b/arch/x86/mm/Makefile
@@ -47,7 +47,7 @@ obj-$(CONFIG_MMIOTRACE) += mmiotrace.o
mmiotrace-y := kmmio.o pf_in.o mmio-mod.o
obj-$(CONFIG_MMIOTRACE_TEST) += testmmiotrace.o
-obj-$(CONFIG_NUMA) += numa.o numa_$(BITS).o
+obj-$(CONFIG_NUMA) += numa.o
obj-$(CONFIG_AMD_NUMA) += amdtopology.o
obj-$(CONFIG_ACPI_NUMA) += srat.o
diff --git a/arch/x86/mm/fault.c b/arch/x86/mm/fault.c
index 296d294142c8..697432f63c59 100644
--- a/arch/x86/mm/fault.c
+++ b/arch/x86/mm/fault.c
@@ -13,7 +13,6 @@
#include <linux/mmiotrace.h> /* kmmio_handler, ... */
#include <linux/perf_event.h> /* perf_sw_event */
#include <linux/hugetlb.h> /* hstate_index_to_shift */
-#include <linux/prefetch.h> /* prefetchw */
#include <linux/context_tracking.h> /* exception_enter(), ... */
#include <linux/uaccess.h> /* faulthandler_disabled() */
#include <linux/efi.h> /* efi_crash_gracefully_on_page_fault()*/
@@ -1496,8 +1495,6 @@ DEFINE_IDTENTRY_RAW_ERRORCODE(exc_page_fault)
address = cpu_feature_enabled(X86_FEATURE_FRED) ? fred_event_data(regs) : read_cr2();
- prefetchw(&current->mm->mmap_lock);
-
/*
* KVM uses #PF vector to deliver 'page not present' events to guests
* (asynchronous page fault mechanism). The event happens when a
diff --git a/arch/x86/mm/init_32.c b/arch/x86/mm/init_32.c
index ad662cc4605c..d467f89191cd 100644
--- a/arch/x86/mm/init_32.c
+++ b/arch/x86/mm/init_32.c
@@ -612,7 +612,6 @@ void __init find_low_pfn_range(void)
highmem_pfn_init();
}
-#ifndef CONFIG_NUMA
void __init initmem_init(void)
{
#ifdef CONFIG_HIGHMEM
@@ -633,12 +632,6 @@ void __init initmem_init(void)
printk(KERN_NOTICE "%ldMB LOWMEM available.\n",
pages_to_mb(max_low_pfn));
- setup_bootmem_allocator();
-}
-#endif /* !CONFIG_NUMA */
-
-void __init setup_bootmem_allocator(void)
-{
printk(KERN_INFO " mapped low ram: 0 - %08lx\n",
max_pfn_mapped<<PAGE_SHIFT);
printk(KERN_INFO " low ram: 0 - %08lx\n", max_low_pfn<<PAGE_SHIFT);
diff --git a/arch/x86/mm/init_64.c b/arch/x86/mm/init_64.c
index 7c4f6f591f2b..b7655396f4e0 100644
--- a/arch/x86/mm/init_64.c
+++ b/arch/x86/mm/init_64.c
@@ -805,12 +805,17 @@ kernel_physical_mapping_change(unsigned long paddr_start,
}
#ifndef CONFIG_NUMA
-void __init initmem_init(void)
+static inline void x86_numa_init(void)
{
memblock_set_node(0, PHYS_ADDR_MAX, &memblock.memory, 0);
}
#endif
+void __init initmem_init(void)
+{
+ x86_numa_init();
+}
+
void __init paging_init(void)
{
sparse_init();
diff --git a/arch/x86/mm/mm_internal.h b/arch/x86/mm/mm_internal.h
index 3f37b5c80bb3..097aadc250f7 100644
--- a/arch/x86/mm/mm_internal.h
+++ b/arch/x86/mm/mm_internal.h
@@ -25,4 +25,8 @@ void update_cache_mode_entry(unsigned entry, enum page_cache_mode cache);
extern unsigned long tlb_single_page_flush_ceiling;
+#ifdef CONFIG_NUMA
+void __init x86_numa_init(void);
+#endif
+
#endif /* __X86_MM_INTERNAL_H */
diff --git a/arch/x86/mm/numa.c b/arch/x86/mm/numa.c
index fed02d1073c7..c24890c40138 100644
--- a/arch/x86/mm/numa.c
+++ b/arch/x86/mm/numa.c
@@ -18,9 +18,10 @@
#include <asm/e820/api.h>
#include <asm/proto.h>
#include <asm/dma.h>
+#include <asm/numa.h>
#include <asm/amd/nb.h>
-#include "numa_internal.h"
+#include "mm_internal.h"
int numa_off;
diff --git a/arch/x86/mm/numa_32.c b/arch/x86/mm/numa_32.c
deleted file mode 100644
index 65fda406e6f2..000000000000
--- a/arch/x86/mm/numa_32.c
+++ /dev/null
@@ -1,61 +0,0 @@
-/*
- * Written by: Patricia Gaughen <gone@us.ibm.com>, IBM Corporation
- * August 2002: added remote node KVA remap - Martin J. Bligh
- *
- * Copyright (C) 2002, IBM Corp.
- *
- * All rights reserved.
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
- *
- * This program is distributed in the hope that it will be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or
- * NON INFRINGEMENT. See the GNU General Public License for more
- * details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write to the Free Software
- * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
- */
-
-#include <linux/memblock.h>
-#include <linux/init.h>
-#include <linux/vmalloc.h>
-#include <asm/pgtable_areas.h>
-
-#include "numa_internal.h"
-
-extern unsigned long highend_pfn, highstart_pfn;
-
-void __init initmem_init(void)
-{
- x86_numa_init();
-
-#ifdef CONFIG_HIGHMEM
- highstart_pfn = highend_pfn = max_pfn;
- if (max_pfn > max_low_pfn)
- highstart_pfn = max_low_pfn;
- printk(KERN_NOTICE "%ldMB HIGHMEM available.\n",
- pages_to_mb(highend_pfn - highstart_pfn));
- high_memory = (void *) __va(highstart_pfn * PAGE_SIZE - 1) + 1;
-#else
- high_memory = (void *) __va(max_low_pfn * PAGE_SIZE - 1) + 1;
-#endif
- printk(KERN_NOTICE "%ldMB LOWMEM available.\n",
- pages_to_mb(max_low_pfn));
- printk(KERN_DEBUG "max_low_pfn = %lx, highstart_pfn = %lx\n",
- max_low_pfn, highstart_pfn);
-
- printk(KERN_DEBUG "Low memory ends at vaddr %08lx\n",
- (ulong) pfn_to_kaddr(max_low_pfn));
-
- printk(KERN_DEBUG "High memory starts at vaddr %08lx\n",
- (ulong) pfn_to_kaddr(highstart_pfn));
-
- __vmalloc_start_set = true;
- setup_bootmem_allocator();
-}
diff --git a/arch/x86/mm/numa_64.c b/arch/x86/mm/numa_64.c
deleted file mode 100644
index 59d80160fa5a..000000000000
--- a/arch/x86/mm/numa_64.c
+++ /dev/null
@@ -1,13 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0
-/*
- * Generic VM initialization for x86-64 NUMA setups.
- * Copyright 2002,2003 Andi Kleen, SuSE Labs.
- */
-#include <linux/memblock.h>
-
-#include "numa_internal.h"
-
-void __init initmem_init(void)
-{
- x86_numa_init();
-}
diff --git a/arch/x86/mm/numa_internal.h b/arch/x86/mm/numa_internal.h
deleted file mode 100644
index 11e1ff370c10..000000000000
--- a/arch/x86/mm/numa_internal.h
+++ /dev/null
@@ -1,10 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-#ifndef __X86_MM_NUMA_INTERNAL_H
-#define __X86_MM_NUMA_INTERNAL_H
-
-#include <linux/types.h>
-#include <asm/numa.h>
-
-void __init x86_numa_init(void);
-
-#endif /* __X86_MM_NUMA_INTERNAL_H */
diff --git a/arch/x86/mm/pat/memtype.c b/arch/x86/mm/pat/memtype.c
index 72d8cbc61158..c97b6598f187 100644
--- a/arch/x86/mm/pat/memtype.c
+++ b/arch/x86/mm/pat/memtype.c
@@ -38,6 +38,7 @@
#include <linux/kernel.h>
#include <linux/pfn_t.h>
#include <linux/slab.h>
+#include <linux/io.h>
#include <linux/mm.h>
#include <linux/highmem.h>
#include <linux/fs.h>
@@ -773,38 +774,14 @@ pgprot_t phys_mem_access_prot(struct file *file, unsigned long pfn,
return vma_prot;
}
-#ifdef CONFIG_STRICT_DEVMEM
-/* This check is done in drivers/char/mem.c in case of STRICT_DEVMEM */
-static inline int range_is_allowed(unsigned long pfn, unsigned long size)
-{
- return 1;
-}
-#else
-/* This check is needed to avoid cache aliasing when PAT is enabled */
-static inline int range_is_allowed(unsigned long pfn, unsigned long size)
-{
- u64 from = ((u64)pfn) << PAGE_SHIFT;
- u64 to = from + size;
- u64 cursor = from;
-
- if (!pat_enabled())
- return 1;
-
- while (cursor < to) {
- if (!devmem_is_allowed(pfn))
- return 0;
- cursor += PAGE_SIZE;
- pfn++;
- }
- return 1;
-}
-#endif /* CONFIG_STRICT_DEVMEM */
-
int phys_mem_access_prot_allowed(struct file *file, unsigned long pfn,
unsigned long size, pgprot_t *vma_prot)
{
enum page_cache_mode pcm = _PAGE_CACHE_MODE_WB;
+ if (!pat_enabled())
+ return 1;
+
if (!range_is_allowed(pfn, size))
return 0;
diff --git a/arch/x86/mm/pat/set_memory.c b/arch/x86/mm/pat/set_memory.c
index def3d9284254..30ab4aced761 100644
--- a/arch/x86/mm/pat/set_memory.c
+++ b/arch/x86/mm/pat/set_memory.c
@@ -889,7 +889,7 @@ static void __set_pmd_pte(pte_t *kpte, unsigned long address, pte_t pte)
/* change init_mm */
set_pte_atomic(kpte, pte);
#ifdef CONFIG_X86_32
- if (!SHARED_KERNEL_PMD) {
+ {
struct page *page;
list_for_each_entry(page, &pgd_list, lru) {
@@ -1293,7 +1293,7 @@ static int collapse_pmd_page(pmd_t *pmd, unsigned long addr,
/* Queue the page table to be freed after TLB flush */
list_add(&page_ptdesc(pmd_page(old_pmd))->pt_list, pgtables);
- if (IS_ENABLED(CONFIG_X86_32) && !SHARED_KERNEL_PMD) {
+ if (IS_ENABLED(CONFIG_X86_32)) {
struct page *page;
/* Update all PGD tables to use the same large page */
diff --git a/arch/x86/mm/pgtable.c b/arch/x86/mm/pgtable.c
index e62af72923e8..7c253deb46e9 100644
--- a/arch/x86/mm/pgtable.c
+++ b/arch/x86/mm/pgtable.c
@@ -69,12 +69,6 @@ static inline void pgd_list_del(pgd_t *pgd)
list_del(&ptdesc->pt_list);
}
-#define UNSHARED_PTRS_PER_PGD \
- (SHARED_KERNEL_PMD ? KERNEL_PGD_BOUNDARY : PTRS_PER_PGD)
-#define MAX_UNSHARED_PTRS_PER_PGD \
- MAX_T(size_t, KERNEL_PGD_BOUNDARY, PTRS_PER_PGD)
-
-
static void pgd_set_mm(pgd_t *pgd, struct mm_struct *mm)
{
virt_to_ptdesc(pgd)->pt_mm = mm;
@@ -87,29 +81,19 @@ struct mm_struct *pgd_page_get_mm(struct page *page)
static void pgd_ctor(struct mm_struct *mm, pgd_t *pgd)
{
- /* If the pgd points to a shared pagetable level (either the
- ptes in non-PAE, or shared PMD in PAE), then just copy the
- references from swapper_pg_dir. */
- if (CONFIG_PGTABLE_LEVELS == 2 ||
- (CONFIG_PGTABLE_LEVELS == 3 && SHARED_KERNEL_PMD) ||
- CONFIG_PGTABLE_LEVELS >= 4) {
+ /* PAE preallocates all its PMDs. No cloning needed. */
+ if (!IS_ENABLED(CONFIG_X86_PAE))
clone_pgd_range(pgd + KERNEL_PGD_BOUNDARY,
swapper_pg_dir + KERNEL_PGD_BOUNDARY,
KERNEL_PGD_PTRS);
- }
- /* list required to sync kernel mapping updates */
- if (!SHARED_KERNEL_PMD) {
- pgd_set_mm(pgd, mm);
- pgd_list_add(pgd);
- }
+ /* List used to sync kernel mapping updates */
+ pgd_set_mm(pgd, mm);
+ pgd_list_add(pgd);
}
static void pgd_dtor(pgd_t *pgd)
{
- if (SHARED_KERNEL_PMD)
- return;
-
spin_lock(&pgd_lock);
pgd_list_del(pgd);
spin_unlock(&pgd_lock);
@@ -133,15 +117,15 @@ static void pgd_dtor(pgd_t *pgd)
* processor notices the update. Since this is expensive, and
* all 4 top-level entries are used almost immediately in a
* new process's life, we just pre-populate them here.
- *
- * Also, if we're in a paravirt environment where the kernel pmd is
- * not shared between pagetables (!SHARED_KERNEL_PMDS), we allocate
- * and initialize the kernel pmds here.
*/
-#define PREALLOCATED_PMDS UNSHARED_PTRS_PER_PGD
-#define MAX_PREALLOCATED_PMDS MAX_UNSHARED_PTRS_PER_PGD
+#define PREALLOCATED_PMDS PTRS_PER_PGD
/*
+ * "USER_PMDS" are the PMDs for the user copy of the page tables when
+ * PTI is enabled. They do not exist when PTI is disabled. Note that
+ * this is distinct from the user _portion_ of the kernel page tables
+ * which always exists.
+ *
* We allocate separate PMDs for the kernel part of the user page-table
* when PTI is enabled. We need them to map the per-process LDT into the
* user-space page-table.
@@ -170,7 +154,6 @@ void pud_populate(struct mm_struct *mm, pud_t *pudp, pmd_t *pmd)
/* No need to prepopulate any pagetable entries in non-PAE modes. */
#define PREALLOCATED_PMDS 0
-#define MAX_PREALLOCATED_PMDS 0
#define PREALLOCATED_USER_PMDS 0
#define MAX_PREALLOCATED_USER_PMDS 0
#endif /* CONFIG_X86_PAE */
@@ -319,68 +302,15 @@ static void pgd_prepopulate_user_pmd(struct mm_struct *mm,
{
}
#endif
-/*
- * Xen paravirt assumes pgd table should be in one page. 64 bit kernel also
- * assumes that pgd should be in one page.
- *
- * But kernel with PAE paging that is not running as a Xen domain
- * only needs to allocate 32 bytes for pgd instead of one page.
- */
-#ifdef CONFIG_X86_PAE
-
-#include <linux/slab.h>
-
-#define PGD_SIZE (PTRS_PER_PGD * sizeof(pgd_t))
-#define PGD_ALIGN 32
-
-static struct kmem_cache *pgd_cache;
-
-void __init pgtable_cache_init(void)
-{
- /*
- * When PAE kernel is running as a Xen domain, it does not use
- * shared kernel pmd. And this requires a whole page for pgd.
- */
- if (!SHARED_KERNEL_PMD)
- return;
-
- /*
- * when PAE kernel is not running as a Xen domain, it uses
- * shared kernel pmd. Shared kernel pmd does not require a whole
- * page for pgd. We are able to just allocate a 32-byte for pgd.
- * During boot time, we create a 32-byte slab for pgd table allocation.
- */
- pgd_cache = kmem_cache_create("pgd_cache", PGD_SIZE, PGD_ALIGN,
- SLAB_PANIC, NULL);
-}
static inline pgd_t *_pgd_alloc(struct mm_struct *mm)
{
/*
- * If no SHARED_KERNEL_PMD, PAE kernel is running as a Xen domain.
- * We allocate one page for pgd.
- */
- if (!SHARED_KERNEL_PMD)
- return __pgd_alloc(mm, pgd_allocation_order());
-
- /*
- * Now PAE kernel is not running as a Xen domain. We can allocate
- * a 32-byte slab for pgd to save memory space.
+ * PTI and Xen need a whole page for the PAE PGD
+ * even though the hardware only needs 32 bytes.
+ *
+ * For simplicity, allocate a page for all users.
*/
- return kmem_cache_alloc(pgd_cache, GFP_PGTABLE_USER);
-}
-
-static inline void _pgd_free(struct mm_struct *mm, pgd_t *pgd)
-{
- if (!SHARED_KERNEL_PMD)
- __pgd_free(mm, pgd);
- else
- kmem_cache_free(pgd_cache, pgd);
-}
-#else
-
-static inline pgd_t *_pgd_alloc(struct mm_struct *mm)
-{
return __pgd_alloc(mm, pgd_allocation_order());
}
@@ -388,13 +318,12 @@ static inline void _pgd_free(struct mm_struct *mm, pgd_t *pgd)
{
__pgd_free(mm, pgd);
}
-#endif /* CONFIG_X86_PAE */
pgd_t *pgd_alloc(struct mm_struct *mm)
{
pgd_t *pgd;
pmd_t *u_pmds[MAX_PREALLOCATED_USER_PMDS];
- pmd_t *pmds[MAX_PREALLOCATED_PMDS];
+ pmd_t *pmds[PREALLOCATED_PMDS];
pgd = _pgd_alloc(mm);
diff --git a/arch/x86/mm/pti.c b/arch/x86/mm/pti.c
index 5f0d579932c6..190299834011 100644
--- a/arch/x86/mm/pti.c
+++ b/arch/x86/mm/pti.c
@@ -185,7 +185,7 @@ static p4d_t *pti_user_pagetable_walk_p4d(unsigned long address)
set_pgd(pgd, __pgd(_KERNPG_TABLE | __pa(new_p4d_page)));
}
- BUILD_BUG_ON(pgd_leaf(*pgd) != 0);
+ BUILD_BUG_ON(pgd_leaf(*pgd));
return p4d_offset(pgd, address);
}
@@ -206,7 +206,7 @@ static pmd_t *pti_user_pagetable_walk_pmd(unsigned long address)
if (!p4d)
return NULL;
- BUILD_BUG_ON(p4d_leaf(*p4d) != 0);
+ BUILD_BUG_ON(p4d_leaf(*p4d));
if (p4d_none(*p4d)) {
unsigned long new_pud_page = __get_free_page(gfp);
if (WARN_ON_ONCE(!new_pud_page))
diff --git a/arch/x86/mm/tlb.c b/arch/x86/mm/tlb.c
index 2ea8cec6cd49..5f75d63093c8 100644
--- a/arch/x86/mm/tlb.c
+++ b/arch/x86/mm/tlb.c
@@ -215,16 +215,20 @@ static void clear_asid_other(void)
atomic64_t last_mm_ctx_id = ATOMIC64_INIT(1);
+struct new_asid {
+ unsigned int asid : 16;
+ unsigned int need_flush : 1;
+};
-static void choose_new_asid(struct mm_struct *next, u64 next_tlb_gen,
- u16 *new_asid, bool *need_flush)
+static struct new_asid choose_new_asid(struct mm_struct *next, u64 next_tlb_gen)
{
+ struct new_asid ns;
u16 asid;
if (!static_cpu_has(X86_FEATURE_PCID)) {
- *new_asid = 0;
- *need_flush = true;
- return;
+ ns.asid = 0;
+ ns.need_flush = 1;
+ return ns;
}
/*
@@ -235,9 +239,9 @@ static void choose_new_asid(struct mm_struct *next, u64 next_tlb_gen,
u16 global_asid = mm_global_asid(next);
if (global_asid) {
- *new_asid = global_asid;
- *need_flush = false;
- return;
+ ns.asid = global_asid;
+ ns.need_flush = 0;
+ return ns;
}
}
@@ -249,22 +253,23 @@ static void choose_new_asid(struct mm_struct *next, u64 next_tlb_gen,
next->context.ctx_id)
continue;
- *new_asid = asid;
- *need_flush = (this_cpu_read(cpu_tlbstate.ctxs[asid].tlb_gen) <
- next_tlb_gen);
- return;
+ ns.asid = asid;
+ ns.need_flush = (this_cpu_read(cpu_tlbstate.ctxs[asid].tlb_gen) < next_tlb_gen);
+ return ns;
}
/*
* We don't currently own an ASID slot on this CPU.
* Allocate a slot.
*/
- *new_asid = this_cpu_add_return(cpu_tlbstate.next_asid, 1) - 1;
- if (*new_asid >= TLB_NR_DYN_ASIDS) {
- *new_asid = 0;
+ ns.asid = this_cpu_add_return(cpu_tlbstate.next_asid, 1) - 1;
+ if (ns.asid >= TLB_NR_DYN_ASIDS) {
+ ns.asid = 0;
this_cpu_write(cpu_tlbstate.next_asid, 1);
}
- *need_flush = true;
+ ns.need_flush = true;
+
+ return ns;
}
/*
@@ -781,9 +786,9 @@ void switch_mm_irqs_off(struct mm_struct *unused, struct mm_struct *next,
bool was_lazy = this_cpu_read(cpu_tlbstate_shared.is_lazy);
unsigned cpu = smp_processor_id();
unsigned long new_lam;
+ struct new_asid ns;
u64 next_tlb_gen;
- bool need_flush;
- u16 new_asid;
+
/* We don't want flush_tlb_func() to run concurrently with us. */
if (IS_ENABLED(CONFIG_PROVE_LOCKING))
@@ -855,7 +860,7 @@ void switch_mm_irqs_off(struct mm_struct *unused, struct mm_struct *next,
/* Check if the current mm is transitioning to a global ASID */
if (mm_needs_global_asid(next, prev_asid)) {
next_tlb_gen = atomic64_read(&next->context.tlb_gen);
- choose_new_asid(next, next_tlb_gen, &new_asid, &need_flush);
+ ns = choose_new_asid(next, next_tlb_gen);
goto reload_tlb;
}
@@ -890,8 +895,8 @@ void switch_mm_irqs_off(struct mm_struct *unused, struct mm_struct *next,
* TLB contents went out of date while we were in lazy
* mode. Fall through to the TLB switching code below.
*/
- new_asid = prev_asid;
- need_flush = true;
+ ns.asid = prev_asid;
+ ns.need_flush = true;
} else {
/*
* Apply process to process speculation vulnerability
@@ -912,21 +917,21 @@ void switch_mm_irqs_off(struct mm_struct *unused, struct mm_struct *next,
cpumask_set_cpu(cpu, mm_cpumask(next));
next_tlb_gen = atomic64_read(&next->context.tlb_gen);
- choose_new_asid(next, next_tlb_gen, &new_asid, &need_flush);
+ ns = choose_new_asid(next, next_tlb_gen);
}
reload_tlb:
new_lam = mm_lam_cr3_mask(next);
- if (need_flush) {
- VM_WARN_ON_ONCE(is_global_asid(new_asid));
- this_cpu_write(cpu_tlbstate.ctxs[new_asid].ctx_id, next->context.ctx_id);
- this_cpu_write(cpu_tlbstate.ctxs[new_asid].tlb_gen, next_tlb_gen);
- load_new_mm_cr3(next->pgd, new_asid, new_lam, true);
+ if (ns.need_flush) {
+ VM_WARN_ON_ONCE(is_global_asid(ns.asid));
+ this_cpu_write(cpu_tlbstate.ctxs[ns.asid].ctx_id, next->context.ctx_id);
+ this_cpu_write(cpu_tlbstate.ctxs[ns.asid].tlb_gen, next_tlb_gen);
+ load_new_mm_cr3(next->pgd, ns.asid, new_lam, true);
trace_tlb_flush(TLB_FLUSH_ON_TASK_SWITCH, TLB_FLUSH_ALL);
} else {
/* The new ASID is already up to date. */
- load_new_mm_cr3(next->pgd, new_asid, new_lam, false);
+ load_new_mm_cr3(next->pgd, ns.asid, new_lam, false);
trace_tlb_flush(TLB_FLUSH_ON_TASK_SWITCH, 0);
}
@@ -935,7 +940,7 @@ reload_tlb:
barrier();
this_cpu_write(cpu_tlbstate.loaded_mm, next);
- this_cpu_write(cpu_tlbstate.loaded_mm_asid, new_asid);
+ this_cpu_write(cpu_tlbstate.loaded_mm_asid, ns.asid);
cpu_tlbstate_update_lam(new_lam, mm_untag_mask(next));
if (next != prev) {
diff --git a/drivers/char/mem.c b/drivers/char/mem.c
index 169eed162a7f..48839958b0b1 100644
--- a/drivers/char/mem.c
+++ b/drivers/char/mem.c
@@ -61,29 +61,11 @@ static inline int page_is_allowed(unsigned long pfn)
{
return devmem_is_allowed(pfn);
}
-static inline int range_is_allowed(unsigned long pfn, unsigned long size)
-{
- u64 from = ((u64)pfn) << PAGE_SHIFT;
- u64 to = from + size;
- u64 cursor = from;
-
- while (cursor < to) {
- if (!devmem_is_allowed(pfn))
- return 0;
- cursor += PAGE_SIZE;
- pfn++;
- }
- return 1;
-}
#else
static inline int page_is_allowed(unsigned long pfn)
{
return 1;
}
-static inline int range_is_allowed(unsigned long pfn, unsigned long size)
-{
- return 1;
-}
#endif
static inline bool should_stop_iteration(void)
diff --git a/include/linux/io.h b/include/linux/io.h
index 6a6bc4d46d0a..0642c7ee41db 100644
--- a/include/linux/io.h
+++ b/include/linux/io.h
@@ -183,4 +183,25 @@ static inline void arch_io_free_memtype_wc(resource_size_t base,
int devm_arch_io_reserve_memtype_wc(struct device *dev, resource_size_t start,
resource_size_t size);
+#ifdef CONFIG_STRICT_DEVMEM
+static inline int range_is_allowed(unsigned long pfn, unsigned long size)
+{
+ u64 from = ((u64)pfn) << PAGE_SHIFT;
+ u64 to = from + size;
+ u64 cursor = from;
+
+ while (cursor < to) {
+ if (!devmem_is_allowed(pfn))
+ return 0;
+ cursor += PAGE_SIZE;
+ pfn++;
+ }
+ return 1;
+}
+#else
+static inline int range_is_allowed(unsigned long pfn, unsigned long size)
+{
+ return 1;
+}
+#endif
#endif /* _LINUX_IO_H */
diff --git a/tools/testing/selftests/x86/lam.c b/tools/testing/selftests/x86/lam.c
index 18d736640ece..0873b0e5f48b 100644
--- a/tools/testing/selftests/x86/lam.c
+++ b/tools/testing/selftests/x86/lam.c
@@ -682,7 +682,7 @@ int do_uring(unsigned long lam)
return 1;
if (fstat(file_fd, &st) < 0)
- return 1;
+ goto cleanup;
off_t file_sz = st.st_size;
@@ -690,7 +690,7 @@ int do_uring(unsigned long lam)
fi = malloc(sizeof(*fi) + sizeof(struct iovec) * blocks);
if (!fi)
- return 1;
+ goto cleanup;
fi->file_sz = file_sz;
fi->file_fd = file_fd;
@@ -698,7 +698,7 @@ int do_uring(unsigned long lam)
ring = malloc(sizeof(*ring));
if (!ring) {
free(fi);
- return 1;
+ goto cleanup;
}
memset(ring, 0, sizeof(struct io_ring));
@@ -729,6 +729,8 @@ out:
}
free(fi);
+cleanup:
+ close(file_fd);
return ret;
}
@@ -1189,6 +1191,7 @@ void *allocate_dsa_pasid(void)
wq = mmap(NULL, 0x1000, PROT_WRITE,
MAP_SHARED | MAP_POPULATE, fd, 0);
+ close(fd);
if (wq == MAP_FAILED)
perror("mmap");