diff options
author | Ingo Molnar <mingo@kernel.org> | 2025-05-13 10:39:22 +0200 |
---|---|---|
committer | Ingo Molnar <mingo@kernel.org> | 2025-05-13 10:39:22 +0200 |
commit | 34be751998c1407a460efe3a20f9c4ddb8c82b9f (patch) | |
tree | e6b55b0ee269828b0cb63c67039c094ef0f407dc | |
parent | 69cb33e2f81a3265383f0c8bbd27c32b4a5a6bf3 (diff) | |
parent | 1b3f2bd04d90f61e1f291b5e365b9bc4ce0ea7c7 (diff) |
Merge branch 'x86/mm' into x86/core, to resolve conflicts
Conflicts:
arch/x86/mm/numa.c
arch/x86/mm/pgtable.c
Signed-off-by: Ingo Molnar <mingo@kernel.org>
-rw-r--r-- | arch/x86/Kconfig.assembler | 4 | ||||
-rw-r--r-- | arch/x86/include/asm/mwait.h | 66 | ||||
-rw-r--r-- | arch/x86/include/asm/page_32_types.h | 1 | ||||
-rw-r--r-- | arch/x86/include/asm/pgtable-2level_types.h | 2 | ||||
-rw-r--r-- | arch/x86/include/asm/pgtable-3level_types.h | 4 | ||||
-rw-r--r-- | arch/x86/include/asm/pgtable.h | 10 | ||||
-rw-r--r-- | arch/x86/include/asm/pgtable_64_types.h | 2 | ||||
-rw-r--r-- | arch/x86/kernel/process.c | 9 | ||||
-rw-r--r-- | arch/x86/mm/Makefile | 2 | ||||
-rw-r--r-- | arch/x86/mm/fault.c | 3 | ||||
-rw-r--r-- | arch/x86/mm/init_32.c | 7 | ||||
-rw-r--r-- | arch/x86/mm/init_64.c | 7 | ||||
-rw-r--r-- | arch/x86/mm/mm_internal.h | 4 | ||||
-rw-r--r-- | arch/x86/mm/numa.c | 3 | ||||
-rw-r--r-- | arch/x86/mm/numa_32.c | 61 | ||||
-rw-r--r-- | arch/x86/mm/numa_64.c | 13 | ||||
-rw-r--r-- | arch/x86/mm/numa_internal.h | 10 | ||||
-rw-r--r-- | arch/x86/mm/pat/memtype.c | 31 | ||||
-rw-r--r-- | arch/x86/mm/pat/set_memory.c | 4 | ||||
-rw-r--r-- | arch/x86/mm/pgtable.c | 103 | ||||
-rw-r--r-- | arch/x86/mm/pti.c | 4 | ||||
-rw-r--r-- | arch/x86/mm/tlb.c | 63 | ||||
-rw-r--r-- | drivers/char/mem.c | 18 | ||||
-rw-r--r-- | include/linux/io.h | 21 | ||||
-rw-r--r-- | tools/testing/selftests/x86/lam.c | 9 |
25 files changed, 130 insertions, 331 deletions
diff --git a/arch/x86/Kconfig.assembler b/arch/x86/Kconfig.assembler index 6d20a6ce0507..fa8858546d5e 100644 --- a/arch/x86/Kconfig.assembler +++ b/arch/x86/Kconfig.assembler @@ -15,10 +15,6 @@ config AS_SHA256_NI def_bool $(as-instr,sha256msg1 %xmm0$(comma)%xmm1) help Supported by binutils >= 2.24 and LLVM integrated assembler -config AS_TPAUSE - def_bool $(as-instr,tpause %ecx) - help - Supported by binutils >= 2.31.1 and LLVM integrated assembler >= V7 config AS_GFNI def_bool $(as-instr,vgf2p8mulb %xmm0$(comma)%xmm1$(comma)%xmm2) diff --git a/arch/x86/include/asm/mwait.h b/arch/x86/include/asm/mwait.h index ce857ef54cf1..dd2b129b0418 100644 --- a/arch/x86/include/asm/mwait.h +++ b/arch/x86/include/asm/mwait.h @@ -25,29 +25,31 @@ #define TPAUSE_C01_STATE 1 #define TPAUSE_C02_STATE 0 -static __always_inline void __monitor(const void *eax, unsigned long ecx, - unsigned long edx) +static __always_inline void __monitor(const void *eax, u32 ecx, u32 edx) { - /* "monitor %eax, %ecx, %edx;" */ - asm volatile(".byte 0x0f, 0x01, 0xc8;" - :: "a" (eax), "c" (ecx), "d"(edx)); + /* + * Use the instruction mnemonic with implicit operands, as the LLVM + * assembler fails to assemble the mnemonic with explicit operands: + */ + asm volatile("monitor" :: "a" (eax), "c" (ecx), "d" (edx)); } -static __always_inline void __monitorx(const void *eax, unsigned long ecx, - unsigned long edx) +static __always_inline void __monitorx(const void *eax, u32 ecx, u32 edx) { - /* "monitorx %eax, %ecx, %edx;" */ - asm volatile(".byte 0x0f, 0x01, 0xfa;" + /* "monitorx %eax, %ecx, %edx" */ + asm volatile(".byte 0x0f, 0x01, 0xfa" :: "a" (eax), "c" (ecx), "d"(edx)); } -static __always_inline void __mwait(unsigned long eax, unsigned long ecx) +static __always_inline void __mwait(u32 eax, u32 ecx) { mds_idle_clear_cpu_buffers(); - /* "mwait %eax, %ecx;" */ - asm volatile(".byte 0x0f, 0x01, 0xc9;" - :: "a" (eax), "c" (ecx)); + /* + * Use the instruction mnemonic with implicit operands, as the LLVM + * assembler fails to assemble the mnemonic with explicit operands: + */ + asm volatile("mwait" :: "a" (eax), "c" (ecx)); } /* @@ -76,13 +78,12 @@ static __always_inline void __mwait(unsigned long eax, unsigned long ecx) * EAX (logical) address to monitor * ECX #GP if not zero */ -static __always_inline void __mwaitx(unsigned long eax, unsigned long ebx, - unsigned long ecx) +static __always_inline void __mwaitx(u32 eax, u32 ebx, u32 ecx) { /* No MDS buffer clear as this is AMD/HYGON only */ - /* "mwaitx %eax, %ebx, %ecx;" */ - asm volatile(".byte 0x0f, 0x01, 0xfb;" + /* "mwaitx %eax, %ebx, %ecx" */ + asm volatile(".byte 0x0f, 0x01, 0xfb" :: "a" (eax), "b" (ebx), "c" (ecx)); } @@ -95,12 +96,11 @@ static __always_inline void __mwaitx(unsigned long eax, unsigned long ebx, * executing mwait, it would otherwise go unnoticed and the next tick * would not be reprogrammed accordingly before mwait ever wakes up. */ -static __always_inline void __sti_mwait(unsigned long eax, unsigned long ecx) +static __always_inline void __sti_mwait(u32 eax, u32 ecx) { mds_idle_clear_cpu_buffers(); - /* "mwait %eax, %ecx;" */ - asm volatile("sti; .byte 0x0f, 0x01, 0xc9;" - :: "a" (eax), "c" (ecx)); + + asm volatile("sti; mwait" :: "a" (eax), "c" (ecx)); } /* @@ -113,16 +113,13 @@ static __always_inline void __sti_mwait(unsigned long eax, unsigned long ecx) * New with Core Duo processors, MWAIT can take some hints based on CPU * capability. */ -static __always_inline void mwait_idle_with_hints(unsigned long eax, unsigned long ecx) +static __always_inline void mwait_idle_with_hints(u32 eax, u32 ecx) { if (static_cpu_has_bug(X86_BUG_MONITOR) || !current_set_polling_and_test()) { - if (static_cpu_has_bug(X86_BUG_CLFLUSH_MONITOR)) { - mb(); - clflush((void *)¤t_thread_info()->flags); - mb(); - } + const void *addr = ¤t_thread_info()->flags; - __monitor((void *)¤t_thread_info()->flags, 0, 0); + alternative_input("", "clflush (%[addr])", X86_BUG_CLFLUSH_MONITOR, [addr] "a" (addr)); + __monitor(addr, 0, 0); if (!need_resched()) { if (ecx & 1) { @@ -144,16 +141,9 @@ static __always_inline void mwait_idle_with_hints(unsigned long eax, unsigned lo */ static inline void __tpause(u32 ecx, u32 edx, u32 eax) { - /* "tpause %ecx, %edx, %eax;" */ - #ifdef CONFIG_AS_TPAUSE - asm volatile("tpause %%ecx\n" - : - : "c"(ecx), "d"(edx), "a"(eax)); - #else - asm volatile(".byte 0x66, 0x0f, 0xae, 0xf1\t\n" - : - : "c"(ecx), "d"(edx), "a"(eax)); - #endif + /* "tpause %ecx" */ + asm volatile(".byte 0x66, 0x0f, 0xae, 0xf1" + :: "c" (ecx), "d" (edx), "a" (eax)); } #endif /* _ASM_X86_MWAIT_H */ diff --git a/arch/x86/include/asm/page_32_types.h b/arch/x86/include/asm/page_32_types.h index a9b62e0e6f79..623f1e9f493e 100644 --- a/arch/x86/include/asm/page_32_types.h +++ b/arch/x86/include/asm/page_32_types.h @@ -73,7 +73,6 @@ extern unsigned int __VMALLOC_RESERVE; extern int sysctl_legacy_va_layout; extern void find_low_pfn_range(void); -extern void setup_bootmem_allocator(void); #endif /* !__ASSEMBLER__ */ diff --git a/arch/x86/include/asm/pgtable-2level_types.h b/arch/x86/include/asm/pgtable-2level_types.h index 66425424ce91..54690bd4ddbe 100644 --- a/arch/x86/include/asm/pgtable-2level_types.h +++ b/arch/x86/include/asm/pgtable-2level_types.h @@ -18,8 +18,6 @@ typedef union { } pte_t; #endif /* !__ASSEMBLER__ */ -#define SHARED_KERNEL_PMD 0 - #define ARCH_PAGE_TABLE_SYNC_MASK PGTBL_PMD_MODIFIED /* diff --git a/arch/x86/include/asm/pgtable-3level_types.h b/arch/x86/include/asm/pgtable-3level_types.h index 9d5b257d44e3..580b09bf6a45 100644 --- a/arch/x86/include/asm/pgtable-3level_types.h +++ b/arch/x86/include/asm/pgtable-3level_types.h @@ -27,9 +27,7 @@ typedef union { } pmd_t; #endif /* !__ASSEMBLER__ */ -#define SHARED_KERNEL_PMD (!static_cpu_has(X86_FEATURE_PTI)) - -#define ARCH_PAGE_TABLE_SYNC_MASK (SHARED_KERNEL_PMD ? 0 : PGTBL_PMD_MODIFIED) +#define ARCH_PAGE_TABLE_SYNC_MASK PGTBL_PMD_MODIFIED /* * PGDIR_SHIFT determines what a top-level page table entry can map diff --git a/arch/x86/include/asm/pgtable.h b/arch/x86/include/asm/pgtable.h index 7bd6bd6df4a1..5ddba366d3b4 100644 --- a/arch/x86/include/asm/pgtable.h +++ b/arch/x86/include/asm/pgtable.h @@ -292,13 +292,6 @@ static inline unsigned long pgd_pfn(pgd_t pgd) return (pgd_val(pgd) & PTE_PFN_MASK) >> PAGE_SHIFT; } -#define p4d_leaf p4d_leaf -static inline bool p4d_leaf(p4d_t p4d) -{ - /* No 512 GiB pages yet */ - return 0; -} - #define pte_page(pte) pfn_to_page(pte_pfn(pte)) #define pmd_leaf pmd_leaf @@ -1472,9 +1465,6 @@ static inline bool pgdp_maps_userspace(void *__ptr) return (((ptr & ~PAGE_MASK) / sizeof(pgd_t)) < PGD_KERNEL_START); } -#define pgd_leaf pgd_leaf -static inline bool pgd_leaf(pgd_t pgd) { return false; } - #ifdef CONFIG_MITIGATION_PAGE_TABLE_ISOLATION /* * All top-level MITIGATION_PAGE_TABLE_ISOLATION page tables are order-1 pages diff --git a/arch/x86/include/asm/pgtable_64_types.h b/arch/x86/include/asm/pgtable_64_types.h index 5bb782d856f2..e83721db18c9 100644 --- a/arch/x86/include/asm/pgtable_64_types.h +++ b/arch/x86/include/asm/pgtable_64_types.h @@ -46,8 +46,6 @@ extern unsigned int ptrs_per_p4d; #endif /* !__ASSEMBLER__ */ -#define SHARED_KERNEL_PMD 0 - #ifdef CONFIG_X86_5LEVEL /* diff --git a/arch/x86/kernel/process.c b/arch/x86/kernel/process.c index 9e6180777565..1871e833ecc7 100644 --- a/arch/x86/kernel/process.c +++ b/arch/x86/kernel/process.c @@ -901,13 +901,10 @@ static __init bool prefer_mwait_c1_over_halt(void) static __cpuidle void mwait_idle(void) { if (!current_set_polling_and_test()) { - if (this_cpu_has(X86_BUG_CLFLUSH_MONITOR)) { - mb(); /* quirk */ - clflush((void *)¤t_thread_info()->flags); - mb(); /* quirk */ - } + const void *addr = ¤t_thread_info()->flags; - __monitor((void *)¤t_thread_info()->flags, 0, 0); + alternative_input("", "clflush (%[addr])", X86_BUG_CLFLUSH_MONITOR, [addr] "a" (addr)); + __monitor(addr, 0, 0); if (!need_resched()) { __sti_mwait(0, 0); raw_local_irq_disable(); diff --git a/arch/x86/mm/Makefile b/arch/x86/mm/Makefile index 3faa60f13a61..cebe5812d78d 100644 --- a/arch/x86/mm/Makefile +++ b/arch/x86/mm/Makefile @@ -47,7 +47,7 @@ obj-$(CONFIG_MMIOTRACE) += mmiotrace.o mmiotrace-y := kmmio.o pf_in.o mmio-mod.o obj-$(CONFIG_MMIOTRACE_TEST) += testmmiotrace.o -obj-$(CONFIG_NUMA) += numa.o numa_$(BITS).o +obj-$(CONFIG_NUMA) += numa.o obj-$(CONFIG_AMD_NUMA) += amdtopology.o obj-$(CONFIG_ACPI_NUMA) += srat.o diff --git a/arch/x86/mm/fault.c b/arch/x86/mm/fault.c index 296d294142c8..697432f63c59 100644 --- a/arch/x86/mm/fault.c +++ b/arch/x86/mm/fault.c @@ -13,7 +13,6 @@ #include <linux/mmiotrace.h> /* kmmio_handler, ... */ #include <linux/perf_event.h> /* perf_sw_event */ #include <linux/hugetlb.h> /* hstate_index_to_shift */ -#include <linux/prefetch.h> /* prefetchw */ #include <linux/context_tracking.h> /* exception_enter(), ... */ #include <linux/uaccess.h> /* faulthandler_disabled() */ #include <linux/efi.h> /* efi_crash_gracefully_on_page_fault()*/ @@ -1496,8 +1495,6 @@ DEFINE_IDTENTRY_RAW_ERRORCODE(exc_page_fault) address = cpu_feature_enabled(X86_FEATURE_FRED) ? fred_event_data(regs) : read_cr2(); - prefetchw(¤t->mm->mmap_lock); - /* * KVM uses #PF vector to deliver 'page not present' events to guests * (asynchronous page fault mechanism). The event happens when a diff --git a/arch/x86/mm/init_32.c b/arch/x86/mm/init_32.c index ad662cc4605c..d467f89191cd 100644 --- a/arch/x86/mm/init_32.c +++ b/arch/x86/mm/init_32.c @@ -612,7 +612,6 @@ void __init find_low_pfn_range(void) highmem_pfn_init(); } -#ifndef CONFIG_NUMA void __init initmem_init(void) { #ifdef CONFIG_HIGHMEM @@ -633,12 +632,6 @@ void __init initmem_init(void) printk(KERN_NOTICE "%ldMB LOWMEM available.\n", pages_to_mb(max_low_pfn)); - setup_bootmem_allocator(); -} -#endif /* !CONFIG_NUMA */ - -void __init setup_bootmem_allocator(void) -{ printk(KERN_INFO " mapped low ram: 0 - %08lx\n", max_pfn_mapped<<PAGE_SHIFT); printk(KERN_INFO " low ram: 0 - %08lx\n", max_low_pfn<<PAGE_SHIFT); diff --git a/arch/x86/mm/init_64.c b/arch/x86/mm/init_64.c index 7c4f6f591f2b..b7655396f4e0 100644 --- a/arch/x86/mm/init_64.c +++ b/arch/x86/mm/init_64.c @@ -805,12 +805,17 @@ kernel_physical_mapping_change(unsigned long paddr_start, } #ifndef CONFIG_NUMA -void __init initmem_init(void) +static inline void x86_numa_init(void) { memblock_set_node(0, PHYS_ADDR_MAX, &memblock.memory, 0); } #endif +void __init initmem_init(void) +{ + x86_numa_init(); +} + void __init paging_init(void) { sparse_init(); diff --git a/arch/x86/mm/mm_internal.h b/arch/x86/mm/mm_internal.h index 3f37b5c80bb3..097aadc250f7 100644 --- a/arch/x86/mm/mm_internal.h +++ b/arch/x86/mm/mm_internal.h @@ -25,4 +25,8 @@ void update_cache_mode_entry(unsigned entry, enum page_cache_mode cache); extern unsigned long tlb_single_page_flush_ceiling; +#ifdef CONFIG_NUMA +void __init x86_numa_init(void); +#endif + #endif /* __X86_MM_INTERNAL_H */ diff --git a/arch/x86/mm/numa.c b/arch/x86/mm/numa.c index fed02d1073c7..c24890c40138 100644 --- a/arch/x86/mm/numa.c +++ b/arch/x86/mm/numa.c @@ -18,9 +18,10 @@ #include <asm/e820/api.h> #include <asm/proto.h> #include <asm/dma.h> +#include <asm/numa.h> #include <asm/amd/nb.h> -#include "numa_internal.h" +#include "mm_internal.h" int numa_off; diff --git a/arch/x86/mm/numa_32.c b/arch/x86/mm/numa_32.c deleted file mode 100644 index 65fda406e6f2..000000000000 --- a/arch/x86/mm/numa_32.c +++ /dev/null @@ -1,61 +0,0 @@ -/* - * Written by: Patricia Gaughen <gone@us.ibm.com>, IBM Corporation - * August 2002: added remote node KVA remap - Martin J. Bligh - * - * Copyright (C) 2002, IBM Corp. - * - * All rights reserved. - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; either version 2 of the License, or - * (at your option) any later version. - * - * This program is distributed in the hope that it will be useful, but - * WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or - * NON INFRINGEMENT. See the GNU General Public License for more - * details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the Free Software - * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. - */ - -#include <linux/memblock.h> -#include <linux/init.h> -#include <linux/vmalloc.h> -#include <asm/pgtable_areas.h> - -#include "numa_internal.h" - -extern unsigned long highend_pfn, highstart_pfn; - -void __init initmem_init(void) -{ - x86_numa_init(); - -#ifdef CONFIG_HIGHMEM - highstart_pfn = highend_pfn = max_pfn; - if (max_pfn > max_low_pfn) - highstart_pfn = max_low_pfn; - printk(KERN_NOTICE "%ldMB HIGHMEM available.\n", - pages_to_mb(highend_pfn - highstart_pfn)); - high_memory = (void *) __va(highstart_pfn * PAGE_SIZE - 1) + 1; -#else - high_memory = (void *) __va(max_low_pfn * PAGE_SIZE - 1) + 1; -#endif - printk(KERN_NOTICE "%ldMB LOWMEM available.\n", - pages_to_mb(max_low_pfn)); - printk(KERN_DEBUG "max_low_pfn = %lx, highstart_pfn = %lx\n", - max_low_pfn, highstart_pfn); - - printk(KERN_DEBUG "Low memory ends at vaddr %08lx\n", - (ulong) pfn_to_kaddr(max_low_pfn)); - - printk(KERN_DEBUG "High memory starts at vaddr %08lx\n", - (ulong) pfn_to_kaddr(highstart_pfn)); - - __vmalloc_start_set = true; - setup_bootmem_allocator(); -} diff --git a/arch/x86/mm/numa_64.c b/arch/x86/mm/numa_64.c deleted file mode 100644 index 59d80160fa5a..000000000000 --- a/arch/x86/mm/numa_64.c +++ /dev/null @@ -1,13 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0 -/* - * Generic VM initialization for x86-64 NUMA setups. - * Copyright 2002,2003 Andi Kleen, SuSE Labs. - */ -#include <linux/memblock.h> - -#include "numa_internal.h" - -void __init initmem_init(void) -{ - x86_numa_init(); -} diff --git a/arch/x86/mm/numa_internal.h b/arch/x86/mm/numa_internal.h deleted file mode 100644 index 11e1ff370c10..000000000000 --- a/arch/x86/mm/numa_internal.h +++ /dev/null @@ -1,10 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 */ -#ifndef __X86_MM_NUMA_INTERNAL_H -#define __X86_MM_NUMA_INTERNAL_H - -#include <linux/types.h> -#include <asm/numa.h> - -void __init x86_numa_init(void); - -#endif /* __X86_MM_NUMA_INTERNAL_H */ diff --git a/arch/x86/mm/pat/memtype.c b/arch/x86/mm/pat/memtype.c index 72d8cbc61158..c97b6598f187 100644 --- a/arch/x86/mm/pat/memtype.c +++ b/arch/x86/mm/pat/memtype.c @@ -38,6 +38,7 @@ #include <linux/kernel.h> #include <linux/pfn_t.h> #include <linux/slab.h> +#include <linux/io.h> #include <linux/mm.h> #include <linux/highmem.h> #include <linux/fs.h> @@ -773,38 +774,14 @@ pgprot_t phys_mem_access_prot(struct file *file, unsigned long pfn, return vma_prot; } -#ifdef CONFIG_STRICT_DEVMEM -/* This check is done in drivers/char/mem.c in case of STRICT_DEVMEM */ -static inline int range_is_allowed(unsigned long pfn, unsigned long size) -{ - return 1; -} -#else -/* This check is needed to avoid cache aliasing when PAT is enabled */ -static inline int range_is_allowed(unsigned long pfn, unsigned long size) -{ - u64 from = ((u64)pfn) << PAGE_SHIFT; - u64 to = from + size; - u64 cursor = from; - - if (!pat_enabled()) - return 1; - - while (cursor < to) { - if (!devmem_is_allowed(pfn)) - return 0; - cursor += PAGE_SIZE; - pfn++; - } - return 1; -} -#endif /* CONFIG_STRICT_DEVMEM */ - int phys_mem_access_prot_allowed(struct file *file, unsigned long pfn, unsigned long size, pgprot_t *vma_prot) { enum page_cache_mode pcm = _PAGE_CACHE_MODE_WB; + if (!pat_enabled()) + return 1; + if (!range_is_allowed(pfn, size)) return 0; diff --git a/arch/x86/mm/pat/set_memory.c b/arch/x86/mm/pat/set_memory.c index def3d9284254..30ab4aced761 100644 --- a/arch/x86/mm/pat/set_memory.c +++ b/arch/x86/mm/pat/set_memory.c @@ -889,7 +889,7 @@ static void __set_pmd_pte(pte_t *kpte, unsigned long address, pte_t pte) /* change init_mm */ set_pte_atomic(kpte, pte); #ifdef CONFIG_X86_32 - if (!SHARED_KERNEL_PMD) { + { struct page *page; list_for_each_entry(page, &pgd_list, lru) { @@ -1293,7 +1293,7 @@ static int collapse_pmd_page(pmd_t *pmd, unsigned long addr, /* Queue the page table to be freed after TLB flush */ list_add(&page_ptdesc(pmd_page(old_pmd))->pt_list, pgtables); - if (IS_ENABLED(CONFIG_X86_32) && !SHARED_KERNEL_PMD) { + if (IS_ENABLED(CONFIG_X86_32)) { struct page *page; /* Update all PGD tables to use the same large page */ diff --git a/arch/x86/mm/pgtable.c b/arch/x86/mm/pgtable.c index e62af72923e8..7c253deb46e9 100644 --- a/arch/x86/mm/pgtable.c +++ b/arch/x86/mm/pgtable.c @@ -69,12 +69,6 @@ static inline void pgd_list_del(pgd_t *pgd) list_del(&ptdesc->pt_list); } -#define UNSHARED_PTRS_PER_PGD \ - (SHARED_KERNEL_PMD ? KERNEL_PGD_BOUNDARY : PTRS_PER_PGD) -#define MAX_UNSHARED_PTRS_PER_PGD \ - MAX_T(size_t, KERNEL_PGD_BOUNDARY, PTRS_PER_PGD) - - static void pgd_set_mm(pgd_t *pgd, struct mm_struct *mm) { virt_to_ptdesc(pgd)->pt_mm = mm; @@ -87,29 +81,19 @@ struct mm_struct *pgd_page_get_mm(struct page *page) static void pgd_ctor(struct mm_struct *mm, pgd_t *pgd) { - /* If the pgd points to a shared pagetable level (either the - ptes in non-PAE, or shared PMD in PAE), then just copy the - references from swapper_pg_dir. */ - if (CONFIG_PGTABLE_LEVELS == 2 || - (CONFIG_PGTABLE_LEVELS == 3 && SHARED_KERNEL_PMD) || - CONFIG_PGTABLE_LEVELS >= 4) { + /* PAE preallocates all its PMDs. No cloning needed. */ + if (!IS_ENABLED(CONFIG_X86_PAE)) clone_pgd_range(pgd + KERNEL_PGD_BOUNDARY, swapper_pg_dir + KERNEL_PGD_BOUNDARY, KERNEL_PGD_PTRS); - } - /* list required to sync kernel mapping updates */ - if (!SHARED_KERNEL_PMD) { - pgd_set_mm(pgd, mm); - pgd_list_add(pgd); - } + /* List used to sync kernel mapping updates */ + pgd_set_mm(pgd, mm); + pgd_list_add(pgd); } static void pgd_dtor(pgd_t *pgd) { - if (SHARED_KERNEL_PMD) - return; - spin_lock(&pgd_lock); pgd_list_del(pgd); spin_unlock(&pgd_lock); @@ -133,15 +117,15 @@ static void pgd_dtor(pgd_t *pgd) * processor notices the update. Since this is expensive, and * all 4 top-level entries are used almost immediately in a * new process's life, we just pre-populate them here. - * - * Also, if we're in a paravirt environment where the kernel pmd is - * not shared between pagetables (!SHARED_KERNEL_PMDS), we allocate - * and initialize the kernel pmds here. */ -#define PREALLOCATED_PMDS UNSHARED_PTRS_PER_PGD -#define MAX_PREALLOCATED_PMDS MAX_UNSHARED_PTRS_PER_PGD +#define PREALLOCATED_PMDS PTRS_PER_PGD /* + * "USER_PMDS" are the PMDs for the user copy of the page tables when + * PTI is enabled. They do not exist when PTI is disabled. Note that + * this is distinct from the user _portion_ of the kernel page tables + * which always exists. + * * We allocate separate PMDs for the kernel part of the user page-table * when PTI is enabled. We need them to map the per-process LDT into the * user-space page-table. @@ -170,7 +154,6 @@ void pud_populate(struct mm_struct *mm, pud_t *pudp, pmd_t *pmd) /* No need to prepopulate any pagetable entries in non-PAE modes. */ #define PREALLOCATED_PMDS 0 -#define MAX_PREALLOCATED_PMDS 0 #define PREALLOCATED_USER_PMDS 0 #define MAX_PREALLOCATED_USER_PMDS 0 #endif /* CONFIG_X86_PAE */ @@ -319,68 +302,15 @@ static void pgd_prepopulate_user_pmd(struct mm_struct *mm, { } #endif -/* - * Xen paravirt assumes pgd table should be in one page. 64 bit kernel also - * assumes that pgd should be in one page. - * - * But kernel with PAE paging that is not running as a Xen domain - * only needs to allocate 32 bytes for pgd instead of one page. - */ -#ifdef CONFIG_X86_PAE - -#include <linux/slab.h> - -#define PGD_SIZE (PTRS_PER_PGD * sizeof(pgd_t)) -#define PGD_ALIGN 32 - -static struct kmem_cache *pgd_cache; - -void __init pgtable_cache_init(void) -{ - /* - * When PAE kernel is running as a Xen domain, it does not use - * shared kernel pmd. And this requires a whole page for pgd. - */ - if (!SHARED_KERNEL_PMD) - return; - - /* - * when PAE kernel is not running as a Xen domain, it uses - * shared kernel pmd. Shared kernel pmd does not require a whole - * page for pgd. We are able to just allocate a 32-byte for pgd. - * During boot time, we create a 32-byte slab for pgd table allocation. - */ - pgd_cache = kmem_cache_create("pgd_cache", PGD_SIZE, PGD_ALIGN, - SLAB_PANIC, NULL); -} static inline pgd_t *_pgd_alloc(struct mm_struct *mm) { /* - * If no SHARED_KERNEL_PMD, PAE kernel is running as a Xen domain. - * We allocate one page for pgd. - */ - if (!SHARED_KERNEL_PMD) - return __pgd_alloc(mm, pgd_allocation_order()); - - /* - * Now PAE kernel is not running as a Xen domain. We can allocate - * a 32-byte slab for pgd to save memory space. + * PTI and Xen need a whole page for the PAE PGD + * even though the hardware only needs 32 bytes. + * + * For simplicity, allocate a page for all users. */ - return kmem_cache_alloc(pgd_cache, GFP_PGTABLE_USER); -} - -static inline void _pgd_free(struct mm_struct *mm, pgd_t *pgd) -{ - if (!SHARED_KERNEL_PMD) - __pgd_free(mm, pgd); - else - kmem_cache_free(pgd_cache, pgd); -} -#else - -static inline pgd_t *_pgd_alloc(struct mm_struct *mm) -{ return __pgd_alloc(mm, pgd_allocation_order()); } @@ -388,13 +318,12 @@ static inline void _pgd_free(struct mm_struct *mm, pgd_t *pgd) { __pgd_free(mm, pgd); } -#endif /* CONFIG_X86_PAE */ pgd_t *pgd_alloc(struct mm_struct *mm) { pgd_t *pgd; pmd_t *u_pmds[MAX_PREALLOCATED_USER_PMDS]; - pmd_t *pmds[MAX_PREALLOCATED_PMDS]; + pmd_t *pmds[PREALLOCATED_PMDS]; pgd = _pgd_alloc(mm); diff --git a/arch/x86/mm/pti.c b/arch/x86/mm/pti.c index 5f0d579932c6..190299834011 100644 --- a/arch/x86/mm/pti.c +++ b/arch/x86/mm/pti.c @@ -185,7 +185,7 @@ static p4d_t *pti_user_pagetable_walk_p4d(unsigned long address) set_pgd(pgd, __pgd(_KERNPG_TABLE | __pa(new_p4d_page))); } - BUILD_BUG_ON(pgd_leaf(*pgd) != 0); + BUILD_BUG_ON(pgd_leaf(*pgd)); return p4d_offset(pgd, address); } @@ -206,7 +206,7 @@ static pmd_t *pti_user_pagetable_walk_pmd(unsigned long address) if (!p4d) return NULL; - BUILD_BUG_ON(p4d_leaf(*p4d) != 0); + BUILD_BUG_ON(p4d_leaf(*p4d)); if (p4d_none(*p4d)) { unsigned long new_pud_page = __get_free_page(gfp); if (WARN_ON_ONCE(!new_pud_page)) diff --git a/arch/x86/mm/tlb.c b/arch/x86/mm/tlb.c index 2ea8cec6cd49..5f75d63093c8 100644 --- a/arch/x86/mm/tlb.c +++ b/arch/x86/mm/tlb.c @@ -215,16 +215,20 @@ static void clear_asid_other(void) atomic64_t last_mm_ctx_id = ATOMIC64_INIT(1); +struct new_asid { + unsigned int asid : 16; + unsigned int need_flush : 1; +}; -static void choose_new_asid(struct mm_struct *next, u64 next_tlb_gen, - u16 *new_asid, bool *need_flush) +static struct new_asid choose_new_asid(struct mm_struct *next, u64 next_tlb_gen) { + struct new_asid ns; u16 asid; if (!static_cpu_has(X86_FEATURE_PCID)) { - *new_asid = 0; - *need_flush = true; - return; + ns.asid = 0; + ns.need_flush = 1; + return ns; } /* @@ -235,9 +239,9 @@ static void choose_new_asid(struct mm_struct *next, u64 next_tlb_gen, u16 global_asid = mm_global_asid(next); if (global_asid) { - *new_asid = global_asid; - *need_flush = false; - return; + ns.asid = global_asid; + ns.need_flush = 0; + return ns; } } @@ -249,22 +253,23 @@ static void choose_new_asid(struct mm_struct *next, u64 next_tlb_gen, next->context.ctx_id) continue; - *new_asid = asid; - *need_flush = (this_cpu_read(cpu_tlbstate.ctxs[asid].tlb_gen) < - next_tlb_gen); - return; + ns.asid = asid; + ns.need_flush = (this_cpu_read(cpu_tlbstate.ctxs[asid].tlb_gen) < next_tlb_gen); + return ns; } /* * We don't currently own an ASID slot on this CPU. * Allocate a slot. */ - *new_asid = this_cpu_add_return(cpu_tlbstate.next_asid, 1) - 1; - if (*new_asid >= TLB_NR_DYN_ASIDS) { - *new_asid = 0; + ns.asid = this_cpu_add_return(cpu_tlbstate.next_asid, 1) - 1; + if (ns.asid >= TLB_NR_DYN_ASIDS) { + ns.asid = 0; this_cpu_write(cpu_tlbstate.next_asid, 1); } - *need_flush = true; + ns.need_flush = true; + + return ns; } /* @@ -781,9 +786,9 @@ void switch_mm_irqs_off(struct mm_struct *unused, struct mm_struct *next, bool was_lazy = this_cpu_read(cpu_tlbstate_shared.is_lazy); unsigned cpu = smp_processor_id(); unsigned long new_lam; + struct new_asid ns; u64 next_tlb_gen; - bool need_flush; - u16 new_asid; + /* We don't want flush_tlb_func() to run concurrently with us. */ if (IS_ENABLED(CONFIG_PROVE_LOCKING)) @@ -855,7 +860,7 @@ void switch_mm_irqs_off(struct mm_struct *unused, struct mm_struct *next, /* Check if the current mm is transitioning to a global ASID */ if (mm_needs_global_asid(next, prev_asid)) { next_tlb_gen = atomic64_read(&next->context.tlb_gen); - choose_new_asid(next, next_tlb_gen, &new_asid, &need_flush); + ns = choose_new_asid(next, next_tlb_gen); goto reload_tlb; } @@ -890,8 +895,8 @@ void switch_mm_irqs_off(struct mm_struct *unused, struct mm_struct *next, * TLB contents went out of date while we were in lazy * mode. Fall through to the TLB switching code below. */ - new_asid = prev_asid; - need_flush = true; + ns.asid = prev_asid; + ns.need_flush = true; } else { /* * Apply process to process speculation vulnerability @@ -912,21 +917,21 @@ void switch_mm_irqs_off(struct mm_struct *unused, struct mm_struct *next, cpumask_set_cpu(cpu, mm_cpumask(next)); next_tlb_gen = atomic64_read(&next->context.tlb_gen); - choose_new_asid(next, next_tlb_gen, &new_asid, &need_flush); + ns = choose_new_asid(next, next_tlb_gen); } reload_tlb: new_lam = mm_lam_cr3_mask(next); - if (need_flush) { - VM_WARN_ON_ONCE(is_global_asid(new_asid)); - this_cpu_write(cpu_tlbstate.ctxs[new_asid].ctx_id, next->context.ctx_id); - this_cpu_write(cpu_tlbstate.ctxs[new_asid].tlb_gen, next_tlb_gen); - load_new_mm_cr3(next->pgd, new_asid, new_lam, true); + if (ns.need_flush) { + VM_WARN_ON_ONCE(is_global_asid(ns.asid)); + this_cpu_write(cpu_tlbstate.ctxs[ns.asid].ctx_id, next->context.ctx_id); + this_cpu_write(cpu_tlbstate.ctxs[ns.asid].tlb_gen, next_tlb_gen); + load_new_mm_cr3(next->pgd, ns.asid, new_lam, true); trace_tlb_flush(TLB_FLUSH_ON_TASK_SWITCH, TLB_FLUSH_ALL); } else { /* The new ASID is already up to date. */ - load_new_mm_cr3(next->pgd, new_asid, new_lam, false); + load_new_mm_cr3(next->pgd, ns.asid, new_lam, false); trace_tlb_flush(TLB_FLUSH_ON_TASK_SWITCH, 0); } @@ -935,7 +940,7 @@ reload_tlb: barrier(); this_cpu_write(cpu_tlbstate.loaded_mm, next); - this_cpu_write(cpu_tlbstate.loaded_mm_asid, new_asid); + this_cpu_write(cpu_tlbstate.loaded_mm_asid, ns.asid); cpu_tlbstate_update_lam(new_lam, mm_untag_mask(next)); if (next != prev) { diff --git a/drivers/char/mem.c b/drivers/char/mem.c index 169eed162a7f..48839958b0b1 100644 --- a/drivers/char/mem.c +++ b/drivers/char/mem.c @@ -61,29 +61,11 @@ static inline int page_is_allowed(unsigned long pfn) { return devmem_is_allowed(pfn); } -static inline int range_is_allowed(unsigned long pfn, unsigned long size) -{ - u64 from = ((u64)pfn) << PAGE_SHIFT; - u64 to = from + size; - u64 cursor = from; - - while (cursor < to) { - if (!devmem_is_allowed(pfn)) - return 0; - cursor += PAGE_SIZE; - pfn++; - } - return 1; -} #else static inline int page_is_allowed(unsigned long pfn) { return 1; } -static inline int range_is_allowed(unsigned long pfn, unsigned long size) -{ - return 1; -} #endif static inline bool should_stop_iteration(void) diff --git a/include/linux/io.h b/include/linux/io.h index 6a6bc4d46d0a..0642c7ee41db 100644 --- a/include/linux/io.h +++ b/include/linux/io.h @@ -183,4 +183,25 @@ static inline void arch_io_free_memtype_wc(resource_size_t base, int devm_arch_io_reserve_memtype_wc(struct device *dev, resource_size_t start, resource_size_t size); +#ifdef CONFIG_STRICT_DEVMEM +static inline int range_is_allowed(unsigned long pfn, unsigned long size) +{ + u64 from = ((u64)pfn) << PAGE_SHIFT; + u64 to = from + size; + u64 cursor = from; + + while (cursor < to) { + if (!devmem_is_allowed(pfn)) + return 0; + cursor += PAGE_SIZE; + pfn++; + } + return 1; +} +#else +static inline int range_is_allowed(unsigned long pfn, unsigned long size) +{ + return 1; +} +#endif #endif /* _LINUX_IO_H */ diff --git a/tools/testing/selftests/x86/lam.c b/tools/testing/selftests/x86/lam.c index 18d736640ece..0873b0e5f48b 100644 --- a/tools/testing/selftests/x86/lam.c +++ b/tools/testing/selftests/x86/lam.c @@ -682,7 +682,7 @@ int do_uring(unsigned long lam) return 1; if (fstat(file_fd, &st) < 0) - return 1; + goto cleanup; off_t file_sz = st.st_size; @@ -690,7 +690,7 @@ int do_uring(unsigned long lam) fi = malloc(sizeof(*fi) + sizeof(struct iovec) * blocks); if (!fi) - return 1; + goto cleanup; fi->file_sz = file_sz; fi->file_fd = file_fd; @@ -698,7 +698,7 @@ int do_uring(unsigned long lam) ring = malloc(sizeof(*ring)); if (!ring) { free(fi); - return 1; + goto cleanup; } memset(ring, 0, sizeof(struct io_ring)); @@ -729,6 +729,8 @@ out: } free(fi); +cleanup: + close(file_fd); return ret; } @@ -1189,6 +1191,7 @@ void *allocate_dsa_pasid(void) wq = mmap(NULL, 0x1000, PROT_WRITE, MAP_SHARED | MAP_POPULATE, fd, 0); + close(fd); if (wq == MAP_FAILED) perror("mmap"); |