diff options
Diffstat (limited to 'arch/arm64/include')
53 files changed, 850 insertions, 322 deletions
diff --git a/arch/arm64/include/asm/alternative-macros.h b/arch/arm64/include/asm/alternative-macros.h index 5df500dcc627..8a078fc662ac 100644 --- a/arch/arm64/include/asm/alternative-macros.h +++ b/arch/arm64/include/asm/alternative-macros.h @@ -97,9 +97,9 @@ .popsection .subsection 1 663: \insn2 -664: .previous - .org . - (664b-663b) + (662b-661b) +664: .org . - (664b-663b) + (662b-661b) .org . - (662b-661b) + (664b-663b) + .previous .endif .endm @@ -169,11 +169,11 @@ */ .macro alternative_endif 664: + .org . - (664b-663b) + (662b-661b) + .org . - (662b-661b) + (664b-663b) .if .Lasm_alt_mode==0 .previous .endif - .org . - (664b-663b) + (662b-661b) - .org . - (662b-661b) + (664b-663b) .endm /* diff --git a/arch/arm64/include/asm/arch_gicv3.h b/arch/arm64/include/asm/arch_gicv3.h index 880b9054d75c..934b9be582d2 100644 --- a/arch/arm64/include/asm/arch_gicv3.h +++ b/arch/arm64/include/asm/arch_gicv3.h @@ -173,7 +173,7 @@ static inline void gic_pmr_mask_irqs(void) static inline void gic_arch_enable_irqs(void) { - asm volatile ("msr daifclr, #2" : : : "memory"); + asm volatile ("msr daifclr, #3" : : : "memory"); } #endif /* __ASSEMBLY__ */ diff --git a/arch/arm64/include/asm/arch_timer.h b/arch/arm64/include/asm/arch_timer.h index 9f0ec21d6327..88d20f04c64a 100644 --- a/arch/arm64/include/asm/arch_timer.h +++ b/arch/arm64/include/asm/arch_timer.h @@ -165,25 +165,6 @@ static inline void arch_timer_set_cntkctl(u32 cntkctl) isb(); } -/* - * Ensure that reads of the counter are treated the same as memory reads - * for the purposes of ordering by subsequent memory barriers. - * - * This insanity brought to you by speculative system register reads, - * out-of-order memory accesses, sequence locks and Thomas Gleixner. - * - * http://lists.infradead.org/pipermail/linux-arm-kernel/2019-February/631195.html - */ -#define arch_counter_enforce_ordering(val) do { \ - u64 tmp, _val = (val); \ - \ - asm volatile( \ - " eor %0, %1, %1\n" \ - " add %0, sp, %0\n" \ - " ldr xzr, [%0]" \ - : "=r" (tmp) : "r" (_val)); \ -} while (0) - static __always_inline u64 __arch_counter_get_cntpct_stable(void) { u64 cnt; @@ -224,8 +205,6 @@ static __always_inline u64 __arch_counter_get_cntvct(void) return cnt; } -#undef arch_counter_enforce_ordering - static inline int arch_timer_arch_init(void) { return 0; diff --git a/arch/arm64/include/asm/asm_pointer_auth.h b/arch/arm64/include/asm/asm_pointer_auth.h index 52dead2a8640..8ca2dc0661ee 100644 --- a/arch/arm64/include/asm/asm_pointer_auth.h +++ b/arch/arm64/include/asm/asm_pointer_auth.h @@ -13,30 +13,12 @@ * so use the base value of ldp as thread.keys_user and offset as * thread.keys_user.ap*. */ - .macro ptrauth_keys_install_user tsk, tmp1, tmp2, tmp3 + .macro __ptrauth_keys_install_user tsk, tmp1, tmp2, tmp3 mov \tmp1, #THREAD_KEYS_USER add \tmp1, \tsk, \tmp1 -alternative_if_not ARM64_HAS_ADDRESS_AUTH - b .Laddr_auth_skip_\@ -alternative_else_nop_endif ldp \tmp2, \tmp3, [\tmp1, #PTRAUTH_USER_KEY_APIA] msr_s SYS_APIAKEYLO_EL1, \tmp2 msr_s SYS_APIAKEYHI_EL1, \tmp3 - ldp \tmp2, \tmp3, [\tmp1, #PTRAUTH_USER_KEY_APIB] - msr_s SYS_APIBKEYLO_EL1, \tmp2 - msr_s SYS_APIBKEYHI_EL1, \tmp3 - ldp \tmp2, \tmp3, [\tmp1, #PTRAUTH_USER_KEY_APDA] - msr_s SYS_APDAKEYLO_EL1, \tmp2 - msr_s SYS_APDAKEYHI_EL1, \tmp3 - ldp \tmp2, \tmp3, [\tmp1, #PTRAUTH_USER_KEY_APDB] - msr_s SYS_APDBKEYLO_EL1, \tmp2 - msr_s SYS_APDBKEYHI_EL1, \tmp3 -.Laddr_auth_skip_\@: -alternative_if ARM64_HAS_GENERIC_AUTH - ldp \tmp2, \tmp3, [\tmp1, #PTRAUTH_USER_KEY_APGA] - msr_s SYS_APGAKEYLO_EL1, \tmp2 - msr_s SYS_APGAKEYHI_EL1, \tmp3 -alternative_else_nop_endif .endm .macro __ptrauth_keys_install_kernel_nosync tsk, tmp1, tmp2, tmp3 diff --git a/arch/arm64/include/asm/assembler.h b/arch/arm64/include/asm/assembler.h index ca31594d3d6c..8418c1bd8f04 100644 --- a/arch/arm64/include/asm/assembler.h +++ b/arch/arm64/include/asm/assembler.h @@ -15,6 +15,8 @@ #include <asm-generic/export.h> #include <asm/asm-offsets.h> +#include <asm/alternative.h> +#include <asm/asm-bug.h> #include <asm/cpufeature.h> #include <asm/cputype.h> #include <asm/debug-monitors.h> @@ -23,6 +25,14 @@ #include <asm/ptrace.h> #include <asm/thread_info.h> + /* + * Provide a wxN alias for each wN register so what we can paste a xN + * reference after a 'w' to obtain the 32-bit version. + */ + .irp n,0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30 + wx\n .req w\n + .endr + .macro save_and_disable_daif, flags mrs \flags, daif msr daifset, #0xf @@ -40,9 +50,9 @@ msr daif, \flags .endm - /* IRQ is the lowest priority flag, unconditionally unmask the rest. */ - .macro enable_da_f - msr daifclr, #(8 | 4 | 1) + /* IRQ/FIQ are the lowest priority flags, unconditionally unmask the rest. */ + .macro enable_da + msr daifclr, #(8 | 4) .endm /* @@ -50,7 +60,7 @@ */ .macro save_and_disable_irq, flags mrs \flags, daif - msr daifset, #2 + msr daifset, #3 .endm .macro restore_irq, flags @@ -270,12 +280,24 @@ alternative_endif * provide the system wide safe value from arm64_ftr_reg_ctrel0.sys_val */ .macro read_ctr, reg +#ifndef __KVM_NVHE_HYPERVISOR__ alternative_if_not ARM64_MISMATCHED_CACHE_TYPE mrs \reg, ctr_el0 // read CTR nop alternative_else ldr_l \reg, arm64_ftr_reg_ctrel0 + ARM64_FTR_SYSVAL alternative_endif +#else +alternative_if_not ARM64_KVM_PROTECTED_MODE + ASM_BUG() +alternative_else_nop_endif +alternative_cb kvm_compute_final_ctr_el0 + movz \reg, #0 + movk \reg, #0, lsl #16 + movk \reg, #0, lsl #32 + movk \reg, #0, lsl #48 +alternative_cb_end +#endif .endm @@ -676,11 +698,11 @@ USER(\label, ic ivau, \tmp2) // invalidate I line PoU .endm /* - * Set SCTLR_EL1 to the passed value, and invalidate the local icache + * Set SCTLR_ELx to the @reg value, and invalidate the local icache * in the process. This is called when setting the MMU on. */ -.macro set_sctlr_el1, reg - msr sctlr_el1, \reg +.macro set_sctlr, sreg, reg + msr \sreg, \reg isb /* * Invalidate the local I-cache so that any instructions fetched @@ -692,90 +714,41 @@ USER(\label, ic ivau, \tmp2) // invalidate I line PoU isb .endm -/* - * Check whether to yield to another runnable task from kernel mode NEON code - * (which runs with preemption disabled). - * - * if_will_cond_yield_neon - * // pre-yield patchup code - * do_cond_yield_neon - * // post-yield patchup code - * endif_yield_neon <label> - * - * where <label> is optional, and marks the point where execution will resume - * after a yield has been performed. If omitted, execution resumes right after - * the endif_yield_neon invocation. Note that the entire sequence, including - * the provided patchup code, will be omitted from the image if - * CONFIG_PREEMPTION is not defined. - * - * As a convenience, in the case where no patchup code is required, the above - * sequence may be abbreviated to - * - * cond_yield_neon <label> - * - * Note that the patchup code does not support assembler directives that change - * the output section, any use of such directives is undefined. - * - * The yield itself consists of the following: - * - Check whether the preempt count is exactly 1 and a reschedule is also - * needed. If so, calling of preempt_enable() in kernel_neon_end() will - * trigger a reschedule. If it is not the case, yielding is pointless. - * - Disable and re-enable kernel mode NEON, and branch to the yield fixup - * code. - * - * This macro sequence may clobber all CPU state that is not guaranteed by the - * AAPCS to be preserved across an ordinary function call. - */ - - .macro cond_yield_neon, lbl - if_will_cond_yield_neon - do_cond_yield_neon - endif_yield_neon \lbl - .endm - - .macro if_will_cond_yield_neon -#ifdef CONFIG_PREEMPTION - get_current_task x0 - ldr x0, [x0, #TSK_TI_PREEMPT] - sub x0, x0, #PREEMPT_DISABLE_OFFSET - cbz x0, .Lyield_\@ - /* fall through to endif_yield_neon */ - .subsection 1 -.Lyield_\@ : -#else - .section ".discard.cond_yield_neon", "ax" -#endif - .endm - - .macro do_cond_yield_neon - bl kernel_neon_end - bl kernel_neon_begin - .endm +.macro set_sctlr_el1, reg + set_sctlr sctlr_el1, \reg +.endm - .macro endif_yield_neon, lbl - .ifnb \lbl - b \lbl - .else - b .Lyield_out_\@ - .endif - .previous -.Lyield_out_\@ : - .endm +.macro set_sctlr_el2, reg + set_sctlr sctlr_el2, \reg +.endm /* - * Check whether preempt-disabled code should yield as soon as it - * is able. This is the case if re-enabling preemption a single - * time results in a preempt count of zero, and the TIF_NEED_RESCHED - * flag is set. (Note that the latter is stored negated in the - * top word of the thread_info::preempt_count field) + * Check whether preempt/bh-disabled asm code should yield as soon as + * it is able. This is the case if we are currently running in task + * context, and either a softirq is pending, or the TIF_NEED_RESCHED + * flag is set and re-enabling preemption a single time would result in + * a preempt count of zero. (Note that the TIF_NEED_RESCHED flag is + * stored negated in the top word of the thread_info::preempt_count + * field) */ - .macro cond_yield, lbl:req, tmp:req -#ifdef CONFIG_PREEMPTION + .macro cond_yield, lbl:req, tmp:req, tmp2:req get_current_task \tmp ldr \tmp, [\tmp, #TSK_TI_PREEMPT] + /* + * If we are serving a softirq, there is no point in yielding: the + * softirq will not be preempted no matter what we do, so we should + * run to completion as quickly as we can. + */ + tbnz \tmp, #SOFTIRQ_SHIFT, .Lnoyield_\@ +#ifdef CONFIG_PREEMPTION sub \tmp, \tmp, #PREEMPT_DISABLE_OFFSET cbz \tmp, \lbl #endif + adr_l \tmp, irq_stat + IRQ_CPUSTAT_SOFTIRQ_PENDING + this_cpu_offset \tmp2 + ldr w\tmp, [\tmp, \tmp2] + cbnz w\tmp, \lbl // yield on pending softirq in task context +.Lnoyield_\@: .endm /* diff --git a/arch/arm64/include/asm/barrier.h b/arch/arm64/include/asm/barrier.h index c3009b0e5239..2175ec0004ed 100644 --- a/arch/arm64/include/asm/barrier.h +++ b/arch/arm64/include/asm/barrier.h @@ -23,12 +23,9 @@ #define dsb(opt) asm volatile("dsb " #opt : : : "memory") #define psb_csync() asm volatile("hint #17" : : : "memory") +#define tsb_csync() asm volatile("hint #18" : : : "memory") #define csdb() asm volatile("hint #20" : : : "memory") -#define spec_bar() asm volatile(ALTERNATIVE("dsb nsh\nisb\n", \ - SB_BARRIER_INSN"nop\n", \ - ARM64_HAS_SB)) - #ifdef CONFIG_ARM64_PSEUDO_NMI #define pmr_sync() \ do { \ @@ -70,6 +67,25 @@ static inline unsigned long array_index_mask_nospec(unsigned long idx, return mask; } +/* + * Ensure that reads of the counter are treated the same as memory reads + * for the purposes of ordering by subsequent memory barriers. + * + * This insanity brought to you by speculative system register reads, + * out-of-order memory accesses, sequence locks and Thomas Gleixner. + * + * http://lists.infradead.org/pipermail/linux-arm-kernel/2019-February/631195.html + */ +#define arch_counter_enforce_ordering(val) do { \ + u64 tmp, _val = (val); \ + \ + asm volatile( \ + " eor %0, %1, %1\n" \ + " add %0, sp, %0\n" \ + " ldr xzr, [%0]" \ + : "=r" (tmp) : "r" (_val)); \ +} while (0) + #define __smp_mb() dmb(ish) #define __smp_rmb() dmb(ishld) #define __smp_wmb() dmb(ishst) diff --git a/arch/arm64/include/asm/checksum.h b/arch/arm64/include/asm/checksum.h index 93a161b3bf3f..dc52b733675d 100644 --- a/arch/arm64/include/asm/checksum.h +++ b/arch/arm64/include/asm/checksum.h @@ -37,7 +37,7 @@ static inline __sum16 ip_fast_csum(const void *iph, unsigned int ihl) } while (--n > 0); sum += ((sum >> 32) | (sum << 32)); - return csum_fold((__force u32)(sum >> 32)); + return csum_fold((__force __wsum)(sum >> 32)); } #define ip_fast_csum ip_fast_csum diff --git a/arch/arm64/include/asm/cpucaps.h b/arch/arm64/include/asm/cpucaps.h index b77d997b173b..b0c5eda0498f 100644 --- a/arch/arm64/include/asm/cpucaps.h +++ b/arch/arm64/include/asm/cpucaps.h @@ -66,7 +66,9 @@ #define ARM64_WORKAROUND_1508412 58 #define ARM64_HAS_LDAPR 59 #define ARM64_KVM_PROTECTED_MODE 60 +#define ARM64_WORKAROUND_NVIDIA_CARMEL_CNP 61 +#define ARM64_HAS_EPAN 62 -#define ARM64_NCAPS 61 +#define ARM64_NCAPS 63 #endif /* __ASM_CPUCAPS_H */ diff --git a/arch/arm64/include/asm/cpufeature.h b/arch/arm64/include/asm/cpufeature.h index 61177bac49fa..338840c00e8e 100644 --- a/arch/arm64/include/asm/cpufeature.h +++ b/arch/arm64/include/asm/cpufeature.h @@ -63,6 +63,23 @@ struct arm64_ftr_bits { s64 safe_val; /* safe value for FTR_EXACT features */ }; +/* + * Describe the early feature override to the core override code: + * + * @val Values that are to be merged into the final + * sanitised value of the register. Only the bitfields + * set to 1 in @mask are valid + * @mask Mask of the features that are overridden by @val + * + * A @mask field set to full-1 indicates that the corresponding field + * in @val is a valid override. + * + * A @mask field set to full-0 with the corresponding @val field set + * to full-0 denotes that this field has no override + * + * A @mask field set to full-0 with the corresponding @val field set + * to full-1 denotes thath this field has an invalid override. + */ struct arm64_ftr_override { u64 val; u64 mask; diff --git a/arch/arm64/include/asm/cputype.h b/arch/arm64/include/asm/cputype.h index ef5b040dee44..6231e1f0abe7 100644 --- a/arch/arm64/include/asm/cputype.h +++ b/arch/arm64/include/asm/cputype.h @@ -59,6 +59,7 @@ #define ARM_CPU_IMP_NVIDIA 0x4E #define ARM_CPU_IMP_FUJITSU 0x46 #define ARM_CPU_IMP_HISI 0x48 +#define ARM_CPU_IMP_APPLE 0x61 #define ARM_CPU_PART_AEM_V8 0xD0F #define ARM_CPU_PART_FOUNDATION 0xD00 @@ -99,6 +100,9 @@ #define HISI_CPU_PART_TSV110 0xD01 +#define APPLE_CPU_PART_M1_ICESTORM 0x022 +#define APPLE_CPU_PART_M1_FIRESTORM 0x023 + #define MIDR_CORTEX_A53 MIDR_CPU_MODEL(ARM_CPU_IMP_ARM, ARM_CPU_PART_CORTEX_A53) #define MIDR_CORTEX_A57 MIDR_CPU_MODEL(ARM_CPU_IMP_ARM, ARM_CPU_PART_CORTEX_A57) #define MIDR_CORTEX_A72 MIDR_CPU_MODEL(ARM_CPU_IMP_ARM, ARM_CPU_PART_CORTEX_A72) @@ -127,6 +131,8 @@ #define MIDR_NVIDIA_CARMEL MIDR_CPU_MODEL(ARM_CPU_IMP_NVIDIA, NVIDIA_CPU_PART_CARMEL) #define MIDR_FUJITSU_A64FX MIDR_CPU_MODEL(ARM_CPU_IMP_FUJITSU, FUJITSU_CPU_PART_A64FX) #define MIDR_HISI_TSV110 MIDR_CPU_MODEL(ARM_CPU_IMP_HISI, HISI_CPU_PART_TSV110) +#define MIDR_APPLE_M1_ICESTORM MIDR_CPU_MODEL(ARM_CPU_IMP_APPLE, APPLE_CPU_PART_M1_ICESTORM) +#define MIDR_APPLE_M1_FIRESTORM MIDR_CPU_MODEL(ARM_CPU_IMP_APPLE, APPLE_CPU_PART_M1_FIRESTORM) /* Fujitsu Erratum 010001 affects A64FX 1.0 and 1.1, (v0r0 and v1r0) */ #define MIDR_FUJITSU_ERRATUM_010001 MIDR_FUJITSU_A64FX diff --git a/arch/arm64/include/asm/daifflags.h b/arch/arm64/include/asm/daifflags.h index 1c26d7baa67f..5eb7af9c4557 100644 --- a/arch/arm64/include/asm/daifflags.h +++ b/arch/arm64/include/asm/daifflags.h @@ -13,8 +13,8 @@ #include <asm/ptrace.h> #define DAIF_PROCCTX 0 -#define DAIF_PROCCTX_NOIRQ PSR_I_BIT -#define DAIF_ERRCTX (PSR_I_BIT | PSR_A_BIT) +#define DAIF_PROCCTX_NOIRQ (PSR_I_BIT | PSR_F_BIT) +#define DAIF_ERRCTX (PSR_A_BIT | PSR_I_BIT | PSR_F_BIT) #define DAIF_MASK (PSR_D_BIT | PSR_A_BIT | PSR_I_BIT | PSR_F_BIT) @@ -47,7 +47,7 @@ static inline unsigned long local_daif_save_flags(void) if (system_uses_irq_prio_masking()) { /* If IRQs are masked with PMR, reflect it in the flags */ if (read_sysreg_s(SYS_ICC_PMR_EL1) != GIC_PRIO_IRQON) - flags |= PSR_I_BIT; + flags |= PSR_I_BIT | PSR_F_BIT; } return flags; @@ -69,7 +69,7 @@ static inline void local_daif_restore(unsigned long flags) bool irq_disabled = flags & PSR_I_BIT; WARN_ON(system_has_prio_mask_debugging() && - !(read_sysreg(daif) & PSR_I_BIT)); + (read_sysreg(daif) & (PSR_I_BIT | PSR_F_BIT)) != (PSR_I_BIT | PSR_F_BIT)); if (!irq_disabled) { trace_hardirqs_on(); @@ -86,7 +86,7 @@ static inline void local_daif_restore(unsigned long flags) * If interrupts are disabled but we can take * asynchronous errors, we can take NMIs */ - flags &= ~PSR_I_BIT; + flags &= ~(PSR_I_BIT | PSR_F_BIT); pmr = GIC_PRIO_IRQOFF; } else { pmr = GIC_PRIO_IRQON | GIC_PRIO_PSR_I_SET; diff --git a/arch/arm64/include/asm/el2_setup.h b/arch/arm64/include/asm/el2_setup.h index d77d358f9395..21fa330f498d 100644 --- a/arch/arm64/include/asm/el2_setup.h +++ b/arch/arm64/include/asm/el2_setup.h @@ -65,6 +65,19 @@ // use EL1&0 translation. .Lskip_spe_\@: + /* Trace buffer */ + ubfx x0, x1, #ID_AA64DFR0_TRBE_SHIFT, #4 + cbz x0, .Lskip_trace_\@ // Skip if TraceBuffer is not present + + mrs_s x0, SYS_TRBIDR_EL1 + and x0, x0, TRBIDR_PROG + cbnz x0, .Lskip_trace_\@ // If TRBE is available at EL2 + + mov x0, #(MDCR_EL2_E2TB_MASK << MDCR_EL2_E2TB_SHIFT) + orr x2, x2, x0 // allow the EL1&0 translation + // to own it. + +.Lskip_trace_\@: msr mdcr_el2, x2 // Configure debug traps .endm @@ -131,6 +144,26 @@ .Lskip_sve_\@: .endm +/* Disable any fine grained traps */ +.macro __init_el2_fgt + mrs x1, id_aa64mmfr0_el1 + ubfx x1, x1, #ID_AA64MMFR0_FGT_SHIFT, #4 + cbz x1, .Lskip_fgt_\@ + + msr_s SYS_HDFGRTR_EL2, xzr + msr_s SYS_HDFGWTR_EL2, xzr + msr_s SYS_HFGRTR_EL2, xzr + msr_s SYS_HFGWTR_EL2, xzr + msr_s SYS_HFGITR_EL2, xzr + + mrs x1, id_aa64pfr0_el1 // AMU traps UNDEF without AMU + ubfx x1, x1, #ID_AA64PFR0_AMU_SHIFT, #4 + cbz x1, .Lskip_fgt_\@ + + msr_s SYS_HAFGRTR_EL2, xzr +.Lskip_fgt_\@: +.endm + .macro __init_el2_nvhe_prepare_eret mov x0, #INIT_PSTATE_EL1 msr spsr_el2, x0 @@ -155,6 +188,7 @@ __init_el2_nvhe_idregs __init_el2_nvhe_cptr __init_el2_nvhe_sve + __init_el2_fgt __init_el2_nvhe_prepare_eret .endm diff --git a/arch/arm64/include/asm/fpsimd.h b/arch/arm64/include/asm/fpsimd.h index bec5f14b622a..2599504674b5 100644 --- a/arch/arm64/include/asm/fpsimd.h +++ b/arch/arm64/include/asm/fpsimd.h @@ -73,6 +73,7 @@ extern void sve_flush_live(void); extern void sve_load_from_fpsimd_state(struct user_fpsimd_state const *state, unsigned long vq_minus_1); extern unsigned int sve_get_vl(void); +extern void sve_set_vq(unsigned long vq_minus_1); struct arm64_cpu_capabilities; extern void sve_kernel_enable(const struct arm64_cpu_capabilities *__unused); @@ -130,6 +131,15 @@ static inline void sve_user_enable(void) sysreg_clear_set(cpacr_el1, 0, CPACR_EL1_ZEN_EL0EN); } +#define sve_cond_update_zcr_vq(val, reg) \ + do { \ + u64 __zcr = read_sysreg_s((reg)); \ + u64 __new = __zcr & ~ZCR_ELx_LEN_MASK; \ + __new |= (val) & ZCR_ELx_LEN_MASK; \ + if (__zcr != __new) \ + write_sysreg_s(__new, (reg)); \ + } while (0) + /* * Probing and setup functions. * Calls to these functions must be serialised with one another. @@ -159,6 +169,8 @@ static inline int sve_get_current_vl(void) static inline void sve_user_disable(void) { BUILD_BUG(); } static inline void sve_user_enable(void) { BUILD_BUG(); } +#define sve_cond_update_zcr_vq(val, reg) do { } while (0) + static inline void sve_init_vq_map(void) { } static inline void sve_update_vq_map(void) { } static inline int sve_verify_vq_map(void) { return 0; } diff --git a/arch/arm64/include/asm/fpsimdmacros.h b/arch/arm64/include/asm/fpsimdmacros.h index af43367534c7..a2563992d2dc 100644 --- a/arch/arm64/include/asm/fpsimdmacros.h +++ b/arch/arm64/include/asm/fpsimdmacros.h @@ -6,6 +6,8 @@ * Author: Catalin Marinas <catalin.marinas@arm.com> */ +#include <asm/assembler.h> + .macro fpsimd_save state, tmpnr stp q0, q1, [\state, #16 * 0] stp q2, q3, [\state, #16 * 2] @@ -230,8 +232,7 @@ str w\nxtmp, [\xpfpsr, #4] .endm -.macro sve_load nxbase, xpfpsr, xvqminus1, nxtmp, xtmp2 - sve_load_vq \xvqminus1, x\nxtmp, \xtmp2 +.macro __sve_load nxbase, xpfpsr, nxtmp _for n, 0, 31, _sve_ldr_v \n, \nxbase, \n - 34 _sve_ldr_p 0, \nxbase _sve_wrffr 0 @@ -242,3 +243,8 @@ ldr w\nxtmp, [\xpfpsr, #4] msr fpcr, x\nxtmp .endm + +.macro sve_load nxbase, xpfpsr, xvqminus1, nxtmp, xtmp2 + sve_load_vq \xvqminus1, x\nxtmp, \xtmp2 + __sve_load \nxbase, \xpfpsr, \nxtmp +.endm diff --git a/arch/arm64/include/asm/hyp_image.h b/arch/arm64/include/asm/hyp_image.h index 737ded6b6d0d..b4b3076a76fb 100644 --- a/arch/arm64/include/asm/hyp_image.h +++ b/arch/arm64/include/asm/hyp_image.h @@ -10,11 +10,15 @@ #define __HYP_CONCAT(a, b) a ## b #define HYP_CONCAT(a, b) __HYP_CONCAT(a, b) +#ifndef __KVM_NVHE_HYPERVISOR__ /* * KVM nVHE code has its own symbol namespace prefixed with __kvm_nvhe_, * to separate it from the kernel proper. */ #define kvm_nvhe_sym(sym) __kvm_nvhe_##sym +#else +#define kvm_nvhe_sym(sym) sym +#endif #ifdef LINKER_SCRIPT @@ -56,6 +60,9 @@ */ #define KVM_NVHE_ALIAS(sym) kvm_nvhe_sym(sym) = sym; +/* Defines a linker script alias for KVM nVHE hyp symbols */ +#define KVM_NVHE_ALIAS_HYP(first, sec) kvm_nvhe_sym(first) = kvm_nvhe_sym(sec); + #endif /* LINKER_SCRIPT */ #endif /* __ARM64_HYP_IMAGE_H__ */ diff --git a/arch/arm64/include/asm/hypervisor.h b/arch/arm64/include/asm/hypervisor.h index f9cc1d021791..0ae427f352c8 100644 --- a/arch/arm64/include/asm/hypervisor.h +++ b/arch/arm64/include/asm/hypervisor.h @@ -4,4 +4,7 @@ #include <asm/xen/hypervisor.h> +void kvm_init_hyp_services(void); +bool kvm_arm_hyp_service_available(u32 func_id); + #endif diff --git a/arch/arm64/include/asm/io.h b/arch/arm64/include/asm/io.h index 5ea8656a2030..7fd836bea7eb 100644 --- a/arch/arm64/include/asm/io.h +++ b/arch/arm64/include/asm/io.h @@ -169,16 +169,7 @@ extern void __iomem *ioremap_cache(phys_addr_t phys_addr, size_t size); #define ioremap(addr, size) __ioremap((addr), (size), __pgprot(PROT_DEVICE_nGnRE)) #define ioremap_wc(addr, size) __ioremap((addr), (size), __pgprot(PROT_NORMAL_NC)) - -/* - * PCI configuration space mapping function. - * - * The PCI specification disallows posted write configuration transactions. - * Add an arch specific pci_remap_cfgspace() definition that is implemented - * through nGnRnE device memory attribute as recommended by the ARM v8 - * Architecture reference manual Issue A.k B2.8.2 "Device memory". - */ -#define pci_remap_cfgspace(addr, size) __ioremap((addr), (size), __pgprot(PROT_DEVICE_nGnRnE)) +#define ioremap_np(addr, size) __ioremap((addr), (size), __pgprot(PROT_DEVICE_nGnRnE)) /* * io{read,write}{16,32,64}be() macros diff --git a/arch/arm64/include/asm/irq.h b/arch/arm64/include/asm/irq.h index b2b0c6405eb0..fac08e18bcd5 100644 --- a/arch/arm64/include/asm/irq.h +++ b/arch/arm64/include/asm/irq.h @@ -8,6 +8,10 @@ struct pt_regs; +int set_handle_irq(void (*handle_irq)(struct pt_regs *)); +#define set_handle_irq set_handle_irq +int set_handle_fiq(void (*handle_fiq)(struct pt_regs *)); + static inline int nr_legacy_irqs(void) { return 0; diff --git a/arch/arm64/include/asm/irq_work.h b/arch/arm64/include/asm/irq_work.h index a1020285ea75..81bbfa3a035b 100644 --- a/arch/arm64/include/asm/irq_work.h +++ b/arch/arm64/include/asm/irq_work.h @@ -2,6 +2,8 @@ #ifndef __ASM_IRQ_WORK_H #define __ASM_IRQ_WORK_H +extern void arch_irq_work_raise(void); + static inline bool arch_irq_work_has_interrupt(void) { return true; diff --git a/arch/arm64/include/asm/irqflags.h b/arch/arm64/include/asm/irqflags.h index ff328e5bbb75..b57b9b1e4344 100644 --- a/arch/arm64/include/asm/irqflags.h +++ b/arch/arm64/include/asm/irqflags.h @@ -12,15 +12,13 @@ /* * Aarch64 has flags for masking: Debug, Asynchronous (serror), Interrupts and - * FIQ exceptions, in the 'daif' register. We mask and unmask them in 'dai' + * FIQ exceptions, in the 'daif' register. We mask and unmask them in 'daif' * order: * Masking debug exceptions causes all other exceptions to be masked too/ - * Masking SError masks irq, but not debug exceptions. Masking irqs has no - * side effects for other flags. Keeping to this order makes it easier for - * entry.S to know which exceptions should be unmasked. - * - * FIQ is never expected, but we mask it when we disable debug exceptions, and - * unmask it at all other times. + * Masking SError masks IRQ/FIQ, but not debug exceptions. IRQ and FIQ are + * always masked and unmasked together, and have no side effects for other + * flags. Keeping to this order makes it easier for entry.S to know which + * exceptions should be unmasked. */ /* @@ -35,7 +33,7 @@ static inline void arch_local_irq_enable(void) } asm volatile(ALTERNATIVE( - "msr daifclr, #2 // arch_local_irq_enable", + "msr daifclr, #3 // arch_local_irq_enable", __msr_s(SYS_ICC_PMR_EL1, "%0"), ARM64_HAS_IRQ_PRIO_MASKING) : @@ -54,7 +52,7 @@ static inline void arch_local_irq_disable(void) } asm volatile(ALTERNATIVE( - "msr daifset, #2 // arch_local_irq_disable", + "msr daifset, #3 // arch_local_irq_disable", __msr_s(SYS_ICC_PMR_EL1, "%0"), ARM64_HAS_IRQ_PRIO_MASKING) : diff --git a/arch/arm64/include/asm/kexec.h b/arch/arm64/include/asm/kexec.h index 9befcd87e9a8..00dbcc71aeb2 100644 --- a/arch/arm64/include/asm/kexec.h +++ b/arch/arm64/include/asm/kexec.h @@ -96,10 +96,6 @@ struct kimage_arch { void *dtb; phys_addr_t dtb_mem; phys_addr_t kern_reloc; - /* Core ELF header buffer */ - void *elf_headers; - unsigned long elf_headers_mem; - unsigned long elf_headers_sz; }; #ifdef CONFIG_KEXEC_FILE diff --git a/arch/arm64/include/asm/kvm_arm.h b/arch/arm64/include/asm/kvm_arm.h index 4e90c2debf70..692c9049befa 100644 --- a/arch/arm64/include/asm/kvm_arm.h +++ b/arch/arm64/include/asm/kvm_arm.h @@ -278,6 +278,9 @@ #define CPTR_EL2_DEFAULT CPTR_EL2_RES1 /* Hyp Debug Configuration Register bits */ +#define MDCR_EL2_E2TB_MASK (UL(0x3)) +#define MDCR_EL2_E2TB_SHIFT (UL(24)) +#define MDCR_EL2_TTRF (1 << 19) #define MDCR_EL2_TPMS (1 << 14) #define MDCR_EL2_E2PB_MASK (UL(0x3)) #define MDCR_EL2_E2PB_SHIFT (UL(12)) diff --git a/arch/arm64/include/asm/kvm_asm.h b/arch/arm64/include/asm/kvm_asm.h index a7ab84f781f7..cf8df032b9c3 100644 --- a/arch/arm64/include/asm/kvm_asm.h +++ b/arch/arm64/include/asm/kvm_asm.h @@ -57,6 +57,12 @@ #define __KVM_HOST_SMCCC_FUNC___kvm_get_mdcr_el2 12 #define __KVM_HOST_SMCCC_FUNC___vgic_v3_save_aprs 13 #define __KVM_HOST_SMCCC_FUNC___vgic_v3_restore_aprs 14 +#define __KVM_HOST_SMCCC_FUNC___pkvm_init 15 +#define __KVM_HOST_SMCCC_FUNC___pkvm_create_mappings 16 +#define __KVM_HOST_SMCCC_FUNC___pkvm_create_private_mapping 17 +#define __KVM_HOST_SMCCC_FUNC___pkvm_cpu_set_vector 18 +#define __KVM_HOST_SMCCC_FUNC___pkvm_prot_finalize 19 +#define __KVM_HOST_SMCCC_FUNC___pkvm_mark_hyp 20 #ifndef __ASSEMBLY__ @@ -154,6 +160,9 @@ struct kvm_nvhe_init_params { unsigned long tpidr_el2; unsigned long stack_hyp_va; phys_addr_t pgd_pa; + unsigned long hcr_el2; + unsigned long vttbr; + unsigned long vtcr; }; /* Translate a kernel address @ptr into its equivalent linear mapping */ diff --git a/arch/arm64/include/asm/kvm_host.h b/arch/arm64/include/asm/kvm_host.h index 3d10e6527f7d..7cd7d5c8c4bc 100644 --- a/arch/arm64/include/asm/kvm_host.h +++ b/arch/arm64/include/asm/kvm_host.h @@ -94,7 +94,7 @@ struct kvm_s2_mmu { /* The last vcpu id that ran on each physical CPU */ int __percpu *last_vcpu_ran; - struct kvm *kvm; + struct kvm_arch *arch; }; struct kvm_arch_memory_slot { @@ -315,6 +315,8 @@ struct kvm_vcpu_arch { struct kvm_guest_debug_arch regs; /* Statistical profiling extension */ u64 pmscr_el1; + /* Self-hosted trace */ + u64 trfcr_el1; } host_debug_state; /* VGIC state */ @@ -372,8 +374,10 @@ struct kvm_vcpu_arch { }; /* Pointer to the vcpu's SVE FFR for sve_{save,load}_state() */ -#define vcpu_sve_pffr(vcpu) ((void *)((char *)((vcpu)->arch.sve_state) + \ - sve_ffr_offset((vcpu)->arch.sve_max_vl))) +#define vcpu_sve_pffr(vcpu) (kern_hyp_va((vcpu)->arch.sve_state) + \ + sve_ffr_offset((vcpu)->arch.sve_max_vl)) + +#define vcpu_sve_max_vq(vcpu) sve_vq_from_vl((vcpu)->arch.sve_max_vl) #define vcpu_sve_state_size(vcpu) ({ \ size_t __size_ret; \ @@ -382,7 +386,7 @@ struct kvm_vcpu_arch { if (WARN_ON(!sve_vl_valid((vcpu)->arch.sve_max_vl))) { \ __size_ret = 0; \ } else { \ - __vcpu_vq = sve_vq_from_vl((vcpu)->arch.sve_max_vl); \ + __vcpu_vq = vcpu_sve_max_vq(vcpu); \ __size_ret = SVE_SIG_REGS_SIZE(__vcpu_vq); \ } \ \ @@ -400,7 +404,13 @@ struct kvm_vcpu_arch { #define KVM_ARM64_GUEST_HAS_PTRAUTH (1 << 7) /* PTRAUTH exposed to guest */ #define KVM_ARM64_PENDING_EXCEPTION (1 << 8) /* Exception pending */ #define KVM_ARM64_EXCEPT_MASK (7 << 9) /* Target EL/MODE */ +#define KVM_ARM64_DEBUG_STATE_SAVE_SPE (1 << 12) /* Save SPE context if active */ +#define KVM_ARM64_DEBUG_STATE_SAVE_TRBE (1 << 13) /* Save TRBE context if active */ +#define KVM_GUESTDBG_VALID_MASK (KVM_GUESTDBG_ENABLE | \ + KVM_GUESTDBG_USE_SW_BP | \ + KVM_GUESTDBG_USE_HW | \ + KVM_GUESTDBG_SINGLESTEP) /* * When KVM_ARM64_PENDING_EXCEPTION is set, KVM_ARM64_EXCEPT_MASK can * take the following values: @@ -582,15 +592,11 @@ int __kvm_arm_vcpu_set_events(struct kvm_vcpu *vcpu, struct kvm_vcpu_events *events); #define KVM_ARCH_WANT_MMU_NOTIFIER -int kvm_unmap_hva_range(struct kvm *kvm, - unsigned long start, unsigned long end, unsigned flags); -int kvm_set_spte_hva(struct kvm *kvm, unsigned long hva, pte_t pte); -int kvm_age_hva(struct kvm *kvm, unsigned long start, unsigned long end); -int kvm_test_age_hva(struct kvm *kvm, unsigned long hva); void kvm_arm_halt_guest(struct kvm *kvm); void kvm_arm_resume_guest(struct kvm *kvm); +#ifndef __KVM_NVHE_HYPERVISOR__ #define kvm_call_hyp_nvhe(f, ...) \ ({ \ struct arm_smccc_res res; \ @@ -630,9 +636,13 @@ void kvm_arm_resume_guest(struct kvm *kvm); \ ret; \ }) +#else /* __KVM_NVHE_HYPERVISOR__ */ +#define kvm_call_hyp(f, ...) f(__VA_ARGS__) +#define kvm_call_hyp_ret(f, ...) f(__VA_ARGS__) +#define kvm_call_hyp_nvhe(f, ...) f(__VA_ARGS__) +#endif /* __KVM_NVHE_HYPERVISOR__ */ void force_vm_exit(const cpumask_t *mask); -void kvm_mmu_wp_memory_region(struct kvm *kvm, int slot); int handle_exit(struct kvm_vcpu *vcpu, int exception_index); void handle_exit_early(struct kvm_vcpu *vcpu, int exception_index); @@ -692,19 +702,6 @@ static inline void kvm_init_host_cpu_context(struct kvm_cpu_context *cpu_ctxt) ctxt_sys_reg(cpu_ctxt, MPIDR_EL1) = read_cpuid_mpidr(); } -static inline bool kvm_arch_requires_vhe(void) -{ - /* - * The Arm architecture specifies that implementation of SVE - * requires VHE also to be implemented. The KVM code for arm64 - * relies on this when SVE is present: - */ - if (system_supports_sve()) - return true; - - return false; -} - void kvm_arm_vcpu_ptrauth_trap(struct kvm_vcpu *vcpu); static inline void kvm_arch_hardware_unsetup(void) {} @@ -713,6 +710,7 @@ static inline void kvm_arch_sched_in(struct kvm_vcpu *vcpu, int cpu) {} static inline void kvm_arch_vcpu_block_finish(struct kvm_vcpu *vcpu) {} void kvm_arm_init_debug(void); +void kvm_arm_vcpu_init_debug(struct kvm_vcpu *vcpu); void kvm_arm_setup_debug(struct kvm_vcpu *vcpu); void kvm_arm_clear_debug(struct kvm_vcpu *vcpu); void kvm_arm_reset_debug_ptr(struct kvm_vcpu *vcpu); @@ -734,6 +732,10 @@ static inline bool kvm_pmu_counter_deferred(struct perf_event_attr *attr) return (!has_vhe() && attr->exclude_host); } +/* Flags for host debug state */ +void kvm_arch_vcpu_load_debug_state_flags(struct kvm_vcpu *vcpu); +void kvm_arch_vcpu_put_debug_state_flags(struct kvm_vcpu *vcpu); + #ifdef CONFIG_KVM /* Avoid conflicts with core headers if CONFIG_KVM=n */ static inline int kvm_arch_vcpu_run_pid_change(struct kvm_vcpu *vcpu) { @@ -771,5 +773,12 @@ bool kvm_arm_vcpu_is_finalized(struct kvm_vcpu *vcpu); (test_bit(KVM_ARM_VCPU_PMU_V3, (vcpu)->arch.features)) int kvm_trng_call(struct kvm_vcpu *vcpu); +#ifdef CONFIG_KVM +extern phys_addr_t hyp_mem_base; +extern phys_addr_t hyp_mem_size; +void __init kvm_hyp_reserve(void); +#else +static inline void kvm_hyp_reserve(void) { } +#endif #endif /* __ARM64_KVM_HOST_H__ */ diff --git a/arch/arm64/include/asm/kvm_hyp.h b/arch/arm64/include/asm/kvm_hyp.h index 32ae676236b6..9d60b3006efc 100644 --- a/arch/arm64/include/asm/kvm_hyp.h +++ b/arch/arm64/include/asm/kvm_hyp.h @@ -90,6 +90,8 @@ void __debug_restore_host_buffers_nvhe(struct kvm_vcpu *vcpu); void __fpsimd_save_state(struct user_fpsimd_state *fp_regs); void __fpsimd_restore_state(struct user_fpsimd_state *fp_regs); +void __sve_save_state(void *sve_pffr, u32 *fpsr); +void __sve_restore_state(void *sve_pffr, u32 *fpsr); #ifndef __KVM_NVHE_HYPERVISOR__ void activate_traps_vhe_load(struct kvm_vcpu *vcpu); @@ -100,10 +102,20 @@ u64 __guest_enter(struct kvm_vcpu *vcpu); bool kvm_host_psci_handler(struct kvm_cpu_context *host_ctxt); -void __noreturn hyp_panic(void); #ifdef __KVM_NVHE_HYPERVISOR__ void __noreturn __hyp_do_panic(struct kvm_cpu_context *host_ctxt, u64 spsr, u64 elr, u64 par); #endif +#ifdef __KVM_NVHE_HYPERVISOR__ +void __pkvm_init_switch_pgd(phys_addr_t phys, unsigned long size, + phys_addr_t pgd, void *sp, void *cont_fn); +int __pkvm_init(phys_addr_t phys, unsigned long size, unsigned long nr_cpus, + unsigned long *per_cpu_base, u32 hyp_va_bits); +void __noreturn __host_enter(struct kvm_cpu_context *host_ctxt); +#endif + +extern u64 kvm_nvhe_sym(id_aa64mmfr0_el1_sys_val); +extern u64 kvm_nvhe_sym(id_aa64mmfr1_el1_sys_val); + #endif /* __ARM64_KVM_HYP_H__ */ diff --git a/arch/arm64/include/asm/kvm_mmu.h b/arch/arm64/include/asm/kvm_mmu.h index 90873851f677..25ed956f9af1 100644 --- a/arch/arm64/include/asm/kvm_mmu.h +++ b/arch/arm64/include/asm/kvm_mmu.h @@ -121,6 +121,8 @@ void kvm_update_va_mask(struct alt_instr *alt, void kvm_compute_layout(void); void kvm_apply_hyp_relocations(void); +#define __hyp_pa(x) (((phys_addr_t)(x)) + hyp_physvirt_offset) + static __always_inline unsigned long __kern_hyp_va(unsigned long v) { asm volatile(ALTERNATIVE_CB("and %0, %0, #1\n" @@ -166,7 +168,15 @@ int kvm_handle_guest_abort(struct kvm_vcpu *vcpu); phys_addr_t kvm_mmu_get_httbr(void); phys_addr_t kvm_get_idmap_vector(void); -int kvm_mmu_init(void); +int kvm_mmu_init(u32 *hyp_va_bits); + +static inline void *__kvm_vector_slot2addr(void *base, + enum arm64_hyp_spectre_vector slot) +{ + int idx = slot - (slot != HYP_VECTOR_DIRECT); + + return base + (idx * SZ_2K); +} struct kvm; @@ -262,9 +272,9 @@ static __always_inline u64 kvm_get_vttbr(struct kvm_s2_mmu *mmu) * Must be called from hyp code running at EL2 with an updated VTTBR * and interrupts disabled. */ -static __always_inline void __load_guest_stage2(struct kvm_s2_mmu *mmu) +static __always_inline void __load_stage2(struct kvm_s2_mmu *mmu, unsigned long vtcr) { - write_sysreg(kern_hyp_va(mmu->kvm)->arch.vtcr, vtcr_el2); + write_sysreg(vtcr, vtcr_el2); write_sysreg(kvm_get_vttbr(mmu), vttbr_el2); /* @@ -275,5 +285,14 @@ static __always_inline void __load_guest_stage2(struct kvm_s2_mmu *mmu) asm(ALTERNATIVE("nop", "isb", ARM64_WORKAROUND_SPECULATIVE_AT)); } +static __always_inline void __load_guest_stage2(struct kvm_s2_mmu *mmu) +{ + __load_stage2(mmu, kern_hyp_va(mmu->arch)->vtcr); +} + +static inline struct kvm *kvm_s2_mmu_to_kvm(struct kvm_s2_mmu *mmu) +{ + return container_of(mmu->arch, struct kvm, arch); +} #endif /* __ASSEMBLY__ */ #endif /* __ARM64_KVM_MMU_H__ */ diff --git a/arch/arm64/include/asm/kvm_pgtable.h b/arch/arm64/include/asm/kvm_pgtable.h index 8886d43cfb11..c3674c47d48c 100644 --- a/arch/arm64/include/asm/kvm_pgtable.h +++ b/arch/arm64/include/asm/kvm_pgtable.h @@ -11,22 +11,79 @@ #include <linux/kvm_host.h> #include <linux/types.h> +#define KVM_PGTABLE_MAX_LEVELS 4U + +static inline u64 kvm_get_parange(u64 mmfr0) +{ + u64 parange = cpuid_feature_extract_unsigned_field(mmfr0, + ID_AA64MMFR0_PARANGE_SHIFT); + if (parange > ID_AA64MMFR0_PARANGE_MAX) + parange = ID_AA64MMFR0_PARANGE_MAX; + + return parange; +} + typedef u64 kvm_pte_t; /** + * struct kvm_pgtable_mm_ops - Memory management callbacks. + * @zalloc_page: Allocate a single zeroed memory page. The @arg parameter + * can be used by the walker to pass a memcache. The + * initial refcount of the page is 1. + * @zalloc_pages_exact: Allocate an exact number of zeroed memory pages. The + * @size parameter is in bytes, and is rounded-up to the + * next page boundary. The resulting allocation is + * physically contiguous. + * @free_pages_exact: Free an exact number of memory pages previously + * allocated by zalloc_pages_exact. + * @get_page: Increment the refcount on a page. + * @put_page: Decrement the refcount on a page. When the refcount + * reaches 0 the page is automatically freed. + * @page_count: Return the refcount of a page. + * @phys_to_virt: Convert a physical address into a virtual address mapped + * in the current context. + * @virt_to_phys: Convert a virtual address mapped in the current context + * into a physical address. + */ +struct kvm_pgtable_mm_ops { + void* (*zalloc_page)(void *arg); + void* (*zalloc_pages_exact)(size_t size); + void (*free_pages_exact)(void *addr, size_t size); + void (*get_page)(void *addr); + void (*put_page)(void *addr); + int (*page_count)(void *addr); + void* (*phys_to_virt)(phys_addr_t phys); + phys_addr_t (*virt_to_phys)(void *addr); +}; + +/** + * enum kvm_pgtable_stage2_flags - Stage-2 page-table flags. + * @KVM_PGTABLE_S2_NOFWB: Don't enforce Normal-WB even if the CPUs have + * ARM64_HAS_STAGE2_FWB. + * @KVM_PGTABLE_S2_IDMAP: Only use identity mappings. + */ +enum kvm_pgtable_stage2_flags { + KVM_PGTABLE_S2_NOFWB = BIT(0), + KVM_PGTABLE_S2_IDMAP = BIT(1), +}; + +/** * struct kvm_pgtable - KVM page-table. * @ia_bits: Maximum input address size, in bits. * @start_level: Level at which the page-table walk starts. * @pgd: Pointer to the first top-level entry of the page-table. + * @mm_ops: Memory management callbacks. * @mmu: Stage-2 KVM MMU struct. Unused for stage-1 page-tables. */ struct kvm_pgtable { u32 ia_bits; u32 start_level; kvm_pte_t *pgd; + struct kvm_pgtable_mm_ops *mm_ops; /* Stage-2 only */ struct kvm_s2_mmu *mmu; + enum kvm_pgtable_stage2_flags flags; }; /** @@ -50,6 +107,16 @@ enum kvm_pgtable_prot { #define PAGE_HYP_DEVICE (PAGE_HYP | KVM_PGTABLE_PROT_DEVICE) /** + * struct kvm_mem_range - Range of Intermediate Physical Addresses + * @start: Start of the range. + * @end: End of the range. + */ +struct kvm_mem_range { + u64 start; + u64 end; +}; + +/** * enum kvm_pgtable_walk_flags - Flags to control a depth-first page-table walk. * @KVM_PGTABLE_WALK_LEAF: Visit leaf entries, including invalid * entries. @@ -86,10 +153,12 @@ struct kvm_pgtable_walker { * kvm_pgtable_hyp_init() - Initialise a hypervisor stage-1 page-table. * @pgt: Uninitialised page-table structure to initialise. * @va_bits: Maximum virtual address bits. + * @mm_ops: Memory management callbacks. * * Return: 0 on success, negative error code on failure. */ -int kvm_pgtable_hyp_init(struct kvm_pgtable *pgt, u32 va_bits); +int kvm_pgtable_hyp_init(struct kvm_pgtable *pgt, u32 va_bits, + struct kvm_pgtable_mm_ops *mm_ops); /** * kvm_pgtable_hyp_destroy() - Destroy an unused hypervisor stage-1 page-table. @@ -123,17 +192,41 @@ int kvm_pgtable_hyp_map(struct kvm_pgtable *pgt, u64 addr, u64 size, u64 phys, enum kvm_pgtable_prot prot); /** - * kvm_pgtable_stage2_init() - Initialise a guest stage-2 page-table. + * kvm_get_vtcr() - Helper to construct VTCR_EL2 + * @mmfr0: Sanitized value of SYS_ID_AA64MMFR0_EL1 register. + * @mmfr1: Sanitized value of SYS_ID_AA64MMFR1_EL1 register. + * @phys_shfit: Value to set in VTCR_EL2.T0SZ. + * + * The VTCR value is common across all the physical CPUs on the system. + * We use system wide sanitised values to fill in different fields, + * except for Hardware Management of Access Flags. HA Flag is set + * unconditionally on all CPUs, as it is safe to run with or without + * the feature and the bit is RES0 on CPUs that don't support it. + * + * Return: VTCR_EL2 value + */ +u64 kvm_get_vtcr(u64 mmfr0, u64 mmfr1, u32 phys_shift); + +/** + * kvm_pgtable_stage2_init_flags() - Initialise a guest stage-2 page-table. * @pgt: Uninitialised page-table structure to initialise. - * @kvm: KVM structure representing the guest virtual machine. + * @arch: Arch-specific KVM structure representing the guest virtual + * machine. + * @mm_ops: Memory management callbacks. + * @flags: Stage-2 configuration flags. * * Return: 0 on success, negative error code on failure. */ -int kvm_pgtable_stage2_init(struct kvm_pgtable *pgt, struct kvm *kvm); +int kvm_pgtable_stage2_init_flags(struct kvm_pgtable *pgt, struct kvm_arch *arch, + struct kvm_pgtable_mm_ops *mm_ops, + enum kvm_pgtable_stage2_flags flags); + +#define kvm_pgtable_stage2_init(pgt, arch, mm_ops) \ + kvm_pgtable_stage2_init_flags(pgt, arch, mm_ops, 0) /** * kvm_pgtable_stage2_destroy() - Destroy an unused guest stage-2 page-table. - * @pgt: Page-table structure initialised by kvm_pgtable_stage2_init(). + * @pgt: Page-table structure initialised by kvm_pgtable_stage2_init*(). * * The page-table is assumed to be unreachable by any hardware walkers prior * to freeing and therefore no TLB invalidation is performed. @@ -142,13 +235,13 @@ void kvm_pgtable_stage2_destroy(struct kvm_pgtable *pgt); /** * kvm_pgtable_stage2_map() - Install a mapping in a guest stage-2 page-table. - * @pgt: Page-table structure initialised by kvm_pgtable_stage2_init(). + * @pgt: Page-table structure initialised by kvm_pgtable_stage2_init*(). * @addr: Intermediate physical address at which to place the mapping. * @size: Size of the mapping. * @phys: Physical address of the memory to map. * @prot: Permissions and attributes for the mapping. - * @mc: Cache of pre-allocated GFP_PGTABLE_USER memory from which to - * allocate page-table pages. + * @mc: Cache of pre-allocated and zeroed memory from which to allocate + * page-table pages. * * The offset of @addr within a page is ignored, @size is rounded-up to * the next page boundary and @phys is rounded-down to the previous page @@ -170,11 +263,31 @@ void kvm_pgtable_stage2_destroy(struct kvm_pgtable *pgt); */ int kvm_pgtable_stage2_map(struct kvm_pgtable *pgt, u64 addr, u64 size, u64 phys, enum kvm_pgtable_prot prot, - struct kvm_mmu_memory_cache *mc); + void *mc); + +/** + * kvm_pgtable_stage2_set_owner() - Unmap and annotate pages in the IPA space to + * track ownership. + * @pgt: Page-table structure initialised by kvm_pgtable_stage2_init*(). + * @addr: Base intermediate physical address to annotate. + * @size: Size of the annotated range. + * @mc: Cache of pre-allocated and zeroed memory from which to allocate + * page-table pages. + * @owner_id: Unique identifier for the owner of the page. + * + * By default, all page-tables are owned by identifier 0. This function can be + * used to mark portions of the IPA space as owned by other entities. When a + * stage 2 is used with identity-mappings, these annotations allow to use the + * page-table data structure as a simple rmap. + * + * Return: 0 on success, negative error code on failure. + */ +int kvm_pgtable_stage2_set_owner(struct kvm_pgtable *pgt, u64 addr, u64 size, + void *mc, u8 owner_id); /** * kvm_pgtable_stage2_unmap() - Remove a mapping from a guest stage-2 page-table. - * @pgt: Page-table structure initialised by kvm_pgtable_stage2_init(). + * @pgt: Page-table structure initialised by kvm_pgtable_stage2_init*(). * @addr: Intermediate physical address from which to remove the mapping. * @size: Size of the mapping. * @@ -194,7 +307,7 @@ int kvm_pgtable_stage2_unmap(struct kvm_pgtable *pgt, u64 addr, u64 size); /** * kvm_pgtable_stage2_wrprotect() - Write-protect guest stage-2 address range * without TLB invalidation. - * @pgt: Page-table structure initialised by kvm_pgtable_stage2_init(). + * @pgt: Page-table structure initialised by kvm_pgtable_stage2_init*(). * @addr: Intermediate physical address from which to write-protect, * @size: Size of the range. * @@ -211,7 +324,7 @@ int kvm_pgtable_stage2_wrprotect(struct kvm_pgtable *pgt, u64 addr, u64 size); /** * kvm_pgtable_stage2_mkyoung() - Set the access flag in a page-table entry. - * @pgt: Page-table structure initialised by kvm_pgtable_stage2_init(). + * @pgt: Page-table structure initialised by kvm_pgtable_stage2_init*(). * @addr: Intermediate physical address to identify the page-table entry. * * The offset of @addr within a page is ignored. @@ -225,7 +338,7 @@ kvm_pte_t kvm_pgtable_stage2_mkyoung(struct kvm_pgtable *pgt, u64 addr); /** * kvm_pgtable_stage2_mkold() - Clear the access flag in a page-table entry. - * @pgt: Page-table structure initialised by kvm_pgtable_stage2_init(). + * @pgt: Page-table structure initialised by kvm_pgtable_stage2_init*(). * @addr: Intermediate physical address to identify the page-table entry. * * The offset of @addr within a page is ignored. @@ -244,7 +357,7 @@ kvm_pte_t kvm_pgtable_stage2_mkold(struct kvm_pgtable *pgt, u64 addr); /** * kvm_pgtable_stage2_relax_perms() - Relax the permissions enforced by a * page-table entry. - * @pgt: Page-table structure initialised by kvm_pgtable_stage2_init(). + * @pgt: Page-table structure initialised by kvm_pgtable_stage2_init*(). * @addr: Intermediate physical address to identify the page-table entry. * @prot: Additional permissions to grant for the mapping. * @@ -263,7 +376,7 @@ int kvm_pgtable_stage2_relax_perms(struct kvm_pgtable *pgt, u64 addr, /** * kvm_pgtable_stage2_is_young() - Test whether a page-table entry has the * access flag set. - * @pgt: Page-table structure initialised by kvm_pgtable_stage2_init(). + * @pgt: Page-table structure initialised by kvm_pgtable_stage2_init*(). * @addr: Intermediate physical address to identify the page-table entry. * * The offset of @addr within a page is ignored. @@ -276,7 +389,7 @@ bool kvm_pgtable_stage2_is_young(struct kvm_pgtable *pgt, u64 addr); * kvm_pgtable_stage2_flush_range() - Clean and invalidate data cache to Point * of Coherency for guest stage-2 address * range. - * @pgt: Page-table structure initialised by kvm_pgtable_stage2_init(). + * @pgt: Page-table structure initialised by kvm_pgtable_stage2_init*(). * @addr: Intermediate physical address from which to flush. * @size: Size of the range. * @@ -311,4 +424,23 @@ int kvm_pgtable_stage2_flush(struct kvm_pgtable *pgt, u64 addr, u64 size); int kvm_pgtable_walk(struct kvm_pgtable *pgt, u64 addr, u64 size, struct kvm_pgtable_walker *walker); +/** + * kvm_pgtable_stage2_find_range() - Find a range of Intermediate Physical + * Addresses with compatible permission + * attributes. + * @pgt: Page-table structure initialised by kvm_pgtable_stage2_init*(). + * @addr: Address that must be covered by the range. + * @prot: Protection attributes that the range must be compatible with. + * @range: Range structure used to limit the search space at call time and + * that will hold the result. + * + * The offset of @addr within a page is ignored. An IPA is compatible with @prot + * iff its corresponding stage-2 page-table entry has default ownership and, if + * valid, is mapped with protection attributes identical to @prot. + * + * Return: 0 on success, negative error code on failure. + */ +int kvm_pgtable_stage2_find_range(struct kvm_pgtable *pgt, u64 addr, + enum kvm_pgtable_prot prot, + struct kvm_mem_range *range); #endif /* __ARM64_KVM_PGTABLE_H__ */ diff --git a/arch/arm64/include/asm/memory.h b/arch/arm64/include/asm/memory.h index 0aabc3be9a75..6d9915d066fa 100644 --- a/arch/arm64/include/asm/memory.h +++ b/arch/arm64/include/asm/memory.h @@ -243,13 +243,15 @@ static inline const void *__tag_set(const void *addr, u8 tag) } #ifdef CONFIG_KASAN_HW_TAGS -#define arch_enable_tagging() mte_enable_kernel() +#define arch_enable_tagging_sync() mte_enable_kernel_sync() +#define arch_enable_tagging_async() mte_enable_kernel_async() #define arch_set_tagging_report_once(state) mte_set_report_once(state) +#define arch_force_async_tag_fault() mte_check_tfsr_exit() #define arch_init_tags(max_tag) mte_init_tags(max_tag) #define arch_get_random_tag() mte_get_random_tag() #define arch_get_mem_tag(addr) mte_get_mem_tag(addr) -#define arch_set_mem_tag_range(addr, size, tag) \ - mte_set_mem_tag_range((addr), (size), (tag)) +#define arch_set_mem_tag_range(addr, size, tag, init) \ + mte_set_mem_tag_range((addr), (size), (tag), (init)) #endif /* CONFIG_KASAN_HW_TAGS */ /* @@ -321,6 +323,22 @@ static inline void *phys_to_virt(phys_addr_t x) #define virt_to_pfn(x) __phys_to_pfn(__virt_to_phys((unsigned long)(x))) #define sym_to_pfn(x) __phys_to_pfn(__pa_symbol(x)) +#ifdef CONFIG_CFI_CLANG +/* + * With CONFIG_CFI_CLANG, the compiler replaces function address + * references with the address of the function's CFI jump table + * entry. The function_nocfi macro always returns the address of the + * actual function instead. + */ +#define function_nocfi(x) ({ \ + void *addr; \ + asm("adrp %0, " __stringify(x) "\n\t" \ + "add %0, %0, :lo12:" __stringify(x) \ + : "=r" (addr)); \ + addr; \ +}) +#endif + /* * virt_to_page(x) convert a _valid_ virtual address to struct page * * virt_addr_valid(x) indicates whether a virtual address is valid diff --git a/arch/arm64/include/asm/mmu_context.h b/arch/arm64/include/asm/mmu_context.h index bd02e99b1a4c..d3cef9133539 100644 --- a/arch/arm64/include/asm/mmu_context.h +++ b/arch/arm64/include/asm/mmu_context.h @@ -119,7 +119,7 @@ static inline void cpu_install_idmap(void) * Atomically replaces the active TTBR1_EL1 PGD with a new VA-compatible PGD, * avoiding the possibility of conflicting TLB entries being allocated. */ -static inline void cpu_replace_ttbr1(pgd_t *pgdp) +static inline void __nocfi cpu_replace_ttbr1(pgd_t *pgdp) { typedef void (ttbr_replace_func)(phys_addr_t); extern ttbr_replace_func idmap_cpu_replace_ttbr1; @@ -140,7 +140,7 @@ static inline void cpu_replace_ttbr1(pgd_t *pgdp) ttbr1 |= TTBR_CNP_BIT; } - replace_phys = (void *)__pa_symbol(idmap_cpu_replace_ttbr1); + replace_phys = (void *)__pa_symbol(function_nocfi(idmap_cpu_replace_ttbr1)); cpu_install_idmap(); replace_phys(ttbr1); diff --git a/arch/arm64/include/asm/mte-kasan.h b/arch/arm64/include/asm/mte-kasan.h index 7ab500e2ad17..ddd4d17cf9a0 100644 --- a/arch/arm64/include/asm/mte-kasan.h +++ b/arch/arm64/include/asm/mte-kasan.h @@ -53,7 +53,8 @@ static inline u8 mte_get_random_tag(void) * Note: The address must be non-NULL and MTE_GRANULE_SIZE aligned and * size must be non-zero and MTE_GRANULE_SIZE aligned. */ -static inline void mte_set_mem_tag_range(void *addr, size_t size, u8 tag) +static inline void mte_set_mem_tag_range(void *addr, size_t size, + u8 tag, bool init) { u64 curr, end; @@ -63,21 +64,31 @@ static inline void mte_set_mem_tag_range(void *addr, size_t size, u8 tag) curr = (u64)__tag_set(addr, tag); end = curr + size; - do { - /* - * 'asm volatile' is required to prevent the compiler to move - * the statement outside of the loop. - */ - asm volatile(__MTE_PREAMBLE "stg %0, [%0]" - : - : "r" (curr) - : "memory"); - - curr += MTE_GRANULE_SIZE; - } while (curr != end); + /* + * 'asm volatile' is required to prevent the compiler to move + * the statement outside of the loop. + */ + if (init) { + do { + asm volatile(__MTE_PREAMBLE "stzg %0, [%0]" + : + : "r" (curr) + : "memory"); + curr += MTE_GRANULE_SIZE; + } while (curr != end); + } else { + do { + asm volatile(__MTE_PREAMBLE "stg %0, [%0]" + : + : "r" (curr) + : "memory"); + curr += MTE_GRANULE_SIZE; + } while (curr != end); + } } -void mte_enable_kernel(void); +void mte_enable_kernel_sync(void); +void mte_enable_kernel_async(void); void mte_init_tags(u64 max_tag); void mte_set_report_once(bool state); @@ -100,11 +111,16 @@ static inline u8 mte_get_random_tag(void) return 0xFF; } -static inline void mte_set_mem_tag_range(void *addr, size_t size, u8 tag) +static inline void mte_set_mem_tag_range(void *addr, size_t size, + u8 tag, bool init) { } -static inline void mte_enable_kernel(void) +static inline void mte_enable_kernel_sync(void) +{ +} + +static inline void mte_enable_kernel_async(void) { } diff --git a/arch/arm64/include/asm/mte.h b/arch/arm64/include/asm/mte.h index 9b557a457f24..bc88a1ced0d7 100644 --- a/arch/arm64/include/asm/mte.h +++ b/arch/arm64/include/asm/mte.h @@ -39,16 +39,15 @@ void mte_free_tag_storage(char *storage); void mte_sync_tags(pte_t *ptep, pte_t pte); void mte_copy_page_tags(void *kto, const void *kfrom); -void flush_mte_state(void); +void mte_thread_init_user(void); void mte_thread_switch(struct task_struct *next); +void mte_suspend_enter(void); void mte_suspend_exit(void); long set_mte_ctrl(struct task_struct *task, unsigned long arg); long get_mte_ctrl(struct task_struct *task); int mte_ptrace_copy_tags(struct task_struct *child, long request, unsigned long addr, unsigned long data); -void mte_assign_mem_tag_range(void *addr, size_t size); - #else /* CONFIG_ARM64_MTE */ /* unused if !CONFIG_ARM64_MTE, silence the compiler */ @@ -60,12 +59,15 @@ static inline void mte_sync_tags(pte_t *ptep, pte_t pte) static inline void mte_copy_page_tags(void *kto, const void *kfrom) { } -static inline void flush_mte_state(void) +static inline void mte_thread_init_user(void) { } static inline void mte_thread_switch(struct task_struct *next) { } +static inline void mte_suspend_enter(void) +{ +} static inline void mte_suspend_exit(void) { } @@ -84,11 +86,51 @@ static inline int mte_ptrace_copy_tags(struct task_struct *child, return -EIO; } -static inline void mte_assign_mem_tag_range(void *addr, size_t size) +#endif /* CONFIG_ARM64_MTE */ + +#ifdef CONFIG_KASAN_HW_TAGS +/* Whether the MTE asynchronous mode is enabled. */ +DECLARE_STATIC_KEY_FALSE(mte_async_mode); + +static inline bool system_uses_mte_async_mode(void) { + return static_branch_unlikely(&mte_async_mode); } -#endif /* CONFIG_ARM64_MTE */ +void mte_check_tfsr_el1(void); + +static inline void mte_check_tfsr_entry(void) +{ + mte_check_tfsr_el1(); +} + +static inline void mte_check_tfsr_exit(void) +{ + /* + * The asynchronous faults are sync'ed automatically with + * TFSR_EL1 on kernel entry but for exit an explicit dsb() + * is required. + */ + dsb(nsh); + isb(); + + mte_check_tfsr_el1(); +} +#else +static inline bool system_uses_mte_async_mode(void) +{ + return false; +} +static inline void mte_check_tfsr_el1(void) +{ +} +static inline void mte_check_tfsr_entry(void) +{ +} +static inline void mte_check_tfsr_exit(void) +{ +} +#endif /* CONFIG_KASAN_HW_TAGS */ #endif /* __ASSEMBLY__ */ #endif /* __ASM_MTE_H */ diff --git a/arch/arm64/include/asm/paravirt.h b/arch/arm64/include/asm/paravirt.h index cf3a0fd7c1a7..9aa193e0e8f2 100644 --- a/arch/arm64/include/asm/paravirt.h +++ b/arch/arm64/include/asm/paravirt.h @@ -3,23 +3,19 @@ #define _ASM_ARM64_PARAVIRT_H #ifdef CONFIG_PARAVIRT +#include <linux/static_call_types.h> + struct static_key; extern struct static_key paravirt_steal_enabled; extern struct static_key paravirt_steal_rq_enabled; -struct pv_time_ops { - unsigned long long (*steal_clock)(int cpu); -}; - -struct paravirt_patch_template { - struct pv_time_ops time; -}; +u64 dummy_steal_clock(int cpu); -extern struct paravirt_patch_template pv_ops; +DECLARE_STATIC_CALL(pv_steal_clock, dummy_steal_clock); static inline u64 paravirt_steal_clock(int cpu) { - return pv_ops.time.steal_clock(cpu); + return static_call(pv_steal_clock)(cpu); } int __init pv_time_init(void); diff --git a/arch/arm64/include/asm/pgalloc.h b/arch/arm64/include/asm/pgalloc.h index 3c6a7f5988b1..31fbab3d6f99 100644 --- a/arch/arm64/include/asm/pgalloc.h +++ b/arch/arm64/include/asm/pgalloc.h @@ -27,7 +27,10 @@ static inline void __pud_populate(pud_t *pudp, phys_addr_t pmdp, pudval_t prot) static inline void pud_populate(struct mm_struct *mm, pud_t *pudp, pmd_t *pmdp) { - __pud_populate(pudp, __pa(pmdp), PMD_TYPE_TABLE); + pudval_t pudval = PUD_TYPE_TABLE; + + pudval |= (mm == &init_mm) ? PUD_TABLE_UXN : PUD_TABLE_PXN; + __pud_populate(pudp, __pa(pmdp), pudval); } #else static inline void __pud_populate(pud_t *pudp, phys_addr_t pmdp, pudval_t prot) @@ -45,7 +48,10 @@ static inline void __p4d_populate(p4d_t *p4dp, phys_addr_t pudp, p4dval_t prot) static inline void p4d_populate(struct mm_struct *mm, p4d_t *p4dp, pud_t *pudp) { - __p4d_populate(p4dp, __pa(pudp), PUD_TYPE_TABLE); + p4dval_t p4dval = P4D_TYPE_TABLE; + + p4dval |= (mm == &init_mm) ? P4D_TABLE_UXN : P4D_TABLE_PXN; + __p4d_populate(p4dp, __pa(pudp), p4dval); } #else static inline void __p4d_populate(p4d_t *p4dp, phys_addr_t pudp, p4dval_t prot) @@ -70,16 +76,15 @@ static inline void __pmd_populate(pmd_t *pmdp, phys_addr_t ptep, static inline void pmd_populate_kernel(struct mm_struct *mm, pmd_t *pmdp, pte_t *ptep) { - /* - * The pmd must be loaded with the physical address of the PTE table - */ - __pmd_populate(pmdp, __pa(ptep), PMD_TYPE_TABLE); + VM_BUG_ON(mm != &init_mm); + __pmd_populate(pmdp, __pa(ptep), PMD_TYPE_TABLE | PMD_TABLE_UXN); } static inline void pmd_populate(struct mm_struct *mm, pmd_t *pmdp, pgtable_t ptep) { - __pmd_populate(pmdp, page_to_phys(ptep), PMD_TYPE_TABLE); + VM_BUG_ON(mm == &init_mm); + __pmd_populate(pmdp, page_to_phys(ptep), PMD_TYPE_TABLE | PMD_TABLE_PXN); } #define pmd_pgtable(pmd) pmd_page(pmd) diff --git a/arch/arm64/include/asm/pgtable-hwdef.h b/arch/arm64/include/asm/pgtable-hwdef.h index 42442a0ae2ab..b82575a33f8b 100644 --- a/arch/arm64/include/asm/pgtable-hwdef.h +++ b/arch/arm64/include/asm/pgtable-hwdef.h @@ -94,6 +94,17 @@ /* * Hardware page table definitions. * + * Level 0 descriptor (P4D). + */ +#define P4D_TYPE_TABLE (_AT(p4dval_t, 3) << 0) +#define P4D_TABLE_BIT (_AT(p4dval_t, 1) << 1) +#define P4D_TYPE_MASK (_AT(p4dval_t, 3) << 0) +#define P4D_TYPE_SECT (_AT(p4dval_t, 1) << 0) +#define P4D_SECT_RDONLY (_AT(p4dval_t, 1) << 7) /* AP[2] */ +#define P4D_TABLE_PXN (_AT(p4dval_t, 1) << 59) +#define P4D_TABLE_UXN (_AT(p4dval_t, 1) << 60) + +/* * Level 1 descriptor (PUD). */ #define PUD_TYPE_TABLE (_AT(pudval_t, 3) << 0) @@ -101,6 +112,8 @@ #define PUD_TYPE_MASK (_AT(pudval_t, 3) << 0) #define PUD_TYPE_SECT (_AT(pudval_t, 1) << 0) #define PUD_SECT_RDONLY (_AT(pudval_t, 1) << 7) /* AP[2] */ +#define PUD_TABLE_PXN (_AT(pudval_t, 1) << 59) +#define PUD_TABLE_UXN (_AT(pudval_t, 1) << 60) /* * Level 2 descriptor (PMD). @@ -122,6 +135,8 @@ #define PMD_SECT_CONT (_AT(pmdval_t, 1) << 52) #define PMD_SECT_PXN (_AT(pmdval_t, 1) << 53) #define PMD_SECT_UXN (_AT(pmdval_t, 1) << 54) +#define PMD_TABLE_PXN (_AT(pmdval_t, 1) << 59) +#define PMD_TABLE_UXN (_AT(pmdval_t, 1) << 60) /* * AttrIndx[2:0] encoding (mapping attributes defined in the MAIR* registers). diff --git a/arch/arm64/include/asm/pgtable-prot.h b/arch/arm64/include/asm/pgtable-prot.h index 9a65fb528110..938092df76cf 100644 --- a/arch/arm64/include/asm/pgtable-prot.h +++ b/arch/arm64/include/asm/pgtable-prot.h @@ -71,10 +71,10 @@ extern bool arm64_use_ng_mappings; #define PAGE_KERNEL_EXEC __pgprot(PROT_NORMAL & ~PTE_PXN) #define PAGE_KERNEL_EXEC_CONT __pgprot((PROT_NORMAL & ~PTE_PXN) | PTE_CONT) -#define PAGE_S2_MEMATTR(attr) \ +#define PAGE_S2_MEMATTR(attr, has_fwb) \ ({ \ u64 __val; \ - if (cpus_have_const_cap(ARM64_HAS_STAGE2_FWB)) \ + if (has_fwb) \ __val = PTE_S2_MEMATTR(MT_S2_FWB_ ## attr); \ else \ __val = PTE_S2_MEMATTR(MT_S2_ ## attr); \ @@ -87,12 +87,13 @@ extern bool arm64_use_ng_mappings; #define PAGE_SHARED_EXEC __pgprot(_PAGE_DEFAULT | PTE_USER | PTE_RDONLY | PTE_NG | PTE_PXN | PTE_WRITE) #define PAGE_READONLY __pgprot(_PAGE_DEFAULT | PTE_USER | PTE_RDONLY | PTE_NG | PTE_PXN | PTE_UXN) #define PAGE_READONLY_EXEC __pgprot(_PAGE_DEFAULT | PTE_USER | PTE_RDONLY | PTE_NG | PTE_PXN) +#define PAGE_EXECONLY __pgprot(_PAGE_DEFAULT | PTE_RDONLY | PTE_NG | PTE_PXN) #define __P000 PAGE_NONE #define __P001 PAGE_READONLY #define __P010 PAGE_READONLY #define __P011 PAGE_READONLY -#define __P100 PAGE_READONLY_EXEC +#define __P100 PAGE_EXECONLY #define __P101 PAGE_READONLY_EXEC #define __P110 PAGE_READONLY_EXEC #define __P111 PAGE_READONLY_EXEC @@ -101,7 +102,7 @@ extern bool arm64_use_ng_mappings; #define __S001 PAGE_READONLY #define __S010 PAGE_SHARED #define __S011 PAGE_SHARED -#define __S100 PAGE_READONLY_EXEC +#define __S100 PAGE_EXECONLY #define __S101 PAGE_READONLY_EXEC #define __S110 PAGE_SHARED_EXEC #define __S111 PAGE_SHARED_EXEC diff --git a/arch/arm64/include/asm/pgtable.h b/arch/arm64/include/asm/pgtable.h index 47027796c2f9..0b10204e72fc 100644 --- a/arch/arm64/include/asm/pgtable.h +++ b/arch/arm64/include/asm/pgtable.h @@ -113,11 +113,12 @@ extern unsigned long empty_zero_page[PAGE_SIZE / sizeof(unsigned long)]; #define pte_dirty(pte) (pte_sw_dirty(pte) || pte_hw_dirty(pte)) #define pte_valid(pte) (!!(pte_val(pte) & PTE_VALID)) +/* + * Execute-only user mappings do not have the PTE_USER bit set. All valid + * kernel mappings have the PTE_UXN bit set. + */ #define pte_valid_not_user(pte) \ - ((pte_val(pte) & (PTE_VALID | PTE_USER)) == PTE_VALID) -#define pte_valid_user(pte) \ - ((pte_val(pte) & (PTE_VALID | PTE_USER)) == (PTE_VALID | PTE_USER)) - + ((pte_val(pte) & (PTE_VALID | PTE_USER | PTE_UXN)) == (PTE_VALID | PTE_UXN)) /* * Could the pte be present in the TLB? We must check mm_tlb_flush_pending * so that we don't erroneously return false for pages that have been @@ -130,12 +131,14 @@ extern unsigned long empty_zero_page[PAGE_SIZE / sizeof(unsigned long)]; (mm_tlb_flush_pending(mm) ? pte_present(pte) : pte_valid(pte)) /* - * p??_access_permitted() is true for valid user mappings (subject to the - * write permission check). PROT_NONE mappings do not have the PTE_VALID bit - * set. + * p??_access_permitted() is true for valid user mappings (PTE_USER + * bit set, subject to the write permission check). For execute-only + * mappings, like PROT_EXEC with EPAN (both PTE_USER and PTE_UXN bits + * not set) must return false. PROT_NONE mappings do not have the + * PTE_VALID bit set. */ #define pte_access_permitted(pte, write) \ - (pte_valid_user(pte) && (!(write) || pte_write(pte))) + (((pte_val(pte) & (PTE_VALID | PTE_USER)) == (PTE_VALID | PTE_USER)) && (!(write) || pte_write(pte))) #define pmd_access_permitted(pmd, write) \ (pte_access_permitted(pmd_pte(pmd), (write))) #define pud_access_permitted(pud, write) \ @@ -995,6 +998,18 @@ static inline bool arch_wants_old_prefaulted_pte(void) } #define arch_wants_old_prefaulted_pte arch_wants_old_prefaulted_pte +static inline pgprot_t arch_filter_pgprot(pgprot_t prot) +{ + if (cpus_have_const_cap(ARM64_HAS_EPAN)) + return prot; + + if (pgprot_val(prot) != pgprot_val(PAGE_EXECONLY)) + return prot; + + return PAGE_READONLY_EXEC; +} + + #endif /* !__ASSEMBLY__ */ #endif /* __ASM_PGTABLE_H */ diff --git a/arch/arm64/include/asm/pointer_auth.h b/arch/arm64/include/asm/pointer_auth.h index b112a11e9302..d50416be99be 100644 --- a/arch/arm64/include/asm/pointer_auth.h +++ b/arch/arm64/include/asm/pointer_auth.h @@ -3,6 +3,7 @@ #define __ASM_POINTER_AUTH_H #include <linux/bitops.h> +#include <linux/prctl.h> #include <linux/random.h> #include <asm/cpufeature.h> @@ -34,6 +35,25 @@ struct ptrauth_keys_kernel { struct ptrauth_key apia; }; +#define __ptrauth_key_install_nosync(k, v) \ +do { \ + struct ptrauth_key __pki_v = (v); \ + write_sysreg_s(__pki_v.lo, SYS_ ## k ## KEYLO_EL1); \ + write_sysreg_s(__pki_v.hi, SYS_ ## k ## KEYHI_EL1); \ +} while (0) + +static inline void ptrauth_keys_install_user(struct ptrauth_keys_user *keys) +{ + if (system_supports_address_auth()) { + __ptrauth_key_install_nosync(APIB, keys->apib); + __ptrauth_key_install_nosync(APDA, keys->apda); + __ptrauth_key_install_nosync(APDB, keys->apdb); + } + + if (system_supports_generic_auth()) + __ptrauth_key_install_nosync(APGA, keys->apga); +} + static inline void ptrauth_keys_init_user(struct ptrauth_keys_user *keys) { if (system_supports_address_auth()) { @@ -45,14 +65,9 @@ static inline void ptrauth_keys_init_user(struct ptrauth_keys_user *keys) if (system_supports_generic_auth()) get_random_bytes(&keys->apga, sizeof(keys->apga)); -} -#define __ptrauth_key_install_nosync(k, v) \ -do { \ - struct ptrauth_key __pki_v = (v); \ - write_sysreg_s(__pki_v.lo, SYS_ ## k ## KEYLO_EL1); \ - write_sysreg_s(__pki_v.hi, SYS_ ## k ## KEYHI_EL1); \ -} while (0) + ptrauth_keys_install_user(keys); +} static __always_inline void ptrauth_keys_init_kernel(struct ptrauth_keys_kernel *keys) { @@ -71,6 +86,10 @@ static __always_inline void ptrauth_keys_switch_kernel(struct ptrauth_keys_kerne extern int ptrauth_prctl_reset_keys(struct task_struct *tsk, unsigned long arg); +extern int ptrauth_set_enabled_keys(struct task_struct *tsk, unsigned long keys, + unsigned long enabled); +extern int ptrauth_get_enabled_keys(struct task_struct *tsk); + static inline unsigned long ptrauth_strip_insn_pac(unsigned long ptr) { return ptrauth_clear_pac(ptr); @@ -85,8 +104,23 @@ static __always_inline void ptrauth_enable(void) isb(); } -#define ptrauth_thread_init_user(tsk) \ - ptrauth_keys_init_user(&(tsk)->thread.keys_user) +#define ptrauth_suspend_exit() \ + ptrauth_keys_install_user(¤t->thread.keys_user) + +#define ptrauth_thread_init_user() \ + do { \ + ptrauth_keys_init_user(¤t->thread.keys_user); \ + \ + /* enable all keys */ \ + if (system_supports_address_auth()) \ + set_task_sctlr_el1(current->thread.sctlr_user | \ + SCTLR_ELx_ENIA | SCTLR_ELx_ENIB | \ + SCTLR_ELx_ENDA | SCTLR_ELx_ENDB); \ + } while (0) + +#define ptrauth_thread_switch_user(tsk) \ + ptrauth_keys_install_user(&(tsk)->thread.keys_user) + #define ptrauth_thread_init_kernel(tsk) \ ptrauth_keys_init_kernel(&(tsk)->thread.keys_kernel) #define ptrauth_thread_switch_kernel(tsk) \ @@ -95,10 +129,17 @@ static __always_inline void ptrauth_enable(void) #else /* CONFIG_ARM64_PTR_AUTH */ #define ptrauth_enable() #define ptrauth_prctl_reset_keys(tsk, arg) (-EINVAL) +#define ptrauth_set_enabled_keys(tsk, keys, enabled) (-EINVAL) +#define ptrauth_get_enabled_keys(tsk) (-EINVAL) #define ptrauth_strip_insn_pac(lr) (lr) -#define ptrauth_thread_init_user(tsk) +#define ptrauth_suspend_exit() +#define ptrauth_thread_init_user() #define ptrauth_thread_init_kernel(tsk) +#define ptrauth_thread_switch_user(tsk) #define ptrauth_thread_switch_kernel(tsk) #endif /* CONFIG_ARM64_PTR_AUTH */ +#define PR_PAC_ENABLED_KEYS_MASK \ + (PR_PAC_APIAKEY | PR_PAC_APIBKEY | PR_PAC_APDAKEY | PR_PAC_APDBKEY) + #endif /* __ASM_POINTER_AUTH_H */ diff --git a/arch/arm64/include/asm/processor.h b/arch/arm64/include/asm/processor.h index ca2cd75d3286..9df3feeee890 100644 --- a/arch/arm64/include/asm/processor.h +++ b/arch/arm64/include/asm/processor.h @@ -151,11 +151,15 @@ struct thread_struct { struct ptrauth_keys_kernel keys_kernel; #endif #ifdef CONFIG_ARM64_MTE - u64 sctlr_tcf0; u64 gcr_user_excl; #endif + u64 sctlr_user; }; +#define SCTLR_USER_MASK \ + (SCTLR_ELx_ENIA | SCTLR_ELx_ENIB | SCTLR_ELx_ENDA | SCTLR_ELx_ENDB | \ + SCTLR_EL1_TCF0_MASK) + static inline void arch_thread_struct_whitelist(unsigned long *offset, unsigned long *size) { @@ -247,10 +251,14 @@ extern void release_thread(struct task_struct *); unsigned long get_wchan(struct task_struct *p); +void set_task_sctlr_el1(u64 sctlr); + /* Thread switching */ extern struct task_struct *cpu_switch_to(struct task_struct *prev, struct task_struct *next); +asmlinkage void arm64_preempt_schedule_irq(void); + #define task_pt_regs(p) \ ((struct pt_regs *)(THREAD_SIZE + task_stack_page(p)) - 1) @@ -301,6 +309,11 @@ extern void __init minsigstksz_setup(void); /* PR_PAC_RESET_KEYS prctl */ #define PAC_RESET_KEYS(tsk, arg) ptrauth_prctl_reset_keys(tsk, arg) +/* PR_PAC_{SET,GET}_ENABLED_KEYS prctl */ +#define PAC_SET_ENABLED_KEYS(tsk, keys, enabled) \ + ptrauth_set_enabled_keys(tsk, keys, enabled) +#define PAC_GET_ENABLED_KEYS(tsk) ptrauth_get_enabled_keys(tsk) + #ifdef CONFIG_ARM64_TAGGED_ADDR_ABI /* PR_{SET,GET}_TAGGED_ADDR_CTRL prctl */ long set_tagged_addr_ctrl(struct task_struct *task, unsigned long arg); diff --git a/arch/arm64/include/asm/ptdump.h b/arch/arm64/include/asm/ptdump.h index 38187f74e089..b1dd7ecff7ef 100644 --- a/arch/arm64/include/asm/ptdump.h +++ b/arch/arm64/include/asm/ptdump.h @@ -23,7 +23,7 @@ struct ptdump_info { void ptdump_walk(struct seq_file *s, struct ptdump_info *info); #ifdef CONFIG_PTDUMP_DEBUGFS -void ptdump_debugfs_register(struct ptdump_info *info, const char *name); +void __init ptdump_debugfs_register(struct ptdump_info *info, const char *name); #else static inline void ptdump_debugfs_register(struct ptdump_info *info, const char *name) { } diff --git a/arch/arm64/include/asm/sections.h b/arch/arm64/include/asm/sections.h index 2f36b16a5b5d..e4ad9db53af1 100644 --- a/arch/arm64/include/asm/sections.h +++ b/arch/arm64/include/asm/sections.h @@ -13,6 +13,7 @@ extern char __hyp_idmap_text_start[], __hyp_idmap_text_end[]; extern char __hyp_text_start[], __hyp_text_end[]; extern char __hyp_rodata_start[], __hyp_rodata_end[]; extern char __hyp_reloc_begin[], __hyp_reloc_end[]; +extern char __hyp_bss_start[], __hyp_bss_end[]; extern char __idmap_text_start[], __idmap_text_end[]; extern char __initdata_begin[], __initdata_end[]; extern char __inittext_begin[], __inittext_end[]; diff --git a/arch/arm64/include/asm/smp.h b/arch/arm64/include/asm/smp.h index bcb01ca15325..0e357757c0cc 100644 --- a/arch/arm64/include/asm/smp.h +++ b/arch/arm64/include/asm/smp.h @@ -145,6 +145,7 @@ bool cpus_are_stuck_in_kernel(void); extern void crash_smp_send_stop(void); extern bool smp_crash_stop_failed(void); +extern void panic_smp_self_stop(void); #endif /* ifndef __ASSEMBLY__ */ diff --git a/arch/arm64/include/asm/stacktrace.h b/arch/arm64/include/asm/stacktrace.h index eb29b1fe8255..4b33ca620679 100644 --- a/arch/arm64/include/asm/stacktrace.h +++ b/arch/arm64/include/asm/stacktrace.h @@ -148,27 +148,7 @@ static inline bool on_accessible_stack(const struct task_struct *tsk, return false; } -static inline void start_backtrace(struct stackframe *frame, - unsigned long fp, unsigned long pc) -{ - frame->fp = fp; - frame->pc = pc; -#ifdef CONFIG_FUNCTION_GRAPH_TRACER - frame->graph = 0; -#endif - - /* - * Prime the first unwind. - * - * In unwind_frame() we'll check that the FP points to a valid stack, - * which can't be STACK_TYPE_UNKNOWN, and the first unwind will be - * treated as a transition to whichever stack that happens to be. The - * prev_fp value won't be used, but we set it to 0 such that it is - * definitely not an accessible stack address. - */ - bitmap_zero(frame->stacks_done, __NR_STACK_TYPES); - frame->prev_fp = 0; - frame->prev_type = STACK_TYPE_UNKNOWN; -} +void start_backtrace(struct stackframe *frame, unsigned long fp, + unsigned long pc); #endif /* __ASM_STACKTRACE_H */ diff --git a/arch/arm64/include/asm/sysreg.h b/arch/arm64/include/asm/sysreg.h index d4a5fca984c3..65d15700a168 100644 --- a/arch/arm64/include/asm/sysreg.h +++ b/arch/arm64/include/asm/sysreg.h @@ -283,6 +283,8 @@ #define SYS_PMSIRR_EL1_INTERVAL_MASK 0xffffffUL /* Filtering controls */ +#define SYS_PMSNEVFR_EL1 sys_reg(3, 0, 9, 9, 1) + #define SYS_PMSFCR_EL1 sys_reg(3, 0, 9, 9, 4) #define SYS_PMSFCR_EL1_FE_SHIFT 0 #define SYS_PMSFCR_EL1_FT_SHIFT 1 @@ -333,6 +335,55 @@ /*** End of Statistical Profiling Extension ***/ +/* + * TRBE Registers + */ +#define SYS_TRBLIMITR_EL1 sys_reg(3, 0, 9, 11, 0) +#define SYS_TRBPTR_EL1 sys_reg(3, 0, 9, 11, 1) +#define SYS_TRBBASER_EL1 sys_reg(3, 0, 9, 11, 2) +#define SYS_TRBSR_EL1 sys_reg(3, 0, 9, 11, 3) +#define SYS_TRBMAR_EL1 sys_reg(3, 0, 9, 11, 4) +#define SYS_TRBTRG_EL1 sys_reg(3, 0, 9, 11, 6) +#define SYS_TRBIDR_EL1 sys_reg(3, 0, 9, 11, 7) + +#define TRBLIMITR_LIMIT_MASK GENMASK_ULL(51, 0) +#define TRBLIMITR_LIMIT_SHIFT 12 +#define TRBLIMITR_NVM BIT(5) +#define TRBLIMITR_TRIG_MODE_MASK GENMASK(1, 0) +#define TRBLIMITR_TRIG_MODE_SHIFT 3 +#define TRBLIMITR_FILL_MODE_MASK GENMASK(1, 0) +#define TRBLIMITR_FILL_MODE_SHIFT 1 +#define TRBLIMITR_ENABLE BIT(0) +#define TRBPTR_PTR_MASK GENMASK_ULL(63, 0) +#define TRBPTR_PTR_SHIFT 0 +#define TRBBASER_BASE_MASK GENMASK_ULL(51, 0) +#define TRBBASER_BASE_SHIFT 12 +#define TRBSR_EC_MASK GENMASK(5, 0) +#define TRBSR_EC_SHIFT 26 +#define TRBSR_IRQ BIT(22) +#define TRBSR_TRG BIT(21) +#define TRBSR_WRAP BIT(20) +#define TRBSR_ABORT BIT(18) +#define TRBSR_STOP BIT(17) +#define TRBSR_MSS_MASK GENMASK(15, 0) +#define TRBSR_MSS_SHIFT 0 +#define TRBSR_BSC_MASK GENMASK(5, 0) +#define TRBSR_BSC_SHIFT 0 +#define TRBSR_FSC_MASK GENMASK(5, 0) +#define TRBSR_FSC_SHIFT 0 +#define TRBMAR_SHARE_MASK GENMASK(1, 0) +#define TRBMAR_SHARE_SHIFT 8 +#define TRBMAR_OUTER_MASK GENMASK(3, 0) +#define TRBMAR_OUTER_SHIFT 4 +#define TRBMAR_INNER_MASK GENMASK(3, 0) +#define TRBMAR_INNER_SHIFT 0 +#define TRBTRG_TRG_MASK GENMASK(31, 0) +#define TRBTRG_TRG_SHIFT 0 +#define TRBIDR_FLAG BIT(5) +#define TRBIDR_PROG BIT(4) +#define TRBIDR_ALIGN_MASK GENMASK(3, 0) +#define TRBIDR_ALIGN_SHIFT 0 + #define SYS_PMINTENSET_EL1 sys_reg(3, 0, 9, 14, 1) #define SYS_PMINTENCLR_EL1 sys_reg(3, 0, 9, 14, 2) @@ -475,9 +526,15 @@ #define SYS_PMCCFILTR_EL0 sys_reg(3, 3, 14, 15, 7) #define SYS_SCTLR_EL2 sys_reg(3, 4, 1, 0, 0) +#define SYS_HFGRTR_EL2 sys_reg(3, 4, 1, 1, 4) +#define SYS_HFGWTR_EL2 sys_reg(3, 4, 1, 1, 5) +#define SYS_HFGITR_EL2 sys_reg(3, 4, 1, 1, 6) #define SYS_ZCR_EL2 sys_reg(3, 4, 1, 2, 0) #define SYS_TRFCR_EL2 sys_reg(3, 4, 1, 2, 1) #define SYS_DACR32_EL2 sys_reg(3, 4, 3, 0, 0) +#define SYS_HDFGRTR_EL2 sys_reg(3, 4, 3, 1, 4) +#define SYS_HDFGWTR_EL2 sys_reg(3, 4, 3, 1, 5) +#define SYS_HAFGRTR_EL2 sys_reg(3, 4, 3, 1, 6) #define SYS_SPSR_EL2 sys_reg(3, 4, 4, 0, 0) #define SYS_ELR_EL2 sys_reg(3, 4, 4, 0, 1) #define SYS_IFSR32_EL2 sys_reg(3, 4, 5, 0, 1) @@ -565,8 +622,10 @@ #define SCTLR_ELx_TCF_ASYNC (UL(0x2) << SCTLR_ELx_TCF_SHIFT) #define SCTLR_ELx_TCF_MASK (UL(0x3) << SCTLR_ELx_TCF_SHIFT) +#define SCTLR_ELx_ENIA_SHIFT 31 + #define SCTLR_ELx_ITFSB (BIT(37)) -#define SCTLR_ELx_ENIA (BIT(31)) +#define SCTLR_ELx_ENIA (BIT(SCTLR_ELx_ENIA_SHIFT)) #define SCTLR_ELx_ENIB (BIT(30)) #define SCTLR_ELx_ENDA (BIT(27)) #define SCTLR_ELx_EE (BIT(25)) @@ -579,9 +638,6 @@ #define SCTLR_ELx_A (BIT(1)) #define SCTLR_ELx_M (BIT(0)) -#define SCTLR_ELx_FLAGS (SCTLR_ELx_M | SCTLR_ELx_A | SCTLR_ELx_C | \ - SCTLR_ELx_SA | SCTLR_ELx_I | SCTLR_ELx_IESB) - /* SCTLR_EL2 specific flags. */ #define SCTLR_EL2_RES1 ((BIT(4)) | (BIT(5)) | (BIT(11)) | (BIT(16)) | \ (BIT(18)) | (BIT(22)) | (BIT(23)) | (BIT(28)) | \ @@ -593,10 +649,15 @@ #define ENDIAN_SET_EL2 0 #endif +#define INIT_SCTLR_EL2_MMU_ON \ + (SCTLR_ELx_M | SCTLR_ELx_C | SCTLR_ELx_SA | SCTLR_ELx_I | \ + SCTLR_ELx_IESB | SCTLR_ELx_WXN | ENDIAN_SET_EL2 | SCTLR_EL2_RES1) + #define INIT_SCTLR_EL2_MMU_OFF \ (SCTLR_EL2_RES1 | ENDIAN_SET_EL2) /* SCTLR_EL1 specific flags. */ +#define SCTLR_EL1_EPAN (BIT(57)) #define SCTLR_EL1_ATA0 (BIT(42)) #define SCTLR_EL1_TCF0_SHIFT 38 @@ -637,7 +698,7 @@ SCTLR_EL1_SED | SCTLR_ELx_I | SCTLR_EL1_DZE | SCTLR_EL1_UCT | \ SCTLR_EL1_NTWE | SCTLR_ELx_IESB | SCTLR_EL1_SPAN | SCTLR_ELx_ITFSB | \ SCTLR_ELx_ATA | SCTLR_EL1_ATA0 | ENDIAN_SET_EL1 | SCTLR_EL1_UCI | \ - SCTLR_EL1_RES1) + SCTLR_EL1_EPAN | SCTLR_EL1_RES1) /* MAIR_ELx memory attributes (used by Linux) */ #define MAIR_ATTR_DEVICE_nGnRnE UL(0x00) @@ -840,6 +901,7 @@ #define ID_AA64MMFR2_CNP_SHIFT 0 /* id_aa64dfr0 */ +#define ID_AA64DFR0_TRBE_SHIFT 44 #define ID_AA64DFR0_TRACE_FILT_SHIFT 40 #define ID_AA64DFR0_DOUBLELOCK_SHIFT 36 #define ID_AA64DFR0_PMSVER_SHIFT 32 @@ -1032,6 +1094,66 @@ #define TRFCR_ELx_ExTRE BIT(1) #define TRFCR_ELx_E0TRE BIT(0) + +/* GIC Hypervisor interface registers */ +/* ICH_MISR_EL2 bit definitions */ +#define ICH_MISR_EOI (1 << 0) +#define ICH_MISR_U (1 << 1) + +/* ICH_LR*_EL2 bit definitions */ +#define ICH_LR_VIRTUAL_ID_MASK ((1ULL << 32) - 1) + +#define ICH_LR_EOI (1ULL << 41) +#define ICH_LR_GROUP (1ULL << 60) +#define ICH_LR_HW (1ULL << 61) +#define ICH_LR_STATE (3ULL << 62) +#define ICH_LR_PENDING_BIT (1ULL << 62) +#define ICH_LR_ACTIVE_BIT (1ULL << 63) +#define ICH_LR_PHYS_ID_SHIFT 32 +#define ICH_LR_PHYS_ID_MASK (0x3ffULL << ICH_LR_PHYS_ID_SHIFT) +#define ICH_LR_PRIORITY_SHIFT 48 +#define ICH_LR_PRIORITY_MASK (0xffULL << ICH_LR_PRIORITY_SHIFT) + +/* ICH_HCR_EL2 bit definitions */ +#define ICH_HCR_EN (1 << 0) +#define ICH_HCR_UIE (1 << 1) +#define ICH_HCR_NPIE (1 << 3) +#define ICH_HCR_TC (1 << 10) +#define ICH_HCR_TALL0 (1 << 11) +#define ICH_HCR_TALL1 (1 << 12) +#define ICH_HCR_EOIcount_SHIFT 27 +#define ICH_HCR_EOIcount_MASK (0x1f << ICH_HCR_EOIcount_SHIFT) + +/* ICH_VMCR_EL2 bit definitions */ +#define ICH_VMCR_ACK_CTL_SHIFT 2 +#define ICH_VMCR_ACK_CTL_MASK (1 << ICH_VMCR_ACK_CTL_SHIFT) +#define ICH_VMCR_FIQ_EN_SHIFT 3 +#define ICH_VMCR_FIQ_EN_MASK (1 << ICH_VMCR_FIQ_EN_SHIFT) +#define ICH_VMCR_CBPR_SHIFT 4 +#define ICH_VMCR_CBPR_MASK (1 << ICH_VMCR_CBPR_SHIFT) +#define ICH_VMCR_EOIM_SHIFT 9 +#define ICH_VMCR_EOIM_MASK (1 << ICH_VMCR_EOIM_SHIFT) +#define ICH_VMCR_BPR1_SHIFT 18 +#define ICH_VMCR_BPR1_MASK (7 << ICH_VMCR_BPR1_SHIFT) +#define ICH_VMCR_BPR0_SHIFT 21 +#define ICH_VMCR_BPR0_MASK (7 << ICH_VMCR_BPR0_SHIFT) +#define ICH_VMCR_PMR_SHIFT 24 +#define ICH_VMCR_PMR_MASK (0xffUL << ICH_VMCR_PMR_SHIFT) +#define ICH_VMCR_ENG0_SHIFT 0 +#define ICH_VMCR_ENG0_MASK (1 << ICH_VMCR_ENG0_SHIFT) +#define ICH_VMCR_ENG1_SHIFT 1 +#define ICH_VMCR_ENG1_MASK (1 << ICH_VMCR_ENG1_SHIFT) + +/* ICH_VTR_EL2 bit definitions */ +#define ICH_VTR_PRI_BITS_SHIFT 29 +#define ICH_VTR_PRI_BITS_MASK (7 << ICH_VTR_PRI_BITS_SHIFT) +#define ICH_VTR_ID_BITS_SHIFT 23 +#define ICH_VTR_ID_BITS_MASK (7 << ICH_VTR_ID_BITS_SHIFT) +#define ICH_VTR_SEIS_SHIFT 22 +#define ICH_VTR_SEIS_MASK (1 << ICH_VTR_SEIS_SHIFT) +#define ICH_VTR_A3V_SHIFT 21 +#define ICH_VTR_A3V_MASK (1 << ICH_VTR_A3V_SHIFT) + #ifdef __ASSEMBLY__ .irp num,0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30 diff --git a/arch/arm64/include/asm/thread_info.h b/arch/arm64/include/asm/thread_info.h index 9f4e3b266f21..6623c99f0984 100644 --- a/arch/arm64/include/asm/thread_info.h +++ b/arch/arm64/include/asm/thread_info.h @@ -55,6 +55,8 @@ void arch_setup_new_exec(void); #define arch_setup_new_exec arch_setup_new_exec void arch_release_task_struct(struct task_struct *tsk); +int arch_dup_task_struct(struct task_struct *dst, + struct task_struct *src); #endif diff --git a/arch/arm64/include/asm/topology.h b/arch/arm64/include/asm/topology.h index 3b8dca4eb08d..ec2db3419c41 100644 --- a/arch/arm64/include/asm/topology.h +++ b/arch/arm64/include/asm/topology.h @@ -17,17 +17,9 @@ int pcibus_to_node(struct pci_bus *bus); #include <linux/arch_topology.h> void update_freq_counters_refs(void); -void topology_scale_freq_tick(void); - -#ifdef CONFIG_ARM64_AMU_EXTN -/* - * Replace task scheduler's default counter-based - * frequency-invariance scale factor setting. - */ -#define arch_scale_freq_tick topology_scale_freq_tick -#endif /* CONFIG_ARM64_AMU_EXTN */ /* Replace task scheduler's default frequency-invariant accounting */ +#define arch_scale_freq_tick topology_scale_freq_tick #define arch_set_freq_scale topology_set_freq_scale #define arch_scale_freq_capacity topology_get_freq_scale #define arch_scale_freq_invariant topology_scale_freq_invariant diff --git a/arch/arm64/include/asm/uaccess.h b/arch/arm64/include/asm/uaccess.h index 0deb88467111..b5f08621fa29 100644 --- a/arch/arm64/include/asm/uaccess.h +++ b/arch/arm64/include/asm/uaccess.h @@ -20,6 +20,7 @@ #include <asm/cpufeature.h> #include <asm/mmu.h> +#include <asm/mte.h> #include <asm/ptrace.h> #include <asm/memory.h> #include <asm/extable.h> @@ -188,6 +189,23 @@ static inline void __uaccess_enable_tco(void) ARM64_MTE, CONFIG_KASAN_HW_TAGS)); } +/* + * These functions disable tag checking only if in MTE async mode + * since the sync mode generates exceptions synchronously and the + * nofault or load_unaligned_zeropad can handle them. + */ +static inline void __uaccess_disable_tco_async(void) +{ + if (system_uses_mte_async_mode()) + __uaccess_disable_tco(); +} + +static inline void __uaccess_enable_tco_async(void) +{ + if (system_uses_mte_async_mode()) + __uaccess_enable_tco(); +} + static inline void uaccess_disable_privileged(void) { __uaccess_disable_tco(); @@ -307,8 +325,10 @@ do { \ do { \ int __gkn_err = 0; \ \ + __uaccess_enable_tco_async(); \ __raw_get_mem("ldr", *((type *)(dst)), \ (__force type *)(src), __gkn_err); \ + __uaccess_disable_tco_async(); \ if (unlikely(__gkn_err)) \ goto err_label; \ } while (0) @@ -380,8 +400,10 @@ do { \ do { \ int __pkn_err = 0; \ \ + __uaccess_enable_tco_async(); \ __raw_put_mem("str", *((type *)(src)), \ (__force type *)(dst), __pkn_err); \ + __uaccess_disable_tco_async(); \ if (unlikely(__pkn_err)) \ goto err_label; \ } while(0) diff --git a/arch/arm64/include/asm/unistd.h b/arch/arm64/include/asm/unistd.h index 949788f5ba40..727bfc3be99b 100644 --- a/arch/arm64/include/asm/unistd.h +++ b/arch/arm64/include/asm/unistd.h @@ -38,7 +38,7 @@ #define __ARM_NR_compat_set_tls (__ARM_NR_COMPAT_BASE + 5) #define __ARM_NR_COMPAT_END (__ARM_NR_COMPAT_BASE + 0x800) -#define __NR_compat_syscalls 443 +#define __NR_compat_syscalls 447 #endif #define __ARCH_WANT_SYS_CLONE diff --git a/arch/arm64/include/asm/unistd32.h b/arch/arm64/include/asm/unistd32.h index 3d874f624056..7859749d6628 100644 --- a/arch/arm64/include/asm/unistd32.h +++ b/arch/arm64/include/asm/unistd32.h @@ -893,6 +893,14 @@ __SYSCALL(__NR_process_madvise, sys_process_madvise) __SYSCALL(__NR_epoll_pwait2, compat_sys_epoll_pwait2) #define __NR_mount_setattr 442 __SYSCALL(__NR_mount_setattr, sys_mount_setattr) +#define __NR_quotactl_path 443 +__SYSCALL(__NR_quotactl_path, sys_quotactl_path) +#define __NR_landlock_create_ruleset 444 +__SYSCALL(__NR_landlock_create_ruleset, sys_landlock_create_ruleset) +#define __NR_landlock_add_rule 445 +__SYSCALL(__NR_landlock_add_rule, sys_landlock_add_rule) +#define __NR_landlock_restrict_self 446 +__SYSCALL(__NR_landlock_restrict_self, sys_landlock_restrict_self) /* * Please add new compat syscalls above this comment and update diff --git a/arch/arm64/include/asm/vdso/compat_gettimeofday.h b/arch/arm64/include/asm/vdso/compat_gettimeofday.h index 7508b0ac1d21..ecb6fd4c3c64 100644 --- a/arch/arm64/include/asm/vdso/compat_gettimeofday.h +++ b/arch/arm64/include/asm/vdso/compat_gettimeofday.h @@ -155,7 +155,8 @@ static __always_inline const struct vdso_data *__arch_get_vdso_data(void) } #ifdef CONFIG_TIME_NS -static __always_inline const struct vdso_data *__arch_get_timens_vdso_data(void) +static __always_inline +const struct vdso_data *__arch_get_timens_vdso_data(const struct vdso_data *vd) { const struct vdso_data *ret; diff --git a/arch/arm64/include/asm/vdso/gettimeofday.h b/arch/arm64/include/asm/vdso/gettimeofday.h index 631ab1281633..4f7a629df81f 100644 --- a/arch/arm64/include/asm/vdso/gettimeofday.h +++ b/arch/arm64/include/asm/vdso/gettimeofday.h @@ -83,11 +83,7 @@ static __always_inline u64 __arch_get_hw_counter(s32 clock_mode, */ isb(); asm volatile("mrs %0, cntvct_el0" : "=r" (res) :: "memory"); - /* - * This isb() is required to prevent that the seq lock is - * speculated.# - */ - isb(); + arch_counter_enforce_ordering(res); return res; } @@ -100,7 +96,7 @@ const struct vdso_data *__arch_get_vdso_data(void) #ifdef CONFIG_TIME_NS static __always_inline -const struct vdso_data *__arch_get_timens_vdso_data(void) +const struct vdso_data *__arch_get_timens_vdso_data(const struct vdso_data *vd) { return _timens_data; } diff --git a/arch/arm64/include/asm/vmalloc.h b/arch/arm64/include/asm/vmalloc.h index 2ca708ab9b20..7a22aeea9bb5 100644 --- a/arch/arm64/include/asm/vmalloc.h +++ b/arch/arm64/include/asm/vmalloc.h @@ -1,4 +1,28 @@ #ifndef _ASM_ARM64_VMALLOC_H #define _ASM_ARM64_VMALLOC_H +#include <asm/page.h> + +#ifdef CONFIG_HAVE_ARCH_HUGE_VMAP + +#define arch_vmap_pud_supported arch_vmap_pud_supported +static inline bool arch_vmap_pud_supported(pgprot_t prot) +{ + /* + * Only 4k granule supports level 1 block mappings. + * SW table walks can't handle removal of intermediate entries. + */ + return IS_ENABLED(CONFIG_ARM64_4K_PAGES) && + !IS_ENABLED(CONFIG_PTDUMP_DEBUGFS); +} + +#define arch_vmap_pmd_supported arch_vmap_pmd_supported +static inline bool arch_vmap_pmd_supported(pgprot_t prot) +{ + /* See arch_vmap_pud_supported() */ + return !IS_ENABLED(CONFIG_PTDUMP_DEBUGFS); +} + +#endif + #endif /* _ASM_ARM64_VMALLOC_H */ diff --git a/arch/arm64/include/asm/word-at-a-time.h b/arch/arm64/include/asm/word-at-a-time.h index 3333950b5909..2dcb104c645b 100644 --- a/arch/arm64/include/asm/word-at-a-time.h +++ b/arch/arm64/include/asm/word-at-a-time.h @@ -53,7 +53,9 @@ static inline unsigned long find_zero(unsigned long mask) */ static inline unsigned long load_unaligned_zeropad(const void *addr) { - unsigned long ret, offset; + unsigned long ret, tmp; + + __uaccess_enable_tco_async(); /* Load word from unaligned pointer addr */ asm( @@ -61,9 +63,9 @@ static inline unsigned long load_unaligned_zeropad(const void *addr) "2:\n" " .pushsection .fixup,\"ax\"\n" " .align 2\n" - "3: and %1, %2, #0x7\n" - " bic %2, %2, #0x7\n" - " ldr %0, [%2]\n" + "3: bic %1, %2, #0x7\n" + " ldr %0, [%1]\n" + " and %1, %2, #0x7\n" " lsl %1, %1, #0x3\n" #ifndef __AARCH64EB__ " lsr %0, %0, %1\n" @@ -73,9 +75,11 @@ static inline unsigned long load_unaligned_zeropad(const void *addr) " b 2b\n" " .popsection\n" _ASM_EXTABLE(1b, 3b) - : "=&r" (ret), "=&r" (offset) + : "=&r" (ret), "=&r" (tmp) : "r" (addr), "Q" (*(unsigned long *)addr)); + __uaccess_disable_tco_async(); + return ret; } diff --git a/arch/arm64/include/asm/xen/swiotlb-xen.h b/arch/arm64/include/asm/xen/swiotlb-xen.h new file mode 100644 index 000000000000..455ade5d5320 --- /dev/null +++ b/arch/arm64/include/asm/xen/swiotlb-xen.h @@ -0,0 +1 @@ +#include <xen/arm/swiotlb-xen.h> |