diff options
Diffstat (limited to 'arch/x86/include')
34 files changed, 534 insertions, 94 deletions
diff --git a/arch/x86/include/asm/checksum_32.h b/arch/x86/include/asm/checksum_32.h index f50de6951738..532f85e6651f 100644 --- a/arch/x86/include/asm/checksum_32.h +++ b/arch/x86/include/asm/checksum_32.h @@ -112,8 +112,7 @@ static inline __sum16 csum_fold(__wsum sum) } static inline __wsum csum_tcpudp_nofold(__be32 saddr, __be32 daddr, - unsigned short len, - unsigned short proto, + __u32 len, __u8 proto, __wsum sum) { asm("addl %1, %0 ;\n" @@ -131,8 +130,7 @@ static inline __wsum csum_tcpudp_nofold(__be32 saddr, __be32 daddr, * returns a 16-bit checksum, already complemented */ static inline __sum16 csum_tcpudp_magic(__be32 saddr, __be32 daddr, - unsigned short len, - unsigned short proto, + __u32 len, __u8 proto, __wsum sum) { return csum_fold(csum_tcpudp_nofold(saddr, daddr, len, proto, sum)); @@ -151,8 +149,7 @@ static inline __sum16 ip_compute_csum(const void *buff, int len) #define _HAVE_ARCH_IPV6_CSUM static inline __sum16 csum_ipv6_magic(const struct in6_addr *saddr, const struct in6_addr *daddr, - __u32 len, unsigned short proto, - __wsum sum) + __u32 len, __u8 proto, __wsum sum) { asm("addl 0(%1), %0 ;\n" "adcl 4(%1), %0 ;\n" diff --git a/arch/x86/include/asm/checksum_64.h b/arch/x86/include/asm/checksum_64.h index cd00e1774491..c020ee75dce7 100644 --- a/arch/x86/include/asm/checksum_64.h +++ b/arch/x86/include/asm/checksum_64.h @@ -84,8 +84,8 @@ static inline __sum16 ip_fast_csum(const void *iph, unsigned int ihl) * 32bit unfolded. */ static inline __wsum -csum_tcpudp_nofold(__be32 saddr, __be32 daddr, unsigned short len, - unsigned short proto, __wsum sum) +csum_tcpudp_nofold(__be32 saddr, __be32 daddr, __u32 len, + __u8 proto, __wsum sum) { asm(" addl %1, %0\n" " adcl %2, %0\n" @@ -110,8 +110,8 @@ csum_tcpudp_nofold(__be32 saddr, __be32 daddr, unsigned short len, * complemented and ready to be filled in. */ static inline __sum16 csum_tcpudp_magic(__be32 saddr, __be32 daddr, - unsigned short len, - unsigned short proto, __wsum sum) + __u32 len, __u8 proto, + __wsum sum) { return csum_fold(csum_tcpudp_nofold(saddr, daddr, len, proto, sum)); } @@ -177,7 +177,7 @@ struct in6_addr; #define _HAVE_ARCH_IPV6_CSUM 1 extern __sum16 csum_ipv6_magic(const struct in6_addr *saddr, const struct in6_addr *daddr, - __u32 len, unsigned short proto, __wsum sum); + __u32 len, __u8 proto, __wsum sum); static inline unsigned add32_with_carry(unsigned a, unsigned b) { diff --git a/arch/x86/include/asm/compat.h b/arch/x86/include/asm/compat.h index acdee09228b3..ebb102e1bbc7 100644 --- a/arch/x86/include/asm/compat.h +++ b/arch/x86/include/asm/compat.h @@ -316,9 +316,10 @@ static inline bool is_x32_task(void) return false; } -static inline bool is_compat_task(void) +static inline bool in_compat_syscall(void) { return is_ia32_task() || is_x32_task(); } +#define in_compat_syscall in_compat_syscall /* override the generic impl */ #endif /* _ASM_X86_COMPAT_H */ diff --git a/arch/x86/include/asm/cpufeature.h b/arch/x86/include/asm/cpufeature.h index 68e4e8258b84..3636ec06c887 100644 --- a/arch/x86/include/asm/cpufeature.h +++ b/arch/x86/include/asm/cpufeature.h @@ -26,6 +26,7 @@ enum cpuid_leafs CPUID_8000_0008_EBX, CPUID_6_EAX, CPUID_8000_000A_EDX, + CPUID_7_ECX, }; #ifdef CONFIG_X86_FEATURE_NAMES @@ -48,28 +49,42 @@ extern const char * const x86_bug_flags[NBUGINTS*32]; test_bit(bit, (unsigned long *)((c)->x86_capability)) #define REQUIRED_MASK_BIT_SET(bit) \ - ( (((bit)>>5)==0 && (1UL<<((bit)&31) & REQUIRED_MASK0)) || \ - (((bit)>>5)==1 && (1UL<<((bit)&31) & REQUIRED_MASK1)) || \ - (((bit)>>5)==2 && (1UL<<((bit)&31) & REQUIRED_MASK2)) || \ - (((bit)>>5)==3 && (1UL<<((bit)&31) & REQUIRED_MASK3)) || \ - (((bit)>>5)==4 && (1UL<<((bit)&31) & REQUIRED_MASK4)) || \ - (((bit)>>5)==5 && (1UL<<((bit)&31) & REQUIRED_MASK5)) || \ - (((bit)>>5)==6 && (1UL<<((bit)&31) & REQUIRED_MASK6)) || \ - (((bit)>>5)==7 && (1UL<<((bit)&31) & REQUIRED_MASK7)) || \ - (((bit)>>5)==8 && (1UL<<((bit)&31) & REQUIRED_MASK8)) || \ - (((bit)>>5)==9 && (1UL<<((bit)&31) & REQUIRED_MASK9)) ) + ( (((bit)>>5)==0 && (1UL<<((bit)&31) & REQUIRED_MASK0 )) || \ + (((bit)>>5)==1 && (1UL<<((bit)&31) & REQUIRED_MASK1 )) || \ + (((bit)>>5)==2 && (1UL<<((bit)&31) & REQUIRED_MASK2 )) || \ + (((bit)>>5)==3 && (1UL<<((bit)&31) & REQUIRED_MASK3 )) || \ + (((bit)>>5)==4 && (1UL<<((bit)&31) & REQUIRED_MASK4 )) || \ + (((bit)>>5)==5 && (1UL<<((bit)&31) & REQUIRED_MASK5 )) || \ + (((bit)>>5)==6 && (1UL<<((bit)&31) & REQUIRED_MASK6 )) || \ + (((bit)>>5)==7 && (1UL<<((bit)&31) & REQUIRED_MASK7 )) || \ + (((bit)>>5)==8 && (1UL<<((bit)&31) & REQUIRED_MASK8 )) || \ + (((bit)>>5)==9 && (1UL<<((bit)&31) & REQUIRED_MASK9 )) || \ + (((bit)>>5)==10 && (1UL<<((bit)&31) & REQUIRED_MASK10)) || \ + (((bit)>>5)==11 && (1UL<<((bit)&31) & REQUIRED_MASK11)) || \ + (((bit)>>5)==12 && (1UL<<((bit)&31) & REQUIRED_MASK12)) || \ + (((bit)>>5)==13 && (1UL<<((bit)&31) & REQUIRED_MASK13)) || \ + (((bit)>>5)==13 && (1UL<<((bit)&31) & REQUIRED_MASK14)) || \ + (((bit)>>5)==13 && (1UL<<((bit)&31) & REQUIRED_MASK15)) || \ + (((bit)>>5)==14 && (1UL<<((bit)&31) & REQUIRED_MASK16)) ) #define DISABLED_MASK_BIT_SET(bit) \ - ( (((bit)>>5)==0 && (1UL<<((bit)&31) & DISABLED_MASK0)) || \ - (((bit)>>5)==1 && (1UL<<((bit)&31) & DISABLED_MASK1)) || \ - (((bit)>>5)==2 && (1UL<<((bit)&31) & DISABLED_MASK2)) || \ - (((bit)>>5)==3 && (1UL<<((bit)&31) & DISABLED_MASK3)) || \ - (((bit)>>5)==4 && (1UL<<((bit)&31) & DISABLED_MASK4)) || \ - (((bit)>>5)==5 && (1UL<<((bit)&31) & DISABLED_MASK5)) || \ - (((bit)>>5)==6 && (1UL<<((bit)&31) & DISABLED_MASK6)) || \ - (((bit)>>5)==7 && (1UL<<((bit)&31) & DISABLED_MASK7)) || \ - (((bit)>>5)==8 && (1UL<<((bit)&31) & DISABLED_MASK8)) || \ - (((bit)>>5)==9 && (1UL<<((bit)&31) & DISABLED_MASK9)) ) + ( (((bit)>>5)==0 && (1UL<<((bit)&31) & DISABLED_MASK0 )) || \ + (((bit)>>5)==1 && (1UL<<((bit)&31) & DISABLED_MASK1 )) || \ + (((bit)>>5)==2 && (1UL<<((bit)&31) & DISABLED_MASK2 )) || \ + (((bit)>>5)==3 && (1UL<<((bit)&31) & DISABLED_MASK3 )) || \ + (((bit)>>5)==4 && (1UL<<((bit)&31) & DISABLED_MASK4 )) || \ + (((bit)>>5)==5 && (1UL<<((bit)&31) & DISABLED_MASK5 )) || \ + (((bit)>>5)==6 && (1UL<<((bit)&31) & DISABLED_MASK6 )) || \ + (((bit)>>5)==7 && (1UL<<((bit)&31) & DISABLED_MASK7 )) || \ + (((bit)>>5)==8 && (1UL<<((bit)&31) & DISABLED_MASK8 )) || \ + (((bit)>>5)==9 && (1UL<<((bit)&31) & DISABLED_MASK9 )) || \ + (((bit)>>5)==10 && (1UL<<((bit)&31) & DISABLED_MASK10)) || \ + (((bit)>>5)==11 && (1UL<<((bit)&31) & DISABLED_MASK11)) || \ + (((bit)>>5)==12 && (1UL<<((bit)&31) & DISABLED_MASK12)) || \ + (((bit)>>5)==13 && (1UL<<((bit)&31) & DISABLED_MASK13)) || \ + (((bit)>>5)==13 && (1UL<<((bit)&31) & DISABLED_MASK14)) || \ + (((bit)>>5)==13 && (1UL<<((bit)&31) & DISABLED_MASK15)) || \ + (((bit)>>5)==14 && (1UL<<((bit)&31) & DISABLED_MASK16)) ) #define cpu_has(c, bit) \ (__builtin_constant_p(bit) && REQUIRED_MASK_BIT_SET(bit) ? 1 : \ diff --git a/arch/x86/include/asm/cpufeatures.h b/arch/x86/include/asm/cpufeatures.h index 074b7604bd51..3d1a84383162 100644 --- a/arch/x86/include/asm/cpufeatures.h +++ b/arch/x86/include/asm/cpufeatures.h @@ -12,7 +12,7 @@ /* * Defines x86 CPU feature bits */ -#define NCAPINTS 16 /* N 32-bit words worth of info */ +#define NCAPINTS 17 /* N 32-bit words worth of info */ #define NBUGINTS 1 /* N 32-bit bug flags */ /* @@ -274,6 +274,10 @@ #define X86_FEATURE_PFTHRESHOLD (15*32+12) /* pause filter threshold */ #define X86_FEATURE_AVIC (15*32+13) /* Virtual Interrupt Controller */ +/* Intel-defined CPU features, CPUID level 0x00000007:0 (ecx), word 16 */ +#define X86_FEATURE_PKU (16*32+ 3) /* Protection Keys for Userspace */ +#define X86_FEATURE_OSPKE (16*32+ 4) /* OS Protection Keys Enable */ + /* * BUG word(s) */ diff --git a/arch/x86/include/asm/disabled-features.h b/arch/x86/include/asm/disabled-features.h index f226df064660..39343be7d4f4 100644 --- a/arch/x86/include/asm/disabled-features.h +++ b/arch/x86/include/asm/disabled-features.h @@ -28,6 +28,14 @@ # define DISABLE_CENTAUR_MCR 0 #endif /* CONFIG_X86_64 */ +#ifdef CONFIG_X86_INTEL_MEMORY_PROTECTION_KEYS +# define DISABLE_PKU (1<<(X86_FEATURE_PKU)) +# define DISABLE_OSPKE (1<<(X86_FEATURE_OSPKE)) +#else +# define DISABLE_PKU 0 +# define DISABLE_OSPKE 0 +#endif /* CONFIG_X86_INTEL_MEMORY_PROTECTION_KEYS */ + /* * Make sure to add features to the correct mask */ @@ -41,5 +49,12 @@ #define DISABLED_MASK7 0 #define DISABLED_MASK8 0 #define DISABLED_MASK9 (DISABLE_MPX) +#define DISABLED_MASK10 0 +#define DISABLED_MASK11 0 +#define DISABLED_MASK12 0 +#define DISABLED_MASK13 0 +#define DISABLED_MASK14 0 +#define DISABLED_MASK15 0 +#define DISABLED_MASK16 (DISABLE_PKU|DISABLE_OSPKE) #endif /* _ASM_X86_DISABLED_FEATURES_H */ diff --git a/arch/x86/include/asm/efi.h b/arch/x86/include/asm/efi.h index 0010c78c4998..53748c45e488 100644 --- a/arch/x86/include/asm/efi.h +++ b/arch/x86/include/asm/efi.h @@ -3,6 +3,7 @@ #include <asm/fpu/api.h> #include <asm/pgtable.h> +#include <asm/tlb.h> /* * We map the EFI regions needed for runtime services non-contiguously, @@ -25,6 +26,8 @@ #define EFI32_LOADER_SIGNATURE "EL32" #define EFI64_LOADER_SIGNATURE "EL64" +#define MAX_CMDLINE_ADDRESS UINT_MAX + #ifdef CONFIG_X86_32 @@ -64,6 +67,17 @@ extern u64 asmlinkage efi_call(void *fp, ...); #define efi_call_phys(f, args...) efi_call((f), args) +/* + * Scratch space used for switching the pagetable in the EFI stub + */ +struct efi_scratch { + u64 r15; + u64 prev_cr3; + pgd_t *efi_pgt; + bool use_pgd; + u64 phys_stack; +} __packed; + #define efi_call_virt(f, ...) \ ({ \ efi_status_t __s; \ @@ -71,7 +85,20 @@ extern u64 asmlinkage efi_call(void *fp, ...); efi_sync_low_kernel_mappings(); \ preempt_disable(); \ __kernel_fpu_begin(); \ + \ + if (efi_scratch.use_pgd) { \ + efi_scratch.prev_cr3 = read_cr3(); \ + write_cr3((unsigned long)efi_scratch.efi_pgt); \ + __flush_tlb_all(); \ + } \ + \ __s = efi_call((void *)efi.systab->runtime->f, __VA_ARGS__); \ + \ + if (efi_scratch.use_pgd) { \ + write_cr3(efi_scratch.prev_cr3); \ + __flush_tlb_all(); \ + } \ + \ __kernel_fpu_end(); \ preempt_enable(); \ __s; \ @@ -111,11 +138,12 @@ extern void __init efi_memory_uc(u64 addr, unsigned long size); extern void __init efi_map_region(efi_memory_desc_t *md); extern void __init efi_map_region_fixed(efi_memory_desc_t *md); extern void efi_sync_low_kernel_mappings(void); +extern int __init efi_alloc_page_tables(void); extern int __init efi_setup_page_tables(unsigned long pa_memmap, unsigned num_pages); extern void __init efi_cleanup_page_tables(unsigned long pa_memmap, unsigned num_pages); extern void __init old_map_region(efi_memory_desc_t *md); extern void __init runtime_code_page_mkexec(void); -extern void __init efi_runtime_mkexec(void); +extern void __init efi_runtime_update_mappings(void); extern void __init efi_dump_pagetable(void); extern void __init efi_apply_memmap_quirks(void); extern int __init efi_reuse_config(u64 tables, int nr_tables); diff --git a/arch/x86/include/asm/fpu/internal.h b/arch/x86/include/asm/fpu/internal.h index a2124343edf5..31ac8e6d9f36 100644 --- a/arch/x86/include/asm/fpu/internal.h +++ b/arch/x86/include/asm/fpu/internal.h @@ -25,6 +25,8 @@ extern void fpu__activate_curr(struct fpu *fpu); extern void fpu__activate_fpstate_read(struct fpu *fpu); extern void fpu__activate_fpstate_write(struct fpu *fpu); +extern void fpu__current_fpstate_write_begin(void); +extern void fpu__current_fpstate_write_end(void); extern void fpu__save(struct fpu *fpu); extern void fpu__restore(struct fpu *fpu); extern int fpu__restore_sig(void __user *buf, int ia32_frame); diff --git a/arch/x86/include/asm/fpu/types.h b/arch/x86/include/asm/fpu/types.h index 1c6f6ac52ad0..36b90bbfc69f 100644 --- a/arch/x86/include/asm/fpu/types.h +++ b/arch/x86/include/asm/fpu/types.h @@ -108,6 +108,8 @@ enum xfeature { XFEATURE_OPMASK, XFEATURE_ZMM_Hi256, XFEATURE_Hi16_ZMM, + XFEATURE_PT_UNIMPLEMENTED_SO_FAR, + XFEATURE_PKRU, XFEATURE_MAX, }; @@ -120,6 +122,7 @@ enum xfeature { #define XFEATURE_MASK_OPMASK (1 << XFEATURE_OPMASK) #define XFEATURE_MASK_ZMM_Hi256 (1 << XFEATURE_ZMM_Hi256) #define XFEATURE_MASK_Hi16_ZMM (1 << XFEATURE_Hi16_ZMM) +#define XFEATURE_MASK_PKRU (1 << XFEATURE_PKRU) #define XFEATURE_MASK_FPSSE (XFEATURE_MASK_FP | XFEATURE_MASK_SSE) #define XFEATURE_MASK_AVX512 (XFEATURE_MASK_OPMASK \ @@ -212,6 +215,15 @@ struct avx_512_hi16_state { struct reg_512_bit hi16_zmm[16]; } __packed; +/* + * State component 9: 32-bit PKRU register. The state is + * 8 bytes long but only 4 bytes is used currently. + */ +struct pkru_state { + u32 pkru; + u32 pad; +} __packed; + struct xstate_header { u64 xfeatures; u64 xcomp_bv; diff --git a/arch/x86/include/asm/fpu/xstate.h b/arch/x86/include/asm/fpu/xstate.h index f23cd8c80b1c..38951b0fcc5a 100644 --- a/arch/x86/include/asm/fpu/xstate.h +++ b/arch/x86/include/asm/fpu/xstate.h @@ -24,7 +24,8 @@ XFEATURE_MASK_YMM | \ XFEATURE_MASK_OPMASK | \ XFEATURE_MASK_ZMM_Hi256 | \ - XFEATURE_MASK_Hi16_ZMM) + XFEATURE_MASK_Hi16_ZMM | \ + XFEATURE_MASK_PKRU) /* Supported features which require eager state saving */ #define XFEATURE_MASK_EAGER (XFEATURE_MASK_BNDREGS | XFEATURE_MASK_BNDCSR) diff --git a/arch/x86/include/asm/ftrace.h b/arch/x86/include/asm/ftrace.h index 24938852db30..21b66dbf3601 100644 --- a/arch/x86/include/asm/ftrace.h +++ b/arch/x86/include/asm/ftrace.h @@ -58,7 +58,7 @@ int ftrace_int3_handler(struct pt_regs *regs); #define ARCH_TRACE_IGNORE_COMPAT_SYSCALLS 1 static inline bool arch_trace_is_compat_syscall(struct pt_regs *regs) { - if (is_compat_task()) + if (in_compat_syscall()) return true; return false; } diff --git a/arch/x86/include/asm/gpio.h b/arch/x86/include/asm/gpio.h deleted file mode 100644 index b3799d88ffcf..000000000000 --- a/arch/x86/include/asm/gpio.h +++ /dev/null @@ -1,4 +0,0 @@ -#ifndef __LINUX_GPIO_H -#warning Include linux/gpio.h instead of asm/gpio.h -#include <linux/gpio.h> -#endif diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h index 44adbb819041..f62a9f37f79f 100644 --- a/arch/x86/include/asm/kvm_host.h +++ b/arch/x86/include/asm/kvm_host.h @@ -32,6 +32,7 @@ #include <asm/mtrr.h> #include <asm/msr-index.h> #include <asm/asm.h> +#include <asm/kvm_page_track.h> #define KVM_MAX_VCPUS 255 #define KVM_SOFT_MAX_VCPUS 160 @@ -83,7 +84,8 @@ | X86_CR4_PSE | X86_CR4_PAE | X86_CR4_MCE \ | X86_CR4_PGE | X86_CR4_PCE | X86_CR4_OSFXSR | X86_CR4_PCIDE \ | X86_CR4_OSXSAVE | X86_CR4_SMEP | X86_CR4_FSGSBASE \ - | X86_CR4_OSXMMEXCPT | X86_CR4_VMXE | X86_CR4_SMAP)) + | X86_CR4_OSXMMEXCPT | X86_CR4_VMXE | X86_CR4_SMAP \ + | X86_CR4_PKE)) #define CR8_RESERVED_BITS (~(unsigned long)X86_CR8_TPR) @@ -186,12 +188,14 @@ enum { #define PFERR_USER_BIT 2 #define PFERR_RSVD_BIT 3 #define PFERR_FETCH_BIT 4 +#define PFERR_PK_BIT 5 #define PFERR_PRESENT_MASK (1U << PFERR_PRESENT_BIT) #define PFERR_WRITE_MASK (1U << PFERR_WRITE_BIT) #define PFERR_USER_MASK (1U << PFERR_USER_BIT) #define PFERR_RSVD_MASK (1U << PFERR_RSVD_BIT) #define PFERR_FETCH_MASK (1U << PFERR_FETCH_BIT) +#define PFERR_PK_MASK (1U << PFERR_PK_BIT) /* apic attention bits */ #define KVM_APIC_CHECK_VAPIC 0 @@ -214,6 +218,14 @@ struct kvm_mmu_memory_cache { void *objects[KVM_NR_MEM_OBJS]; }; +/* + * the pages used as guest page table on soft mmu are tracked by + * kvm_memory_slot.arch.gfn_track which is 16 bits, so the role bits used + * by indirect shadow page can not be more than 15 bits. + * + * Currently, we used 14 bits that are @level, @cr4_pae, @quadrant, @access, + * @nxe, @cr0_wp, @smep_andnot_wp and @smap_andnot_wp. + */ union kvm_mmu_page_role { unsigned word; struct { @@ -276,7 +288,7 @@ struct kvm_mmu_page { #endif /* Number of writes since the last time traversal visited this page. */ - int write_flooding_count; + atomic_t write_flooding_count; }; struct kvm_pio_request { @@ -326,6 +338,14 @@ struct kvm_mmu { */ u8 permissions[16]; + /* + * The pkru_mask indicates if protection key checks are needed. It + * consists of 16 domains indexed by page fault error code bits [4:1], + * with PFEC.RSVD replaced by ACC_USER_MASK from the page tables. + * Each domain has 2 bits which are ANDed with AD and WD from PKRU. + */ + u32 pkru_mask; + u64 *pae_root; u64 *lm_root; @@ -338,12 +358,8 @@ struct kvm_mmu { struct rsvd_bits_validate guest_rsvd_check; - /* - * Bitmap: bit set = last pte in walk - * index[0:1]: level (zero-based) - * index[2]: pte.ps - */ - u8 last_pte_bitmap; + /* Can have large pages at levels 2..last_nonleaf_level-1. */ + u8 last_nonleaf_level; bool nx; @@ -498,7 +514,6 @@ struct kvm_vcpu_arch { struct kvm_mmu_memory_cache mmu_page_header_cache; struct fpu guest_fpu; - bool eager_fpu; u64 xcr0; u64 guest_supported_xcr0; u32 guest_xstate_size; @@ -644,12 +659,13 @@ struct kvm_vcpu_arch { }; struct kvm_lpage_info { - int write_count; + int disallow_lpage; }; struct kvm_arch_memory_slot { struct kvm_rmap_head *rmap[KVM_NR_PAGE_SIZES]; struct kvm_lpage_info *lpage_info[KVM_NR_PAGE_SIZES - 1]; + unsigned short *gfn_track[KVM_PAGE_TRACK_MAX]; }; /* @@ -694,6 +710,8 @@ struct kvm_arch { */ struct list_head active_mmu_pages; struct list_head zapped_obsolete_pages; + struct kvm_page_track_notifier_node mmu_sp_tracker; + struct kvm_page_track_notifier_head track_notifier_head; struct list_head assigned_dev_head; struct iommu_domain *iommu_domain; @@ -754,6 +772,8 @@ struct kvm_arch { bool irqchip_split; u8 nr_reserved_ioapic_pins; + + bool disabled_lapic_found; }; struct kvm_vm_stat { @@ -865,6 +885,7 @@ struct kvm_x86_ops { void (*cache_reg)(struct kvm_vcpu *vcpu, enum kvm_reg reg); unsigned long (*get_rflags)(struct kvm_vcpu *vcpu); void (*set_rflags)(struct kvm_vcpu *vcpu, unsigned long rflags); + u32 (*get_pkru)(struct kvm_vcpu *vcpu); void (*fpu_activate)(struct kvm_vcpu *vcpu); void (*fpu_deactivate)(struct kvm_vcpu *vcpu); @@ -988,6 +1009,8 @@ void kvm_mmu_module_exit(void); void kvm_mmu_destroy(struct kvm_vcpu *vcpu); int kvm_mmu_create(struct kvm_vcpu *vcpu); void kvm_mmu_setup(struct kvm_vcpu *vcpu); +void kvm_mmu_init_vm(struct kvm *kvm); +void kvm_mmu_uninit_vm(struct kvm *kvm); void kvm_mmu_set_mask_ptes(u64 user_mask, u64 accessed_mask, u64 dirty_mask, u64 nx_mask, u64 x_mask); @@ -1127,8 +1150,6 @@ void kvm_pic_clear_all(struct kvm_pic *pic, int irq_source_id); void kvm_inject_nmi(struct kvm_vcpu *vcpu); -void kvm_mmu_pte_write(struct kvm_vcpu *vcpu, gpa_t gpa, - const u8 *new, int bytes); int kvm_mmu_unprotect_page(struct kvm *kvm, gfn_t gfn); int kvm_mmu_unprotect_page_virt(struct kvm_vcpu *vcpu, gva_t gva); void __kvm_mmu_free_some_pages(struct kvm_vcpu *vcpu); diff --git a/arch/x86/include/asm/kvm_page_track.h b/arch/x86/include/asm/kvm_page_track.h new file mode 100644 index 000000000000..c2b8d24a235c --- /dev/null +++ b/arch/x86/include/asm/kvm_page_track.h @@ -0,0 +1,61 @@ +#ifndef _ASM_X86_KVM_PAGE_TRACK_H +#define _ASM_X86_KVM_PAGE_TRACK_H + +enum kvm_page_track_mode { + KVM_PAGE_TRACK_WRITE, + KVM_PAGE_TRACK_MAX, +}; + +/* + * The notifier represented by @kvm_page_track_notifier_node is linked into + * the head which will be notified when guest is triggering the track event. + * + * Write access on the head is protected by kvm->mmu_lock, read access + * is protected by track_srcu. + */ +struct kvm_page_track_notifier_head { + struct srcu_struct track_srcu; + struct hlist_head track_notifier_list; +}; + +struct kvm_page_track_notifier_node { + struct hlist_node node; + + /* + * It is called when guest is writing the write-tracked page + * and write emulation is finished at that time. + * + * @vcpu: the vcpu where the write access happened. + * @gpa: the physical address written by guest. + * @new: the data was written to the address. + * @bytes: the written length. + */ + void (*track_write)(struct kvm_vcpu *vcpu, gpa_t gpa, const u8 *new, + int bytes); +}; + +void kvm_page_track_init(struct kvm *kvm); + +void kvm_page_track_free_memslot(struct kvm_memory_slot *free, + struct kvm_memory_slot *dont); +int kvm_page_track_create_memslot(struct kvm_memory_slot *slot, + unsigned long npages); + +void kvm_slot_page_track_add_page(struct kvm *kvm, + struct kvm_memory_slot *slot, gfn_t gfn, + enum kvm_page_track_mode mode); +void kvm_slot_page_track_remove_page(struct kvm *kvm, + struct kvm_memory_slot *slot, gfn_t gfn, + enum kvm_page_track_mode mode); +bool kvm_page_track_is_active(struct kvm_vcpu *vcpu, gfn_t gfn, + enum kvm_page_track_mode mode); + +void +kvm_page_track_register_notifier(struct kvm *kvm, + struct kvm_page_track_notifier_node *n); +void +kvm_page_track_unregister_notifier(struct kvm *kvm, + struct kvm_page_track_notifier_node *n); +void kvm_page_track_write(struct kvm_vcpu *vcpu, gpa_t gpa, const u8 *new, + int bytes); +#endif diff --git a/arch/x86/include/asm/livepatch.h b/arch/x86/include/asm/livepatch.h index e795f5274217..7e68f9558552 100644 --- a/arch/x86/include/asm/livepatch.h +++ b/arch/x86/include/asm/livepatch.h @@ -25,7 +25,6 @@ #include <linux/module.h> #include <linux/ftrace.h> -#ifdef CONFIG_LIVEPATCH static inline int klp_check_compiler_support(void) { #ifndef CC_USING_FENTRY @@ -40,8 +39,5 @@ static inline void klp_arch_set_pc(struct pt_regs *regs, unsigned long ip) { regs->ip = ip; } -#else -#error Include linux/livepatch.h, not asm/livepatch.h -#endif #endif /* _ASM_X86_LIVEPATCH_H */ diff --git a/arch/x86/include/asm/mmu_context.h b/arch/x86/include/asm/mmu_context.h index bfd9b2a35a0b..84280029cafd 100644 --- a/arch/x86/include/asm/mmu_context.h +++ b/arch/x86/include/asm/mmu_context.h @@ -52,15 +52,15 @@ struct ldt_struct { /* * Used for LDT copy/destruction. */ -int init_new_context(struct task_struct *tsk, struct mm_struct *mm); -void destroy_context(struct mm_struct *mm); +int init_new_context_ldt(struct task_struct *tsk, struct mm_struct *mm); +void destroy_context_ldt(struct mm_struct *mm); #else /* CONFIG_MODIFY_LDT_SYSCALL */ -static inline int init_new_context(struct task_struct *tsk, - struct mm_struct *mm) +static inline int init_new_context_ldt(struct task_struct *tsk, + struct mm_struct *mm) { return 0; } -static inline void destroy_context(struct mm_struct *mm) {} +static inline void destroy_context_ldt(struct mm_struct *mm) {} #endif static inline void load_mm_ldt(struct mm_struct *mm) @@ -104,6 +104,17 @@ static inline void enter_lazy_tlb(struct mm_struct *mm, struct task_struct *tsk) #endif } +static inline int init_new_context(struct task_struct *tsk, + struct mm_struct *mm) +{ + init_new_context_ldt(tsk, mm); + return 0; +} +static inline void destroy_context(struct mm_struct *mm) +{ + destroy_context_ldt(mm); +} + static inline void switch_mm(struct mm_struct *prev, struct mm_struct *next, struct task_struct *tsk) { @@ -275,4 +286,68 @@ static inline void arch_unmap(struct mm_struct *mm, struct vm_area_struct *vma, mpx_notify_unmap(mm, vma, start, end); } +static inline int vma_pkey(struct vm_area_struct *vma) +{ + u16 pkey = 0; +#ifdef CONFIG_X86_INTEL_MEMORY_PROTECTION_KEYS + unsigned long vma_pkey_mask = VM_PKEY_BIT0 | VM_PKEY_BIT1 | + VM_PKEY_BIT2 | VM_PKEY_BIT3; + pkey = (vma->vm_flags & vma_pkey_mask) >> VM_PKEY_SHIFT; +#endif + return pkey; +} + +static inline bool __pkru_allows_pkey(u16 pkey, bool write) +{ + u32 pkru = read_pkru(); + + if (!__pkru_allows_read(pkru, pkey)) + return false; + if (write && !__pkru_allows_write(pkru, pkey)) + return false; + + return true; +} + +/* + * We only want to enforce protection keys on the current process + * because we effectively have no access to PKRU for other + * processes or any way to tell *which * PKRU in a threaded + * process we could use. + * + * So do not enforce things if the VMA is not from the current + * mm, or if we are in a kernel thread. + */ +static inline bool vma_is_foreign(struct vm_area_struct *vma) +{ + if (!current->mm) + return true; + /* + * Should PKRU be enforced on the access to this VMA? If + * the VMA is from another process, then PKRU has no + * relevance and should not be enforced. + */ + if (current->mm != vma->vm_mm) + return true; + + return false; +} + +static inline bool arch_vma_access_permitted(struct vm_area_struct *vma, + bool write, bool execute, bool foreign) +{ + /* pkeys never affect instruction fetches */ + if (execute) + return true; + /* allow access if the VMA is not one from this process */ + if (foreign || vma_is_foreign(vma)) + return true; + return __pkru_allows_pkey(vma_pkey(vma), write); +} + +static inline bool arch_pte_access_permitted(pte_t pte, bool write) +{ + return __pkru_allows_pkey(pte_flags_pkey(pte_flags(pte)), write); +} + #endif /* _ASM_X86_MMU_CONTEXT_H */ diff --git a/arch/x86/include/asm/msr-index.h b/arch/x86/include/asm/msr-index.h index 984ab75bf621..2da46ac16e37 100644 --- a/arch/x86/include/asm/msr-index.h +++ b/arch/x86/include/asm/msr-index.h @@ -235,10 +235,10 @@ #define HWP_PACKAGE_LEVEL_REQUEST_BIT (1<<11) /* IA32_HWP_CAPABILITIES */ -#define HWP_HIGHEST_PERF(x) (x & 0xff) -#define HWP_GUARANTEED_PERF(x) ((x & (0xff << 8)) >>8) -#define HWP_MOSTEFFICIENT_PERF(x) ((x & (0xff << 16)) >>16) -#define HWP_LOWEST_PERF(x) ((x & (0xff << 24)) >>24) +#define HWP_HIGHEST_PERF(x) (((x) >> 0) & 0xff) +#define HWP_GUARANTEED_PERF(x) (((x) >> 8) & 0xff) +#define HWP_MOSTEFFICIENT_PERF(x) (((x) >> 16) & 0xff) +#define HWP_LOWEST_PERF(x) (((x) >> 24) & 0xff) /* IA32_HWP_REQUEST */ #define HWP_MIN_PERF(x) (x & 0xff) diff --git a/arch/x86/include/asm/paravirt.h b/arch/x86/include/asm/paravirt.h index f6192502149e..601f1b8f9961 100644 --- a/arch/x86/include/asm/paravirt.h +++ b/arch/x86/include/asm/paravirt.h @@ -13,6 +13,7 @@ #include <linux/bug.h> #include <linux/types.h> #include <linux/cpumask.h> +#include <asm/frame.h> static inline int paravirt_enabled(void) { @@ -756,15 +757,19 @@ static __always_inline void __ticket_unlock_kick(struct arch_spinlock *lock, * call. The return value in rax/eax will not be saved, even for void * functions. */ +#define PV_THUNK_NAME(func) "__raw_callee_save_" #func #define PV_CALLEE_SAVE_REGS_THUNK(func) \ extern typeof(func) __raw_callee_save_##func; \ \ asm(".pushsection .text;" \ - ".globl __raw_callee_save_" #func " ; " \ - "__raw_callee_save_" #func ": " \ + ".globl " PV_THUNK_NAME(func) ";" \ + ".type " PV_THUNK_NAME(func) ", @function;" \ + PV_THUNK_NAME(func) ":" \ + FRAME_BEGIN \ PV_SAVE_ALL_CALLER_REGS \ "call " #func ";" \ PV_RESTORE_ALL_CALLER_REGS \ + FRAME_END \ "ret;" \ ".popsection") diff --git a/arch/x86/include/asm/paravirt_types.h b/arch/x86/include/asm/paravirt_types.h index 77db5616a473..e8c2326478c8 100644 --- a/arch/x86/include/asm/paravirt_types.h +++ b/arch/x86/include/asm/paravirt_types.h @@ -466,8 +466,9 @@ int paravirt_disable_iospace(void); * makes sure the incoming and outgoing types are always correct. */ #ifdef CONFIG_X86_32 -#define PVOP_VCALL_ARGS \ - unsigned long __eax = __eax, __edx = __edx, __ecx = __ecx +#define PVOP_VCALL_ARGS \ + unsigned long __eax = __eax, __edx = __edx, __ecx = __ecx; \ + register void *__sp asm("esp") #define PVOP_CALL_ARGS PVOP_VCALL_ARGS #define PVOP_CALL_ARG1(x) "a" ((unsigned long)(x)) @@ -485,9 +486,10 @@ int paravirt_disable_iospace(void); #define VEXTRA_CLOBBERS #else /* CONFIG_X86_64 */ /* [re]ax isn't an arg, but the return val */ -#define PVOP_VCALL_ARGS \ - unsigned long __edi = __edi, __esi = __esi, \ - __edx = __edx, __ecx = __ecx, __eax = __eax +#define PVOP_VCALL_ARGS \ + unsigned long __edi = __edi, __esi = __esi, \ + __edx = __edx, __ecx = __ecx, __eax = __eax; \ + register void *__sp asm("rsp") #define PVOP_CALL_ARGS PVOP_VCALL_ARGS #define PVOP_CALL_ARG1(x) "D" ((unsigned long)(x)) @@ -526,7 +528,7 @@ int paravirt_disable_iospace(void); asm volatile(pre \ paravirt_alt(PARAVIRT_CALL) \ post \ - : call_clbr \ + : call_clbr, "+r" (__sp) \ : paravirt_type(op), \ paravirt_clobber(clbr), \ ##__VA_ARGS__ \ @@ -536,7 +538,7 @@ int paravirt_disable_iospace(void); asm volatile(pre \ paravirt_alt(PARAVIRT_CALL) \ post \ - : call_clbr \ + : call_clbr, "+r" (__sp) \ : paravirt_type(op), \ paravirt_clobber(clbr), \ ##__VA_ARGS__ \ @@ -563,7 +565,7 @@ int paravirt_disable_iospace(void); asm volatile(pre \ paravirt_alt(PARAVIRT_CALL) \ post \ - : call_clbr \ + : call_clbr, "+r" (__sp) \ : paravirt_type(op), \ paravirt_clobber(clbr), \ ##__VA_ARGS__ \ diff --git a/arch/x86/include/asm/pci.h b/arch/x86/include/asm/pci.h index 462594320d39..9ab7507ca1c2 100644 --- a/arch/x86/include/asm/pci.h +++ b/arch/x86/include/asm/pci.h @@ -20,6 +20,9 @@ struct pci_sysdata { #ifdef CONFIG_X86_64 void *iommu; /* IOMMU private data */ #endif +#ifdef CONFIG_PCI_MSI_IRQ_DOMAIN + void *fwnode; /* IRQ domain for MSI assignment */ +#endif }; extern int pci_routeirq; @@ -32,6 +35,7 @@ extern int noioapicreroute; static inline int pci_domain_nr(struct pci_bus *bus) { struct pci_sysdata *sd = bus->sysdata; + return sd->domain; } @@ -41,6 +45,17 @@ static inline int pci_proc_domain(struct pci_bus *bus) } #endif +#ifdef CONFIG_PCI_MSI_IRQ_DOMAIN +static inline void *_pci_root_bus_fwnode(struct pci_bus *bus) +{ + struct pci_sysdata *sd = bus->sysdata; + + return sd->fwnode; +} + +#define pci_root_bus_fwnode _pci_root_bus_fwnode +#endif + /* Can be used to override the logic in pci_scan_bus for skipping already-configured bus numbers - to be used for buggy BIOSes or architectures with incomplete PCI setup by the loader */ @@ -105,9 +120,6 @@ void native_restore_msi_irqs(struct pci_dev *dev); #include <asm/pci_64.h> #endif -/* implement the pci_ DMA API in terms of the generic device dma_ one */ -#include <asm-generic/pci-dma-compat.h> - /* generic pci stuff */ #include <asm-generic/pci.h> diff --git a/arch/x86/include/asm/pgtable.h b/arch/x86/include/asm/pgtable.h index 0687c4748b8f..97f3242e133c 100644 --- a/arch/x86/include/asm/pgtable.h +++ b/arch/x86/include/asm/pgtable.h @@ -99,6 +99,20 @@ static inline int pte_dirty(pte_t pte) return pte_flags(pte) & _PAGE_DIRTY; } + +static inline u32 read_pkru(void) +{ + if (boot_cpu_has(X86_FEATURE_OSPKE)) + return __read_pkru(); + return 0; +} + +static inline void write_pkru(u32 pkru) +{ + if (boot_cpu_has(X86_FEATURE_OSPKE)) + __write_pkru(pkru); +} + static inline int pte_young(pte_t pte) { return pte_flags(pte) & _PAGE_ACCESSED; @@ -911,6 +925,36 @@ static inline pte_t pte_swp_clear_soft_dirty(pte_t pte) } #endif +#define PKRU_AD_BIT 0x1 +#define PKRU_WD_BIT 0x2 +#define PKRU_BITS_PER_PKEY 2 + +static inline bool __pkru_allows_read(u32 pkru, u16 pkey) +{ + int pkru_pkey_bits = pkey * PKRU_BITS_PER_PKEY; + return !(pkru & (PKRU_AD_BIT << pkru_pkey_bits)); +} + +static inline bool __pkru_allows_write(u32 pkru, u16 pkey) +{ + int pkru_pkey_bits = pkey * PKRU_BITS_PER_PKEY; + /* + * Access-disable disables writes too so we need to check + * both bits here. + */ + return !(pkru & ((PKRU_AD_BIT|PKRU_WD_BIT) << pkru_pkey_bits)); +} + +static inline u16 pte_flags_pkey(unsigned long pte_flags) +{ +#ifdef CONFIG_X86_INTEL_MEMORY_PROTECTION_KEYS + /* ifdef to avoid doing 59-bit shift on 32-bit values */ + return (pte_flags & _PAGE_PKEY_MASK) >> _PAGE_BIT_PKEY_BIT0; +#else + return 0; +#endif +} + #include <asm-generic/pgtable.h> #endif /* __ASSEMBLY__ */ diff --git a/arch/x86/include/asm/pgtable_types.h b/arch/x86/include/asm/pgtable_types.h index 4432ab7f407c..7b5efe264eff 100644 --- a/arch/x86/include/asm/pgtable_types.h +++ b/arch/x86/include/asm/pgtable_types.h @@ -20,13 +20,18 @@ #define _PAGE_BIT_SOFTW2 10 /* " */ #define _PAGE_BIT_SOFTW3 11 /* " */ #define _PAGE_BIT_PAT_LARGE 12 /* On 2MB or 1GB pages */ +#define _PAGE_BIT_SOFTW4 58 /* available for programmer */ +#define _PAGE_BIT_PKEY_BIT0 59 /* Protection Keys, bit 1/4 */ +#define _PAGE_BIT_PKEY_BIT1 60 /* Protection Keys, bit 2/4 */ +#define _PAGE_BIT_PKEY_BIT2 61 /* Protection Keys, bit 3/4 */ +#define _PAGE_BIT_PKEY_BIT3 62 /* Protection Keys, bit 4/4 */ +#define _PAGE_BIT_NX 63 /* No execute: only valid after cpuid check */ + #define _PAGE_BIT_SPECIAL _PAGE_BIT_SOFTW1 #define _PAGE_BIT_CPA_TEST _PAGE_BIT_SOFTW1 #define _PAGE_BIT_HIDDEN _PAGE_BIT_SOFTW3 /* hidden by kmemcheck */ #define _PAGE_BIT_SOFT_DIRTY _PAGE_BIT_SOFTW3 /* software dirty tracking */ -#define _PAGE_BIT_SOFTW4 58 /* available for programmer */ -#define _PAGE_BIT_DEVMAP _PAGE_BIT_SOFTW4 -#define _PAGE_BIT_NX 63 /* No execute: only valid after cpuid check */ +#define _PAGE_BIT_DEVMAP _PAGE_BIT_SOFTW4 /* If _PAGE_BIT_PRESENT is clear, we use these: */ /* - if the user mapped it with PROT_NONE; pte_present gives true */ @@ -47,8 +52,24 @@ #define _PAGE_PAT_LARGE (_AT(pteval_t, 1) << _PAGE_BIT_PAT_LARGE) #define _PAGE_SPECIAL (_AT(pteval_t, 1) << _PAGE_BIT_SPECIAL) #define _PAGE_CPA_TEST (_AT(pteval_t, 1) << _PAGE_BIT_CPA_TEST) +#ifdef CONFIG_X86_INTEL_MEMORY_PROTECTION_KEYS +#define _PAGE_PKEY_BIT0 (_AT(pteval_t, 1) << _PAGE_BIT_PKEY_BIT0) +#define _PAGE_PKEY_BIT1 (_AT(pteval_t, 1) << _PAGE_BIT_PKEY_BIT1) +#define _PAGE_PKEY_BIT2 (_AT(pteval_t, 1) << _PAGE_BIT_PKEY_BIT2) +#define _PAGE_PKEY_BIT3 (_AT(pteval_t, 1) << _PAGE_BIT_PKEY_BIT3) +#else +#define _PAGE_PKEY_BIT0 (_AT(pteval_t, 0)) +#define _PAGE_PKEY_BIT1 (_AT(pteval_t, 0)) +#define _PAGE_PKEY_BIT2 (_AT(pteval_t, 0)) +#define _PAGE_PKEY_BIT3 (_AT(pteval_t, 0)) +#endif #define __HAVE_ARCH_PTE_SPECIAL +#define _PAGE_PKEY_MASK (_PAGE_PKEY_BIT0 | \ + _PAGE_PKEY_BIT1 | \ + _PAGE_PKEY_BIT2 | \ + _PAGE_PKEY_BIT3) + #ifdef CONFIG_KMEMCHECK #define _PAGE_HIDDEN (_AT(pteval_t, 1) << _PAGE_BIT_HIDDEN) #else @@ -99,7 +120,12 @@ #define _KERNPG_TABLE (_PAGE_PRESENT | _PAGE_RW | _PAGE_ACCESSED | \ _PAGE_DIRTY) -/* Set of bits not changed in pte_modify */ +/* + * Set of bits not changed in pte_modify. The pte's + * protection key is treated like _PAGE_RW, for + * instance, and is *not* included in this mask since + * pte_modify() does modify it. + */ #define _PAGE_CHG_MASK (PTE_PFN_MASK | _PAGE_PCD | _PAGE_PWT | \ _PAGE_SPECIAL | _PAGE_ACCESSED | _PAGE_DIRTY | \ _PAGE_SOFT_DIRTY) @@ -215,7 +241,10 @@ enum page_cache_mode { /* Extracts the PFN from a (pte|pmd|pud|pgd)val_t of a 4KB page */ #define PTE_PFN_MASK ((pteval_t)PHYSICAL_PAGE_MASK) -/* Extracts the flags from a (pte|pmd|pud|pgd)val_t of a 4KB page */ +/* + * Extracts the flags from a (pte|pmd|pud|pgd)val_t + * This includes the protection key value. + */ #define PTE_FLAGS_MASK (~PTE_PFN_MASK) typedef struct pgprot { pgprotval_t pgprot; } pgprot_t; diff --git a/arch/x86/include/asm/pkeys.h b/arch/x86/include/asm/pkeys.h new file mode 100644 index 000000000000..7b84565c916c --- /dev/null +++ b/arch/x86/include/asm/pkeys.h @@ -0,0 +1,34 @@ +#ifndef _ASM_X86_PKEYS_H +#define _ASM_X86_PKEYS_H + +#define arch_max_pkey() (boot_cpu_has(X86_FEATURE_OSPKE) ? 16 : 1) + +extern int arch_set_user_pkey_access(struct task_struct *tsk, int pkey, + unsigned long init_val); + +/* + * Try to dedicate one of the protection keys to be used as an + * execute-only protection key. + */ +#define PKEY_DEDICATED_EXECUTE_ONLY 15 +extern int __execute_only_pkey(struct mm_struct *mm); +static inline int execute_only_pkey(struct mm_struct *mm) +{ + if (!boot_cpu_has(X86_FEATURE_OSPKE)) + return 0; + + return __execute_only_pkey(mm); +} + +extern int __arch_override_mprotect_pkey(struct vm_area_struct *vma, + int prot, int pkey); +static inline int arch_override_mprotect_pkey(struct vm_area_struct *vma, + int prot, int pkey) +{ + if (!boot_cpu_has(X86_FEATURE_OSPKE)) + return 0; + + return __arch_override_mprotect_pkey(vma, prot, pkey); +} + +#endif /*_ASM_X86_PKEYS_H */ diff --git a/arch/x86/include/asm/pmem.h b/arch/x86/include/asm/pmem.h index c57fd1ea9689..bf8b35d2035a 100644 --- a/arch/x86/include/asm/pmem.h +++ b/arch/x86/include/asm/pmem.h @@ -137,6 +137,11 @@ static inline void arch_clear_pmem(void __pmem *addr, size_t size) arch_wb_cache_pmem(addr, size); } +static inline void arch_invalidate_pmem(void __pmem *addr, size_t size) +{ + clflush_cache_range((void __force *) addr, size); +} + static inline bool __arch_has_wmb_pmem(void) { /* diff --git a/arch/x86/include/asm/preempt.h b/arch/x86/include/asm/preempt.h index 01bcde84d3e4..d397deb58146 100644 --- a/arch/x86/include/asm/preempt.h +++ b/arch/x86/include/asm/preempt.h @@ -94,10 +94,19 @@ static __always_inline bool should_resched(int preempt_offset) #ifdef CONFIG_PREEMPT extern asmlinkage void ___preempt_schedule(void); -# define __preempt_schedule() asm ("call ___preempt_schedule") +# define __preempt_schedule() \ +({ \ + register void *__sp asm(_ASM_SP); \ + asm volatile ("call ___preempt_schedule" : "+r"(__sp)); \ +}) + extern asmlinkage void preempt_schedule(void); extern asmlinkage void ___preempt_schedule_notrace(void); -# define __preempt_schedule_notrace() asm ("call ___preempt_schedule_notrace") +# define __preempt_schedule_notrace() \ +({ \ + register void *__sp asm(_ASM_SP); \ + asm volatile ("call ___preempt_schedule_notrace" : "+r"(__sp)); \ +}) extern asmlinkage void preempt_schedule_notrace(void); #endif diff --git a/arch/x86/include/asm/qspinlock_paravirt.h b/arch/x86/include/asm/qspinlock_paravirt.h index 9f92c180ed2f..9d55f9b6e167 100644 --- a/arch/x86/include/asm/qspinlock_paravirt.h +++ b/arch/x86/include/asm/qspinlock_paravirt.h @@ -36,8 +36,10 @@ PV_CALLEE_SAVE_REGS_THUNK(__pv_queued_spin_unlock_slowpath); */ asm (".pushsection .text;" ".globl " PV_UNLOCK ";" + ".type " PV_UNLOCK ", @function;" ".align 4,0x90;" PV_UNLOCK ": " + FRAME_BEGIN "push %rdx;" "mov $0x1,%eax;" "xor %edx,%edx;" @@ -45,6 +47,7 @@ asm (".pushsection .text;" "cmp $0x1,%al;" "jne .slowpath;" "pop %rdx;" + FRAME_END "ret;" ".slowpath: " "push %rsi;" @@ -52,6 +55,7 @@ asm (".pushsection .text;" "call " PV_UNLOCK_SLOWPATH ";" "pop %rsi;" "pop %rdx;" + FRAME_END "ret;" ".size " PV_UNLOCK ", .-" PV_UNLOCK ";" ".popsection"); diff --git a/arch/x86/include/asm/required-features.h b/arch/x86/include/asm/required-features.h index 5c6e4fb370f5..4916144e3c42 100644 --- a/arch/x86/include/asm/required-features.h +++ b/arch/x86/include/asm/required-features.h @@ -92,5 +92,12 @@ #define REQUIRED_MASK7 0 #define REQUIRED_MASK8 0 #define REQUIRED_MASK9 0 +#define REQUIRED_MASK10 0 +#define REQUIRED_MASK11 0 +#define REQUIRED_MASK12 0 +#define REQUIRED_MASK13 0 +#define REQUIRED_MASK14 0 +#define REQUIRED_MASK15 0 +#define REQUIRED_MASK16 0 #endif /* _ASM_X86_REQUIRED_FEATURES_H */ diff --git a/arch/x86/include/asm/special_insns.h b/arch/x86/include/asm/special_insns.h index 2270e41b32fd..d96d04377765 100644 --- a/arch/x86/include/asm/special_insns.h +++ b/arch/x86/include/asm/special_insns.h @@ -98,6 +98,44 @@ static inline void native_write_cr8(unsigned long val) } #endif +#ifdef CONFIG_X86_INTEL_MEMORY_PROTECTION_KEYS +static inline u32 __read_pkru(void) +{ + u32 ecx = 0; + u32 edx, pkru; + + /* + * "rdpkru" instruction. Places PKRU contents in to EAX, + * clears EDX and requires that ecx=0. + */ + asm volatile(".byte 0x0f,0x01,0xee\n\t" + : "=a" (pkru), "=d" (edx) + : "c" (ecx)); + return pkru; +} + +static inline void __write_pkru(u32 pkru) +{ + u32 ecx = 0, edx = 0; + + /* + * "wrpkru" instruction. Loads contents in EAX to PKRU, + * requires that ecx = edx = 0. + */ + asm volatile(".byte 0x0f,0x01,0xef\n\t" + : : "a" (pkru), "c"(ecx), "d"(edx)); +} +#else +static inline u32 __read_pkru(void) +{ + return 0; +} + +static inline void __write_pkru(u32 pkru) +{ +} +#endif + static inline void native_wbinvd(void) { asm volatile("wbinvd": : :"memory"); diff --git a/arch/x86/include/asm/stacktrace.h b/arch/x86/include/asm/stacktrace.h index 70bbe39043a9..7c247e7404be 100644 --- a/arch/x86/include/asm/stacktrace.h +++ b/arch/x86/include/asm/stacktrace.h @@ -37,7 +37,7 @@ print_context_stack_bp(struct thread_info *tinfo, /* Generic stack tracer with callbacks */ struct stacktrace_ops { - void (*address)(void *data, unsigned long address, int reliable); + int (*address)(void *data, unsigned long address, int reliable); /* On negative return stop dumping */ int (*stack)(void *data, char *name); walk_stack_t walk_stack; diff --git a/arch/x86/include/asm/uaccess.h b/arch/x86/include/asm/uaccess.h index c0f27d7ea7ff..a969ae607be8 100644 --- a/arch/x86/include/asm/uaccess.h +++ b/arch/x86/include/asm/uaccess.h @@ -105,9 +105,8 @@ static inline bool __chk_range_not_ok(unsigned long addr, unsigned long size, un struct exception_table_entry { int insn, fixup, handler; }; -/* This is not the generic standard exception_table_entry format */ -#define ARCH_HAS_SORT_EXTABLE -#define ARCH_HAS_SEARCH_EXTABLE + +#define ARCH_HAS_RELATIVE_EXTABLE extern int fixup_exception(struct pt_regs *regs, int trapnr); extern bool ex_has_fault_handler(unsigned long ip); @@ -179,10 +178,11 @@ __typeof__(__builtin_choose_expr(sizeof(x) > sizeof(0UL), 0ULL, 0UL)) ({ \ int __ret_gu; \ register __inttype(*(ptr)) __val_gu asm("%"_ASM_DX); \ + register void *__sp asm(_ASM_SP); \ __chk_user_ptr(ptr); \ might_fault(); \ - asm volatile("call __get_user_%P3" \ - : "=a" (__ret_gu), "=r" (__val_gu) \ + asm volatile("call __get_user_%P4" \ + : "=a" (__ret_gu), "=r" (__val_gu), "+r" (__sp) \ : "0" (ptr), "i" (sizeof(*(ptr)))); \ (x) = (__force __typeof__(*(ptr))) __val_gu; \ __builtin_expect(__ret_gu, 0); \ diff --git a/arch/x86/include/asm/xen/hypercall.h b/arch/x86/include/asm/xen/hypercall.h index 3bcdcc84259d..a12a047184ee 100644 --- a/arch/x86/include/asm/xen/hypercall.h +++ b/arch/x86/include/asm/xen/hypercall.h @@ -110,9 +110,10 @@ extern struct { char _entry[32]; } hypercall_page[]; register unsigned long __arg2 asm(__HYPERCALL_ARG2REG) = __arg2; \ register unsigned long __arg3 asm(__HYPERCALL_ARG3REG) = __arg3; \ register unsigned long __arg4 asm(__HYPERCALL_ARG4REG) = __arg4; \ - register unsigned long __arg5 asm(__HYPERCALL_ARG5REG) = __arg5; + register unsigned long __arg5 asm(__HYPERCALL_ARG5REG) = __arg5; \ + register void *__sp asm(_ASM_SP); -#define __HYPERCALL_0PARAM "=r" (__res) +#define __HYPERCALL_0PARAM "=r" (__res), "+r" (__sp) #define __HYPERCALL_1PARAM __HYPERCALL_0PARAM, "+r" (__arg1) #define __HYPERCALL_2PARAM __HYPERCALL_1PARAM, "+r" (__arg2) #define __HYPERCALL_3PARAM __HYPERCALL_2PARAM, "+r" (__arg3) diff --git a/arch/x86/include/uapi/asm/hyperv.h b/arch/x86/include/uapi/asm/hyperv.h index 7956412d09bd..9b1a91834ac8 100644 --- a/arch/x86/include/uapi/asm/hyperv.h +++ b/arch/x86/include/uapi/asm/hyperv.h @@ -226,7 +226,9 @@ (~((1ull << HV_X64_MSR_HYPERCALL_PAGE_ADDRESS_SHIFT) - 1)) /* Declare the various hypercall operations. */ -#define HV_X64_HV_NOTIFY_LONG_SPIN_WAIT 0x0008 +#define HVCALL_NOTIFY_LONG_SPIN_WAIT 0x0008 +#define HVCALL_POST_MESSAGE 0x005c +#define HVCALL_SIGNAL_EVENT 0x005d #define HV_X64_MSR_APIC_ASSIST_PAGE_ENABLE 0x00000001 #define HV_X64_MSR_APIC_ASSIST_PAGE_ADDRESS_SHIFT 12 diff --git a/arch/x86/include/uapi/asm/mman.h b/arch/x86/include/uapi/asm/mman.h index 513b05f15bb4..39bca7fac087 100644 --- a/arch/x86/include/uapi/asm/mman.h +++ b/arch/x86/include/uapi/asm/mman.h @@ -6,6 +6,28 @@ #define MAP_HUGE_2MB (21 << MAP_HUGE_SHIFT) #define MAP_HUGE_1GB (30 << MAP_HUGE_SHIFT) +#ifdef CONFIG_X86_INTEL_MEMORY_PROTECTION_KEYS +/* + * Take the 4 protection key bits out of the vma->vm_flags + * value and turn them in to the bits that we can put in + * to a pte. + * + * Only override these if Protection Keys are available + * (which is only on 64-bit). + */ +#define arch_vm_get_page_prot(vm_flags) __pgprot( \ + ((vm_flags) & VM_PKEY_BIT0 ? _PAGE_PKEY_BIT0 : 0) | \ + ((vm_flags) & VM_PKEY_BIT1 ? _PAGE_PKEY_BIT1 : 0) | \ + ((vm_flags) & VM_PKEY_BIT2 ? _PAGE_PKEY_BIT2 : 0) | \ + ((vm_flags) & VM_PKEY_BIT3 ? _PAGE_PKEY_BIT3 : 0)) + +#define arch_calc_vm_prot_bits(prot, key) ( \ + ((key) & 0x1 ? VM_PKEY_BIT0 : 0) | \ + ((key) & 0x2 ? VM_PKEY_BIT1 : 0) | \ + ((key) & 0x4 ? VM_PKEY_BIT2 : 0) | \ + ((key) & 0x8 ? VM_PKEY_BIT3 : 0)) +#endif + #include <asm-generic/mman.h> #endif /* _ASM_X86_MMAN_H */ diff --git a/arch/x86/include/uapi/asm/processor-flags.h b/arch/x86/include/uapi/asm/processor-flags.h index 79887abcb5e1..567de50a4c2a 100644 --- a/arch/x86/include/uapi/asm/processor-flags.h +++ b/arch/x86/include/uapi/asm/processor-flags.h @@ -118,6 +118,8 @@ #define X86_CR4_SMEP _BITUL(X86_CR4_SMEP_BIT) #define X86_CR4_SMAP_BIT 21 /* enable SMAP support */ #define X86_CR4_SMAP _BITUL(X86_CR4_SMAP_BIT) +#define X86_CR4_PKE_BIT 22 /* enable Protection Keys support */ +#define X86_CR4_PKE _BITUL(X86_CR4_PKE_BIT) /* * x86-64 Task Priority Register, CR8 |