From f7387eff4bad33d12719c66c43541c095556ae4e Mon Sep 17 00:00:00 2001 From: Seongman Lee Date: Sun, 11 May 2025 18:23:28 +0900 Subject: x86/sev: Fix operator precedence in GHCB_MSR_VMPL_REQ_LEVEL macro The GHCB_MSR_VMPL_REQ_LEVEL macro lacked parentheses around the bitmask expression, causing the shift operation to bind too early. As a result, when requesting VMPL1 (e.g., GHCB_MSR_VMPL_REQ_LEVEL(1)), incorrect values such as 0x000000016 were generated instead of the intended 0x100000016 (the requested VMPL level is specified in GHCBData[39:32]). Fix the precedence issue by grouping the masked value before applying the shift. [ bp: Massage commit message. ] Fixes: 34ff65901735 ("x86/sev: Use kernel provided SVSM Calling Areas") Signed-off-by: Seongman Lee Signed-off-by: Borislav Petkov (AMD) Link: https://lore.kernel.org/20250511092329.12680-1-cloudlee1719@gmail.com --- arch/x86/include/asm/sev-common.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/x86/include/asm/sev-common.h b/arch/x86/include/asm/sev-common.h index acb85b9346d8..0020d77a0800 100644 --- a/arch/x86/include/asm/sev-common.h +++ b/arch/x86/include/asm/sev-common.h @@ -116,7 +116,7 @@ enum psc_op { #define GHCB_MSR_VMPL_REQ 0x016 #define GHCB_MSR_VMPL_REQ_LEVEL(v) \ /* GHCBData[39:32] */ \ - (((u64)(v) & GENMASK_ULL(7, 0) << 32) | \ + ((((u64)(v) & GENMASK_ULL(7, 0)) << 32) | \ /* GHCBDdata[11:0] */ \ GHCB_MSR_VMPL_REQ) -- cgit v1.2.3 From 386cd3dcfd63491619b4034b818737fc0219e128 Mon Sep 17 00:00:00 2001 From: Alexey Makhalov Date: Tue, 18 Mar 2025 00:40:31 +0000 Subject: MAINTAINERS: Update Alexey Makhalov's email address Fix a typo in an email address. Closes: https://lore.kernel.org/all/20240925-rational-succinct-vulture-cca9fb@lemur/T/ Reported-by: Konstantin Ryabitsev Reported-by: Juergen Gross Signed-off-by: Alexey Makhalov Signed-off-by: Borislav Petkov (AMD) Cc: stable@vger.kernel.org Link: https://lore.kernel.org/20250318004031.2703923-1-alexey.makhalov@broadcom.com --- MAINTAINERS | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/MAINTAINERS b/MAINTAINERS index 69511c3b2b76..0d3a252db2b6 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -18373,7 +18373,7 @@ F: include/uapi/linux/ppdev.h PARAVIRT_OPS INTERFACE M: Juergen Gross R: Ajay Kaher -R: Alexey Makhalov +R: Alexey Makhalov R: Broadcom internal kernel review list L: virtualization@lists.linux.dev L: x86@kernel.org @@ -25859,7 +25859,7 @@ F: drivers/misc/vmw_balloon.c VMWARE HYPERVISOR INTERFACE M: Ajay Kaher -M: Alexey Makhalov +M: Alexey Makhalov R: Broadcom internal kernel review list L: virtualization@lists.linux.dev L: x86@kernel.org @@ -25887,7 +25887,7 @@ F: drivers/scsi/vmw_pvscsi.h VMWARE VIRTUAL PTP CLOCK DRIVER M: Nick Shi R: Ajay Kaher -R: Alexey Makhalov +R: Alexey Makhalov R: Broadcom internal kernel review list L: netdev@vger.kernel.org S: Supported -- cgit v1.2.3 From d2062cc1b1c367d5d019f595ef860159e1301351 Mon Sep 17 00:00:00 2001 From: Ashish Kalra Date: Mon, 28 Apr 2025 21:41:51 +0000 Subject: x86/sev: Do not touch VMSA pages during SNP guest memory kdump When kdump is running makedumpfile to generate vmcore and dump SNP guest memory it touches the VMSA page of the vCPU executing kdump. It then results in unrecoverable #NPF/RMP faults as the VMSA page is marked busy/in-use when the vCPU is running and subsequently a causes guest softlockup/hang. Additionally, other APs may be halted in guest mode and their VMSA pages are marked busy and touching these VMSA pages during guest memory dump will also cause #NPF. Issue AP_DESTROY GHCB calls on other APs to ensure they are kicked out of guest mode and then clear the VMSA bit on their VMSA pages. If the vCPU running kdump is an AP, mark it's VMSA page as offline to ensure that makedumpfile excludes that page while dumping guest memory. Fixes: 3074152e56c9 ("x86/sev: Convert shared memory back to private on kexec") Signed-off-by: Ashish Kalra Signed-off-by: Borislav Petkov (AMD) Reviewed-by: Pankaj Gupta Reviewed-by: Tom Lendacky Tested-by: Srikanth Aithal Cc: stable@vger.kernel.org Link: https://lore.kernel.org/20250428214151.155464-1-Ashish.Kalra@amd.com --- arch/x86/coco/sev/core.c | 244 ++++++++++++++++++++++++++++++----------------- 1 file changed, 158 insertions(+), 86 deletions(-) diff --git a/arch/x86/coco/sev/core.c b/arch/x86/coco/sev/core.c index b0c1a7a57497..41060ba41b5c 100644 --- a/arch/x86/coco/sev/core.c +++ b/arch/x86/coco/sev/core.c @@ -959,6 +959,102 @@ void snp_accept_memory(phys_addr_t start, phys_addr_t end) set_pages_state(vaddr, npages, SNP_PAGE_STATE_PRIVATE); } +static int vmgexit_ap_control(u64 event, struct sev_es_save_area *vmsa, u32 apic_id) +{ + bool create = event != SVM_VMGEXIT_AP_DESTROY; + struct ghcb_state state; + unsigned long flags; + struct ghcb *ghcb; + int ret = 0; + + local_irq_save(flags); + + ghcb = __sev_get_ghcb(&state); + + vc_ghcb_invalidate(ghcb); + + if (create) + ghcb_set_rax(ghcb, vmsa->sev_features); + + ghcb_set_sw_exit_code(ghcb, SVM_VMGEXIT_AP_CREATION); + ghcb_set_sw_exit_info_1(ghcb, + ((u64)apic_id << 32) | + ((u64)snp_vmpl << 16) | + event); + ghcb_set_sw_exit_info_2(ghcb, __pa(vmsa)); + + sev_es_wr_ghcb_msr(__pa(ghcb)); + VMGEXIT(); + + if (!ghcb_sw_exit_info_1_is_valid(ghcb) || + lower_32_bits(ghcb->save.sw_exit_info_1)) { + pr_err("SNP AP %s error\n", (create ? "CREATE" : "DESTROY")); + ret = -EINVAL; + } + + __sev_put_ghcb(&state); + + local_irq_restore(flags); + + return ret; +} + +static int snp_set_vmsa(void *va, void *caa, int apic_id, bool make_vmsa) +{ + int ret; + + if (snp_vmpl) { + struct svsm_call call = {}; + unsigned long flags; + + local_irq_save(flags); + + call.caa = this_cpu_read(svsm_caa); + call.rcx = __pa(va); + + if (make_vmsa) { + /* Protocol 0, Call ID 2 */ + call.rax = SVSM_CORE_CALL(SVSM_CORE_CREATE_VCPU); + call.rdx = __pa(caa); + call.r8 = apic_id; + } else { + /* Protocol 0, Call ID 3 */ + call.rax = SVSM_CORE_CALL(SVSM_CORE_DELETE_VCPU); + } + + ret = svsm_perform_call_protocol(&call); + + local_irq_restore(flags); + } else { + /* + * If the kernel runs at VMPL0, it can change the VMSA + * bit for a page using the RMPADJUST instruction. + * However, for the instruction to succeed it must + * target the permissions of a lesser privileged (higher + * numbered) VMPL level, so use VMPL1. + */ + u64 attrs = 1; + + if (make_vmsa) + attrs |= RMPADJUST_VMSA_PAGE_BIT; + + ret = rmpadjust((unsigned long)va, RMP_PG_SIZE_4K, attrs); + } + + return ret; +} + +static void snp_cleanup_vmsa(struct sev_es_save_area *vmsa, int apic_id) +{ + int err; + + err = snp_set_vmsa(vmsa, NULL, apic_id, false); + if (err) + pr_err("clear VMSA page failed (%u), leaking page\n", err); + else + free_page((unsigned long)vmsa); +} + static void set_pte_enc(pte_t *kpte, int level, void *va) { struct pte_enc_desc d = { @@ -1055,6 +1151,65 @@ void snp_kexec_begin(void) pr_warn("Failed to stop shared<->private conversions\n"); } +/* + * Shutdown all APs except the one handling kexec/kdump and clearing + * the VMSA tag on AP's VMSA pages as they are not being used as + * VMSA page anymore. + */ +static void shutdown_all_aps(void) +{ + struct sev_es_save_area *vmsa; + int apic_id, this_cpu, cpu; + + this_cpu = get_cpu(); + + /* + * APs are already in HLT loop when enc_kexec_finish() callback + * is invoked. + */ + for_each_present_cpu(cpu) { + vmsa = per_cpu(sev_vmsa, cpu); + + /* + * The BSP or offlined APs do not have guest allocated VMSA + * and there is no need to clear the VMSA tag for this page. + */ + if (!vmsa) + continue; + + /* + * Cannot clear the VMSA tag for the currently running vCPU. + */ + if (this_cpu == cpu) { + unsigned long pa; + struct page *p; + + pa = __pa(vmsa); + /* + * Mark the VMSA page of the running vCPU as offline + * so that is excluded and not touched by makedumpfile + * while generating vmcore during kdump. + */ + p = pfn_to_online_page(pa >> PAGE_SHIFT); + if (p) + __SetPageOffline(p); + continue; + } + + apic_id = cpuid_to_apicid[cpu]; + + /* + * Issue AP destroy to ensure AP gets kicked out of guest mode + * to allow using RMPADJUST to remove the VMSA tag on it's + * VMSA page. + */ + vmgexit_ap_control(SVM_VMGEXIT_AP_DESTROY, vmsa, apic_id); + snp_cleanup_vmsa(vmsa, apic_id); + } + + put_cpu(); +} + void snp_kexec_finish(void) { struct sev_es_runtime_data *data; @@ -1069,6 +1224,8 @@ void snp_kexec_finish(void) if (!IS_ENABLED(CONFIG_KEXEC_CORE)) return; + shutdown_all_aps(); + unshare_all_memory(); /* @@ -1090,51 +1247,6 @@ void snp_kexec_finish(void) } } -static int snp_set_vmsa(void *va, void *caa, int apic_id, bool make_vmsa) -{ - int ret; - - if (snp_vmpl) { - struct svsm_call call = {}; - unsigned long flags; - - local_irq_save(flags); - - call.caa = this_cpu_read(svsm_caa); - call.rcx = __pa(va); - - if (make_vmsa) { - /* Protocol 0, Call ID 2 */ - call.rax = SVSM_CORE_CALL(SVSM_CORE_CREATE_VCPU); - call.rdx = __pa(caa); - call.r8 = apic_id; - } else { - /* Protocol 0, Call ID 3 */ - call.rax = SVSM_CORE_CALL(SVSM_CORE_DELETE_VCPU); - } - - ret = svsm_perform_call_protocol(&call); - - local_irq_restore(flags); - } else { - /* - * If the kernel runs at VMPL0, it can change the VMSA - * bit for a page using the RMPADJUST instruction. - * However, for the instruction to succeed it must - * target the permissions of a lesser privileged (higher - * numbered) VMPL level, so use VMPL1. - */ - u64 attrs = 1; - - if (make_vmsa) - attrs |= RMPADJUST_VMSA_PAGE_BIT; - - ret = rmpadjust((unsigned long)va, RMP_PG_SIZE_4K, attrs); - } - - return ret; -} - #define __ATTR_BASE (SVM_SELECTOR_P_MASK | SVM_SELECTOR_S_MASK) #define INIT_CS_ATTRIBS (__ATTR_BASE | SVM_SELECTOR_READ_MASK | SVM_SELECTOR_CODE_MASK) #define INIT_DS_ATTRIBS (__ATTR_BASE | SVM_SELECTOR_WRITE_MASK) @@ -1166,24 +1278,10 @@ static void *snp_alloc_vmsa_page(int cpu) return page_address(p + 1); } -static void snp_cleanup_vmsa(struct sev_es_save_area *vmsa, int apic_id) -{ - int err; - - err = snp_set_vmsa(vmsa, NULL, apic_id, false); - if (err) - pr_err("clear VMSA page failed (%u), leaking page\n", err); - else - free_page((unsigned long)vmsa); -} - static int wakeup_cpu_via_vmgexit(u32 apic_id, unsigned long start_ip) { struct sev_es_save_area *cur_vmsa, *vmsa; - struct ghcb_state state; struct svsm_ca *caa; - unsigned long flags; - struct ghcb *ghcb; u8 sipi_vector; int cpu, ret; u64 cr4; @@ -1297,33 +1395,7 @@ static int wakeup_cpu_via_vmgexit(u32 apic_id, unsigned long start_ip) } /* Issue VMGEXIT AP Creation NAE event */ - local_irq_save(flags); - - ghcb = __sev_get_ghcb(&state); - - vc_ghcb_invalidate(ghcb); - ghcb_set_rax(ghcb, vmsa->sev_features); - ghcb_set_sw_exit_code(ghcb, SVM_VMGEXIT_AP_CREATION); - ghcb_set_sw_exit_info_1(ghcb, - ((u64)apic_id << 32) | - ((u64)snp_vmpl << 16) | - SVM_VMGEXIT_AP_CREATE); - ghcb_set_sw_exit_info_2(ghcb, __pa(vmsa)); - - sev_es_wr_ghcb_msr(__pa(ghcb)); - VMGEXIT(); - - if (!ghcb_sw_exit_info_1_is_valid(ghcb) || - lower_32_bits(ghcb->save.sw_exit_info_1)) { - pr_err("SNP AP Creation error\n"); - ret = -EINVAL; - } - - __sev_put_ghcb(&state); - - local_irq_restore(flags); - - /* Perform cleanup if there was an error */ + ret = vmgexit_ap_control(SVM_VMGEXIT_AP_CREATE, vmsa, apic_id); if (ret) { snp_cleanup_vmsa(vmsa, apic_id); vmsa = NULL; -- cgit v1.2.3 From 82b7f88f2316c5442708daeb0b5ec5aa54c8ff7f Mon Sep 17 00:00:00 2001 From: Ashish Kalra Date: Tue, 6 May 2025 18:35:29 +0000 Subject: x86/sev: Make sure pages are not skipped during kdump When shared pages are being converted to private during kdump, additional checks are performed. They include handling the case of a GHCB page being contained within a huge page. Currently, this check incorrectly skips a page just below the GHCB page from being transitioned back to private during kdump preparation. This skipped page causes a 0x404 #VC exception when it is accessed later while dumping guest memory for vmcore generation. Correct the range to be checked for GHCB contained in a huge page. Also, ensure that the skipped huge page containing the GHCB page is transitioned back to private by applying the correct address mask later when changing GHCBs to private at end of kdump preparation. [ bp: Massage commit message. ] Fixes: 3074152e56c9 ("x86/sev: Convert shared memory back to private on kexec") Signed-off-by: Ashish Kalra Signed-off-by: Borislav Petkov (AMD) Reviewed-by: Tom Lendacky Tested-by: Srikanth Aithal Cc: stable@vger.kernel.org Link: https://lore.kernel.org/20250506183529.289549-1-Ashish.Kalra@amd.com --- arch/x86/coco/sev/core.c | 11 +++++++---- 1 file changed, 7 insertions(+), 4 deletions(-) diff --git a/arch/x86/coco/sev/core.c b/arch/x86/coco/sev/core.c index 41060ba41b5c..36beaac713c1 100644 --- a/arch/x86/coco/sev/core.c +++ b/arch/x86/coco/sev/core.c @@ -1101,7 +1101,8 @@ static void unshare_all_memory(void) data = per_cpu(runtime_data, cpu); ghcb = (unsigned long)&data->ghcb_page; - if (addr <= ghcb && ghcb <= addr + size) { + /* Handle the case of a huge page containing the GHCB page */ + if (addr <= ghcb && ghcb < addr + size) { skipped_addr = true; break; } @@ -1213,8 +1214,8 @@ static void shutdown_all_aps(void) void snp_kexec_finish(void) { struct sev_es_runtime_data *data; + unsigned long size, addr; unsigned int level, cpu; - unsigned long size; struct ghcb *ghcb; pte_t *pte; @@ -1242,8 +1243,10 @@ void snp_kexec_finish(void) ghcb = &data->ghcb_page; pte = lookup_address((unsigned long)ghcb, &level); size = page_level_size(level); - set_pte_enc(pte, level, (void *)ghcb); - snp_set_memory_private((unsigned long)ghcb, (size / PAGE_SIZE)); + /* Handle the case of a huge page containing the GHCB page */ + addr = (unsigned long)ghcb & page_level_mask(level); + set_pte_enc(pte, level, (void *)addr); + snp_set_memory_private(addr, (size / PAGE_SIZE)); } } -- cgit v1.2.3 From 24ee8d9432b5744fce090af3d829a39aa4abf63f Mon Sep 17 00:00:00 2001 From: Yazen Ghannam Date: Tue, 13 May 2025 20:48:57 +0000 Subject: x86/CPU/AMD: Add X86_FEATURE_ZEN6 Add a synthetic feature flag for Zen6. [ bp: Move the feature flag to a free slot and avoid future merge conflicts from incoming stuff. ] Signed-off-by: Yazen Ghannam Signed-off-by: Borislav Petkov (AMD) Link: https://lore.kernel.org/20250513204857.3376577-1-yazen.ghannam@amd.com --- arch/x86/include/asm/cpufeatures.h | 2 +- arch/x86/kernel/cpu/amd.c | 5 +++++ 2 files changed, 6 insertions(+), 1 deletion(-) diff --git a/arch/x86/include/asm/cpufeatures.h b/arch/x86/include/asm/cpufeatures.h index 6c2c152d8a67..9bb17c714f4a 100644 --- a/arch/x86/include/asm/cpufeatures.h +++ b/arch/x86/include/asm/cpufeatures.h @@ -75,7 +75,7 @@ #define X86_FEATURE_CENTAUR_MCR ( 3*32+ 3) /* "centaur_mcr" Centaur MCRs (= MTRRs) */ #define X86_FEATURE_K8 ( 3*32+ 4) /* Opteron, Athlon64 */ #define X86_FEATURE_ZEN5 ( 3*32+ 5) /* CPU based on Zen5 microarchitecture */ -/* Free ( 3*32+ 6) */ +#define X86_FEATURE_ZEN6 ( 3*32+ 6) /* CPU based on Zen6 microarchitecture */ /* Free ( 3*32+ 7) */ #define X86_FEATURE_CONSTANT_TSC ( 3*32+ 8) /* "constant_tsc" TSC ticks at a constant rate */ #define X86_FEATURE_UP ( 3*32+ 9) /* "up" SMP kernel running on UP */ diff --git a/arch/x86/kernel/cpu/amd.c b/arch/x86/kernel/cpu/amd.c index 2b36379ff675..4e06baab40bb 100644 --- a/arch/x86/kernel/cpu/amd.c +++ b/arch/x86/kernel/cpu/amd.c @@ -472,6 +472,11 @@ static void bsp_init_amd(struct cpuinfo_x86 *c) case 0x60 ... 0x7f: setup_force_cpu_cap(X86_FEATURE_ZEN5); break; + case 0x50 ... 0x5f: + case 0x90 ... 0xaf: + case 0xc0 ... 0xcf: + setup_force_cpu_cap(X86_FEATURE_ZEN6); + break; default: goto warn; } -- cgit v1.2.3 From 03680913744de17fa49e62b1d8f71bab42b0b721 Mon Sep 17 00:00:00 2001 From: Lukas Bulwahn Date: Fri, 16 May 2025 11:08:10 +0200 Subject: x86/mm: Remove duplicated word in warning message Commit bbeb69ce3013 ("x86/mm: Remove CONFIG_HIGHMEM64G support") introduces a new warning message MSG_HIGHMEM_TRIMMED, which accidentally introduces a duplicated 'for for' in the warning message. Remove this duplicated word. This was noticed while reviewing for references to obsolete kernel build config options. Fixes: bbeb69ce3013 ("x86/mm: Remove CONFIG_HIGHMEM64G support") Signed-off-by: Lukas Bulwahn Signed-off-by: Ingo Molnar Cc: Andy Lutomirski Cc: Arnd Bergmann Cc: Dave Hansen Cc: H. Peter Anvin Cc: Peter Zijlstra Cc: kernel-janitors@vger.kernel.org Link: https://lore.kernel.org/r/20250516090810.556623-1-lukas.bulwahn@redhat.com --- arch/x86/mm/init_32.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/x86/mm/init_32.c b/arch/x86/mm/init_32.c index ad662cc4605c..148eba50265a 100644 --- a/arch/x86/mm/init_32.c +++ b/arch/x86/mm/init_32.c @@ -565,7 +565,7 @@ static void __init lowmem_pfn_init(void) "only %luMB highmem pages available, ignoring highmem size of %luMB!\n" #define MSG_HIGHMEM_TRIMMED \ - "Warning: only 4GB will be used. Support for for CONFIG_HIGHMEM64G was removed!\n" + "Warning: only 4GB will be used. Support for CONFIG_HIGHMEM64G was removed!\n" /* * We have more RAM than fits into lowmem - we try to put it into * highmem, also taking the highmem=x boot parameter into account: -- cgit v1.2.3