From 39bc4f1b8de8752405bb4e54d3be35c6cb0f8887 Mon Sep 17 00:00:00 2001 From: hanliyang Date: Sun, 10 Jan 2021 14:57:21 -0500 Subject: [PATCH 1/7] KVM: SVM: Fix the available ASID range for CSV2 guest hygon inclusion category: feature bugzilla: https://gitee.com/openeuler/kernel/issues/I98WPG CVE: NA --------------------------- All the ASIDs in range [1, max_sev_asid] are available for CSV2 guest, regardless of the value of min_sev_asid. Signed-off-by: hanliyang --- arch/x86/kvm/svm/sev.c | 26 +++++++++++++++++++++----- 1 file changed, 21 insertions(+), 5 deletions(-) diff --git a/arch/x86/kvm/svm/sev.c b/arch/x86/kvm/svm/sev.c index 18368047ef2d..d0d674c6c411 100644 --- a/arch/x86/kvm/svm/sev.c +++ b/arch/x86/kvm/svm/sev.c @@ -159,6 +159,13 @@ static int sev_asid_new(struct kvm_sev_info *sev) bool retry = true; int ret; + /* + * No matter what the min_sev_asid is, all asids in range + * [1, max_sev_asid] can be used for CSV2 guest on Hygon CPUs. + */ + if (is_x86_vendor_hygon()) + max_asid = max_sev_asid; + if (min_asid > max_asid) return -ENOTTY; @@ -2295,11 +2302,19 @@ void __init sev_hardware_setup(void) if (!boot_cpu_has(X86_FEATURE_SEV_ES)) goto out; - /* Has the system been allocated ASIDs for SEV-ES? */ - if (min_sev_asid == 1) - goto out; + if (is_x86_vendor_hygon()) { + /* + * Ths ASIDs from 1 to max_sev_asid are available for hygon + * CSV2 guest. + */ + sev_es_asid_count = max_sev_asid; + } else { + /* Has the system been allocated ASIDs for SEV-ES? */ + if (min_sev_asid == 1) + goto out; - sev_es_asid_count = min_sev_asid - 1; + sev_es_asid_count = min_sev_asid - 1; + } WARN_ON_ONCE(misc_cg_set_capacity(MISC_CG_RES_SEV_ES, sev_es_asid_count)); sev_es_supported = true; @@ -2315,7 +2330,8 @@ void __init sev_hardware_setup(void) pr_info("%s %s (ASIDs %u - %u)\n", is_x86_vendor_hygon() ? "CSV2" : "SEV-ES", sev_es_supported ? "enabled" : "disabled", - min_sev_asid > 1 ? 1 : 0, min_sev_asid - 1); + is_x86_vendor_hygon() ? 1 : (min_sev_asid > 1 ? 1 : 0), + is_x86_vendor_hygon() ? max_sev_asid : min_sev_asid - 1); sev_enabled = sev_supported; sev_es_enabled = sev_es_supported; -- Gitee From d2e93b4f8832441d46f95acdcea55234ca9bd01d Mon Sep 17 00:00:00 2001 From: hanliyang Date: Thu, 6 Apr 2023 09:03:58 +0800 Subject: [PATCH 2/7] x86/csv2: Keep in atomic context when holding ghcb page if the #VC comes from userspace hygon inclusion category: bugfix bugzilla: https://gitee.com/openeuler/kernel/issues/I98WPG CVE: NA --------------------------- In function vc_raw_handle_exception(), it will holds ghcb page and calls __sev_get_ghcb() <- holding ghcb page to communicate with host vc_init_em_etxt() vc_handle_exitcode() __sev_put_ghcb() <- no longer holding ghcb page after the communication to emulate instruction which cause #VC. When the #VC comes from userspace, the code path user_exc_vmm_communication() vc_raw_handle_exception() cannot keep memory access in atomic context, this may lead to direct page fault handling if the emulation process access userspace address which doesn't exist in memory. For userspace address page fault handling, if it's not in the atomic context or the caller doesn't call pagefault_disable(), the irq may be enabled and there is a risk of generating more #VC. So it's necessary to switch to atomic context before emulate instructions which cause #VC. Add __preempt_count_{add,sub}() pair to keep the code between __sev_get_ghcb() and __sev_put_ghcb() in atomic context if #VC comes from userspace. If memory access fails during emulating, the caller will construct page fault info and forward a page fault later. Fixes: be1a5408868a ("x86/sev: Split up runtime #VC handler for correct state tracking") Signed-off-by: hanliyang --- arch/x86/kernel/sev.c | 13 +++++++++++++ 1 file changed, 13 insertions(+) diff --git a/arch/x86/kernel/sev.c b/arch/x86/kernel/sev.c index 9905dc0e0b09..614335589d52 100644 --- a/arch/x86/kernel/sev.c +++ b/arch/x86/kernel/sev.c @@ -33,6 +33,7 @@ #include #include #include +#include #include #include #include @@ -1852,6 +1853,15 @@ static bool vc_raw_handle_exception(struct pt_regs *regs, unsigned long error_co struct ghcb *ghcb; bool ret = true; + /* + * Make sure the codes between __sev_get_ghcb() and __sev_put_ghcb() + * keep in atomic context. If #VC comes from kernel mode, then the + * codes here are in atomic context. If #VC comes from user mode, then + * it's necessary to switch to atomic context manually. + */ + if (is_x86_vendor_hygon() && !in_nmi()) + __preempt_count_add(HARDIRQ_OFFSET); + ghcb = __sev_get_ghcb(&state); vc_ghcb_invalidate(ghcb); @@ -1862,6 +1872,9 @@ static bool vc_raw_handle_exception(struct pt_regs *regs, unsigned long error_co __sev_put_ghcb(&state); + if (is_x86_vendor_hygon() && !in_nmi()) + __preempt_count_sub(HARDIRQ_OFFSET); + /* Done - now check the result */ switch (result) { case ES_OK: -- Gitee From 582c10cc57416f6d12fa1bbab3310e7fa3d75991 Mon Sep 17 00:00:00 2001 From: hanliyang Date: Thu, 6 Jul 2023 05:20:45 +0800 Subject: [PATCH 3/7] KVM: x86: Calls is_64_bit_hypercall() instead of is_64_bit_mode() in complete_hypercall_exit() hygon inclusion category: bugfix bugzilla: https://gitee.com/openeuler/kernel/issues/I98WPG CVE: NA --------------------------- In the commit b5aead0064f3 ("KVM: x86: Assume a 64-bit hypercall for guests with protected state"), is_64_bit_mode() will trigger warning, as the following messages, for SEV-ES or CSV2 guest. [85350.053201] ------------[ cut here ]------------ [85350.053206] WARNING: CPU: 2 PID: 68989 at arch/x86/kvm/x86.h:156 complete_hypercall_exit+0x6a/0x70 [kvm] [85350.053299] Modules linked in: kvm_amd(OE) kvm(OE) ccp(E) irqbypass(E) vhost_net(E) vhost(E) vhost_iotlb(E) tap(E) fuse(E) xt_CHECKSUM(E) xt_MASQUERADE(E) xt_conntrack(E) ipt_REJECT(E) nf_reject_ipv4(E) ip6table_mangle(E) ip6table_nat(E) iptable_mangle(E) iptable_nat(E) nf_nat(E) nf_conntrack(E) nf_defrag_ipv6(E) nf_defrag_ipv4(E) nf_tables(E) libcrc32c(E) nfnetlink(E) ip6table_filter(E) ip6_tables(E) iptable_filter(E) tun(E) bridge(E) stp(E) llc(E) rfkill(E) vfat(E) fat(E) binfmt_misc(E) intel_rapl_msr(E) intel_rapl_common(E) amd64_edac(E) edac_mce_amd(E) crct10dif_pclmul(E) crc32_pclmul(E) acpi_ipmi(E) ipmi_ssif(E) ipmi_si(E) ast(E) joydev(E) mousedev(E) ghash_clmulni_intel(E) rapl(E) ipmi_devintf(E) drm_shmem_helper(E) drm_kms_helper(E) ipmi_msghandler(E) sg(E) k10temp(E) acpi_cpufreq(E) squashfs(E) loop(E) parport_pc(E) ppdev(E) lp(E) parport(E) drm(E) ip_tables(E) sd_mod(E) t10_pi(E) crc64_rocksoft(E) crc64(E) ahci(E) igb(E) i2c_designware_platform(E) libahci(E) i2c_algo_bit(E) dca(E) i2c_piix4(E) [85350.053421] i2c_designware_core(E) crc32c_intel(E) libata(E) i2c_core(E) [last unloaded: kvm(OE)] [85350.053432] CPU: 2 PID: 68989 Comm: qemu-system-x86 Tainted: GF W OE 6.6.7-for-openanolis #5 [85350.053438] Hardware name: HYGON HongHaiA1b/HongHaiA1, BIOS A1633050 02/02/2023 [85350.053441] RIP: 0010:complete_hypercall_exit+0x6a/0x70 [kvm] [85350.053511] Code: e8 9b fb ff ff 48 83 c4 08 5b 5d e9 60 68 68 d8 48 8d 54 24 04 48 89 e6 48 89 ef e8 40 db 12 00 8b 44 24 04 85 c0 74 c4 eb c4 <0f> 0b eb b5 66 90 90 90 90 90 90 90 90 90 90 90 90 90 90 90 90 90 [85350.053514] RSP: 0018:ffffc90000ea3e28 EFLAGS: 00010202 [85350.053519] RAX: ffff8881419f0000 RBX: 0000000000000000 RCX: ffff8881003ad780 [85350.053522] RDX: 0000606fc0a29bc0 RSI: 00000000fffffe01 RDI: ffff888b5dc20000 [85350.053525] RBP: ffff888b5dc20000 R08: 0000000000000001 R09: 0000000000000000 [85350.053527] R10: ffffc90000ea3ee8 R11: 0000000000000000 R12: ffff88810fe1ea00 [85350.053530] R13: ffff888b5dc20000 R14: ffff888b5dc20048 R15: 0000000000000000 [85350.053532] FS: 00007eff45528700(0000) GS:ffff88903f080000(0000) knlGS:0000000000000000 [85350.053536] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 [85350.053539] CR2: 0000000000000000 CR3: 00000001415d2000 CR4: 00000000003506e0 [85350.053541] Call Trace: [85350.053545] [85350.053550] ? __warn+0x84/0x140 [85350.053558] ? complete_hypercall_exit+0x6a/0x70 [kvm] [85350.053627] ? report_bug+0x1bd/0x1d0 [85350.053635] ? handle_bug+0x3c/0x70 [85350.053640] ? exc_invalid_op+0x18/0x70 [85350.053645] ? asm_exc_invalid_op+0x1a/0x20 [85350.053655] ? complete_hypercall_exit+0x6a/0x70 [kvm] [85350.053724] kvm_arch_vcpu_ioctl_run+0x3dd/0x410 [kvm] [85350.053796] kvm_vcpu_ioctl+0x277/0x6c0 [kvm] [85350.053855] __x64_sys_ioctl+0x92/0xd0 [85350.053864] do_syscall_64+0x3f/0x90 [85350.053868] entry_SYSCALL_64_after_hwframe+0x6e/0xd8 [85350.053874] RIP: 0033:0x7eff486c33ab [85350.053878] Code: 0f 1e fa 48 8b 05 e5 7a 0d 00 64 c7 00 26 00 00 00 48 c7 c0 ff ff ff ff c3 66 0f 1f 44 00 00 f3 0f 1e fa b8 10 00 00 00 0f 05 <48> 3d 01 f0 ff ff 73 01 c3 48 8b 0d b5 7a 0d 00 f7 d8 64 89 01 48 [85350.053881] RSP: 002b:00007eff45527848 EFLAGS: 00000246 ORIG_RAX: 0000000000000010 [85350.053886] RAX: ffffffffffffffda RBX: 000000000000ae80 RCX: 00007eff486c33ab [85350.053888] RDX: 0000000000000000 RSI: 000000000000ae80 RDI: 0000000000000010 [85350.053891] RBP: 0000563586e32430 R08: 0000563584ff1d30 R09: 00007eff455276a4 [85350.053893] R10: 00007eff4552769c R11: 0000000000000246 R12: 0000000000000000 [85350.053896] R13: 00005635856bcd60 R14: 0000000000000000 R15: 0000000000000000 [85350.053904] [85350.053906] ---[ end trace 0000000000000000 ]--- Use is_64_bit_hypercall() instead of is_64_bit_mode() in complete_hypercall_exit() to avoid warning when the SEV-ES or CSV2 guest invoking KVM_HC_MAP_GPA_RANGE hypercall. Fixes: b5aead0064f3 ("KVM: x86: Assume a 64-bit hypercall for guests with protected state") Signed-off-by: hanliyang --- arch/x86/kvm/x86.c | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index b91f9e996eeb..0cd09f5cbee5 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -9934,8 +9934,13 @@ static int complete_hypercall_exit(struct kvm_vcpu *vcpu) { u64 ret = vcpu->run->hypercall.ret; - if (!is_64_bit_mode(vcpu)) + /* Use is_64_bit_hypercall() instead of is_64_bit_mode() for Hygon CPUs */ + if (is_x86_vendor_hygon()) { + if (!is_64_bit_hypercall(vcpu)) + ret = (u32)ret; + } else if (!is_64_bit_mode(vcpu)) { ret = (u32)ret; + } kvm_rax_write(vcpu, ret); ++vcpu->stat.hypercalls; return kvm_skip_emulated_instruction(vcpu); -- Gitee From ca0c1fa1ec4b6d9040cec7e543d50a93fc88d4c6 Mon Sep 17 00:00:00 2001 From: hanliyang Date: Sun, 7 Jan 2024 04:47:42 +0800 Subject: [PATCH 4/7] x86/head/64: Flush caches for .bss..decrypted section after CR3 switches to early_top_pgt hygon inclusion category: bugfix bugzilla: https://gitee.com/openeuler/kernel/issues/I98WPG CVE: NA --------------------------- The memory region of .bss..decrypted section maybe mapped with encryption before early boot stage of Linux. If the correspond stale caches lives in earlier stage were not flushed before we access that memory region in later stages, then Linux will crash because the stale caches will pollute the memory. Fix this issue by flush the caches with encrypted mapping before we access .bss..decrypted section. Fixes: b3f0907c71e0 ("x86/mm: Add .bss..decrypted section to hold shared variables") Signed-off-by: hanliyang --- arch/x86/kernel/head64.c | 109 ++++++++++++++++++++++++++++++++++++++ arch/x86/kernel/head_64.S | 10 ++++ 2 files changed, 119 insertions(+) diff --git a/arch/x86/kernel/head64.c b/arch/x86/kernel/head64.c index c58213bce294..360dcd0d8454 100644 --- a/arch/x86/kernel/head64.c +++ b/arch/x86/kernel/head64.c @@ -318,6 +318,115 @@ unsigned long __head __startup_64(unsigned long physaddr, return sme_postprocess_startup(bp, pmd); } +#ifdef CONFIG_AMD_MEM_ENCRYPT + +extern bool bsp_flush_bss_decrypted_section_handled; + +/* Get CPUID data through GHCB MSR protocol */ +static int __cpuid_msr_protocol(u32 fn, int reg_idx, u32 *reg) +{ + unsigned int msr_idx = (unsigned int)MSR_AMD64_SEV_ES_GHCB; + struct msr m; + + m.q = GHCB_CPUID_REQ(fn, reg_idx); + + asm volatile("wrmsr" : : "c" (msr_idx), "a"(m.l), "d" (m.h) : "memory"); + VMGEXIT(); + asm volatile("rdmsr" : "=a" (m.l), "=d" (m.h) : "c" (msr_idx)); + + if (GHCB_RESP_CODE(m.q) != GHCB_MSR_CPUID_RESP) + return -EIO; + + *reg = m.h; + + return 0; +} + +static bool __should_do_clflush(void) +{ + u32 eax, ebx, ecx, edx; + int ret; + + /* Check if this is a Hygon CSV guest or an AMD SEV guest */ + if (!sme_get_me_mask() || + !(RIP_REL_REF(sev_status) & MSR_AMD64_SEV_ENABLED)) + return false; + + /* Get cpuid vendor info, if cannot get vendor info, then return false */ + eax = 0x0; + if (!(RIP_REL_REF(sev_status) & MSR_AMD64_SEV_ES_ENABLED)) { + native_cpuid(&eax, &ebx, &ecx, &edx); + } else { + /* + * Hygon CSV2 guest or AMD SEV-ES guest should use GHCB MSR + * protocol to get cpu vendor info. + */ + ret = __cpuid_msr_protocol(eax, GHCB_CPUID_REQ_EBX, &ebx); + ret = ret ? : __cpuid_msr_protocol(eax, GHCB_CPUID_REQ_ECX, &ecx); + ret = ret ? : __cpuid_msr_protocol(eax, GHCB_CPUID_REQ_EDX, &edx); + if (ret) + return false; + } + + /* Check if this is a Hygon CSV guest */ +#define STRING_Hygo 0x6f677948 +#define STRING_uine 0x656e6975 +#define STRING_nGen 0x6e65476e + + if (ebx != STRING_Hygo || ecx != STRING_uine || edx != STRING_nGen) + return false; + + return true; +} + +void __ref early_clflush_bss_decrypted_section(void) +{ + unsigned long vaddr, vaddr_end; + char *cl, *start, *end; + + /* Only allow bsp flush these caches and the bsp must at early boot stage */ + if (bsp_flush_bss_decrypted_section_handled) + return; + + if (read_cr3_pa() != __pa_nodebug(early_top_pgt)) + return; + + /* Only Hygon CSV guest should do the clflush */ + if (!__should_do_clflush()) + goto handled; + + /* + * The memory region of .bss..decrypted section maybe mapped + * with encryption in earlier stage. If the correspond stale + * caches lives in earlier stage were not flushed before we + * access that memory region, then Linux will crash later + * because the stale caches will pollute the memory. So we + * need flush the caches with encrypted mapping before we + * access .bss..decrypted section. + * + * The function __startup_64() have already filled the + * encrypted mapping for .bss..decrypted section, use that + * mapping here. + */ + vaddr = (unsigned long)__start_bss_decrypted - + __START_KERNEL_map + phys_base; + vaddr_end = (unsigned long)__end_bss_decrypted - + __START_KERNEL_map + phys_base; + + /* Hardcode cl-size to 64 at this stage. */ + start = (char *)(vaddr & ~63); + end = (char *)((vaddr_end + 63) & ~63); + + asm volatile("mfence" : : : "memory"); + for (cl = start; cl != end; cl += 64) + clflush(cl); + asm volatile("mfence" : : : "memory"); + +handled: + bsp_flush_bss_decrypted_section_handled = true; +} +#endif + /* Wipe all early page tables except for the kernel symbol map */ static void __init reset_early_page_tables(void) { diff --git a/arch/x86/kernel/head_64.S b/arch/x86/kernel/head_64.S index e6eaee8509ce..3357aafa52f1 100644 --- a/arch/x86/kernel/head_64.S +++ b/arch/x86/kernel/head_64.S @@ -375,6 +375,14 @@ SYM_INNER_LABEL(secondary_startup_64_no_verify, SYM_L_GLOBAL) shrq $32, %rdx wrmsr +#ifdef CONFIG_AMD_MEM_ENCRYPT + /* + * Ensure .bss.decrypted memory's stale caches which lived in earlier + * stage to be flushed. + */ + call early_clflush_bss_decrypted_section +#endif + /* Setup and Load IDT */ call early_setup_idt @@ -511,6 +519,8 @@ SYM_CODE_END(vc_boot_ghcb) SYM_DATA(initial_code, .quad x86_64_start_kernel) #ifdef CONFIG_AMD_MEM_ENCRYPT SYM_DATA(initial_vc_handler, .quad handle_vc_boot_ghcb) +SYM_DATA(bsp_flush_bss_decrypted_section_handled, .byte 0x0) + .balign 8 #endif SYM_DATA(trampoline_lock, .quad 0); -- Gitee From 313483630d379b230522442cc771f35031035069 Mon Sep 17 00:00:00 2001 From: Michael Roth Date: Mon, 16 Oct 2023 08:27:32 -0500 Subject: [PATCH 5/7] KVM: SEV: Do not intercept accesses to MSR_IA32_XSS for SEV-ES guests mainline inclusion from mainline-v6.7-rc7 commit a26b7cd2254695f8258cc370f33280db0a9a3813 category: bugfix bugzilla: https://gitee.com/openeuler/kernel/issues/I98WPG Reference: https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/commit/?id=a26b7cd2254695f8258cc370f33280db0a9a3813 --------------------------- When intercepts are enabled for MSR_IA32_XSS, the host will swap in/out the guest-defined values while context-switching to/from guest mode. However, in the case of SEV-ES, vcpu->arch.guest_state_protected is set, so the guest-defined value is effectively ignored when switching to guest mode with the understanding that the VMSA will handle swapping in/out this register state. However, SVM is still configured to intercept these accesses for SEV-ES guests, so the values in the initial MSR_IA32_XSS are effectively read-only, and a guest will experience undefined behavior if it actually tries to write to this MSR. Fortunately, only CET/shadowstack makes use of this register on SEV-ES-capable systems currently, which isn't yet widely used, but this may become more of an issue in the future. Additionally, enabling intercepts of MSR_IA32_XSS results in #VC exceptions in the guest in certain paths that can lead to unexpected #VC nesting levels. One example is SEV-SNP guests when handling #VC exceptions for CPUID instructions involving leaf 0xD, subleaf 0x1, since they will access MSR_IA32_XSS as part of servicing the CPUID #VC, then generate another #VC when accessing MSR_IA32_XSS, which can lead to guest crashes if an NMI occurs at that point in time. Running perf on a guest while it is issuing such a sequence is one example where these can be problematic. Address this by disabling intercepts of MSR_IA32_XSS for SEV-ES guests if the host/guest configuration allows it. If the host/guest configuration doesn't allow for MSR_IA32_XSS, leave it intercepted so that it can be caught by the existing checks in kvm_{set,get}_msr_common() if the guest still attempts to access it. Fixes: 376c6d285017 ("KVM: SVM: Provide support for SEV-ES vCPU creation/loading") Cc: Alexey Kardashevskiy Suggested-by: Tom Lendacky Signed-off-by: Michael Roth Message-Id: <20231016132819.1002933-4-michael.roth@amd.com> Signed-off-by: Paolo Bonzini --- arch/x86/kvm/svm/sev.c | 19 +++++++++++++++++++ arch/x86/kvm/svm/svm.c | 1 + arch/x86/kvm/svm/svm.h | 2 +- 3 files changed, 21 insertions(+), 1 deletion(-) diff --git a/arch/x86/kvm/svm/sev.c b/arch/x86/kvm/svm/sev.c index d0d674c6c411..473377165125 100644 --- a/arch/x86/kvm/svm/sev.c +++ b/arch/x86/kvm/svm/sev.c @@ -3064,6 +3064,25 @@ static void sev_es_vcpu_after_set_cpuid(struct vcpu_svm *svm) set_msr_interception(vcpu, svm->msrpm, MSR_TSC_AUX, v_tsc_aux, v_tsc_aux); } + + /* + * For SEV-ES, accesses to MSR_IA32_XSS should not be intercepted if + * the host/guest supports its use. + * + * guest_can_use() checks a number of requirements on the host/guest to + * ensure that MSR_IA32_XSS is available, but it might report true even + * if X86_FEATURE_XSAVES isn't configured in the guest to ensure host + * MSR_IA32_XSS is always properly restored. For SEV-ES, it is better + * to further check that the guest CPUID actually supports + * X86_FEATURE_XSAVES so that accesses to MSR_IA32_XSS by misbehaved + * guests will still get intercepted and caught in the normal + * kvm_emulate_rdmsr()/kvm_emulated_wrmsr() paths. + */ + if (guest_can_use(vcpu, X86_FEATURE_XSAVES) && + guest_cpuid_has(vcpu, X86_FEATURE_XSAVES)) + set_msr_interception(vcpu, svm->msrpm, MSR_IA32_XSS, 1, 1); + else + set_msr_interception(vcpu, svm->msrpm, MSR_IA32_XSS, 0, 0); } void sev_vcpu_after_set_cpuid(struct vcpu_svm *svm) diff --git a/arch/x86/kvm/svm/svm.c b/arch/x86/kvm/svm/svm.c index c6b0b199638c..93347d51efbb 100644 --- a/arch/x86/kvm/svm/svm.c +++ b/arch/x86/kvm/svm/svm.c @@ -106,6 +106,7 @@ static const struct svm_direct_access_msrs { { .index = MSR_IA32_LASTBRANCHTOIP, .always = false }, { .index = MSR_IA32_LASTINTFROMIP, .always = false }, { .index = MSR_IA32_LASTINTTOIP, .always = false }, + { .index = MSR_IA32_XSS, .always = false }, { .index = MSR_EFER, .always = false }, { .index = MSR_IA32_CR_PAT, .always = false }, { .index = MSR_AMD64_SEV_ES_GHCB, .always = true }, diff --git a/arch/x86/kvm/svm/svm.h b/arch/x86/kvm/svm/svm.h index 9979329517d7..13cf80855481 100644 --- a/arch/x86/kvm/svm/svm.h +++ b/arch/x86/kvm/svm/svm.h @@ -30,7 +30,7 @@ #define IOPM_SIZE PAGE_SIZE * 3 #define MSRPM_SIZE PAGE_SIZE * 2 -#define MAX_DIRECT_ACCESS_MSRS 46 +#define MAX_DIRECT_ACCESS_MSRS 47 #define MSRPM_OFFSETS 32 extern u32 msrpm_offsets[MSRPM_OFFSETS] __read_mostly; extern bool npt_enabled; -- Gitee From dc86f61081c838f09dfaa8f0a0ab2ddbf4acaaa9 Mon Sep 17 00:00:00 2001 From: Mathias Krause Date: Sat, 3 Feb 2024 13:45:20 +0100 Subject: [PATCH 6/7] KVM: x86: Fix KVM_GET_MSRS stack info leak mainline inclusion from mainline-v6.8-rc5 commit 3376ca3f1a2075eaa23c5576c47d04d7e8a4adda category: bugfix bugzilla: https://gitee.com/openeuler/kernel/issues/I98WPG Reference: https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/commit/?id=3376ca3f1a2075eaa23c5576c47d04d7e8a4adda --------------------------- Commit 6abe9c1386e5 ("KVM: X86: Move ignore_msrs handling upper the stack") changed the 'ignore_msrs' handling, including sanitizing return values to the caller. This was fine until commit 12bc2132b15e ("KVM: X86: Do the same ignore_msrs check for feature msrs") which allowed non-existing feature MSRs to be ignored, i.e. to not generate an error on the ioctl() level. It even tried to preserve the sanitization of the return value. However, the logic is flawed, as '*data' will be overwritten again with the uninitialized stack value of msr.data. Fix this by simplifying the logic and always initializing msr.data, vanishing the need for an additional error exit path. Fixes: 12bc2132b15e ("KVM: X86: Do the same ignore_msrs check for feature msrs") Signed-off-by: Mathias Krause Reviewed-by: Xiaoyao Li Link: https://lore.kernel.org/r/20240203124522.592778-2-minipli@grsecurity.net Signed-off-by: Sean Christopherson --- arch/x86/kvm/x86.c | 15 +++++---------- 1 file changed, 5 insertions(+), 10 deletions(-) diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index 0cd09f5cbee5..be2e89d5798a 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -1755,22 +1755,17 @@ static int do_get_msr_feature(struct kvm_vcpu *vcpu, unsigned index, u64 *data) struct kvm_msr_entry msr; int r; + /* Unconditionally clear the output for simplicity */ + msr.data = 0; msr.index = index; r = kvm_get_msr_feature(&msr); - if (r == KVM_MSR_RET_INVALID) { - /* Unconditionally clear the output for simplicity */ - *data = 0; - if (kvm_msr_ignored_check(index, 0, false)) - r = 0; - } - - if (r) - return r; + if (r == KVM_MSR_RET_INVALID && kvm_msr_ignored_check(index, 0, false)) + r = 0; *data = msr.data; - return 0; + return r; } static bool __kvm_valid_efer(struct kvm_vcpu *vcpu, u64 efer) -- Gitee From f0da6f7a4d98faa7abffdd47665da49fbb77bcaf Mon Sep 17 00:00:00 2001 From: hanliyang Date: Mon, 13 Nov 2023 01:54:26 +0800 Subject: [PATCH 7/7] KVM: SVM: Unmap ghcb pages if they're still mapped when destroy guest hygon inclusion category: bugfix bugzilla: https://gitee.com/openeuler/kernel/issues/I98WPG CVE: NA --------------------------- The ghcb pages might be mapped when KVM handling the VMGEXIT events, and these ghcb pages will be unmapped when prepare to switch to guest mode. If we try to kill the userspace VMM (e.g. qemu) of a guest, it's possible that the mapped ghcb pages will never be unmapped which will cause memory leak. We exposed a serious memory leak by creating and killing multiple qemu processes for state encrypted guests frequently. In order to solve this issue, unmap ghcb pages if they're sill mapped when destroy guest. Fixes: ce7ea0cfdc2e ("KVM: SVM: Move GHCB unmapping to fix RCU warning") Fixes: 291bd20d5d88 ("KVM: SVM: Add initial support for a VMGEXIT VMEXIT") Signed-off-by: hanliyang --- arch/x86/kvm/svm/sev.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/arch/x86/kvm/svm/sev.c b/arch/x86/kvm/svm/sev.c index 473377165125..713b0ee0ef27 100644 --- a/arch/x86/kvm/svm/sev.c +++ b/arch/x86/kvm/svm/sev.c @@ -2454,6 +2454,9 @@ void sev_free_vcpu(struct kvm_vcpu *vcpu) __free_page(virt_to_page(svm->sev_es.vmsa)); + if (svm->sev_es.ghcb) + kvm_vcpu_unmap(vcpu, &svm->sev_es.ghcb_map, false); + if (svm->sev_es.ghcb_sa_free) kvfree(svm->sev_es.ghcb_sa); -- Gitee