From 715ffb1717c5bbc19895a97b4b83a3ee31ccdc50 Mon Sep 17 00:00:00 2001 From: Pawan Gupta Date: Tue, 12 Mar 2024 15:40:33 -0700 Subject: [PATCH 01/12] x86/bugs: Add asm helpers for executing VERW ANBZ: #27027 commit 35e36eac881cddea42ca5fd93facc145a2d5369d upstream MDS mitigation requires clearing the CPU buffers before returning to user. This needs to be done late in the exit-to-user path. Current location of VERW leaves a possibility of kernel data ending up in CPU buffers for memory accesses done after VERW such as: 1. Kernel data accessed by an NMI between VERW and return-to-user can remain in CPU buffers since NMI returning to kernel does not execute VERW to clear CPU buffers. 2. Alyssa reported that after VERW is executed, CONFIG_GCC_PLUGIN_STACKLEAK=y scrubs the stack used by a system call. Memory accesses during stack scrubbing can move kernel stack contents into CPU buffers. 3. When caller saved registers are restored after a return from function executing VERW, the kernel stack accesses can remain in CPU buffers(since they occur after VERW). To fix this VERW needs to be moved very late in exit-to-user path. In preparation for moving VERW to entry/exit asm code, create macros that can be used in asm. Also make VERW patching depend on a new feature flag X86_FEATURE_CLEAR_CPU_BUF. [pawan: - Runtime patch jmp instead of verw in macro CLEAR_CPU_BUFFERS due to lack of relative addressing support for relocations in kernels < v6.5. - Add UNWIND_HINT_EMPTY to avoid warning: arch/x86/entry/entry.o: warning: objtool: mds_verw_sel+0x0: unreachable instruction] Reported-by: Alyssa Milburn Suggested-by: Andrew Cooper Suggested-by: Peter Zijlstra Signed-off-by: Pawan Gupta Signed-off-by: Dave Hansen Link: https://lore.kernel.org/all/20240213-delay-verw-v8-1-a6216d83edb7%40linux.intel.com Signed-off-by: Greg Kroah-Hartman Signed-off-by: Jeevan deep J Signed-off-by: PrithivishS --- arch/x86/entry/entry.S | 22 ++++++++++++++++++++++ arch/x86/include/asm/cpufeatures.h | 1 + arch/x86/include/asm/nospec-branch.h | 15 +++++++++++++++ 3 files changed, 38 insertions(+) diff --git a/arch/x86/entry/entry.S b/arch/x86/entry/entry.S index bfb7bcb362bc..a4db552908c5 100644 --- a/arch/x86/entry/entry.S +++ b/arch/x86/entry/entry.S @@ -6,6 +6,9 @@ #include #include #include +#include +#include +#include .pushsection .noinstr.text, "ax" @@ -20,3 +23,22 @@ SYM_FUNC_END(entry_ibpb) EXPORT_SYMBOL_GPL(entry_ibpb); .popsection + +/* + * Define the VERW operand that is disguised as entry code so that + * it can be referenced with KPTI enabled. This ensure VERW can be + * used late in exit-to-user path after page tables are switched. + */ +.pushsection .entry.text, "ax" + +.align L1_CACHE_BYTES, 0xcc +SYM_CODE_START_NOALIGN(mds_verw_sel) + UNWIND_HINT_EMPTY + ANNOTATE_NOENDBR + .word __KERNEL_DS +.align L1_CACHE_BYTES, 0xcc +SYM_CODE_END(mds_verw_sel); +/* For KVM */ +EXPORT_SYMBOL_GPL(mds_verw_sel); + +.popsection diff --git a/arch/x86/include/asm/cpufeatures.h b/arch/x86/include/asm/cpufeatures.h index c6cf63569dc1..2c7d480fcfd9 100644 --- a/arch/x86/include/asm/cpufeatures.h +++ b/arch/x86/include/asm/cpufeatures.h @@ -330,6 +330,7 @@ #define X86_FEATURE_SMBA (11*32+21) /* "" Slow Memory Bandwidth Allocation */ #define X86_FEATURE_BMEC (11*32+22) /* "" Bandwidth Monitoring Event Configuration */ +#define X86_FEATURE_CLEAR_CPU_BUF (11*32+19) /* "" Clear CPU buffers using VERW */ #define X86_FEATURE_SRSO (11*32+24) /* "" AMD BTB untrain RETs */ #define X86_FEATURE_SRSO_ALIAS (11*32+25) /* "" AMD BTB untrain RETs through aliasing */ diff --git a/arch/x86/include/asm/nospec-branch.h b/arch/x86/include/asm/nospec-branch.h index 9be65ed0ba29..b853f6037467 100644 --- a/arch/x86/include/asm/nospec-branch.h +++ b/arch/x86/include/asm/nospec-branch.h @@ -177,6 +177,19 @@ #endif .endm +/* + * Macro to execute VERW instruction that mitigate transient data sampling + * attacks such as MDS. On affected systems a microcode update overloaded VERW + * instruction to also clear the CPU buffers. VERW clobbers CFLAGS.ZF. + * + * Note: Only the memory operand variant of VERW clears the CPU buffers. + */ +.macro CLEAR_CPU_BUFFERS + ALTERNATIVE "jmp .Lskip_verw_\@", "", X86_FEATURE_CLEAR_CPU_BUF + verw _ASM_RIP(mds_verw_sel) +.Lskip_verw_\@: +.endm + #else /* __ASSEMBLY__ */ #define ANNOTATE_RETPOLINE_SAFE \ @@ -357,6 +370,8 @@ DECLARE_STATIC_KEY_FALSE(mds_idle_clear); DECLARE_STATIC_KEY_FALSE(mmio_stale_data_clear); +extern u16 mds_verw_sel; + #include /** -- Gitee From a7c9fd0040d6b4031619dd324589086ef72ed873 Mon Sep 17 00:00:00 2001 From: Pawan Gupta Date: Tue, 12 Mar 2024 15:40:39 -0700 Subject: [PATCH 02/12] x86/entry_64: Add VERW just before userspace transition ANBZ: #27027 commit edc702b4a820fc7ffc20e732db1c421cfffbb746 upstream Mitigation for MDS is to use VERW instruction to clear any secrets in CPU Buffers. Any memory accesses after VERW execution can still remain in CPU buffers. It is safer to execute VERW late in return to user path to minimize the window in which kernel data can end up in CPU buffers. There are not many kernel secrets to be had after SWITCH_TO_USER_CR3. Add support for deploying VERW mitigation after user register state is restored. This helps minimize the chances of kernel data ending up into CPU buffers after executing VERW. Note that the mitigation at the new location is not yet enabled. Corner case not handled ======================= Interrupts returning to kernel don't clear CPUs buffers since the exit-to-user path is expected to do that anyways. But, there could be a case when an NMI is generated in kernel after the exit-to-user path has cleared the buffers. This case is not handled and NMI returning to kernel don't clear CPU buffers because: 1. It is rare to get an NMI after VERW, but before returning to user. 2. For an unprivileged user, there is no known way to make that NMI less rare or target it. 3. It would take a large number of these precisely-timed NMIs to mount an actual attack. There's presumably not enough bandwidth. 4. The NMI in question occurs after a VERW, i.e. when user state is restored and most interesting data is already scrubbed. Whats left is only the data that NMI touches, and that may or may not be of any interest. [ pawan: resolved conflict in syscall_return_via_sysret, added CLEAR_CPU_BUFFERS to USERGS_SYSRET64 ] Suggested-by: Dave Hansen Signed-off-by: Pawan Gupta Signed-off-by: Dave Hansen Link: https://lore.kernel.org/all/20240213-delay-verw-v8-2-a6216d83edb7%40linux.intel.com Signed-off-by: Greg Kroah-Hartman Signed-off-by: Jeevan deep J Signed-off-by: PrithivishS --- arch/x86/entry/entry_64.S | 10 ++++++++++ arch/x86/entry/entry_64_compat.S | 1 + arch/x86/include/asm/irqflags.h | 1 + 3 files changed, 12 insertions(+) diff --git a/arch/x86/entry/entry_64.S b/arch/x86/entry/entry_64.S index e81282cc67fc..ed5bbc33d0f2 100644 --- a/arch/x86/entry/entry_64.S +++ b/arch/x86/entry/entry_64.S @@ -618,6 +618,7 @@ SYM_INNER_LABEL(swapgs_restore_regs_and_return_to_usermode, SYM_L_GLOBAL) /* Restore RDI. */ popq %rdi SWAPGS + CLEAR_CPU_BUFFERS INTERRUPT_RETURN @@ -724,6 +725,8 @@ native_irq_return_ldt: */ popq %rax /* Restore user RAX */ + CLEAR_CPU_BUFFERS + /* * RSP now points to an ordinary IRET frame, except that the page * is read-only and RSP[31:16] are preloaded with the userspace @@ -1490,6 +1493,12 @@ nmi_restore: std movq $0, 5*8(%rsp) /* clear "NMI executing" */ + /* + * Skip CLEAR_CPU_BUFFERS here, since it only helps in rare cases like + * NMI in kernel after user state is restored. For an unprivileged user + * these conditions are hard to meet. + */ + /* * iretq reads the "iret" frame and exits the NMI stack in a * single instruction. We are returning to kernel mode, so this @@ -1507,6 +1516,7 @@ SYM_CODE_END(asm_exc_nmi) SYM_CODE_START(ignore_sysret) UNWIND_HINT_EMPTY mov $-ENOSYS, %eax + CLEAR_CPU_BUFFERS sysretl SYM_CODE_END(ignore_sysret) #endif diff --git a/arch/x86/entry/entry_64_compat.S b/arch/x86/entry/entry_64_compat.S index da9fbd358412..6fd2f8cbb3b6 100644 --- a/arch/x86/entry/entry_64_compat.S +++ b/arch/x86/entry/entry_64_compat.S @@ -320,6 +320,7 @@ SYM_INNER_LABEL(entry_SYSRETL_compat_unsafe_stack, SYM_L_GLOBAL) xorl %r9d, %r9d xorl %r10d, %r10d swapgs + CLEAR_CPU_BUFFERS sysretl SYM_INNER_LABEL(entry_SYSRETL_compat_end, SYM_L_GLOBAL) SYM_CODE_END(entry_SYSCALL_compat) diff --git a/arch/x86/include/asm/irqflags.h b/arch/x86/include/asm/irqflags.h index 8c86edefa115..f40dea50dfbf 100644 --- a/arch/x86/include/asm/irqflags.h +++ b/arch/x86/include/asm/irqflags.h @@ -134,6 +134,7 @@ static __always_inline unsigned long arch_local_irq_save(void) #define INTERRUPT_RETURN jmp native_iret #define USERGS_SYSRET64 \ swapgs; \ + CLEAR_CPU_BUFFERS; \ sysretq; #define USERGS_SYSRET32 \ swapgs; \ -- Gitee From 99858c739384be008c802aa625b3bc27e995a560 Mon Sep 17 00:00:00 2001 From: Pawan Gupta Date: Tue, 12 Mar 2024 15:40:44 -0700 Subject: [PATCH 03/12] x86/entry_32: Add VERW just before userspace transition ANBZ: #27027 commit 50f021f0b985629accf10481a6e89af8b9700583 upstream As done for entry_64, add support for executing VERW late in exit to user path for 32-bit mode. Signed-off-by: Pawan Gupta Signed-off-by: Dave Hansen Link: https://lore.kernel.org/all/20240213-delay-verw-v8-3-a6216d83edb7%40linux.intel.com Signed-off-by: Greg Kroah-Hartman Signed-off-by: Jeevan deep J Signed-off-by: PrithivishS --- arch/x86/entry/entry_32.S | 3 +++ 1 file changed, 3 insertions(+) diff --git a/arch/x86/entry/entry_32.S b/arch/x86/entry/entry_32.S index 8fcd6a42b3a1..55abddcd6607 100644 --- a/arch/x86/entry/entry_32.S +++ b/arch/x86/entry/entry_32.S @@ -997,6 +997,7 @@ SYM_FUNC_START(entry_SYSENTER_32) BUG_IF_WRONG_CR3 no_user_check=1 popfl popl %eax + CLEAR_CPU_BUFFERS /* * Return back to the vDSO, which will pop ecx and edx. @@ -1069,6 +1070,7 @@ restore_all_switch_stack: /* Restore user state */ RESTORE_REGS pop=4 # skip orig_eax/error_code + CLEAR_CPU_BUFFERS .Lirq_return: /* * ARCH_HAS_MEMBARRIER_SYNC_CORE rely on IRET core serialization @@ -1267,6 +1269,7 @@ SYM_CODE_START(asm_exc_nmi) /* Not on SYSENTER stack. */ call exc_nmi + CLEAR_CPU_BUFFERS jmp .Lnmi_return .Lnmi_from_sysenter_stack: -- Gitee From b4415b4351cdbb1fc12e2aea42e4048933fda985 Mon Sep 17 00:00:00 2001 From: Pawan Gupta Date: Tue, 12 Mar 2024 15:40:50 -0700 Subject: [PATCH 04/12] x86/bugs: Use ALTERNATIVE() instead of mds_user_clear static key ANBZ: #27027 commit 6192d9ed311f70eb7e8ab4a874631a98c5a9217e upstream The VERW mitigation at exit-to-user is enabled via a static branch mds_user_clear. This static branch is never toggled after boot, and can be safely replaced with an ALTERNATIVE() which is convenient to use in asm. Switch to ALTERNATIVE() to use the VERW mitigation late in exit-to-user path. Also remove the now redundant VERW in exc_nmi() and arch_exit_to_user_mode(). Signed-off-by: Pawan Gupta Signed-off-by: Dave Hansen Link: https://lore.kernel.org/all/20240213-delay-verw-v8-4-a6216d83edb7%40linux.intel.com Signed-off-by: Greg Kroah-Hartman Signed-off-by: Jeevan deep J Signed-off-by: PrithivishS --- Documentation/x86/mds.rst | 38 ++++++++++++++++++++-------- arch/x86/include/asm/entry-common.h | 1 - arch/x86/include/asm/nospec-branch.h | 12 --------- arch/x86/kernel/cpu/bugs.c | 15 +++++------ arch/x86/kernel/nmi.c | 3 --- arch/x86/kvm/vmx/vmx.c | 2 +- 6 files changed, 34 insertions(+), 37 deletions(-) diff --git a/Documentation/x86/mds.rst b/Documentation/x86/mds.rst index 5d4330be200f..e801df0bb3a8 100644 --- a/Documentation/x86/mds.rst +++ b/Documentation/x86/mds.rst @@ -95,6 +95,9 @@ The kernel provides a function to invoke the buffer clearing: mds_clear_cpu_buffers() +Also macro CLEAR_CPU_BUFFERS can be used in ASM late in exit-to-user path. +Other than CFLAGS.ZF, this macro doesn't clobber any registers. + The mitigation is invoked on kernel/userspace, hypervisor/guest and C-state (idle) transitions. @@ -138,17 +141,30 @@ Mitigation points When transitioning from kernel to user space the CPU buffers are flushed on affected CPUs when the mitigation is not disabled on the kernel - command line. The migitation is enabled through the static key - mds_user_clear. - - The mitigation is invoked in prepare_exit_to_usermode() which covers - all but one of the kernel to user space transitions. The exception - is when we return from a Non Maskable Interrupt (NMI), which is - handled directly in do_nmi(). - - (The reason that NMI is special is that prepare_exit_to_usermode() can - enable IRQs. In NMI context, NMIs are blocked, and we don't want to - enable IRQs with NMIs blocked.) + command line. The mitigation is enabled through the feature flag + X86_FEATURE_CLEAR_CPU_BUF. + + The mitigation is invoked just before transitioning to userspace after + user registers are restored. This is done to minimize the window in + which kernel data could be accessed after VERW e.g. via an NMI after + VERW. + + **Corner case not handled** + Interrupts returning to kernel don't clear CPUs buffers since the + exit-to-user path is expected to do that anyways. But, there could be + a case when an NMI is generated in kernel after the exit-to-user path + has cleared the buffers. This case is not handled and NMI returning to + kernel don't clear CPU buffers because: + + 1. It is rare to get an NMI after VERW, but before returning to userspace. + 2. For an unprivileged user, there is no known way to make that NMI + less rare or target it. + 3. It would take a large number of these precisely-timed NMIs to mount + an actual attack. There's presumably not enough bandwidth. + 4. The NMI in question occurs after a VERW, i.e. when user state is + restored and most interesting data is already scrubbed. Whats left + is only the data that NMI touches, and that may or may not be of + any interest. 2. C-State transition diff --git a/arch/x86/include/asm/entry-common.h b/arch/x86/include/asm/entry-common.h index 5443851d3aa6..264ab414e9f6 100644 --- a/arch/x86/include/asm/entry-common.h +++ b/arch/x86/include/asm/entry-common.h @@ -77,7 +77,6 @@ static inline void arch_exit_to_user_mode_prepare(struct pt_regs *regs, static __always_inline void arch_exit_to_user_mode(void) { - mds_user_clear_cpu_buffers(); amd_clear_divider(); } #define arch_exit_to_user_mode arch_exit_to_user_mode diff --git a/arch/x86/include/asm/nospec-branch.h b/arch/x86/include/asm/nospec-branch.h index b853f6037467..94d37131fb30 100644 --- a/arch/x86/include/asm/nospec-branch.h +++ b/arch/x86/include/asm/nospec-branch.h @@ -365,7 +365,6 @@ DECLARE_STATIC_KEY_FALSE(switch_to_cond_stibp); DECLARE_STATIC_KEY_FALSE(switch_mm_cond_ibpb); DECLARE_STATIC_KEY_FALSE(switch_mm_always_ibpb); -DECLARE_STATIC_KEY_FALSE(mds_user_clear); DECLARE_STATIC_KEY_FALSE(mds_idle_clear); DECLARE_STATIC_KEY_FALSE(mmio_stale_data_clear); @@ -397,17 +396,6 @@ static __always_inline void mds_clear_cpu_buffers(void) asm volatile("verw %[ds]" : : [ds] "m" (ds) : "cc"); } -/** - * mds_user_clear_cpu_buffers - Mitigation for MDS and TAA vulnerability - * - * Clear CPU buffers if the corresponding static key is enabled - */ -static __always_inline void mds_user_clear_cpu_buffers(void) -{ - if (static_branch_likely(&mds_user_clear)) - mds_clear_cpu_buffers(); -} - /** * mds_idle_clear_cpu_buffers - Mitigation for MDS vulnerability * diff --git a/arch/x86/kernel/cpu/bugs.c b/arch/x86/kernel/cpu/bugs.c index 51dfedc68d6a..1e8835d90a95 100644 --- a/arch/x86/kernel/cpu/bugs.c +++ b/arch/x86/kernel/cpu/bugs.c @@ -111,9 +111,6 @@ DEFINE_STATIC_KEY_FALSE(switch_mm_cond_ibpb); /* Control unconditional IBPB in switch_mm() */ DEFINE_STATIC_KEY_FALSE(switch_mm_always_ibpb); -/* Control MDS CPU buffer clear before returning to user space */ -DEFINE_STATIC_KEY_FALSE(mds_user_clear); -EXPORT_SYMBOL_GPL(mds_user_clear); /* Control MDS CPU buffer clear before idling (halt, mwait) */ DEFINE_STATIC_KEY_FALSE(mds_idle_clear); EXPORT_SYMBOL_GPL(mds_idle_clear); @@ -296,7 +293,7 @@ static void __init mds_select_mitigation(void) if (!boot_cpu_has(X86_FEATURE_MD_CLEAR)) mds_mitigation = MDS_MITIGATION_VMWERV; - static_branch_enable(&mds_user_clear); + setup_force_cpu_cap(X86_FEATURE_CLEAR_CPU_BUF); if (!boot_cpu_has(X86_BUG_MSBDS_ONLY) && (mds_nosmt || cpu_mitigations_auto_nosmt())) @@ -400,7 +397,7 @@ static void __init taa_select_mitigation(void) * For guests that can't determine whether the correct microcode is * present on host, enable the mitigation for UCODE_NEEDED as well. */ - static_branch_enable(&mds_user_clear); + setup_force_cpu_cap(X86_FEATURE_CLEAR_CPU_BUF); if (taa_nosmt || cpu_mitigations_auto_nosmt()) cpu_smt_disable(false); @@ -467,7 +464,7 @@ static void __init mmio_select_mitigation(void) */ if (boot_cpu_has_bug(X86_BUG_MDS) || (boot_cpu_has_bug(X86_BUG_TAA) && boot_cpu_has(X86_FEATURE_RTM))) - static_branch_enable(&mds_user_clear); + setup_force_cpu_cap(X86_FEATURE_CLEAR_CPU_BUF); else static_branch_enable(&mmio_stale_data_clear); @@ -527,12 +524,12 @@ static void __init md_clear_update_mitigation(void) if (cpu_mitigations_off()) return; - if (!static_key_enabled(&mds_user_clear)) + if (!boot_cpu_has(X86_FEATURE_CLEAR_CPU_BUF)) goto out; /* - * mds_user_clear is now enabled. Update MDS, TAA and MMIO Stale Data - * mitigation, if necessary. + * X86_FEATURE_CLEAR_CPU_BUF is now enabled. Update MDS, TAA and MMIO + * Stale Data mitigation, if necessary. */ if (mds_mitigation == MDS_MITIGATION_OFF && boot_cpu_has_bug(X86_BUG_MDS)) { diff --git a/arch/x86/kernel/nmi.c b/arch/x86/kernel/nmi.c index 4bce802d25fb..b892fe7035db 100644 --- a/arch/x86/kernel/nmi.c +++ b/arch/x86/kernel/nmi.c @@ -519,9 +519,6 @@ DEFINE_IDTENTRY_RAW(exc_nmi) write_cr2(this_cpu_read(nmi_cr2)); if (this_cpu_dec_return(nmi_state)) goto nmi_restart; - - if (user_mode(regs)) - mds_user_clear_cpu_buffers(); } #if defined(CONFIG_X86_64) && IS_ENABLED(CONFIG_KVM_INTEL) diff --git a/arch/x86/kvm/vmx/vmx.c b/arch/x86/kvm/vmx/vmx.c index 33761204f143..03bc9a2602f0 100644 --- a/arch/x86/kvm/vmx/vmx.c +++ b/arch/x86/kvm/vmx/vmx.c @@ -7159,7 +7159,7 @@ static noinstr void vmx_vcpu_enter_exit(struct kvm_vcpu *vcpu, /* L1D Flush includes CPU buffer clear to mitigate MDS */ if (static_branch_unlikely(&vmx_l1d_should_flush)) vmx_l1d_flush(vcpu); - else if (static_branch_unlikely(&mds_user_clear)) + else if (cpu_feature_enabled(X86_FEATURE_CLEAR_CPU_BUF)) mds_clear_cpu_buffers(); else if (static_branch_unlikely(&mmio_stale_data_clear) && kvm_arch_has_assigned_device(vcpu->kvm)) -- Gitee From caa180484c25e8cde600fc5f7348342e3f066e4c Mon Sep 17 00:00:00 2001 From: Pawan Gupta Date: Tue, 12 Mar 2024 15:41:02 -0700 Subject: [PATCH 05/12] KVM/VMX: Move VERW closer to VMentry for MDS mitigation ANBZ: #27027 commit b9a97767c6be8a5e746609da7cbb4f63b42b0622 upstream During VMentry VERW is executed to mitigate MDS. After VERW, any memory access like register push onto stack may put host data in MDS affected CPU buffers. A guest can then use MDS to sample host data. Although likelihood of secrets surviving in registers at current VERW callsite is less, but it can't be ruled out. Harden the MDS mitigation by moving the VERW mitigation late in VMentry path. Note that VERW for MMIO Stale Data mitigation is unchanged because of the complexity of per-guest conditional VERW which is not easy to handle that late in asm with no GPRs available. If the CPU is also affected by MDS, VERW is unconditionally executed late in asm regardless of guest having MMIO access. [ pawan: conflict resolved in backport ] Signed-off-by: Pawan Gupta Signed-off-by: Dave Hansen Acked-by: Sean Christopherson Link: https://lore.kernel.org/all/20240213-delay-verw-v8-6-a6216d83edb7%40linux.intel.com Signed-off-by: Greg Kroah-Hartman Signed-off-by: Jeevan deep J Signed-off-by: PrithivishS --- arch/x86/kvm/vmx/vmenter.S | 3 +++ arch/x86/kvm/vmx/vmx.c | 12 ++++++++---- 2 files changed, 11 insertions(+), 4 deletions(-) diff --git a/arch/x86/kvm/vmx/vmenter.S b/arch/x86/kvm/vmx/vmenter.S index 982138bebb70..d0858150288e 100644 --- a/arch/x86/kvm/vmx/vmenter.S +++ b/arch/x86/kvm/vmx/vmenter.S @@ -99,6 +99,9 @@ SYM_FUNC_START(__vmx_vcpu_run) /* Load guest RAX. This kills the @regs pointer! */ mov VCPU_RAX(%_ASM_AX), %_ASM_AX + /* Clobbers EFLAGS.ZF */ + CLEAR_CPU_BUFFERS + /* Check EFLAGS.ZF from 'testb' above */ jz .Lvmlaunch diff --git a/arch/x86/kvm/vmx/vmx.c b/arch/x86/kvm/vmx/vmx.c index 03bc9a2602f0..d059bb0e2fd1 100644 --- a/arch/x86/kvm/vmx/vmx.c +++ b/arch/x86/kvm/vmx/vmx.c @@ -410,7 +410,8 @@ static __always_inline void vmx_enable_fb_clear(struct vcpu_vmx *vmx) static void vmx_update_fb_clear_dis(struct kvm_vcpu *vcpu, struct vcpu_vmx *vmx) { - vmx->disable_fb_clear = vmx_fb_clear_ctrl_available; + vmx->disable_fb_clear = !cpu_feature_enabled(X86_FEATURE_CLEAR_CPU_BUF) && + vmx_fb_clear_ctrl_available; /* * If guest will not execute VERW, there is no need to set FB_CLEAR_DIS @@ -7156,11 +7157,14 @@ static noinstr void vmx_vcpu_enter_exit(struct kvm_vcpu *vcpu, guest_enter_irqoff(); lockdep_hardirqs_on(CALLER_ADDR0); - /* L1D Flush includes CPU buffer clear to mitigate MDS */ + /* + * L1D Flush includes CPU buffer clear to mitigate MDS, but VERW + * mitigation for MDS is done late in VMentry and is still + * executed in spite of L1D Flush. This is because an extra VERW + * should not matter much after the big hammer L1D Flush. + */ if (static_branch_unlikely(&vmx_l1d_should_flush)) vmx_l1d_flush(vcpu); - else if (cpu_feature_enabled(X86_FEATURE_CLEAR_CPU_BUF)) - mds_clear_cpu_buffers(); else if (static_branch_unlikely(&mmio_stale_data_clear) && kvm_arch_has_assigned_device(vcpu->kvm)) mds_clear_cpu_buffers(); -- Gitee From 8799b4f71d540749ee6b2ed77986d18cd78a55a7 Mon Sep 17 00:00:00 2001 From: "Borislav Petkov (AMD)" Date: Fri, 5 Apr 2024 16:19:51 +0200 Subject: [PATCH 06/12] x86/bugs: Fix the SRSO mitigation on Zen3/4 ANBZ: #27027 commit f5e9b93fbe70bbc1c2b1c40aefd524bce64abf1c upstream The original version of the mitigation would patch in the calls to the untraining routines directly. That is, the alternative() in UNTRAIN_RET will patch in the CALL to srso_alias_untrain_ret() directly. However, even if commit e7c25c441e9e ("x86/cpu: Cleanup the untrain mess") meant well in trying to clean up the situation, due to micro- architectural reasons, the untraining routine srso_alias_untrain_ret() must be the target of a CALL instruction and not of a JMP instruction as it is done now. Reshuffle the alternative macros to accomplish that. Fixes: e7c25c441e9e ("x86/cpu: Cleanup the untrain mess") Signed-off-by: Borislav Petkov (AMD) Reviewed-by: Ingo Molnar Cc: stable@kernel.org Signed-off-by: Linus Torvalds Signed-off-by: Greg Kroah-Hartman Signed-off-by: Jeevan deep J Signed-off-by: PrithivishS --- arch/x86/include/asm/asm-prototypes.h | 1 + arch/x86/include/asm/nospec-branch.h | 20 ++++++++++++++------ arch/x86/lib/retpoline.S | 4 +--- 3 files changed, 16 insertions(+), 9 deletions(-) diff --git a/arch/x86/include/asm/asm-prototypes.h b/arch/x86/include/asm/asm-prototypes.h index 8f80de627c60..5cdccea45554 100644 --- a/arch/x86/include/asm/asm-prototypes.h +++ b/arch/x86/include/asm/asm-prototypes.h @@ -12,6 +12,7 @@ #include #include #include +#include #ifndef CONFIG_X86_CMPXCHG64 extern void cmpxchg8b_emu(void); diff --git a/arch/x86/include/asm/nospec-branch.h b/arch/x86/include/asm/nospec-branch.h index 94d37131fb30..8850c01faeee 100644 --- a/arch/x86/include/asm/nospec-branch.h +++ b/arch/x86/include/asm/nospec-branch.h @@ -143,11 +143,20 @@ .Lskip_rsb_\@: .endm +/* + * The CALL to srso_alias_untrain_ret() must be patched in directly at + * the spot where untraining must be done, ie., srso_alias_untrain_ret() + * must be the target of a CALL instruction instead of indirectly + * jumping to a wrapper which then calls it. Therefore, this macro is + * called outside of __UNTRAIN_RET below, for the time being, before the + * kernel can support nested alternatives with arbitrary nesting. + */ +.macro CALL_UNTRAIN_RET #ifdef CONFIG_CPU_UNRET_ENTRY -#define CALL_UNTRAIN_RET "call entry_untrain_ret" -#else -#define CALL_UNTRAIN_RET "" + ALTERNATIVE_2 "", "call entry_untrain_ret", X86_FEATURE_UNRET, \ + "call srso_alias_untrain_ret", X86_FEATURE_SRSO_ALIAS #endif +.endm /* * Mitigate RETBleed for AMD/Hygon Zen uarch. Requires KERNEL CR3 because the @@ -164,9 +173,8 @@ #if defined(CONFIG_CPU_UNRET_ENTRY) || defined(CONFIG_CPU_IBPB_ENTRY) || \ defined(CONFIG_CPU_SRSO) ANNOTATE_UNRET_END - ALTERNATIVE_2 "", \ - CALL_UNTRAIN_RET, X86_FEATURE_UNRET, \ - "call entry_ibpb", X86_FEATURE_ENTRY_IBPB + CALL_UNTRAIN_RET + ALTERNATIVE "", "call entry_ibpb", X86_FEATURE_ENTRY_IBPB #endif .endm diff --git a/arch/x86/lib/retpoline.S b/arch/x86/lib/retpoline.S index fc9abc482e21..2323077ddbe0 100644 --- a/arch/x86/lib/retpoline.S +++ b/arch/x86/lib/retpoline.S @@ -252,9 +252,7 @@ SYM_CODE_START(srso_return_thunk) SYM_CODE_END(srso_return_thunk) SYM_FUNC_START(entry_untrain_ret) - ALTERNATIVE_2 "jmp retbleed_untrain_ret", \ - "jmp srso_untrain_ret", X86_FEATURE_SRSO, \ - "jmp srso_alias_untrain_ret", X86_FEATURE_SRSO_ALIAS + ALTERNATIVE "jmp retbleed_untrain_ret", "jmp srso_untrain_ret", X86_FEATURE_SRSO SYM_FUNC_END(entry_untrain_ret) __EXPORT_THUNK(entry_untrain_ret) -- Gitee From b172978f5399a3a90aa21f6b4af6e532c90983fe Mon Sep 17 00:00:00 2001 From: Pawan Gupta Date: Thu, 26 Sep 2024 09:10:31 -0700 Subject: [PATCH 07/12] x86/bugs: Use code segment selector for VERW operand ANBZ: #27027 commit bfd1d223d80cb29a210caa1bd5e21f0816d58f02 upstream Robert Gill reported below #GP in 32-bit mode when dosemu software was executing vm86() system call: general protection fault: 0000 [#1] PREEMPT SMP CPU: 4 PID: 4610 Comm: dosemu.bin Not tainted 6.6.21-gentoo-x86 #1 Hardware name: Dell Inc. PowerEdge 1950/0H723K, BIOS 2.7.0 10/30/2010 EIP: restore_all_switch_stack+0xbe/0xcf EAX: 00000000 EBX: 00000000 ECX: 00000000 EDX: 00000000 ESI: 00000000 EDI: 00000000 EBP: 00000000 ESP: ff8affdc DS: 0000 ES: 0000 FS: 0000 GS: 0033 SS: 0068 EFLAGS: 00010046 CR0: 80050033 CR2: 00c2101c CR3: 04b6d000 CR4: 000406d0 Call Trace: show_regs+0x70/0x78 die_addr+0x29/0x70 exc_general_protection+0x13c/0x348 exc_bounds+0x98/0x98 handle_exception+0x14d/0x14d exc_bounds+0x98/0x98 restore_all_switch_stack+0xbe/0xcf exc_bounds+0x98/0x98 restore_all_switch_stack+0xbe/0xcf This only happens in 32-bit mode when VERW based mitigations like MDS/RFDS are enabled. This is because segment registers with an arbitrary user value can result in #GP when executing VERW. Intel SDM vol. 2C documents the following behavior for VERW instruction: #GP(0) - If a memory operand effective address is outside the CS, DS, ES, FS, or GS segment limit. CLEAR_CPU_BUFFERS macro executes VERW instruction before returning to user space. Use %cs selector to reference VERW operand. This ensures VERW will not #GP for an arbitrary user %ds. [ mingo: Fixed the SOB chain. ] Fixes: a0e2dab44d22 ("x86/entry_32: Add VERW just before userspace transition") Reported-by: Robert Gill Reviewed-by: Andrew Cooper Suggested-by: Brian Gerst Signed-off-by: Pawan Gupta Signed-off-by: Dave Hansen Signed-off-by: Ingo Molnar Signed-off-by: Greg Kroah-Hartman Signed-off-by: Jeevan deep J Signed-off-by: PrithivishS --- arch/x86/include/asm/nospec-branch.h | 11 ++++++++++- 1 file changed, 10 insertions(+), 1 deletion(-) diff --git a/arch/x86/include/asm/nospec-branch.h b/arch/x86/include/asm/nospec-branch.h index 8850c01faeee..03a938407d06 100644 --- a/arch/x86/include/asm/nospec-branch.h +++ b/arch/x86/include/asm/nospec-branch.h @@ -194,7 +194,16 @@ */ .macro CLEAR_CPU_BUFFERS ALTERNATIVE "jmp .Lskip_verw_\@", "", X86_FEATURE_CLEAR_CPU_BUF - verw _ASM_RIP(mds_verw_sel) +#ifdef CONFIG_X86_64 + verw mds_verw_sel(%rip) +#else + /* + * In 32bit mode, the memory operand must be a %cs reference. The data + * segments may not be usable (vm86 mode), and the stack segment may not + * be flat (ESPFIX32). + */ + verw %cs:mds_verw_sel +#endif .Lskip_verw_\@: .endm -- Gitee From c0ec85952147c10e7ba8ff54ec09603c87740d63 Mon Sep 17 00:00:00 2001 From: Borislav Petkov Date: Tue, 15 Jul 2025 14:37:45 +0200 Subject: [PATCH 08/12] x86/bugs: Rename MDS machinery to something more generic ANBZ: #27027 commit b9d08aebbe1859bbca5600fd1220b2ffa703df29 upstream It will be used by other x86 mitigations. No functional changes. Signed-off-by: Borislav Petkov (AMD) Reviewed-by: Pawan Gupta Signed-off-by: Greg Kroah-Hartman Signed-off-by: Jeevan deep J Signed-off-by: PrithivishS --- .../hw-vuln/processor_mmio_stale_data.rst | 4 +-- arch/x86/entry/entry.S | 8 ++--- arch/x86/include/asm/irqflags.h | 4 +-- arch/x86/include/asm/mwait.h | 5 ++-- arch/x86/include/asm/nospec-branch.h | 29 ++++++++++--------- arch/x86/kernel/cpu/bugs.c | 12 ++++---- arch/x86/kvm/vmx/vmx.c | 2 +- 7 files changed, 32 insertions(+), 32 deletions(-) diff --git a/Documentation/admin-guide/hw-vuln/processor_mmio_stale_data.rst b/Documentation/admin-guide/hw-vuln/processor_mmio_stale_data.rst index 9393c50b5afc..d33a071c1631 100644 --- a/Documentation/admin-guide/hw-vuln/processor_mmio_stale_data.rst +++ b/Documentation/admin-guide/hw-vuln/processor_mmio_stale_data.rst @@ -157,9 +157,7 @@ This is achieved by using the otherwise unused and obsolete VERW instruction in combination with a microcode update. The microcode clears the affected CPU buffers when the VERW instruction is executed. -Kernel reuses the MDS function to invoke the buffer clearing: - - mds_clear_cpu_buffers() +Kernel does the buffer clearing with x86_clear_cpu_buffers(). On MDS affected CPUs, the kernel already invokes CPU buffer clear on kernel/userspace, hypervisor/guest and C-state (idle) transitions. No diff --git a/arch/x86/entry/entry.S b/arch/x86/entry/entry.S index a4db552908c5..c3a21b28b3dc 100644 --- a/arch/x86/entry/entry.S +++ b/arch/x86/entry/entry.S @@ -26,19 +26,19 @@ EXPORT_SYMBOL_GPL(entry_ibpb); /* * Define the VERW operand that is disguised as entry code so that - * it can be referenced with KPTI enabled. This ensure VERW can be + * it can be referenced with KPTI enabled. This ensures VERW can be * used late in exit-to-user path after page tables are switched. */ .pushsection .entry.text, "ax" .align L1_CACHE_BYTES, 0xcc -SYM_CODE_START_NOALIGN(mds_verw_sel) +SYM_CODE_START_NOALIGN(x86_verw_sel) UNWIND_HINT_EMPTY ANNOTATE_NOENDBR .word __KERNEL_DS .align L1_CACHE_BYTES, 0xcc -SYM_CODE_END(mds_verw_sel); +SYM_CODE_END(x86_verw_sel); /* For KVM */ -EXPORT_SYMBOL_GPL(mds_verw_sel); +EXPORT_SYMBOL_GPL(x86_verw_sel); .popsection diff --git a/arch/x86/include/asm/irqflags.h b/arch/x86/include/asm/irqflags.h index f40dea50dfbf..1dc747d84201 100644 --- a/arch/x86/include/asm/irqflags.h +++ b/arch/x86/include/asm/irqflags.h @@ -56,13 +56,13 @@ static __always_inline void native_irq_enable(void) static inline __cpuidle void native_safe_halt(void) { - mds_idle_clear_cpu_buffers(); + x86_idle_clear_cpu_buffers(); asm volatile("sti; hlt": : :"memory"); } static inline __cpuidle void native_halt(void) { - mds_idle_clear_cpu_buffers(); + x86_idle_clear_cpu_buffers(); asm volatile("hlt": : :"memory"); } diff --git a/arch/x86/include/asm/mwait.h b/arch/x86/include/asm/mwait.h index 29dd27b5a339..35e20e8a7cc6 100644 --- a/arch/x86/include/asm/mwait.h +++ b/arch/x86/include/asm/mwait.h @@ -43,7 +43,7 @@ static inline void __monitorx(const void *eax, unsigned long ecx, static inline void __mwait(unsigned long eax, unsigned long ecx) { - mds_idle_clear_cpu_buffers(); + x86_idle_clear_cpu_buffers(); /* "mwait %eax, %ecx;" */ asm volatile(".byte 0x0f, 0x01, 0xc9;" @@ -88,7 +88,8 @@ static inline void __mwaitx(unsigned long eax, unsigned long ebx, static inline void __sti_mwait(unsigned long eax, unsigned long ecx) { - mds_idle_clear_cpu_buffers(); + x86_idle_clear_cpu_buffers(); + /* "mwait %eax, %ecx;" */ asm volatile("sti; .byte 0x0f, 0x01, 0xc9;" :: "a" (eax), "c" (ecx)); diff --git a/arch/x86/include/asm/nospec-branch.h b/arch/x86/include/asm/nospec-branch.h index 03a938407d06..5e137db4e9cc 100644 --- a/arch/x86/include/asm/nospec-branch.h +++ b/arch/x86/include/asm/nospec-branch.h @@ -186,23 +186,23 @@ .endm /* - * Macro to execute VERW instruction that mitigate transient data sampling - * attacks such as MDS. On affected systems a microcode update overloaded VERW - * instruction to also clear the CPU buffers. VERW clobbers CFLAGS.ZF. - * + * Macro to execute VERW insns that mitigate transient data sampling + * attacks such as MDS or TSA. On affected systems a microcode update + * overloaded VERW insns to also clear the CPU buffers. VERW clobbers + * CFLAGS.ZF. * Note: Only the memory operand variant of VERW clears the CPU buffers. */ .macro CLEAR_CPU_BUFFERS ALTERNATIVE "jmp .Lskip_verw_\@", "", X86_FEATURE_CLEAR_CPU_BUF #ifdef CONFIG_X86_64 - verw mds_verw_sel(%rip) + verw x86_verw_sel(%rip) #else /* * In 32bit mode, the memory operand must be a %cs reference. The data * segments may not be usable (vm86 mode), and the stack segment may not * be flat (ESPFIX32). */ - verw %cs:mds_verw_sel + verw %cs:x86_verw_sel #endif .Lskip_verw_\@: .endm @@ -382,22 +382,22 @@ DECLARE_STATIC_KEY_FALSE(switch_to_cond_stibp); DECLARE_STATIC_KEY_FALSE(switch_mm_cond_ibpb); DECLARE_STATIC_KEY_FALSE(switch_mm_always_ibpb); -DECLARE_STATIC_KEY_FALSE(mds_idle_clear); +DECLARE_STATIC_KEY_FALSE(cpu_buf_idle_clear); DECLARE_STATIC_KEY_FALSE(mmio_stale_data_clear); -extern u16 mds_verw_sel; +extern u16 x86_verw_sel; #include /** - * mds_clear_cpu_buffers - Mitigation for MDS and TAA vulnerability + * x86_clear_cpu_buffers - Buffer clearing support for different x86 CPU vulns * * This uses the otherwise unused and obsolete VERW instruction in * combination with microcode which triggers a CPU buffer flush when the * instruction is executed. */ -static __always_inline void mds_clear_cpu_buffers(void) +static __always_inline void x86_clear_cpu_buffers(void) { static const u16 ds = __KERNEL_DS; @@ -414,14 +414,15 @@ static __always_inline void mds_clear_cpu_buffers(void) } /** - * mds_idle_clear_cpu_buffers - Mitigation for MDS vulnerability + * x86_idle_clear_cpu_buffers - Buffer clearing support in idle for the MDS + * vulnerability * * Clear CPU buffers if the corresponding static key is enabled */ -static inline void mds_idle_clear_cpu_buffers(void) +static __always_inline void x86_idle_clear_cpu_buffers(void) { - if (static_branch_likely(&mds_idle_clear)) - mds_clear_cpu_buffers(); + if (static_branch_likely(&cpu_buf_idle_clear)) + x86_clear_cpu_buffers(); } #endif /* __ASSEMBLY__ */ diff --git a/arch/x86/kernel/cpu/bugs.c b/arch/x86/kernel/cpu/bugs.c index 1e8835d90a95..a6ac8ba72425 100644 --- a/arch/x86/kernel/cpu/bugs.c +++ b/arch/x86/kernel/cpu/bugs.c @@ -111,9 +111,9 @@ DEFINE_STATIC_KEY_FALSE(switch_mm_cond_ibpb); /* Control unconditional IBPB in switch_mm() */ DEFINE_STATIC_KEY_FALSE(switch_mm_always_ibpb); -/* Control MDS CPU buffer clear before idling (halt, mwait) */ -DEFINE_STATIC_KEY_FALSE(mds_idle_clear); -EXPORT_SYMBOL_GPL(mds_idle_clear); +/* Control CPU buffer clear before idling (halt, mwait) */ +DEFINE_STATIC_KEY_FALSE(cpu_buf_idle_clear); +EXPORT_SYMBOL_GPL(cpu_buf_idle_clear); /* Controls CPU Fill buffer clear before KVM guest MMIO accesses */ DEFINE_STATIC_KEY_FALSE(mmio_stale_data_clear); @@ -474,7 +474,7 @@ static void __init mmio_select_mitigation(void) * is required irrespective of SMT state. */ if (!(ia32_cap & ARCH_CAP_FBSDP_NO)) - static_branch_enable(&mds_idle_clear); + static_branch_enable(&cpu_buf_idle_clear); /* * Check if the system has the right microcode. @@ -1609,10 +1609,10 @@ static void update_mds_branch_idle(void) return; if (sched_smt_active()) { - static_branch_enable(&mds_idle_clear); + static_branch_enable(&cpu_buf_idle_clear); } else if (mmio_mitigation == MMIO_MITIGATION_OFF || (ia32_cap & ARCH_CAP_FBSDP_NO)) { - static_branch_disable(&mds_idle_clear); + static_branch_disable(&cpu_buf_idle_clear); } } diff --git a/arch/x86/kvm/vmx/vmx.c b/arch/x86/kvm/vmx/vmx.c index d059bb0e2fd1..3d78c0031073 100644 --- a/arch/x86/kvm/vmx/vmx.c +++ b/arch/x86/kvm/vmx/vmx.c @@ -7167,7 +7167,7 @@ static noinstr void vmx_vcpu_enter_exit(struct kvm_vcpu *vcpu, vmx_l1d_flush(vcpu); else if (static_branch_unlikely(&mmio_stale_data_clear) && kvm_arch_has_assigned_device(vcpu->kvm)) - mds_clear_cpu_buffers(); + x86_clear_cpu_buffers(); vmx_disable_fb_clear(vmx); -- Gitee From 351a4cc7459e7b4934938fa21ba719feec8d3ad1 Mon Sep 17 00:00:00 2001 From: "Borislav Petkov (AMD)" Date: Tue, 15 Jul 2025 14:37:46 +0200 Subject: [PATCH 09/12] x86/bugs: Add a Transient Scheduler Attacks mitigation ANBZ: #27027 commit 78192f511f401af644e6ccb7c02bc1ce39092851 upstream Add the required features detection glue to bugs.c et all in order to support the TSA mitigation. [Backport Changes] In arch/x86/kernel/cpu/bugs.c, the call to tsa_select_mitigation() was added to function check_bugs() instead of cpu_select_mitigations() , as the upstream commit that replaced check_bugs() with cpu_select_mitigations() is not avilable yet in the current source tree. So the change was applied on to the equivalent function check_bugs() Co-developed-by: Kim Phillips Signed-off-by: Kim Phillips Signed-off-by: Borislav Petkov (AMD) Reviewed-by: Pawan Gupta Signed-off-by: Greg Kroah-Hartman Signed-off-by: Jeevan deep J Signed-off-by: PrithivishS --- .../ABI/testing/sysfs-devices-system-cpu | 1 + .../admin-guide/kernel-parameters.txt | 13 ++ arch/x86/Kconfig | 9 ++ arch/x86/include/asm/cpu.h | 13 ++ arch/x86/include/asm/cpufeatures.h | 6 + arch/x86/include/asm/mwait.h | 2 +- arch/x86/include/asm/nospec-branch.h | 12 +- arch/x86/kernel/cpu/amd.c | 58 +++++++++ arch/x86/kernel/cpu/bugs.c | 121 ++++++++++++++++++ arch/x86/kernel/cpu/common.c | 14 +- arch/x86/kernel/cpu/scattered.c | 2 + arch/x86/kvm/svm/vmenter.S | 3 + drivers/base/cpu.c | 7 + include/linux/cpu.h | 1 + 14 files changed, 257 insertions(+), 5 deletions(-) diff --git a/Documentation/ABI/testing/sysfs-devices-system-cpu b/Documentation/ABI/testing/sysfs-devices-system-cpu index 44c6e5730398..22d68f660975 100644 --- a/Documentation/ABI/testing/sysfs-devices-system-cpu +++ b/Documentation/ABI/testing/sysfs-devices-system-cpu @@ -508,6 +508,7 @@ What: /sys/devices/system/cpu/vulnerabilities /sys/devices/system/cpu/vulnerabilities/l1tf /sys/devices/system/cpu/vulnerabilities/mds /sys/devices/system/cpu/vulnerabilities/srbds + /sys/devices/system/cpu/vulnerabilities/tsa /sys/devices/system/cpu/vulnerabilities/tsx_async_abort /sys/devices/system/cpu/vulnerabilities/itlb_multihit /sys/devices/system/cpu/vulnerabilities/mmio_stale_data diff --git a/Documentation/admin-guide/kernel-parameters.txt b/Documentation/admin-guide/kernel-parameters.txt index a8ffa1069d72..3110ef7ea614 100644 --- a/Documentation/admin-guide/kernel-parameters.txt +++ b/Documentation/admin-guide/kernel-parameters.txt @@ -5750,6 +5750,19 @@ See Documentation/admin-guide/mm/transhuge.rst for more details. + tsa= [X86] Control mitigation for Transient Scheduler + Attacks on AMD CPUs. Search the following in your + favourite search engine for more details: + + "Technical guidance for mitigating transient scheduler + attacks". + + off - disable the mitigation + on - enable the mitigation (default) + user - mitigate only user/kernel transitions + vm - mitigate only guest/host transitions + + tsc= Disable clocksource stability checks for TSC. Format: [x86] reliable: mark tsc clocksource as reliable, this diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig index dc8824a55028..587c199803d0 100644 --- a/arch/x86/Kconfig +++ b/arch/x86/Kconfig @@ -2670,6 +2670,15 @@ config SLS against straight line speculation. The kernel image might be slightly larger. +config MITIGATION_TSA + bool "Mitigate Transient Scheduler Attacks" + depends on CPU_SUP_AMD + default y + help + Enable mitigation for Transient Scheduler Attacks. TSA is a hardware + security vulnerability on AMD CPUs which can lead to forwarding of + invalid info to subsequent instructions and thus can affect their + timing and thereby cause a leakage. endif config ARCH_HAS_ADD_PAGES diff --git a/arch/x86/include/asm/cpu.h b/arch/x86/include/asm/cpu.h index 0a0e510202d1..af7c264e100e 100644 --- a/arch/x86/include/asm/cpu.h +++ b/arch/x86/include/asm/cpu.h @@ -112,4 +112,17 @@ extern u64 x86_read_arch_cap_msr(void); int intel_find_matching_signature(void *mc, unsigned int csig, int cpf); int intel_microcode_sanity_check(void *mc, bool print_err, int hdr_type); + +union zen_patch_rev { + struct { + __u32 rev : 8, + stepping : 4, + model : 4, + __reserved : 4, + ext_model : 4, + ext_fam : 8; + }; + __u32 ucode_rev; +}; + #endif /* _ASM_X86_CPU_H */ diff --git a/arch/x86/include/asm/cpufeatures.h b/arch/x86/include/asm/cpufeatures.h index 2c7d480fcfd9..af285eaba8bf 100644 --- a/arch/x86/include/asm/cpufeatures.h +++ b/arch/x86/include/asm/cpufeatures.h @@ -463,6 +463,7 @@ /* HYGON Secure Isolated Virtualization */ #define X86_FEATURE_CSV3 (19*32 + 30) /* HYGON Secure Isolated Virtualization */ +#define X86_FEATURE_VERW_CLEAR (20*32+ 5) /* "" The memory form of VERW mitigates TSA */ #define X86_FEATURE_AUTOIBRS (20*32+ 8) /* "" Automatic IBRS */ #define X86_FEATURE_SBPB (20*32+27) /* "" Selective Branch Prediction Barrier */ @@ -483,6 +484,10 @@ #define X86_FEATURE_AMD_LBR_PMC_FREEZE (21*32+ 0) /* AMD LBR and PMC Freeze */ #define X86_FEATURE_ABMC (21*32+ 6) /* "" Assignable Bandwidth Monitoring Counters */ +#define X86_FEATURE_TSA_SQ_NO (21*32+11) /* "" AMD CPU not vulnerable to TSA-SQ */ +#define X86_FEATURE_TSA_L1_NO (21*32+12) /* "" AMD CPU not vulnerable to TSA-L1 */ +#define X86_FEATURE_CLEAR_CPU_BUF_VM (21*32+13) /* "" Clear CPU buffers using VERW before VMRUN */ + /* * BUG word(s) @@ -527,4 +532,5 @@ /* BUG word 2 */ #define X86_BUG_SRSO X86_BUG(1*32 + 0) /* AMD SRSO bug */ #define X86_BUG_DIV0 X86_BUG(1*32 + 1) /* AMD DIV0 speculation bug */ +#define X86_BUG_TSA X86_BUG(1*32 + 9) /* "tsa" CPU is affected by Transient Scheduler Attacks */ #endif /* _ASM_X86_CPUFEATURES_H */ diff --git a/arch/x86/include/asm/mwait.h b/arch/x86/include/asm/mwait.h index 35e20e8a7cc6..20b33e6370c3 100644 --- a/arch/x86/include/asm/mwait.h +++ b/arch/x86/include/asm/mwait.h @@ -79,7 +79,7 @@ static inline void __mwait(unsigned long eax, unsigned long ecx) static inline void __mwaitx(unsigned long eax, unsigned long ebx, unsigned long ecx) { - /* No MDS buffer clear as this is AMD/HYGON only */ + /* No need for TSA buffer clearing on AMD */ /* "mwaitx %eax, %ebx, %ecx;" */ asm volatile(".byte 0x0f, 0x01, 0xfb;" diff --git a/arch/x86/include/asm/nospec-branch.h b/arch/x86/include/asm/nospec-branch.h index 5e137db4e9cc..e5366fb01343 100644 --- a/arch/x86/include/asm/nospec-branch.h +++ b/arch/x86/include/asm/nospec-branch.h @@ -192,8 +192,8 @@ * CFLAGS.ZF. * Note: Only the memory operand variant of VERW clears the CPU buffers. */ -.macro CLEAR_CPU_BUFFERS - ALTERNATIVE "jmp .Lskip_verw_\@", "", X86_FEATURE_CLEAR_CPU_BUF +.macro __CLEAR_CPU_BUFFERS feature + ALTERNATIVE "jmp .Lskip_verw_\@", "", \feature #ifdef CONFIG_X86_64 verw x86_verw_sel(%rip) #else @@ -207,6 +207,12 @@ .Lskip_verw_\@: .endm +#define CLEAR_CPU_BUFFERS \ + __CLEAR_CPU_BUFFERS X86_FEATURE_CLEAR_CPU_BUF + +#define VM_CLEAR_CPU_BUFFERS \ + __CLEAR_CPU_BUFFERS X86_FEATURE_CLEAR_CPU_BUF_VM + #else /* __ASSEMBLY__ */ #define ANNOTATE_RETPOLINE_SAFE \ @@ -415,7 +421,7 @@ static __always_inline void x86_clear_cpu_buffers(void) /** * x86_idle_clear_cpu_buffers - Buffer clearing support in idle for the MDS - * vulnerability + * and TSA vulnerabilities. * * Clear CPU buffers if the corresponding static key is enabled */ diff --git a/arch/x86/kernel/cpu/amd.c b/arch/x86/kernel/cpu/amd.c index d62edc1d6175..8382e8013914 100644 --- a/arch/x86/kernel/cpu/amd.c +++ b/arch/x86/kernel/cpu/amd.c @@ -508,6 +508,62 @@ static void early_init_amd_mc(struct cpuinfo_x86 *c) #endif } +static bool amd_check_tsa_microcode(void) +{ + struct cpuinfo_x86 *c = &boot_cpu_data; + union zen_patch_rev p; + u32 min_rev = 0; + + p.ext_fam = c->x86 - 0xf; + p.model = c->x86_model; + p.ext_model = c->x86_model >> 4; + p.stepping = c->x86_stepping; + + if (c->x86 == 0x19) { + switch (p.ucode_rev >> 8) { + case 0xa0011: min_rev = 0x0a0011d7; break; + case 0xa0012: min_rev = 0x0a00123b; break; + case 0xa0082: min_rev = 0x0a00820d; break; + case 0xa1011: min_rev = 0x0a10114c; break; + case 0xa1012: min_rev = 0x0a10124c; break; + case 0xa1081: min_rev = 0x0a108109; break; + case 0xa2010: min_rev = 0x0a20102e; break; + case 0xa2012: min_rev = 0x0a201211; break; + case 0xa4041: min_rev = 0x0a404108; break; + case 0xa5000: min_rev = 0x0a500012; break; + case 0xa6012: min_rev = 0x0a60120a; break; + case 0xa7041: min_rev = 0x0a704108; break; + case 0xa7052: min_rev = 0x0a705208; break; + case 0xa7080: min_rev = 0x0a708008; break; + case 0xa70c0: min_rev = 0x0a70c008; break; + case 0xaa002: min_rev = 0x0aa00216; break; + default: + pr_debug("%s: ucode_rev: 0x%x, current revision: 0x%x\n", + __func__, p.ucode_rev, c->microcode); + return false; + } + } + + if (!min_rev) + return false; + + return c->microcode >= min_rev; +} + +static void tsa_init(struct cpuinfo_x86 *c) +{ + if (cpu_has(c, X86_FEATURE_HYPERVISOR)) + return; + + if (c->x86 == 0x19) { + if (amd_check_tsa_microcode()) + setup_force_cpu_cap(X86_FEATURE_VERW_CLEAR); + } else { + setup_force_cpu_cap(X86_FEATURE_TSA_SQ_NO); + setup_force_cpu_cap(X86_FEATURE_TSA_L1_NO); + } +} + static void bsp_init_amd(struct cpuinfo_x86 *c) { @@ -646,6 +702,8 @@ static void bsp_init_amd(struct cpuinfo_x86 *c) break; } + + tsa_init(c); return; warn: diff --git a/arch/x86/kernel/cpu/bugs.c b/arch/x86/kernel/cpu/bugs.c index a6ac8ba72425..e072ebd4d68b 100644 --- a/arch/x86/kernel/cpu/bugs.c +++ b/arch/x86/kernel/cpu/bugs.c @@ -49,6 +49,7 @@ static void __init taa_select_mitigation(void); static void __init mmio_select_mitigation(void); static void __init srbds_select_mitigation(void); static void __init srso_select_mitigation(void); +static void __init tsa_select_mitigation(void); /* The base value of the SPEC_CTRL MSR without task-specific bits set */ u64 x86_spec_ctrl_base; @@ -208,6 +209,7 @@ void __init check_bugs(void) if (!direct_gbpages) set_memory_4k((unsigned long)__va(0), 1); #endif + tsa_select_mitigation(); } /* @@ -1620,6 +1622,94 @@ static void update_mds_branch_idle(void) #define TAA_MSG_SMT "TAA CPU bug present and SMT on, data leak possible. See https://www.kernel.org/doc/html/latest/admin-guide/hw-vuln/tsx_async_abort.html for more details.\n" #define MMIO_MSG_SMT "MMIO Stale Data CPU bug present and SMT on, data leak possible. See https://www.kernel.org/doc/html/latest/admin-guide/hw-vuln/processor_mmio_stale_data.html for more details.\n" +#undef pr_fmt +#define pr_fmt(fmt) "Transient Scheduler Attacks: " fmt + +enum tsa_mitigations { + TSA_MITIGATION_NONE, + TSA_MITIGATION_UCODE_NEEDED, + TSA_MITIGATION_USER_KERNEL, + TSA_MITIGATION_VM, + TSA_MITIGATION_FULL, +}; + +static const char * const tsa_strings[] = { + [TSA_MITIGATION_NONE] = "Vulnerable", + [TSA_MITIGATION_UCODE_NEEDED] = "Vulnerable: Clear CPU buffers attempted, no microcode", + [TSA_MITIGATION_USER_KERNEL] = "Mitigation: Clear CPU buffers: user/kernel boundary", + [TSA_MITIGATION_VM] = "Mitigation: Clear CPU buffers: VM", + [TSA_MITIGATION_FULL] = "Mitigation: Clear CPU buffers", +}; + +static enum tsa_mitigations tsa_mitigation __ro_after_init = + IS_ENABLED(CONFIG_MITIGATION_TSA) ? TSA_MITIGATION_FULL : TSA_MITIGATION_NONE; + +static int __init tsa_parse_cmdline(char *str) +{ + if (!str) + return -EINVAL; + + if (!strcmp(str, "off")) + tsa_mitigation = TSA_MITIGATION_NONE; + else if (!strcmp(str, "on")) + tsa_mitigation = TSA_MITIGATION_FULL; + else if (!strcmp(str, "user")) + tsa_mitigation = TSA_MITIGATION_USER_KERNEL; + else if (!strcmp(str, "vm")) + tsa_mitigation = TSA_MITIGATION_VM; + else + pr_err("Ignoring unknown tsa=%s option.\n", str); + + return 0; +} +early_param("tsa", tsa_parse_cmdline); + +static void __init tsa_select_mitigation(void) +{ + if (tsa_mitigation == TSA_MITIGATION_NONE) + return; + + if (cpu_mitigations_off() || !boot_cpu_has_bug(X86_BUG_TSA)) { + tsa_mitigation = TSA_MITIGATION_NONE; + return; + } + + if (!boot_cpu_has(X86_FEATURE_VERW_CLEAR)) + tsa_mitigation = TSA_MITIGATION_UCODE_NEEDED; + + switch (tsa_mitigation) { + case TSA_MITIGATION_USER_KERNEL: + setup_force_cpu_cap(X86_FEATURE_CLEAR_CPU_BUF); + break; + + case TSA_MITIGATION_VM: + setup_force_cpu_cap(X86_FEATURE_CLEAR_CPU_BUF_VM); + break; + + case TSA_MITIGATION_UCODE_NEEDED: + if (!boot_cpu_has(X86_FEATURE_HYPERVISOR)) + goto out; + + pr_notice("Forcing mitigation on in a VM\n"); + + /* + * On the off-chance that microcode has been updated + * on the host, enable the mitigation in the guest just + * in case. + */ + fallthrough; + case TSA_MITIGATION_FULL: + setup_force_cpu_cap(X86_FEATURE_CLEAR_CPU_BUF); + setup_force_cpu_cap(X86_FEATURE_CLEAR_CPU_BUF_VM); + break; + default: + break; + } + +out: + pr_info("%s\n", tsa_strings[tsa_mitigation]); +} + void cpu_bugs_smt_update(void) { mutex_lock(&spec_ctrl_mutex); @@ -1673,6 +1763,24 @@ void cpu_bugs_smt_update(void) break; } + switch (tsa_mitigation) { + case TSA_MITIGATION_USER_KERNEL: + case TSA_MITIGATION_VM: + case TSA_MITIGATION_FULL: + case TSA_MITIGATION_UCODE_NEEDED: + /* + * TSA-SQ can potentially lead to info leakage between + * SMT threads. + */ + if (sched_smt_active()) + static_branch_enable(&cpu_buf_idle_clear); + else + static_branch_disable(&cpu_buf_idle_clear); + break; + case TSA_MITIGATION_NONE: + break; + } + mutex_unlock(&spec_ctrl_mutex); } @@ -2513,6 +2621,11 @@ static ssize_t srso_show_state(char *buf) boot_cpu_has(X86_FEATURE_IBPB_BRTYPE) ? "" : ", no microcode"); } +static ssize_t tsa_show_state(char *buf) +{ + return sysfs_emit(buf, "%s\n", tsa_strings[tsa_mitigation]); +} + static ssize_t cpu_show_common(struct device *dev, struct device_attribute *attr, char *buf, unsigned int bug) { @@ -2564,6 +2677,9 @@ static ssize_t cpu_show_common(struct device *dev, struct device_attribute *attr case X86_BUG_SRSO: return srso_show_state(buf); + case X86_BUG_TSA: + return tsa_show_state(buf); + default: break; } @@ -2630,4 +2746,9 @@ ssize_t cpu_show_spec_rstack_overflow(struct device *dev, struct device_attribut { return cpu_show_common(dev, attr, buf, X86_BUG_SRSO); } + +ssize_t cpu_show_tsa(struct device *dev, struct device_attribute *attr, char *buf) +{ + return cpu_show_common(dev, attr, buf, X86_BUG_TSA); +} #endif diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c index 8deb567c92a6..f6dceb6ea7f6 100644 --- a/arch/x86/kernel/cpu/common.c +++ b/arch/x86/kernel/cpu/common.c @@ -1215,6 +1215,8 @@ static const __initconst struct x86_cpu_id cpu_vuln_whitelist[] = { /* CPU is affected by SRSO */ #define SRSO BIT(5) +/* CPU is affected by Transient Scheduler Attacks */ +#define TSA BIT(10) static const struct x86_cpu_id cpu_vuln_blacklist[] __initconst = { VULNBL_INTEL_STEPPINGS(IVYBRIDGE, X86_STEPPING_ANY, SRBDS), @@ -1248,7 +1250,7 @@ static const struct x86_cpu_id cpu_vuln_blacklist[] __initconst = { VULNBL_AMD(0x16, RETBLEED), VULNBL_AMD(0x17, RETBLEED | SRSO), VULNBL_HYGON(0x18, RETBLEED | SRSO), - VULNBL_AMD(0x19, SRSO), + VULNBL_AMD(0x19, SRSO | TSA), {} }; @@ -1368,6 +1370,16 @@ static void __init cpu_set_bug_bits(struct cpuinfo_x86 *c) setup_force_cpu_bug(X86_BUG_SRSO); } + if (c->x86_vendor == X86_VENDOR_AMD) { + if (!cpu_has(c, X86_FEATURE_TSA_SQ_NO) || + !cpu_has(c, X86_FEATURE_TSA_L1_NO)) { + if (cpu_matches(cpu_vuln_blacklist, TSA) || + /* Enable bug on Zen guests to allow for live migration. */ + (cpu_has(c, X86_FEATURE_HYPERVISOR) && cpu_has(c, X86_FEATURE_ZEN))) + setup_force_cpu_bug(X86_BUG_TSA); + } + } + if (cpu_matches(cpu_vuln_whitelist, NO_MELTDOWN)) return; diff --git a/arch/x86/kernel/cpu/scattered.c b/arch/x86/kernel/cpu/scattered.c index 10cd37e096d7..5046f535cedf 100644 --- a/arch/x86/kernel/cpu/scattered.c +++ b/arch/x86/kernel/cpu/scattered.c @@ -47,6 +47,8 @@ static const struct cpuid_bit cpuid_bits[] = { { X86_FEATURE_SMBA, CPUID_EBX, 2, 0x80000020, 0 }, { X86_FEATURE_BMEC, CPUID_EBX, 3, 0x80000020, 0 }, { X86_FEATURE_ABMC, CPUID_EBX, 5, 0x80000020, 0 }, + { X86_FEATURE_TSA_SQ_NO, CPUID_ECX, 1, 0x80000021, 0 }, + { X86_FEATURE_TSA_L1_NO, CPUID_ECX, 2, 0x80000021, 0 }, { X86_FEATURE_PERFMON_V2, CPUID_EAX, 0, 0x80000022, 0 }, { X86_FEATURE_AMD_LBR_V2, CPUID_EAX, 1, 0x80000022, 0 }, { X86_FEATURE_AMD_LBR_PMC_FREEZE, CPUID_EAX, 2, 0x80000022, 0 }, diff --git a/arch/x86/kvm/svm/vmenter.S b/arch/x86/kvm/svm/vmenter.S index 99f9cc140b41..9011534f6837 100644 --- a/arch/x86/kvm/svm/vmenter.S +++ b/arch/x86/kvm/svm/vmenter.S @@ -77,6 +77,9 @@ SYM_FUNC_START(__svm_vcpu_run) /* "POP" @vmcb to RAX. */ pop %_ASM_AX + /* Clobbers EFLAGS.ZF */ + VM_CLEAR_CPU_BUFFERS + /* Enter guest mode */ sti 1: vmload %_ASM_AX diff --git a/drivers/base/cpu.c b/drivers/base/cpu.c index e7d2d1cf1a37..cf094c8c027b 100644 --- a/drivers/base/cpu.c +++ b/drivers/base/cpu.c @@ -620,6 +620,11 @@ ssize_t __weak cpu_show_spec_rstack_overflow(struct device *dev, return sysfs_emit(buf, "Not affected\n"); } +ssize_t __weak cpu_show_tsa(struct device *dev, struct device_attribute *attr, char *buf) +{ + return sysfs_emit(buf, "Not affected\n"); +} + static DEVICE_ATTR(meltdown, 0444, cpu_show_meltdown, NULL); static DEVICE_ATTR(spectre_v1, 0444, cpu_show_spectre_v1, NULL); static DEVICE_ATTR(spectre_v2, 0444, cpu_show_spectre_v2, NULL); @@ -632,6 +637,7 @@ static DEVICE_ATTR(srbds, 0444, cpu_show_srbds, NULL); static DEVICE_ATTR(mmio_stale_data, 0444, cpu_show_mmio_stale_data, NULL); static DEVICE_ATTR(retbleed, 0444, cpu_show_retbleed, NULL); static DEVICE_ATTR(spec_rstack_overflow, 0444, cpu_show_spec_rstack_overflow, NULL); +static DEVICE_ATTR(tsa, 0444, cpu_show_tsa, NULL); static struct attribute *cpu_root_vulnerabilities_attrs[] = { &dev_attr_meltdown.attr, @@ -646,6 +652,7 @@ static struct attribute *cpu_root_vulnerabilities_attrs[] = { &dev_attr_mmio_stale_data.attr, &dev_attr_retbleed.attr, &dev_attr_spec_rstack_overflow.attr, + &dev_attr_tsa.attr, NULL }; diff --git a/include/linux/cpu.h b/include/linux/cpu.h index 5c537c984ee5..165b782d3f4c 100644 --- a/include/linux/cpu.h +++ b/include/linux/cpu.h @@ -74,6 +74,7 @@ extern ssize_t cpu_show_spec_rstack_overflow(struct device *dev, struct device_attribute *attr, char *buf); extern ssize_t cpu_show_gds(struct device *dev, struct device_attribute *attr, char *buf); +extern ssize_t cpu_show_tsa(struct device *dev, struct device_attribute *attr, char *buf); extern __printf(4, 5) struct device *cpu_device_create(struct device *parent, void *drvdata, -- Gitee From 27e68e7aeed66adcb4e52dcc8e8f28bdd06d1d8e Mon Sep 17 00:00:00 2001 From: Paolo Bonzini Date: Tue, 15 Jul 2025 14:37:47 +0200 Subject: [PATCH 10/12] KVM: x86: add support for CPUID leaf 0x80000021 ANBZ: #27027 commit 6457a8c0a02694375271cb40250527672c64395d upstream. CPUID leaf 0x80000021 defines some features (or lack of bugs) of AMD processors. Expose the ones that make sense via KVM_GET_SUPPORTED_CPUID. Signed-off-by: Paolo Bonzini Signed-off-by: Borislav Petkov (AMD) Signed-off-by: Greg Kroah-Hartman Signed-off-by: Jeevan deep J Signed-off-by: PrithivishS --- arch/x86/kvm/cpuid.c | 19 ++++++++++++++++++- 1 file changed, 18 insertions(+), 1 deletion(-) diff --git a/arch/x86/kvm/cpuid.c b/arch/x86/kvm/cpuid.c index cdb03a4f516d..9f117baf64cc 100644 --- a/arch/x86/kvm/cpuid.c +++ b/arch/x86/kvm/cpuid.c @@ -962,7 +962,7 @@ static inline int __do_cpuid_func(struct kvm_cpuid_array *array, u32 function) entry->edx = 0; break; case 0x80000000: - entry->eax = min(entry->eax, 0x8000001f); + entry->eax = min(entry->eax, 0x80000021); break; case 0x80000001: cpuid_entry_override(entry, CPUID_8000_0001_EDX); @@ -1020,6 +1020,23 @@ static inline int __do_cpuid_func(struct kvm_cpuid_array *array, u32 function) else cpuid_entry_override(entry, CPUID_8000_001F_EAX); break; + case 0x80000020: + entry->eax = entry->ebx = entry->ecx = entry->edx = 0; + break; + case 0x80000021: + entry->ebx = entry->ecx = entry->edx = 0; + /* + * Pass down these bits: + * EAX 0 NNDBP, Processor ignores nested data breakpoints + * EAX 2 LAS, LFENCE always serializing + * EAX 6 NSCB, Null selector clear base + * + * Other defined bits are for MSRs that KVM does not expose: + * EAX 3 SPCL, SMM page configuration lock + * EAX 13 PCMSR, Prefetch control MSR + */ + entry->eax &= BIT(0) | BIT(2) | BIT(6); + break; /*Add support for Centaur's CPUID instruction*/ case 0xC0000000: /*Just support up to 0xC0000004 now*/ -- Gitee From 5b67b8c8dcf09fe994b4858b68fb8a25f0464fd8 Mon Sep 17 00:00:00 2001 From: Borislav Petkov Date: Tue, 15 Jul 2025 14:37:48 +0200 Subject: [PATCH 11/12] KVM: SVM: Advertise TSA CPUID bits to guests ANBZ: #27027 commit 6fea1a4e150f1fe3726310558f33a30e0c209153 upstream. Synthesize the TSA CPUID feature bits for guests. Set TSA_{SQ,L1}_NO on unaffected machines. [ backporting notes: 5.10 doesn't have the KVM-only CPUID leafs so allocate a separate capability leaf for CPUID_8000_0021_ECX to avoid backporting the world and more. ] Signed-off-by: Borislav Petkov (AMD) Signed-off-by: Greg Kroah-Hartman Signed-off-by: Jeevan deep J Signed-off-by: PrithivishS --- arch/x86/include/asm/cpufeature.h | 5 +++-- arch/x86/include/asm/cpufeatures.h | 8 ++++---- arch/x86/include/asm/disabled-features.h | 2 +- arch/x86/include/asm/required-features.h | 2 +- arch/x86/kernel/cpu/scattered.c | 2 -- arch/x86/kvm/cpuid.c | 16 ++++++++++++++-- arch/x86/kvm/cpuid.h | 1 + 7 files changed, 24 insertions(+), 12 deletions(-) diff --git a/arch/x86/include/asm/cpufeature.h b/arch/x86/include/asm/cpufeature.h index a2263d4b39bd..613f66706370 100644 --- a/arch/x86/include/asm/cpufeature.h +++ b/arch/x86/include/asm/cpufeature.h @@ -34,6 +34,7 @@ enum cpuid_leafs CPUID_8000_001F_EAX, CPUID_8000_0021_EAX, CPUID_8C86_0000_EDX, + CPUID_8000_0021_ECX, }; #ifdef CONFIG_X86_FEATURE_NAMES @@ -96,7 +97,7 @@ extern const char * const x86_bug_flags[NBUGINTS*32]; CHECK_BIT_IN_MASK_WORD(REQUIRED_MASK, 20, feature_bit) || \ CHECK_BIT_IN_MASK_WORD(REQUIRED_MASK, 21, feature_bit) || \ REQUIRED_MASK_CHECK || \ - BUILD_BUG_ON_ZERO(NCAPINTS != 22)) + BUILD_BUG_ON_ZERO(NCAPINTS != 23)) #define DISABLED_MASK_BIT_SET(feature_bit) \ ( CHECK_BIT_IN_MASK_WORD(DISABLED_MASK, 0, feature_bit) || \ @@ -122,7 +123,7 @@ extern const char * const x86_bug_flags[NBUGINTS*32]; CHECK_BIT_IN_MASK_WORD(DISABLED_MASK, 20, feature_bit) || \ CHECK_BIT_IN_MASK_WORD(DISABLED_MASK, 21, feature_bit) || \ DISABLED_MASK_CHECK || \ - BUILD_BUG_ON_ZERO(NCAPINTS != 22)) + BUILD_BUG_ON_ZERO(NCAPINTS != 23)) #define cpu_has(c, bit) \ (__builtin_constant_p(bit) && REQUIRED_MASK_BIT_SET(bit) ? 1 : \ diff --git a/arch/x86/include/asm/cpufeatures.h b/arch/x86/include/asm/cpufeatures.h index af285eaba8bf..3a73c77effc8 100644 --- a/arch/x86/include/asm/cpufeatures.h +++ b/arch/x86/include/asm/cpufeatures.h @@ -13,7 +13,7 @@ /* * Defines x86 CPU feature bits */ -#define NCAPINTS 22 /* N 32-bit words worth of info */ +#define NCAPINTS 23 /* N 32-bit words worth of info */ #define NBUGINTS 2 /* N 32-bit bug flags */ /* @@ -484,9 +484,9 @@ #define X86_FEATURE_AMD_LBR_PMC_FREEZE (21*32+ 0) /* AMD LBR and PMC Freeze */ #define X86_FEATURE_ABMC (21*32+ 6) /* "" Assignable Bandwidth Monitoring Counters */ -#define X86_FEATURE_TSA_SQ_NO (21*32+11) /* "" AMD CPU not vulnerable to TSA-SQ */ -#define X86_FEATURE_TSA_L1_NO (21*32+12) /* "" AMD CPU not vulnerable to TSA-L1 */ -#define X86_FEATURE_CLEAR_CPU_BUF_VM (21*32+13) /* "" Clear CPU buffers using VERW before VMRUN */ +#define X86_FEATURE_TSA_SQ_NO (22*32+11) /* "" AMD CPU not vulnerable to TSA-SQ */ +#define X86_FEATURE_TSA_L1_NO (22*32+12) /* "" AMD CPU not vulnerable to TSA-L1 */ +#define X86_FEATURE_CLEAR_CPU_BUF_VM (22*32+13) /* "" Clear CPU buffers using VERW before VMRUN */ /* diff --git a/arch/x86/include/asm/disabled-features.h b/arch/x86/include/asm/disabled-features.h index 4d62024fd0ba..ea458d341f71 100644 --- a/arch/x86/include/asm/disabled-features.h +++ b/arch/x86/include/asm/disabled-features.h @@ -126,6 +126,6 @@ #define DISABLED_MASK19 0 #define DISABLED_MASK20 0 #define DISABLED_MASK21 0 -#define DISABLED_MASK_CHECK BUILD_BUG_ON_ZERO(NCAPINTS != 22) +#define DISABLED_MASK_CHECK BUILD_BUG_ON_ZERO(NCAPINTS != 23) #endif /* _ASM_X86_DISABLED_FEATURES_H */ diff --git a/arch/x86/include/asm/required-features.h b/arch/x86/include/asm/required-features.h index 1fbe53583e95..4e3cd318323b 100644 --- a/arch/x86/include/asm/required-features.h +++ b/arch/x86/include/asm/required-features.h @@ -104,6 +104,6 @@ #define REQUIRED_MASK19 0 #define REQUIRED_MASK20 0 #define REQUIRED_MASK21 0 -#define REQUIRED_MASK_CHECK BUILD_BUG_ON_ZERO(NCAPINTS != 22) +#define REQUIRED_MASK_CHECK BUILD_BUG_ON_ZERO(NCAPINTS != 23) #endif /* _ASM_X86_REQUIRED_FEATURES_H */ diff --git a/arch/x86/kernel/cpu/scattered.c b/arch/x86/kernel/cpu/scattered.c index 5046f535cedf..10cd37e096d7 100644 --- a/arch/x86/kernel/cpu/scattered.c +++ b/arch/x86/kernel/cpu/scattered.c @@ -47,8 +47,6 @@ static const struct cpuid_bit cpuid_bits[] = { { X86_FEATURE_SMBA, CPUID_EBX, 2, 0x80000020, 0 }, { X86_FEATURE_BMEC, CPUID_EBX, 3, 0x80000020, 0 }, { X86_FEATURE_ABMC, CPUID_EBX, 5, 0x80000020, 0 }, - { X86_FEATURE_TSA_SQ_NO, CPUID_ECX, 1, 0x80000021, 0 }, - { X86_FEATURE_TSA_L1_NO, CPUID_ECX, 2, 0x80000021, 0 }, { X86_FEATURE_PERFMON_V2, CPUID_EAX, 0, 0x80000022, 0 }, { X86_FEATURE_AMD_LBR_V2, CPUID_EAX, 1, 0x80000022, 0 }, { X86_FEATURE_AMD_LBR_PMC_FREEZE, CPUID_EAX, 2, 0x80000022, 0 }, diff --git a/arch/x86/kvm/cpuid.c b/arch/x86/kvm/cpuid.c index 9f117baf64cc..33f5c19ad702 100644 --- a/arch/x86/kvm/cpuid.c +++ b/arch/x86/kvm/cpuid.c @@ -598,6 +598,15 @@ void kvm_set_cpu_caps(void) kvm_cpu_cap_mask(CPUID_8000_001F_EAX, 0 /* SME */ | F(SEV) | 0 /* VM_PAGE_FLUSH */ | F(SEV_ES) | F(SME_COHERENT)); + if (cpu_feature_enabled(X86_FEATURE_VERW_CLEAR)) + kvm_cpu_cap_set(X86_FEATURE_VERW_CLEAR); + + if (cpu_feature_enabled(X86_FEATURE_TSA_SQ_NO)) + kvm_cpu_cap_set(X86_FEATURE_TSA_SQ_NO); + + if (cpu_feature_enabled(X86_FEATURE_TSA_L1_NO)) + kvm_cpu_cap_set(X86_FEATURE_TSA_L1_NO); + kvm_cpu_cap_mask(CPUID_C000_0001_EDX, F(XSTORE) | F(XSTORE_EN) | F(XCRYPT) | F(XCRYPT_EN) | @@ -1024,18 +1033,21 @@ static inline int __do_cpuid_func(struct kvm_cpuid_array *array, u32 function) entry->eax = entry->ebx = entry->ecx = entry->edx = 0; break; case 0x80000021: - entry->ebx = entry->ecx = entry->edx = 0; + entry->ebx = entry->edx = 0; /* * Pass down these bits: * EAX 0 NNDBP, Processor ignores nested data breakpoints * EAX 2 LAS, LFENCE always serializing + * EAX 5 VERW_CLEAR, mitigate TSA * EAX 6 NSCB, Null selector clear base * * Other defined bits are for MSRs that KVM does not expose: * EAX 3 SPCL, SMM page configuration lock * EAX 13 PCMSR, Prefetch control MSR */ - entry->eax &= BIT(0) | BIT(2) | BIT(6); + cpuid_entry_override(entry, CPUID_8000_0021_EAX); + entry->eax &= BIT(0) | BIT(2) | BIT(5) | BIT(6); + cpuid_entry_override(entry, CPUID_8000_0021_ECX); break; /*Add support for Centaur's CPUID instruction*/ case 0xC0000000: diff --git a/arch/x86/kvm/cpuid.h b/arch/x86/kvm/cpuid.h index 85407b457efa..77a9e811085a 100644 --- a/arch/x86/kvm/cpuid.h +++ b/arch/x86/kvm/cpuid.h @@ -107,6 +107,7 @@ static const struct cpuid_reg reverse_cpuid[] = { [CPUID_8000_0021_EAX] = {0x80000021, 0, CPUID_EAX}, [CPUID_12_EAX] = {0x00000012, 0, CPUID_EAX}, [CPUID_8000_001F_EAX] = {0x8000001f, 0, CPUID_EAX}, + [CPUID_8000_0021_ECX] = {0x80000021, 0, CPUID_ECX}, }; /* -- Gitee From b0189cd54e8e53179485126ab66488fdb778d4b8 Mon Sep 17 00:00:00 2001 From: Borislav Petkov Date: Tue, 15 Jul 2025 14:37:49 +0200 Subject: [PATCH 12/12] x86/process: Move the buffer clearing before MONITOR ANBZ: #27027 commit a8a08f1e519b4fd08adce08ebfa1763950be5d99 upstream Move the VERW clearing before the MONITOR so that VERW doesn't disarm it and the machine never enters C1. Original idea by Kim Phillips . Suggested-by: Andrew Cooper Signed-off-by: Borislav Petkov (AMD) Signed-off-by: Greg Kroah-Hartman Signed-off-by: Jeevan deep J Signed-off-by: PrithivishS --- arch/x86/include/asm/mwait.h | 16 +++++++++++----- arch/x86/kernel/process.c | 15 ++++++++++++--- 2 files changed, 23 insertions(+), 8 deletions(-) diff --git a/arch/x86/include/asm/mwait.h b/arch/x86/include/asm/mwait.h index 20b33e6370c3..2a2de4f3cb20 100644 --- a/arch/x86/include/asm/mwait.h +++ b/arch/x86/include/asm/mwait.h @@ -43,8 +43,6 @@ static inline void __monitorx(const void *eax, unsigned long ecx, static inline void __mwait(unsigned long eax, unsigned long ecx) { - x86_idle_clear_cpu_buffers(); - /* "mwait %eax, %ecx;" */ asm volatile(".byte 0x0f, 0x01, 0xc9;" :: "a" (eax), "c" (ecx)); @@ -88,7 +86,6 @@ static inline void __mwaitx(unsigned long eax, unsigned long ebx, static inline void __sti_mwait(unsigned long eax, unsigned long ecx) { - x86_idle_clear_cpu_buffers(); /* "mwait %eax, %ecx;" */ asm volatile("sti; .byte 0x0f, 0x01, 0xc9;" @@ -107,6 +104,11 @@ static inline void __sti_mwait(unsigned long eax, unsigned long ecx) */ static inline void mwait_idle_with_hints(unsigned long eax, unsigned long ecx) { + if (need_resched()) + return; + + x86_idle_clear_cpu_buffers(); + if (static_cpu_has_bug(X86_BUG_MONITOR) || !current_set_polling_and_test()) { if (static_cpu_has_bug(X86_BUG_CLFLUSH_MONITOR)) { mb(); @@ -115,9 +117,13 @@ static inline void mwait_idle_with_hints(unsigned long eax, unsigned long ecx) } __monitor((void *)¤t_thread_info()->flags, 0, 0); - if (!need_resched()) - __mwait(eax, ecx); + if (need_resched()) + goto out; + + __mwait(eax, ecx); } + +out: current_clr_polling(); } diff --git a/arch/x86/kernel/process.c b/arch/x86/kernel/process.c index b3ef8593ca6b..3f53261614f0 100644 --- a/arch/x86/kernel/process.c +++ b/arch/x86/kernel/process.c @@ -863,6 +863,11 @@ static int prefer_mwait_c1_over_halt(const struct cpuinfo_x86 *c) */ static __cpuidle void mwait_idle(void) { + if (need_resched()) + return; + + x86_idle_clear_cpu_buffers(); + if (!current_set_polling_and_test()) { if (this_cpu_has(X86_BUG_CLFLUSH_MONITOR)) { mb(); /* quirk */ @@ -871,13 +876,17 @@ static __cpuidle void mwait_idle(void) } __monitor((void *)¤t_thread_info()->flags, 0, 0); - if (!need_resched()) - __sti_mwait(0, 0); - else + if (need_resched()) { raw_local_irq_enable(); + goto out; + } + + __sti_mwait(0, 0); } else { raw_local_irq_enable(); } + +out: __current_clr_polling(); } -- Gitee