diff --git a/Documentation/ABI/testing/sysfs-devices-system-cpu b/Documentation/ABI/testing/sysfs-devices-system-cpu index 44c6e57303988cf527e9e8f9619071b2d8571f46..22d68f6609755f38ee631bf847395fad9b7202d7 100644 --- a/Documentation/ABI/testing/sysfs-devices-system-cpu +++ b/Documentation/ABI/testing/sysfs-devices-system-cpu @@ -508,6 +508,7 @@ What: /sys/devices/system/cpu/vulnerabilities /sys/devices/system/cpu/vulnerabilities/l1tf /sys/devices/system/cpu/vulnerabilities/mds /sys/devices/system/cpu/vulnerabilities/srbds + /sys/devices/system/cpu/vulnerabilities/tsa /sys/devices/system/cpu/vulnerabilities/tsx_async_abort /sys/devices/system/cpu/vulnerabilities/itlb_multihit /sys/devices/system/cpu/vulnerabilities/mmio_stale_data diff --git a/Documentation/admin-guide/hw-vuln/processor_mmio_stale_data.rst b/Documentation/admin-guide/hw-vuln/processor_mmio_stale_data.rst index 9393c50b5afc9c9fe8b9ac90ed9fe4774e1d1550..d33a071c1631e0cc18254da5ff1488f9df8a7cf6 100644 --- a/Documentation/admin-guide/hw-vuln/processor_mmio_stale_data.rst +++ b/Documentation/admin-guide/hw-vuln/processor_mmio_stale_data.rst @@ -157,9 +157,7 @@ This is achieved by using the otherwise unused and obsolete VERW instruction in combination with a microcode update. The microcode clears the affected CPU buffers when the VERW instruction is executed. -Kernel reuses the MDS function to invoke the buffer clearing: - - mds_clear_cpu_buffers() +Kernel does the buffer clearing with x86_clear_cpu_buffers(). On MDS affected CPUs, the kernel already invokes CPU buffer clear on kernel/userspace, hypervisor/guest and C-state (idle) transitions. No diff --git a/Documentation/admin-guide/kernel-parameters.txt b/Documentation/admin-guide/kernel-parameters.txt index a8ffa1069d72a7d6572db3a8014b6f282164da41..3110ef7ea6141ae9e1e975ad71cc9346bcc9aff3 100644 --- a/Documentation/admin-guide/kernel-parameters.txt +++ b/Documentation/admin-guide/kernel-parameters.txt @@ -5750,6 +5750,19 @@ See Documentation/admin-guide/mm/transhuge.rst for more details. + tsa= [X86] Control mitigation for Transient Scheduler + Attacks on AMD CPUs. Search the following in your + favourite search engine for more details: + + "Technical guidance for mitigating transient scheduler + attacks". + + off - disable the mitigation + on - enable the mitigation (default) + user - mitigate only user/kernel transitions + vm - mitigate only guest/host transitions + + tsc= Disable clocksource stability checks for TSC. Format: [x86] reliable: mark tsc clocksource as reliable, this diff --git a/Documentation/x86/mds.rst b/Documentation/x86/mds.rst index 5d4330be200f980cf4ea7a8e17b9e89012dcc381..e801df0bb3a81a83c2d7676d72982c3d72c33fca 100644 --- a/Documentation/x86/mds.rst +++ b/Documentation/x86/mds.rst @@ -95,6 +95,9 @@ The kernel provides a function to invoke the buffer clearing: mds_clear_cpu_buffers() +Also macro CLEAR_CPU_BUFFERS can be used in ASM late in exit-to-user path. +Other than CFLAGS.ZF, this macro doesn't clobber any registers. + The mitigation is invoked on kernel/userspace, hypervisor/guest and C-state (idle) transitions. @@ -138,17 +141,30 @@ Mitigation points When transitioning from kernel to user space the CPU buffers are flushed on affected CPUs when the mitigation is not disabled on the kernel - command line. The migitation is enabled through the static key - mds_user_clear. - - The mitigation is invoked in prepare_exit_to_usermode() which covers - all but one of the kernel to user space transitions. The exception - is when we return from a Non Maskable Interrupt (NMI), which is - handled directly in do_nmi(). - - (The reason that NMI is special is that prepare_exit_to_usermode() can - enable IRQs. In NMI context, NMIs are blocked, and we don't want to - enable IRQs with NMIs blocked.) + command line. The mitigation is enabled through the feature flag + X86_FEATURE_CLEAR_CPU_BUF. + + The mitigation is invoked just before transitioning to userspace after + user registers are restored. This is done to minimize the window in + which kernel data could be accessed after VERW e.g. via an NMI after + VERW. + + **Corner case not handled** + Interrupts returning to kernel don't clear CPUs buffers since the + exit-to-user path is expected to do that anyways. But, there could be + a case when an NMI is generated in kernel after the exit-to-user path + has cleared the buffers. This case is not handled and NMI returning to + kernel don't clear CPU buffers because: + + 1. It is rare to get an NMI after VERW, but before returning to userspace. + 2. For an unprivileged user, there is no known way to make that NMI + less rare or target it. + 3. It would take a large number of these precisely-timed NMIs to mount + an actual attack. There's presumably not enough bandwidth. + 4. The NMI in question occurs after a VERW, i.e. when user state is + restored and most interesting data is already scrubbed. Whats left + is only the data that NMI touches, and that may or may not be of + any interest. 2. C-State transition diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig index dc8824a55028b0e2c74eb29ddd5ba395e77be703..587c199803d0dd012096adf7019d85ebaea828c8 100644 --- a/arch/x86/Kconfig +++ b/arch/x86/Kconfig @@ -2670,6 +2670,15 @@ config SLS against straight line speculation. The kernel image might be slightly larger. +config MITIGATION_TSA + bool "Mitigate Transient Scheduler Attacks" + depends on CPU_SUP_AMD + default y + help + Enable mitigation for Transient Scheduler Attacks. TSA is a hardware + security vulnerability on AMD CPUs which can lead to forwarding of + invalid info to subsequent instructions and thus can affect their + timing and thereby cause a leakage. endif config ARCH_HAS_ADD_PAGES diff --git a/arch/x86/entry/entry.S b/arch/x86/entry/entry.S index bfb7bcb362bcfca2f84947e0925766925687d77c..c3a21b28b3dce10f2d5f9e76d5725c04a12f68d7 100644 --- a/arch/x86/entry/entry.S +++ b/arch/x86/entry/entry.S @@ -6,6 +6,9 @@ #include #include #include +#include +#include +#include .pushsection .noinstr.text, "ax" @@ -20,3 +23,22 @@ SYM_FUNC_END(entry_ibpb) EXPORT_SYMBOL_GPL(entry_ibpb); .popsection + +/* + * Define the VERW operand that is disguised as entry code so that + * it can be referenced with KPTI enabled. This ensures VERW can be + * used late in exit-to-user path after page tables are switched. + */ +.pushsection .entry.text, "ax" + +.align L1_CACHE_BYTES, 0xcc +SYM_CODE_START_NOALIGN(x86_verw_sel) + UNWIND_HINT_EMPTY + ANNOTATE_NOENDBR + .word __KERNEL_DS +.align L1_CACHE_BYTES, 0xcc +SYM_CODE_END(x86_verw_sel); +/* For KVM */ +EXPORT_SYMBOL_GPL(x86_verw_sel); + +.popsection diff --git a/arch/x86/entry/entry_32.S b/arch/x86/entry/entry_32.S index 8fcd6a42b3a18f0e5f83bc022cbd6d22a6049bec..55abddcd66075e06076b1524ddd0c034ee324704 100644 --- a/arch/x86/entry/entry_32.S +++ b/arch/x86/entry/entry_32.S @@ -997,6 +997,7 @@ SYM_FUNC_START(entry_SYSENTER_32) BUG_IF_WRONG_CR3 no_user_check=1 popfl popl %eax + CLEAR_CPU_BUFFERS /* * Return back to the vDSO, which will pop ecx and edx. @@ -1069,6 +1070,7 @@ restore_all_switch_stack: /* Restore user state */ RESTORE_REGS pop=4 # skip orig_eax/error_code + CLEAR_CPU_BUFFERS .Lirq_return: /* * ARCH_HAS_MEMBARRIER_SYNC_CORE rely on IRET core serialization @@ -1267,6 +1269,7 @@ SYM_CODE_START(asm_exc_nmi) /* Not on SYSENTER stack. */ call exc_nmi + CLEAR_CPU_BUFFERS jmp .Lnmi_return .Lnmi_from_sysenter_stack: diff --git a/arch/x86/entry/entry_64.S b/arch/x86/entry/entry_64.S index e81282cc67fc1c3bb73bab35e72d1a6829cb82d4..ed5bbc33d0f23720f247d663e7027b0a9f5d4860 100644 --- a/arch/x86/entry/entry_64.S +++ b/arch/x86/entry/entry_64.S @@ -618,6 +618,7 @@ SYM_INNER_LABEL(swapgs_restore_regs_and_return_to_usermode, SYM_L_GLOBAL) /* Restore RDI. */ popq %rdi SWAPGS + CLEAR_CPU_BUFFERS INTERRUPT_RETURN @@ -724,6 +725,8 @@ native_irq_return_ldt: */ popq %rax /* Restore user RAX */ + CLEAR_CPU_BUFFERS + /* * RSP now points to an ordinary IRET frame, except that the page * is read-only and RSP[31:16] are preloaded with the userspace @@ -1490,6 +1493,12 @@ nmi_restore: std movq $0, 5*8(%rsp) /* clear "NMI executing" */ + /* + * Skip CLEAR_CPU_BUFFERS here, since it only helps in rare cases like + * NMI in kernel after user state is restored. For an unprivileged user + * these conditions are hard to meet. + */ + /* * iretq reads the "iret" frame and exits the NMI stack in a * single instruction. We are returning to kernel mode, so this @@ -1507,6 +1516,7 @@ SYM_CODE_END(asm_exc_nmi) SYM_CODE_START(ignore_sysret) UNWIND_HINT_EMPTY mov $-ENOSYS, %eax + CLEAR_CPU_BUFFERS sysretl SYM_CODE_END(ignore_sysret) #endif diff --git a/arch/x86/entry/entry_64_compat.S b/arch/x86/entry/entry_64_compat.S index da9fbd358412a79a7c754abeafe4d8dd21d73e61..6fd2f8cbb3b61cfc0df2f4a8f13bd9d2a3a67b2c 100644 --- a/arch/x86/entry/entry_64_compat.S +++ b/arch/x86/entry/entry_64_compat.S @@ -320,6 +320,7 @@ SYM_INNER_LABEL(entry_SYSRETL_compat_unsafe_stack, SYM_L_GLOBAL) xorl %r9d, %r9d xorl %r10d, %r10d swapgs + CLEAR_CPU_BUFFERS sysretl SYM_INNER_LABEL(entry_SYSRETL_compat_end, SYM_L_GLOBAL) SYM_CODE_END(entry_SYSCALL_compat) diff --git a/arch/x86/include/asm/asm-prototypes.h b/arch/x86/include/asm/asm-prototypes.h index 8f80de627c60a8accabc0c6bcc1595045f0af53c..5cdccea455544f692569ecef2063659c11167b9b 100644 --- a/arch/x86/include/asm/asm-prototypes.h +++ b/arch/x86/include/asm/asm-prototypes.h @@ -12,6 +12,7 @@ #include #include #include +#include #ifndef CONFIG_X86_CMPXCHG64 extern void cmpxchg8b_emu(void); diff --git a/arch/x86/include/asm/cpu.h b/arch/x86/include/asm/cpu.h index 0a0e510202d1ec74be32cb694eaf558dabbee28e..af7c264e100e0c1cd20c132fafe8bf4996d2ecdc 100644 --- a/arch/x86/include/asm/cpu.h +++ b/arch/x86/include/asm/cpu.h @@ -112,4 +112,17 @@ extern u64 x86_read_arch_cap_msr(void); int intel_find_matching_signature(void *mc, unsigned int csig, int cpf); int intel_microcode_sanity_check(void *mc, bool print_err, int hdr_type); + +union zen_patch_rev { + struct { + __u32 rev : 8, + stepping : 4, + model : 4, + __reserved : 4, + ext_model : 4, + ext_fam : 8; + }; + __u32 ucode_rev; +}; + #endif /* _ASM_X86_CPU_H */ diff --git a/arch/x86/include/asm/cpufeature.h b/arch/x86/include/asm/cpufeature.h index a2263d4b39bd73f56532b4bb92727b05e3abb130..613f667063708ef6dbac2d237d36966892d94e93 100644 --- a/arch/x86/include/asm/cpufeature.h +++ b/arch/x86/include/asm/cpufeature.h @@ -34,6 +34,7 @@ enum cpuid_leafs CPUID_8000_001F_EAX, CPUID_8000_0021_EAX, CPUID_8C86_0000_EDX, + CPUID_8000_0021_ECX, }; #ifdef CONFIG_X86_FEATURE_NAMES @@ -96,7 +97,7 @@ extern const char * const x86_bug_flags[NBUGINTS*32]; CHECK_BIT_IN_MASK_WORD(REQUIRED_MASK, 20, feature_bit) || \ CHECK_BIT_IN_MASK_WORD(REQUIRED_MASK, 21, feature_bit) || \ REQUIRED_MASK_CHECK || \ - BUILD_BUG_ON_ZERO(NCAPINTS != 22)) + BUILD_BUG_ON_ZERO(NCAPINTS != 23)) #define DISABLED_MASK_BIT_SET(feature_bit) \ ( CHECK_BIT_IN_MASK_WORD(DISABLED_MASK, 0, feature_bit) || \ @@ -122,7 +123,7 @@ extern const char * const x86_bug_flags[NBUGINTS*32]; CHECK_BIT_IN_MASK_WORD(DISABLED_MASK, 20, feature_bit) || \ CHECK_BIT_IN_MASK_WORD(DISABLED_MASK, 21, feature_bit) || \ DISABLED_MASK_CHECK || \ - BUILD_BUG_ON_ZERO(NCAPINTS != 22)) + BUILD_BUG_ON_ZERO(NCAPINTS != 23)) #define cpu_has(c, bit) \ (__builtin_constant_p(bit) && REQUIRED_MASK_BIT_SET(bit) ? 1 : \ diff --git a/arch/x86/include/asm/cpufeatures.h b/arch/x86/include/asm/cpufeatures.h index c6cf63569dc10c9df9dfdf9d1181a75a682bf43f..3a73c77effc8f6d4278595322dde7040dcdf85c3 100644 --- a/arch/x86/include/asm/cpufeatures.h +++ b/arch/x86/include/asm/cpufeatures.h @@ -13,7 +13,7 @@ /* * Defines x86 CPU feature bits */ -#define NCAPINTS 22 /* N 32-bit words worth of info */ +#define NCAPINTS 23 /* N 32-bit words worth of info */ #define NBUGINTS 2 /* N 32-bit bug flags */ /* @@ -330,6 +330,7 @@ #define X86_FEATURE_SMBA (11*32+21) /* "" Slow Memory Bandwidth Allocation */ #define X86_FEATURE_BMEC (11*32+22) /* "" Bandwidth Monitoring Event Configuration */ +#define X86_FEATURE_CLEAR_CPU_BUF (11*32+19) /* "" Clear CPU buffers using VERW */ #define X86_FEATURE_SRSO (11*32+24) /* "" AMD BTB untrain RETs */ #define X86_FEATURE_SRSO_ALIAS (11*32+25) /* "" AMD BTB untrain RETs through aliasing */ @@ -462,6 +463,7 @@ /* HYGON Secure Isolated Virtualization */ #define X86_FEATURE_CSV3 (19*32 + 30) /* HYGON Secure Isolated Virtualization */ +#define X86_FEATURE_VERW_CLEAR (20*32+ 5) /* "" The memory form of VERW mitigates TSA */ #define X86_FEATURE_AUTOIBRS (20*32+ 8) /* "" Automatic IBRS */ #define X86_FEATURE_SBPB (20*32+27) /* "" Selective Branch Prediction Barrier */ @@ -482,6 +484,10 @@ #define X86_FEATURE_AMD_LBR_PMC_FREEZE (21*32+ 0) /* AMD LBR and PMC Freeze */ #define X86_FEATURE_ABMC (21*32+ 6) /* "" Assignable Bandwidth Monitoring Counters */ +#define X86_FEATURE_TSA_SQ_NO (22*32+11) /* "" AMD CPU not vulnerable to TSA-SQ */ +#define X86_FEATURE_TSA_L1_NO (22*32+12) /* "" AMD CPU not vulnerable to TSA-L1 */ +#define X86_FEATURE_CLEAR_CPU_BUF_VM (22*32+13) /* "" Clear CPU buffers using VERW before VMRUN */ + /* * BUG word(s) @@ -526,4 +532,5 @@ /* BUG word 2 */ #define X86_BUG_SRSO X86_BUG(1*32 + 0) /* AMD SRSO bug */ #define X86_BUG_DIV0 X86_BUG(1*32 + 1) /* AMD DIV0 speculation bug */ +#define X86_BUG_TSA X86_BUG(1*32 + 9) /* "tsa" CPU is affected by Transient Scheduler Attacks */ #endif /* _ASM_X86_CPUFEATURES_H */ diff --git a/arch/x86/include/asm/disabled-features.h b/arch/x86/include/asm/disabled-features.h index 4d62024fd0ba75d7d5d8274ae7fe1f3a5aec7ed9..ea458d341f718bd52ae584f0588a5081ad205d72 100644 --- a/arch/x86/include/asm/disabled-features.h +++ b/arch/x86/include/asm/disabled-features.h @@ -126,6 +126,6 @@ #define DISABLED_MASK19 0 #define DISABLED_MASK20 0 #define DISABLED_MASK21 0 -#define DISABLED_MASK_CHECK BUILD_BUG_ON_ZERO(NCAPINTS != 22) +#define DISABLED_MASK_CHECK BUILD_BUG_ON_ZERO(NCAPINTS != 23) #endif /* _ASM_X86_DISABLED_FEATURES_H */ diff --git a/arch/x86/include/asm/entry-common.h b/arch/x86/include/asm/entry-common.h index 5443851d3aa6089e592ee60b0fb30120c1e1ac4d..264ab414e9f632d3d37f8b336df2aa34762dc5fb 100644 --- a/arch/x86/include/asm/entry-common.h +++ b/arch/x86/include/asm/entry-common.h @@ -77,7 +77,6 @@ static inline void arch_exit_to_user_mode_prepare(struct pt_regs *regs, static __always_inline void arch_exit_to_user_mode(void) { - mds_user_clear_cpu_buffers(); amd_clear_divider(); } #define arch_exit_to_user_mode arch_exit_to_user_mode diff --git a/arch/x86/include/asm/irqflags.h b/arch/x86/include/asm/irqflags.h index 8c86edefa11508ab3401ead3bcfd757b3834e015..1dc747d84201aba083b7bb72915c9aca83ff11bd 100644 --- a/arch/x86/include/asm/irqflags.h +++ b/arch/x86/include/asm/irqflags.h @@ -56,13 +56,13 @@ static __always_inline void native_irq_enable(void) static inline __cpuidle void native_safe_halt(void) { - mds_idle_clear_cpu_buffers(); + x86_idle_clear_cpu_buffers(); asm volatile("sti; hlt": : :"memory"); } static inline __cpuidle void native_halt(void) { - mds_idle_clear_cpu_buffers(); + x86_idle_clear_cpu_buffers(); asm volatile("hlt": : :"memory"); } @@ -134,6 +134,7 @@ static __always_inline unsigned long arch_local_irq_save(void) #define INTERRUPT_RETURN jmp native_iret #define USERGS_SYSRET64 \ swapgs; \ + CLEAR_CPU_BUFFERS; \ sysretq; #define USERGS_SYSRET32 \ swapgs; \ diff --git a/arch/x86/include/asm/mwait.h b/arch/x86/include/asm/mwait.h index 29dd27b5a339db16cfb1b40aa68004cf7b2bac0c..2a2de4f3cb204e7fae74e094786ee3e1461344f1 100644 --- a/arch/x86/include/asm/mwait.h +++ b/arch/x86/include/asm/mwait.h @@ -43,8 +43,6 @@ static inline void __monitorx(const void *eax, unsigned long ecx, static inline void __mwait(unsigned long eax, unsigned long ecx) { - mds_idle_clear_cpu_buffers(); - /* "mwait %eax, %ecx;" */ asm volatile(".byte 0x0f, 0x01, 0xc9;" :: "a" (eax), "c" (ecx)); @@ -79,7 +77,7 @@ static inline void __mwait(unsigned long eax, unsigned long ecx) static inline void __mwaitx(unsigned long eax, unsigned long ebx, unsigned long ecx) { - /* No MDS buffer clear as this is AMD/HYGON only */ + /* No need for TSA buffer clearing on AMD */ /* "mwaitx %eax, %ebx, %ecx;" */ asm volatile(".byte 0x0f, 0x01, 0xfb;" @@ -88,7 +86,7 @@ static inline void __mwaitx(unsigned long eax, unsigned long ebx, static inline void __sti_mwait(unsigned long eax, unsigned long ecx) { - mds_idle_clear_cpu_buffers(); + /* "mwait %eax, %ecx;" */ asm volatile("sti; .byte 0x0f, 0x01, 0xc9;" :: "a" (eax), "c" (ecx)); @@ -106,6 +104,11 @@ static inline void __sti_mwait(unsigned long eax, unsigned long ecx) */ static inline void mwait_idle_with_hints(unsigned long eax, unsigned long ecx) { + if (need_resched()) + return; + + x86_idle_clear_cpu_buffers(); + if (static_cpu_has_bug(X86_BUG_MONITOR) || !current_set_polling_and_test()) { if (static_cpu_has_bug(X86_BUG_CLFLUSH_MONITOR)) { mb(); @@ -114,9 +117,13 @@ static inline void mwait_idle_with_hints(unsigned long eax, unsigned long ecx) } __monitor((void *)¤t_thread_info()->flags, 0, 0); - if (!need_resched()) - __mwait(eax, ecx); + if (need_resched()) + goto out; + + __mwait(eax, ecx); } + +out: current_clr_polling(); } diff --git a/arch/x86/include/asm/nospec-branch.h b/arch/x86/include/asm/nospec-branch.h index 9be65ed0ba294282ede6cf9d68a607a3615f5f65..e5366fb01343667ef0d8be6781bd0b2d57de4600 100644 --- a/arch/x86/include/asm/nospec-branch.h +++ b/arch/x86/include/asm/nospec-branch.h @@ -143,11 +143,20 @@ .Lskip_rsb_\@: .endm +/* + * The CALL to srso_alias_untrain_ret() must be patched in directly at + * the spot where untraining must be done, ie., srso_alias_untrain_ret() + * must be the target of a CALL instruction instead of indirectly + * jumping to a wrapper which then calls it. Therefore, this macro is + * called outside of __UNTRAIN_RET below, for the time being, before the + * kernel can support nested alternatives with arbitrary nesting. + */ +.macro CALL_UNTRAIN_RET #ifdef CONFIG_CPU_UNRET_ENTRY -#define CALL_UNTRAIN_RET "call entry_untrain_ret" -#else -#define CALL_UNTRAIN_RET "" + ALTERNATIVE_2 "", "call entry_untrain_ret", X86_FEATURE_UNRET, \ + "call srso_alias_untrain_ret", X86_FEATURE_SRSO_ALIAS #endif +.endm /* * Mitigate RETBleed for AMD/Hygon Zen uarch. Requires KERNEL CR3 because the @@ -164,9 +173,8 @@ #if defined(CONFIG_CPU_UNRET_ENTRY) || defined(CONFIG_CPU_IBPB_ENTRY) || \ defined(CONFIG_CPU_SRSO) ANNOTATE_UNRET_END - ALTERNATIVE_2 "", \ - CALL_UNTRAIN_RET, X86_FEATURE_UNRET, \ - "call entry_ibpb", X86_FEATURE_ENTRY_IBPB + CALL_UNTRAIN_RET + ALTERNATIVE "", "call entry_ibpb", X86_FEATURE_ENTRY_IBPB #endif .endm @@ -177,6 +185,34 @@ #endif .endm +/* + * Macro to execute VERW insns that mitigate transient data sampling + * attacks such as MDS or TSA. On affected systems a microcode update + * overloaded VERW insns to also clear the CPU buffers. VERW clobbers + * CFLAGS.ZF. + * Note: Only the memory operand variant of VERW clears the CPU buffers. + */ +.macro __CLEAR_CPU_BUFFERS feature + ALTERNATIVE "jmp .Lskip_verw_\@", "", \feature +#ifdef CONFIG_X86_64 + verw x86_verw_sel(%rip) +#else + /* + * In 32bit mode, the memory operand must be a %cs reference. The data + * segments may not be usable (vm86 mode), and the stack segment may not + * be flat (ESPFIX32). + */ + verw %cs:x86_verw_sel +#endif +.Lskip_verw_\@: +.endm + +#define CLEAR_CPU_BUFFERS \ + __CLEAR_CPU_BUFFERS X86_FEATURE_CLEAR_CPU_BUF + +#define VM_CLEAR_CPU_BUFFERS \ + __CLEAR_CPU_BUFFERS X86_FEATURE_CLEAR_CPU_BUF_VM + #else /* __ASSEMBLY__ */ #define ANNOTATE_RETPOLINE_SAFE \ @@ -352,21 +388,22 @@ DECLARE_STATIC_KEY_FALSE(switch_to_cond_stibp); DECLARE_STATIC_KEY_FALSE(switch_mm_cond_ibpb); DECLARE_STATIC_KEY_FALSE(switch_mm_always_ibpb); -DECLARE_STATIC_KEY_FALSE(mds_user_clear); -DECLARE_STATIC_KEY_FALSE(mds_idle_clear); +DECLARE_STATIC_KEY_FALSE(cpu_buf_idle_clear); DECLARE_STATIC_KEY_FALSE(mmio_stale_data_clear); +extern u16 x86_verw_sel; + #include /** - * mds_clear_cpu_buffers - Mitigation for MDS and TAA vulnerability + * x86_clear_cpu_buffers - Buffer clearing support for different x86 CPU vulns * * This uses the otherwise unused and obsolete VERW instruction in * combination with microcode which triggers a CPU buffer flush when the * instruction is executed. */ -static __always_inline void mds_clear_cpu_buffers(void) +static __always_inline void x86_clear_cpu_buffers(void) { static const u16 ds = __KERNEL_DS; @@ -383,25 +420,15 @@ static __always_inline void mds_clear_cpu_buffers(void) } /** - * mds_user_clear_cpu_buffers - Mitigation for MDS and TAA vulnerability - * - * Clear CPU buffers if the corresponding static key is enabled - */ -static __always_inline void mds_user_clear_cpu_buffers(void) -{ - if (static_branch_likely(&mds_user_clear)) - mds_clear_cpu_buffers(); -} - -/** - * mds_idle_clear_cpu_buffers - Mitigation for MDS vulnerability + * x86_idle_clear_cpu_buffers - Buffer clearing support in idle for the MDS + * and TSA vulnerabilities. * * Clear CPU buffers if the corresponding static key is enabled */ -static inline void mds_idle_clear_cpu_buffers(void) +static __always_inline void x86_idle_clear_cpu_buffers(void) { - if (static_branch_likely(&mds_idle_clear)) - mds_clear_cpu_buffers(); + if (static_branch_likely(&cpu_buf_idle_clear)) + x86_clear_cpu_buffers(); } #endif /* __ASSEMBLY__ */ diff --git a/arch/x86/include/asm/required-features.h b/arch/x86/include/asm/required-features.h index 1fbe53583e9529c8860a9b848f17fe457243be40..4e3cd318323b2a04d7eb296f57b1dd0d5fb19f16 100644 --- a/arch/x86/include/asm/required-features.h +++ b/arch/x86/include/asm/required-features.h @@ -104,6 +104,6 @@ #define REQUIRED_MASK19 0 #define REQUIRED_MASK20 0 #define REQUIRED_MASK21 0 -#define REQUIRED_MASK_CHECK BUILD_BUG_ON_ZERO(NCAPINTS != 22) +#define REQUIRED_MASK_CHECK BUILD_BUG_ON_ZERO(NCAPINTS != 23) #endif /* _ASM_X86_REQUIRED_FEATURES_H */ diff --git a/arch/x86/kernel/cpu/amd.c b/arch/x86/kernel/cpu/amd.c index d62edc1d61759bbcd96eabf56e4f893aa23bf078..8382e8013914cd016a20d4a4803a621afc554132 100644 --- a/arch/x86/kernel/cpu/amd.c +++ b/arch/x86/kernel/cpu/amd.c @@ -508,6 +508,62 @@ static void early_init_amd_mc(struct cpuinfo_x86 *c) #endif } +static bool amd_check_tsa_microcode(void) +{ + struct cpuinfo_x86 *c = &boot_cpu_data; + union zen_patch_rev p; + u32 min_rev = 0; + + p.ext_fam = c->x86 - 0xf; + p.model = c->x86_model; + p.ext_model = c->x86_model >> 4; + p.stepping = c->x86_stepping; + + if (c->x86 == 0x19) { + switch (p.ucode_rev >> 8) { + case 0xa0011: min_rev = 0x0a0011d7; break; + case 0xa0012: min_rev = 0x0a00123b; break; + case 0xa0082: min_rev = 0x0a00820d; break; + case 0xa1011: min_rev = 0x0a10114c; break; + case 0xa1012: min_rev = 0x0a10124c; break; + case 0xa1081: min_rev = 0x0a108109; break; + case 0xa2010: min_rev = 0x0a20102e; break; + case 0xa2012: min_rev = 0x0a201211; break; + case 0xa4041: min_rev = 0x0a404108; break; + case 0xa5000: min_rev = 0x0a500012; break; + case 0xa6012: min_rev = 0x0a60120a; break; + case 0xa7041: min_rev = 0x0a704108; break; + case 0xa7052: min_rev = 0x0a705208; break; + case 0xa7080: min_rev = 0x0a708008; break; + case 0xa70c0: min_rev = 0x0a70c008; break; + case 0xaa002: min_rev = 0x0aa00216; break; + default: + pr_debug("%s: ucode_rev: 0x%x, current revision: 0x%x\n", + __func__, p.ucode_rev, c->microcode); + return false; + } + } + + if (!min_rev) + return false; + + return c->microcode >= min_rev; +} + +static void tsa_init(struct cpuinfo_x86 *c) +{ + if (cpu_has(c, X86_FEATURE_HYPERVISOR)) + return; + + if (c->x86 == 0x19) { + if (amd_check_tsa_microcode()) + setup_force_cpu_cap(X86_FEATURE_VERW_CLEAR); + } else { + setup_force_cpu_cap(X86_FEATURE_TSA_SQ_NO); + setup_force_cpu_cap(X86_FEATURE_TSA_L1_NO); + } +} + static void bsp_init_amd(struct cpuinfo_x86 *c) { @@ -646,6 +702,8 @@ static void bsp_init_amd(struct cpuinfo_x86 *c) break; } + + tsa_init(c); return; warn: diff --git a/arch/x86/kernel/cpu/bugs.c b/arch/x86/kernel/cpu/bugs.c index 51dfedc68d6a5c38f3fa3a8d37335f53593f720e..e072ebd4d68b2280b87fa9788f6f2193ae76129c 100644 --- a/arch/x86/kernel/cpu/bugs.c +++ b/arch/x86/kernel/cpu/bugs.c @@ -49,6 +49,7 @@ static void __init taa_select_mitigation(void); static void __init mmio_select_mitigation(void); static void __init srbds_select_mitigation(void); static void __init srso_select_mitigation(void); +static void __init tsa_select_mitigation(void); /* The base value of the SPEC_CTRL MSR without task-specific bits set */ u64 x86_spec_ctrl_base; @@ -111,12 +112,9 @@ DEFINE_STATIC_KEY_FALSE(switch_mm_cond_ibpb); /* Control unconditional IBPB in switch_mm() */ DEFINE_STATIC_KEY_FALSE(switch_mm_always_ibpb); -/* Control MDS CPU buffer clear before returning to user space */ -DEFINE_STATIC_KEY_FALSE(mds_user_clear); -EXPORT_SYMBOL_GPL(mds_user_clear); -/* Control MDS CPU buffer clear before idling (halt, mwait) */ -DEFINE_STATIC_KEY_FALSE(mds_idle_clear); -EXPORT_SYMBOL_GPL(mds_idle_clear); +/* Control CPU buffer clear before idling (halt, mwait) */ +DEFINE_STATIC_KEY_FALSE(cpu_buf_idle_clear); +EXPORT_SYMBOL_GPL(cpu_buf_idle_clear); /* Controls CPU Fill buffer clear before KVM guest MMIO accesses */ DEFINE_STATIC_KEY_FALSE(mmio_stale_data_clear); @@ -211,6 +209,7 @@ void __init check_bugs(void) if (!direct_gbpages) set_memory_4k((unsigned long)__va(0), 1); #endif + tsa_select_mitigation(); } /* @@ -296,7 +295,7 @@ static void __init mds_select_mitigation(void) if (!boot_cpu_has(X86_FEATURE_MD_CLEAR)) mds_mitigation = MDS_MITIGATION_VMWERV; - static_branch_enable(&mds_user_clear); + setup_force_cpu_cap(X86_FEATURE_CLEAR_CPU_BUF); if (!boot_cpu_has(X86_BUG_MSBDS_ONLY) && (mds_nosmt || cpu_mitigations_auto_nosmt())) @@ -400,7 +399,7 @@ static void __init taa_select_mitigation(void) * For guests that can't determine whether the correct microcode is * present on host, enable the mitigation for UCODE_NEEDED as well. */ - static_branch_enable(&mds_user_clear); + setup_force_cpu_cap(X86_FEATURE_CLEAR_CPU_BUF); if (taa_nosmt || cpu_mitigations_auto_nosmt()) cpu_smt_disable(false); @@ -467,7 +466,7 @@ static void __init mmio_select_mitigation(void) */ if (boot_cpu_has_bug(X86_BUG_MDS) || (boot_cpu_has_bug(X86_BUG_TAA) && boot_cpu_has(X86_FEATURE_RTM))) - static_branch_enable(&mds_user_clear); + setup_force_cpu_cap(X86_FEATURE_CLEAR_CPU_BUF); else static_branch_enable(&mmio_stale_data_clear); @@ -477,7 +476,7 @@ static void __init mmio_select_mitigation(void) * is required irrespective of SMT state. */ if (!(ia32_cap & ARCH_CAP_FBSDP_NO)) - static_branch_enable(&mds_idle_clear); + static_branch_enable(&cpu_buf_idle_clear); /* * Check if the system has the right microcode. @@ -527,12 +526,12 @@ static void __init md_clear_update_mitigation(void) if (cpu_mitigations_off()) return; - if (!static_key_enabled(&mds_user_clear)) + if (!boot_cpu_has(X86_FEATURE_CLEAR_CPU_BUF)) goto out; /* - * mds_user_clear is now enabled. Update MDS, TAA and MMIO Stale Data - * mitigation, if necessary. + * X86_FEATURE_CLEAR_CPU_BUF is now enabled. Update MDS, TAA and MMIO + * Stale Data mitigation, if necessary. */ if (mds_mitigation == MDS_MITIGATION_OFF && boot_cpu_has_bug(X86_BUG_MDS)) { @@ -1612,10 +1611,10 @@ static void update_mds_branch_idle(void) return; if (sched_smt_active()) { - static_branch_enable(&mds_idle_clear); + static_branch_enable(&cpu_buf_idle_clear); } else if (mmio_mitigation == MMIO_MITIGATION_OFF || (ia32_cap & ARCH_CAP_FBSDP_NO)) { - static_branch_disable(&mds_idle_clear); + static_branch_disable(&cpu_buf_idle_clear); } } @@ -1623,6 +1622,94 @@ static void update_mds_branch_idle(void) #define TAA_MSG_SMT "TAA CPU bug present and SMT on, data leak possible. See https://www.kernel.org/doc/html/latest/admin-guide/hw-vuln/tsx_async_abort.html for more details.\n" #define MMIO_MSG_SMT "MMIO Stale Data CPU bug present and SMT on, data leak possible. See https://www.kernel.org/doc/html/latest/admin-guide/hw-vuln/processor_mmio_stale_data.html for more details.\n" +#undef pr_fmt +#define pr_fmt(fmt) "Transient Scheduler Attacks: " fmt + +enum tsa_mitigations { + TSA_MITIGATION_NONE, + TSA_MITIGATION_UCODE_NEEDED, + TSA_MITIGATION_USER_KERNEL, + TSA_MITIGATION_VM, + TSA_MITIGATION_FULL, +}; + +static const char * const tsa_strings[] = { + [TSA_MITIGATION_NONE] = "Vulnerable", + [TSA_MITIGATION_UCODE_NEEDED] = "Vulnerable: Clear CPU buffers attempted, no microcode", + [TSA_MITIGATION_USER_KERNEL] = "Mitigation: Clear CPU buffers: user/kernel boundary", + [TSA_MITIGATION_VM] = "Mitigation: Clear CPU buffers: VM", + [TSA_MITIGATION_FULL] = "Mitigation: Clear CPU buffers", +}; + +static enum tsa_mitigations tsa_mitigation __ro_after_init = + IS_ENABLED(CONFIG_MITIGATION_TSA) ? TSA_MITIGATION_FULL : TSA_MITIGATION_NONE; + +static int __init tsa_parse_cmdline(char *str) +{ + if (!str) + return -EINVAL; + + if (!strcmp(str, "off")) + tsa_mitigation = TSA_MITIGATION_NONE; + else if (!strcmp(str, "on")) + tsa_mitigation = TSA_MITIGATION_FULL; + else if (!strcmp(str, "user")) + tsa_mitigation = TSA_MITIGATION_USER_KERNEL; + else if (!strcmp(str, "vm")) + tsa_mitigation = TSA_MITIGATION_VM; + else + pr_err("Ignoring unknown tsa=%s option.\n", str); + + return 0; +} +early_param("tsa", tsa_parse_cmdline); + +static void __init tsa_select_mitigation(void) +{ + if (tsa_mitigation == TSA_MITIGATION_NONE) + return; + + if (cpu_mitigations_off() || !boot_cpu_has_bug(X86_BUG_TSA)) { + tsa_mitigation = TSA_MITIGATION_NONE; + return; + } + + if (!boot_cpu_has(X86_FEATURE_VERW_CLEAR)) + tsa_mitigation = TSA_MITIGATION_UCODE_NEEDED; + + switch (tsa_mitigation) { + case TSA_MITIGATION_USER_KERNEL: + setup_force_cpu_cap(X86_FEATURE_CLEAR_CPU_BUF); + break; + + case TSA_MITIGATION_VM: + setup_force_cpu_cap(X86_FEATURE_CLEAR_CPU_BUF_VM); + break; + + case TSA_MITIGATION_UCODE_NEEDED: + if (!boot_cpu_has(X86_FEATURE_HYPERVISOR)) + goto out; + + pr_notice("Forcing mitigation on in a VM\n"); + + /* + * On the off-chance that microcode has been updated + * on the host, enable the mitigation in the guest just + * in case. + */ + fallthrough; + case TSA_MITIGATION_FULL: + setup_force_cpu_cap(X86_FEATURE_CLEAR_CPU_BUF); + setup_force_cpu_cap(X86_FEATURE_CLEAR_CPU_BUF_VM); + break; + default: + break; + } + +out: + pr_info("%s\n", tsa_strings[tsa_mitigation]); +} + void cpu_bugs_smt_update(void) { mutex_lock(&spec_ctrl_mutex); @@ -1676,6 +1763,24 @@ void cpu_bugs_smt_update(void) break; } + switch (tsa_mitigation) { + case TSA_MITIGATION_USER_KERNEL: + case TSA_MITIGATION_VM: + case TSA_MITIGATION_FULL: + case TSA_MITIGATION_UCODE_NEEDED: + /* + * TSA-SQ can potentially lead to info leakage between + * SMT threads. + */ + if (sched_smt_active()) + static_branch_enable(&cpu_buf_idle_clear); + else + static_branch_disable(&cpu_buf_idle_clear); + break; + case TSA_MITIGATION_NONE: + break; + } + mutex_unlock(&spec_ctrl_mutex); } @@ -2516,6 +2621,11 @@ static ssize_t srso_show_state(char *buf) boot_cpu_has(X86_FEATURE_IBPB_BRTYPE) ? "" : ", no microcode"); } +static ssize_t tsa_show_state(char *buf) +{ + return sysfs_emit(buf, "%s\n", tsa_strings[tsa_mitigation]); +} + static ssize_t cpu_show_common(struct device *dev, struct device_attribute *attr, char *buf, unsigned int bug) { @@ -2567,6 +2677,9 @@ static ssize_t cpu_show_common(struct device *dev, struct device_attribute *attr case X86_BUG_SRSO: return srso_show_state(buf); + case X86_BUG_TSA: + return tsa_show_state(buf); + default: break; } @@ -2633,4 +2746,9 @@ ssize_t cpu_show_spec_rstack_overflow(struct device *dev, struct device_attribut { return cpu_show_common(dev, attr, buf, X86_BUG_SRSO); } + +ssize_t cpu_show_tsa(struct device *dev, struct device_attribute *attr, char *buf) +{ + return cpu_show_common(dev, attr, buf, X86_BUG_TSA); +} #endif diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c index 8deb567c92a6d58ba337a9d2dc1419e0d6bea3d4..f6dceb6ea7f6995518703880d9f949924af4fc2f 100644 --- a/arch/x86/kernel/cpu/common.c +++ b/arch/x86/kernel/cpu/common.c @@ -1215,6 +1215,8 @@ static const __initconst struct x86_cpu_id cpu_vuln_whitelist[] = { /* CPU is affected by SRSO */ #define SRSO BIT(5) +/* CPU is affected by Transient Scheduler Attacks */ +#define TSA BIT(10) static const struct x86_cpu_id cpu_vuln_blacklist[] __initconst = { VULNBL_INTEL_STEPPINGS(IVYBRIDGE, X86_STEPPING_ANY, SRBDS), @@ -1248,7 +1250,7 @@ static const struct x86_cpu_id cpu_vuln_blacklist[] __initconst = { VULNBL_AMD(0x16, RETBLEED), VULNBL_AMD(0x17, RETBLEED | SRSO), VULNBL_HYGON(0x18, RETBLEED | SRSO), - VULNBL_AMD(0x19, SRSO), + VULNBL_AMD(0x19, SRSO | TSA), {} }; @@ -1368,6 +1370,16 @@ static void __init cpu_set_bug_bits(struct cpuinfo_x86 *c) setup_force_cpu_bug(X86_BUG_SRSO); } + if (c->x86_vendor == X86_VENDOR_AMD) { + if (!cpu_has(c, X86_FEATURE_TSA_SQ_NO) || + !cpu_has(c, X86_FEATURE_TSA_L1_NO)) { + if (cpu_matches(cpu_vuln_blacklist, TSA) || + /* Enable bug on Zen guests to allow for live migration. */ + (cpu_has(c, X86_FEATURE_HYPERVISOR) && cpu_has(c, X86_FEATURE_ZEN))) + setup_force_cpu_bug(X86_BUG_TSA); + } + } + if (cpu_matches(cpu_vuln_whitelist, NO_MELTDOWN)) return; diff --git a/arch/x86/kernel/nmi.c b/arch/x86/kernel/nmi.c index 4bce802d25fb18b79cd525883ea87a2b4a75ff24..b892fe7035db5f61929a46f76f943b9b749b06b2 100644 --- a/arch/x86/kernel/nmi.c +++ b/arch/x86/kernel/nmi.c @@ -519,9 +519,6 @@ DEFINE_IDTENTRY_RAW(exc_nmi) write_cr2(this_cpu_read(nmi_cr2)); if (this_cpu_dec_return(nmi_state)) goto nmi_restart; - - if (user_mode(regs)) - mds_user_clear_cpu_buffers(); } #if defined(CONFIG_X86_64) && IS_ENABLED(CONFIG_KVM_INTEL) diff --git a/arch/x86/kernel/process.c b/arch/x86/kernel/process.c index b3ef8593ca6b7fe77d5f037874a11bb0c8b81149..3f53261614f0a2e4397f56b100b1e74e45dce942 100644 --- a/arch/x86/kernel/process.c +++ b/arch/x86/kernel/process.c @@ -863,6 +863,11 @@ static int prefer_mwait_c1_over_halt(const struct cpuinfo_x86 *c) */ static __cpuidle void mwait_idle(void) { + if (need_resched()) + return; + + x86_idle_clear_cpu_buffers(); + if (!current_set_polling_and_test()) { if (this_cpu_has(X86_BUG_CLFLUSH_MONITOR)) { mb(); /* quirk */ @@ -871,13 +876,17 @@ static __cpuidle void mwait_idle(void) } __monitor((void *)¤t_thread_info()->flags, 0, 0); - if (!need_resched()) - __sti_mwait(0, 0); - else + if (need_resched()) { raw_local_irq_enable(); + goto out; + } + + __sti_mwait(0, 0); } else { raw_local_irq_enable(); } + +out: __current_clr_polling(); } diff --git a/arch/x86/kvm/cpuid.c b/arch/x86/kvm/cpuid.c index cdb03a4f516d8495b23ac9df7d756186f57b14ae..33f5c19ad7022bfc5024b60320e66f05fd085446 100644 --- a/arch/x86/kvm/cpuid.c +++ b/arch/x86/kvm/cpuid.c @@ -598,6 +598,15 @@ void kvm_set_cpu_caps(void) kvm_cpu_cap_mask(CPUID_8000_001F_EAX, 0 /* SME */ | F(SEV) | 0 /* VM_PAGE_FLUSH */ | F(SEV_ES) | F(SME_COHERENT)); + if (cpu_feature_enabled(X86_FEATURE_VERW_CLEAR)) + kvm_cpu_cap_set(X86_FEATURE_VERW_CLEAR); + + if (cpu_feature_enabled(X86_FEATURE_TSA_SQ_NO)) + kvm_cpu_cap_set(X86_FEATURE_TSA_SQ_NO); + + if (cpu_feature_enabled(X86_FEATURE_TSA_L1_NO)) + kvm_cpu_cap_set(X86_FEATURE_TSA_L1_NO); + kvm_cpu_cap_mask(CPUID_C000_0001_EDX, F(XSTORE) | F(XSTORE_EN) | F(XCRYPT) | F(XCRYPT_EN) | @@ -962,7 +971,7 @@ static inline int __do_cpuid_func(struct kvm_cpuid_array *array, u32 function) entry->edx = 0; break; case 0x80000000: - entry->eax = min(entry->eax, 0x8000001f); + entry->eax = min(entry->eax, 0x80000021); break; case 0x80000001: cpuid_entry_override(entry, CPUID_8000_0001_EDX); @@ -1020,6 +1029,26 @@ static inline int __do_cpuid_func(struct kvm_cpuid_array *array, u32 function) else cpuid_entry_override(entry, CPUID_8000_001F_EAX); break; + case 0x80000020: + entry->eax = entry->ebx = entry->ecx = entry->edx = 0; + break; + case 0x80000021: + entry->ebx = entry->edx = 0; + /* + * Pass down these bits: + * EAX 0 NNDBP, Processor ignores nested data breakpoints + * EAX 2 LAS, LFENCE always serializing + * EAX 5 VERW_CLEAR, mitigate TSA + * EAX 6 NSCB, Null selector clear base + * + * Other defined bits are for MSRs that KVM does not expose: + * EAX 3 SPCL, SMM page configuration lock + * EAX 13 PCMSR, Prefetch control MSR + */ + cpuid_entry_override(entry, CPUID_8000_0021_EAX); + entry->eax &= BIT(0) | BIT(2) | BIT(5) | BIT(6); + cpuid_entry_override(entry, CPUID_8000_0021_ECX); + break; /*Add support for Centaur's CPUID instruction*/ case 0xC0000000: /*Just support up to 0xC0000004 now*/ diff --git a/arch/x86/kvm/cpuid.h b/arch/x86/kvm/cpuid.h index 85407b457efa6f21dea450f88efedcf2c9caaea1..77a9e811085ae48ae05da359f8a063aaf5bdc712 100644 --- a/arch/x86/kvm/cpuid.h +++ b/arch/x86/kvm/cpuid.h @@ -107,6 +107,7 @@ static const struct cpuid_reg reverse_cpuid[] = { [CPUID_8000_0021_EAX] = {0x80000021, 0, CPUID_EAX}, [CPUID_12_EAX] = {0x00000012, 0, CPUID_EAX}, [CPUID_8000_001F_EAX] = {0x8000001f, 0, CPUID_EAX}, + [CPUID_8000_0021_ECX] = {0x80000021, 0, CPUID_ECX}, }; /* diff --git a/arch/x86/kvm/svm/vmenter.S b/arch/x86/kvm/svm/vmenter.S index 99f9cc140b41cb30fe207d3fc7a62a80c29cf966..9011534f68375adc0e3ea150b78b2680b5fa4ef5 100644 --- a/arch/x86/kvm/svm/vmenter.S +++ b/arch/x86/kvm/svm/vmenter.S @@ -77,6 +77,9 @@ SYM_FUNC_START(__svm_vcpu_run) /* "POP" @vmcb to RAX. */ pop %_ASM_AX + /* Clobbers EFLAGS.ZF */ + VM_CLEAR_CPU_BUFFERS + /* Enter guest mode */ sti 1: vmload %_ASM_AX diff --git a/arch/x86/kvm/vmx/vmenter.S b/arch/x86/kvm/vmx/vmenter.S index 982138bebb70f3c41ee2dc273c84e0bfb7342510..d0858150288e4cca14cb64074879fed4af7536da 100644 --- a/arch/x86/kvm/vmx/vmenter.S +++ b/arch/x86/kvm/vmx/vmenter.S @@ -99,6 +99,9 @@ SYM_FUNC_START(__vmx_vcpu_run) /* Load guest RAX. This kills the @regs pointer! */ mov VCPU_RAX(%_ASM_AX), %_ASM_AX + /* Clobbers EFLAGS.ZF */ + CLEAR_CPU_BUFFERS + /* Check EFLAGS.ZF from 'testb' above */ jz .Lvmlaunch diff --git a/arch/x86/kvm/vmx/vmx.c b/arch/x86/kvm/vmx/vmx.c index 33761204f14395656c8a45ca729fecf013e15fd9..3d78c0031073a7719c56f5734db819d51a5dd576 100644 --- a/arch/x86/kvm/vmx/vmx.c +++ b/arch/x86/kvm/vmx/vmx.c @@ -410,7 +410,8 @@ static __always_inline void vmx_enable_fb_clear(struct vcpu_vmx *vmx) static void vmx_update_fb_clear_dis(struct kvm_vcpu *vcpu, struct vcpu_vmx *vmx) { - vmx->disable_fb_clear = vmx_fb_clear_ctrl_available; + vmx->disable_fb_clear = !cpu_feature_enabled(X86_FEATURE_CLEAR_CPU_BUF) && + vmx_fb_clear_ctrl_available; /* * If guest will not execute VERW, there is no need to set FB_CLEAR_DIS @@ -7156,14 +7157,17 @@ static noinstr void vmx_vcpu_enter_exit(struct kvm_vcpu *vcpu, guest_enter_irqoff(); lockdep_hardirqs_on(CALLER_ADDR0); - /* L1D Flush includes CPU buffer clear to mitigate MDS */ + /* + * L1D Flush includes CPU buffer clear to mitigate MDS, but VERW + * mitigation for MDS is done late in VMentry and is still + * executed in spite of L1D Flush. This is because an extra VERW + * should not matter much after the big hammer L1D Flush. + */ if (static_branch_unlikely(&vmx_l1d_should_flush)) vmx_l1d_flush(vcpu); - else if (static_branch_unlikely(&mds_user_clear)) - mds_clear_cpu_buffers(); else if (static_branch_unlikely(&mmio_stale_data_clear) && kvm_arch_has_assigned_device(vcpu->kvm)) - mds_clear_cpu_buffers(); + x86_clear_cpu_buffers(); vmx_disable_fb_clear(vmx); diff --git a/arch/x86/lib/retpoline.S b/arch/x86/lib/retpoline.S index fc9abc482e21e4e919825b3bb351d9054f67650c..2323077ddbe0f51e2ac11623e681895f1a11b6fa 100644 --- a/arch/x86/lib/retpoline.S +++ b/arch/x86/lib/retpoline.S @@ -252,9 +252,7 @@ SYM_CODE_START(srso_return_thunk) SYM_CODE_END(srso_return_thunk) SYM_FUNC_START(entry_untrain_ret) - ALTERNATIVE_2 "jmp retbleed_untrain_ret", \ - "jmp srso_untrain_ret", X86_FEATURE_SRSO, \ - "jmp srso_alias_untrain_ret", X86_FEATURE_SRSO_ALIAS + ALTERNATIVE "jmp retbleed_untrain_ret", "jmp srso_untrain_ret", X86_FEATURE_SRSO SYM_FUNC_END(entry_untrain_ret) __EXPORT_THUNK(entry_untrain_ret) diff --git a/drivers/base/cpu.c b/drivers/base/cpu.c index e7d2d1cf1a37b396f9f68da7f37ee70edd6717b1..cf094c8c027bdb39b898305bd3894de9853f61da 100644 --- a/drivers/base/cpu.c +++ b/drivers/base/cpu.c @@ -620,6 +620,11 @@ ssize_t __weak cpu_show_spec_rstack_overflow(struct device *dev, return sysfs_emit(buf, "Not affected\n"); } +ssize_t __weak cpu_show_tsa(struct device *dev, struct device_attribute *attr, char *buf) +{ + return sysfs_emit(buf, "Not affected\n"); +} + static DEVICE_ATTR(meltdown, 0444, cpu_show_meltdown, NULL); static DEVICE_ATTR(spectre_v1, 0444, cpu_show_spectre_v1, NULL); static DEVICE_ATTR(spectre_v2, 0444, cpu_show_spectre_v2, NULL); @@ -632,6 +637,7 @@ static DEVICE_ATTR(srbds, 0444, cpu_show_srbds, NULL); static DEVICE_ATTR(mmio_stale_data, 0444, cpu_show_mmio_stale_data, NULL); static DEVICE_ATTR(retbleed, 0444, cpu_show_retbleed, NULL); static DEVICE_ATTR(spec_rstack_overflow, 0444, cpu_show_spec_rstack_overflow, NULL); +static DEVICE_ATTR(tsa, 0444, cpu_show_tsa, NULL); static struct attribute *cpu_root_vulnerabilities_attrs[] = { &dev_attr_meltdown.attr, @@ -646,6 +652,7 @@ static struct attribute *cpu_root_vulnerabilities_attrs[] = { &dev_attr_mmio_stale_data.attr, &dev_attr_retbleed.attr, &dev_attr_spec_rstack_overflow.attr, + &dev_attr_tsa.attr, NULL }; diff --git a/include/linux/cpu.h b/include/linux/cpu.h index 5c537c984ee5b8432b0cd0718f021364d5d3e01f..165b782d3f4c8d681b56b18e450fd1077c6144a3 100644 --- a/include/linux/cpu.h +++ b/include/linux/cpu.h @@ -74,6 +74,7 @@ extern ssize_t cpu_show_spec_rstack_overflow(struct device *dev, struct device_attribute *attr, char *buf); extern ssize_t cpu_show_gds(struct device *dev, struct device_attribute *attr, char *buf); +extern ssize_t cpu_show_tsa(struct device *dev, struct device_attribute *attr, char *buf); extern __printf(4, 5) struct device *cpu_device_create(struct device *parent, void *drvdata,