diff --git a/MAINTAINERS b/MAINTAINERS index bd9daeb31fe694798fea88cf554e36c44a38736d..824937aba8f2925a738e5924b290bb401ef7dd30 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -24016,10 +24016,10 @@ L: linux-hwmon@vger.kernel.org S: Maintained F: drivers/hwmon/zhaoxin-cputemp.c -ZHAOXIN ZXPAUSE INSTRUCTION SUPPORT +ZHAOXIN PAUSEOPT INSTRUCTION SUPPORT M: LeoLiu-oc S: Maintained -F: arch/x86/kernel/cpu/zxpause.c +F: arch/x86/kernel/cpu/pauseopt.c ZHAOXIN PINCTRL DRIVER M: LeoLiu-oc diff --git a/arch/x86/Kconfig.assembler b/arch/x86/Kconfig.assembler index 16d0b022d6fff66ca5c19b403ca098b826f79614..1195554bb92c64ec53d496f15d4c1efec29b615d 100644 --- a/arch/x86/Kconfig.assembler +++ b/arch/x86/Kconfig.assembler @@ -29,3 +29,8 @@ config AS_WRUSS def_bool $(as-instr64,wrussq %rax$(comma)(%rbx)) help Supported by binutils >= 2.31 and LLVM integrated assembler + +config AS_PAUSEOPT + def_bool $(as-instr,pauseopt) + help + Supported by binutils >= xxx-TBD and LLVM integrated assembler xxx-TBD diff --git a/arch/x86/include/asm/cpufeatures.h b/arch/x86/include/asm/cpufeatures.h index 24301ce90e2995ae14ab1f7713e15ebf8f14d97e..0134d108cd9ad879e14f0fc774da1efa92ec3556 100644 --- a/arch/x86/include/asm/cpufeatures.h +++ b/arch/x86/include/asm/cpufeatures.h @@ -486,13 +486,6 @@ #define X86_FEATURE_IBPB_BRTYPE (20*32+28) /* "" MSR_PRED_CMD[IBPB] flushes all branch type predictions */ #define X86_FEATURE_SRSO_NO (20*32+29) /* "" CPU is not affected by SRSO */ -/* HYGON-defined CPU features, CPUID level 0x8c860000:0 (EDX), word 22 */ -#define X86_FEATURE_HYGON_CIS_SM3 (22*32 + 1) /* "sm3" SM3 instructions */ -#define X86_FEATURE_HYGON_CIS_SM4 (22*32 + 2) /* "sm4" SM4 instructions */ - -/* VIA/Cyrix/Centaur-defined CPU features, CPUID level 0xC0000006, word 23 */ -#define X86_FEATURE_ZXPAUSE (23*32 + 0) /* ZHAOXIN ZXPAUSE */ - /* * Extended auxiliary flags: Linux defined - for features scattered in various * CPUID levels like 0x80000022, etc and Linux defined features. @@ -512,6 +505,13 @@ #define X86_FEATURE_ABMC (21*32+ 6) /* "" Assignable Bandwidth Monitoring Counters */ #define X86_FEATURE_FAST_CPPC (21*32 + 5) /* "" AMD Fast CPPC */ +/* HYGON-defined CPU features, CPUID level 0x8c860000:0 (EDX), word 22 */ +#define X86_FEATURE_HYGON_CIS_SM3 (22*32 + 1) /* "sm3" SM3 instructions */ +#define X86_FEATURE_HYGON_CIS_SM4 (22*32 + 2) /* "sm4" SM4 instructions */ + +/* Zhaoxin/Centaur-defined CPU features, CPUID level 0xC0000006, word 23 */ +#define X86_FEATURE_PAUSEOPT (23*32 + 0) /* ZHAOXIN PAUSEOPT */ + /* * BUG word(s) */ diff --git a/arch/x86/include/asm/delay.h b/arch/x86/include/asm/delay.h index 4dbb3fea67fb510c051de76b86b08d322caf3521..c844077f19b6f7bb0e3b81bc790a5fc0e93d4a1d 100644 --- a/arch/x86/include/asm/delay.h +++ b/arch/x86/include/asm/delay.h @@ -7,7 +7,7 @@ void __init use_tsc_delay(void); void __init use_tpause_delay(void); -void __init use_zxpause_delay(void); +void __init use_pauseopt_delay(void); void use_mwaitx_delay(void); #endif /* _ASM_X86_DELAY_H */ diff --git a/arch/x86/include/asm/msr-index.h b/arch/x86/include/asm/msr-index.h index 3a7b7964df2a323456190300cf3d19a3111fa346..27752d44c87fd4d7277ac9c374f6bcbf992ed75b 100644 --- a/arch/x86/include/asm/msr-index.h +++ b/arch/x86/include/asm/msr-index.h @@ -75,22 +75,18 @@ #define MSR_IA32_UMWAIT_CONTROL 0xe1 #define MSR_IA32_UMWAIT_CONTROL_C02_DISABLE BIT(0) #define MSR_IA32_UMWAIT_CONTROL_RESERVED BIT(1) - -#define MSR_ZX_PAUSE_CONTROL 0x187f -#define MSR_ZX_PAUSE_CONTROL_C02_DISABLE BIT(0) -#define MSR_ZX_PAUSE_CONTROL_RESERVED BIT(1) - /* * The time field is bit[31:2], but representing a 32bit value with * bit[1:0] zero. */ #define MSR_IA32_UMWAIT_CONTROL_TIME_MASK (~0x03U) +#define MSR_PAUSEOPT_CONTROL 0x187f /* * The time field is bit[31:2], but representing a 32bit value with * bit[1:0] zero. */ -#define MSR_ZX_PAUSE_CONTROL_TIME_MASK (~0x03U) +#define MSR_PAUSEOPT_CONTROL_TIME_MASK (~0x03U) /* Abbreviated from Intel SDM name IA32_CORE_CAPABILITIES */ #define MSR_IA32_CORE_CAPS 0x000000cf @@ -811,10 +807,10 @@ /* * Zhaoxin extend VMCS capabilities: - * bit 0: exec-cntl3 VMCS field. + * bit 0: exec-cntl3 VMCS field. */ -#define MSR_ZX_EXT_VMCS_CAPS 0x1675 -#define MSR_ZX_VMCS_EXEC_CTL3 BIT(0) +#define MSR_ZX_EXT_VMCS_CAPS 0x1675 +#define MSR_ZX_VMCS_EXEC_CTL3_EN BIT(0) /* Transmeta defined MSRs */ #define MSR_TMTA_LONGRUN_CTRL 0x80868010 @@ -1152,6 +1148,9 @@ #define MSR_IA32_VMX_VMFUNC 0x00000491 #define MSR_IA32_VMX_PROCBASED_CTLS3 0x00000492 +/* Zhaoxin VT MSRs */ +#define MSR_ZX_VMX_PROCBASED_CTLS3 0x12A7 + /* VMX_BASIC bits and bitmasks */ #define VMX_BASIC_VMCS_SIZE_SHIFT 32 #define VMX_BASIC_TRUE_CTLS (1ULL << 55) diff --git a/arch/x86/include/asm/mwait.h b/arch/x86/include/asm/mwait.h index 0b9fad3e2bf2217fb5cba368c907a8bf4a01cdee..d480006b6360c31ba03c142faa160383cfce7d32 100644 --- a/arch/x86/include/asm/mwait.h +++ b/arch/x86/include/asm/mwait.h @@ -26,7 +26,7 @@ #define TPAUSE_C01_STATE 1 #define TPAUSE_C02_STATE 0 -#define ZXPAUSE_C01_STATE 1 +#define PAUSEOPT_P01_STATE 1 static __always_inline void __monitor(const void *eax, unsigned long ecx, unsigned long edx) @@ -154,16 +154,22 @@ static inline void __tpause(u32 ecx, u32 edx, u32 eax) } /* - * Caller can specify whether to enter C0.1 (low latency, less - * power saving) or C0.2 state (saves more power, but longer wakeup - * latency). This may be overridden by the ZX_PAUSE_CONTROL MSR - * which can force requests for C0.2 to be downgraded to C0.1. + * Caller can specify to enter P0.1 (low latency, less power saving). */ -static inline void __zxpause(u32 ecx, u32 edx, u32 eax) +static inline void __pauseopt(u32 ecx, u32 edx, u32 eax) { - /* "zxpause %ecx, %edx, %eax;" */ - asm volatile(".byte 0xf2, 0x0f, 0xa6, 0xd0\t\n" - : - : "c"(ecx), "d"(edx), "a"(eax)); + /* "pauseopt %ecx, %edx, %eax;" */ +#ifdef CONFIG_AS_PAUSEOPT + asm volatile( + "pauseopt\n" + : + : "c"(ecx), "d"(edx), "a"(eax)); +#else + asm volatile( + ".byte 0xf2, 0x0f, 0xa6, 0xd0\t\n" + : + : "c"(ecx), "d"(edx), "a"(eax)); +#endif } + #endif /* _ASM_X86_MWAIT_H */ diff --git a/arch/x86/include/asm/processor.h b/arch/x86/include/asm/processor.h index 428348e7f06c3ade22f4feb9eb6a9bee54bb4810..d995b71ccfc95bf254cf7d9012dbf688fc95fd75 100644 --- a/arch/x86/include/asm/processor.h +++ b/arch/x86/include/asm/processor.h @@ -181,6 +181,20 @@ struct cpuinfo_x86 { */ extern struct cpuinfo_x86 boot_cpu_data; extern struct cpuinfo_x86 new_cpu_data; +struct extra_zx_cpuinfo_x86 { +#ifdef CONFIG_X86_VMX_FEATURE_NAMES + unsigned long vmx_tertiary_capability[NVMX_ZX_TERTIARY_INTS]; +#endif +} __randomize_layout; + +extern struct extra_zx_cpuinfo_x86 extra_zx_boot_cpu_data; +#ifdef CONFIG_SMP +DECLARE_PER_CPU_READ_MOSTLY(struct extra_zx_cpuinfo_x86, extra_zx_cpu_info); +#define extra_zx_cpu_data(cpu) per_cpu(extra_zx_cpu_info, cpu) +#else +#define extra_zx_cpu_info extra_zx_boot_cpu_data +#define extra_zx_cpu_data(cpu) extra_zx_boot_cpu_data +#endif extern __u32 cpu_caps_cleared[NCAPINTS + NBUGINTS]; extern __u32 cpu_caps_set[NCAPINTS + NBUGINTS]; diff --git a/arch/x86/include/asm/vmx.h b/arch/x86/include/asm/vmx.h index 3a4f60f19de3efe789e0424bb9b4485c45d7d979..1160d47c8aa73519c7d9c9af22c0f7dc43e83fad 100644 --- a/arch/x86/include/asm/vmx.h +++ b/arch/x86/include/asm/vmx.h @@ -20,6 +20,7 @@ #include #define VMCS_CONTROL_BIT(x) BIT(VMX_FEATURE_##x & 0x1f) +#define VMCS_ZX_TERTIARY_CONTROL_BIT(x) BIT(VMX_ZX_TERTIARY_FEATURE_##x & 0x1f) /* * Definitions of Primary Processor-Based VM-Execution Controls. @@ -87,7 +88,7 @@ /* * Definitions of Zhaoxin Tertiary Processor-Based VM-Execution Controls. */ -#define ZX_TERTIARY_EXEC_GUEST_ZXPAUSE VMCS_CONTROL_BIT(GUEST_ZXPAUSE) +#define ZX_TERTIARY_EXEC_GUEST_PAUSEOPT VMCS_ZX_TERTIARY_CONTROL_BIT(GUEST_PAUSEOPT) #define PIN_BASED_EXT_INTR_MASK VMCS_CONTROL_BIT(INTR_EXITING) #define PIN_BASED_NMI_EXITING VMCS_CONTROL_BIT(NMI_EXITING) @@ -240,7 +241,7 @@ enum vmcs_field { TERTIARY_VM_EXEC_CONTROL_HIGH = 0x00002035, PID_POINTER_TABLE = 0x00002042, PID_POINTER_TABLE_HIGH = 0x00002043, - ZXPAUSE_VMEXIT_TSC = 0x00002200, + PAUSEOPT_TARGET_TSC = 0x00002200, GUEST_PHYSICAL_ADDRESS = 0x00002400, GUEST_PHYSICAL_ADDRESS_HIGH = 0x00002401, VMCS_LINK_POINTER = 0x00002800, diff --git a/arch/x86/include/asm/vmxfeatures.h b/arch/x86/include/asm/vmxfeatures.h index ba209bdf57d9dc7c59b4cc8cb87432d0b674c708..eee62e3e84c10a014f0c13ee81018fa7433c0903 100644 --- a/arch/x86/include/asm/vmxfeatures.h +++ b/arch/x86/include/asm/vmxfeatures.h @@ -5,7 +5,8 @@ /* * Defines VMX CPU feature bits */ -#define NVMXINTS 6 /* N 32-bit words worth of info */ +#define NVMXINTS 5 /* N 32-bit words worth of info */ +#define NVMX_ZX_TERTIARY_INTS 1 /* Zhaoxin-specific tertiary control words */ /* * Note: If the comment begins with a quoted string, that string is used @@ -90,7 +91,6 @@ /* Tertiary Processor-Based VM-Execution Controls, word 3 */ #define VMX_FEATURE_IPI_VIRT ( 3*32+ 4) /* Enable IPI virtualization */ -/* Zhaoxin Tertiary Processor-Based VM-Execution Controls, word 4 */ -#define VMX_FEATURE_GUEST_ZXPAUSE (4*32 + 0) /* zxpause instruction in guest mode */ - +/* Zhaoxin Tertiary Processor-Based VM-Execution Controls */ +#define VMX_ZX_TERTIARY_FEATURE_GUEST_PAUSEOPT (((NVMXINTS) * 32) + 0) /* pauseopt in guest mode */ #endif /* _ASM_X86_VMXFEATURES_H */ diff --git a/arch/x86/include/uapi/asm/vmx.h b/arch/x86/include/uapi/asm/vmx.h index a5faf6d88f1bf614a997e120bb364d8c695ab94a..0e10f6bcacae830fa45b4722c09eabc9bd1bc718 100644 --- a/arch/x86/include/uapi/asm/vmx.h +++ b/arch/x86/include/uapi/asm/vmx.h @@ -90,6 +90,7 @@ #define EXIT_REASON_XRSTORS 64 #define EXIT_REASON_UMWAIT 67 #define EXIT_REASON_TPAUSE 68 +#define EXIT_REASON_PAUSEOPT 68 #define EXIT_REASON_BUS_LOCK 74 #define EXIT_REASON_NOTIFY 75 diff --git a/arch/x86/kernel/cpu/Makefile b/arch/x86/kernel/cpu/Makefile index 6db634c1241c6960f0e6aaa7cc8594f8393a756d..c75977d7d727c686afa12350d3fdb4f129a140d3 100644 --- a/arch/x86/kernel/cpu/Makefile +++ b/arch/x86/kernel/cpu/Makefile @@ -25,7 +25,7 @@ obj-y += bugs.o obj-y += aperfmperf.o obj-y += cpuid-deps.o obj-y += umwait.o -obj-$(CONFIG_CPU_SUP_ZHAOXIN) += zxpause.o +obj-y += pauseopt.o obj-$(CONFIG_PROC_FS) += proc.o obj-y += capflags.o powerflags.o diff --git a/arch/x86/kernel/cpu/feat_ctl.c b/arch/x86/kernel/cpu/feat_ctl.c index 3e0fbf510f1c6db7aee1e4fcacf6c3269174d415..c30e970664a6e5013915e3a819c33cbdddf637bc 100644 --- a/arch/x86/kernel/cpu/feat_ctl.c +++ b/arch/x86/kernel/cpu/feat_ctl.c @@ -17,12 +17,43 @@ enum vmx_feature_leafs { SECONDARY_CTLS, TERTIARY_CTLS_LOW, TERTIARY_CTLS_HIGH, - ZX_TERTIARY_CTLS, NR_VMX_FEATURE_WORDS, }; #define VMX_F(x) BIT(VMX_FEATURE_##x & 0x1f) +static void init_zhaoxin_ext_capabilities(struct cpuinfo_x86 *c) +{ +#ifdef CONFIG_X86_VMX_FEATURE_NAMES + struct extra_zx_cpuinfo_x86 *zx = &extra_zx_cpu_data(c->cpu_index); + u32 ext_vmcs_cap = 0; + u32 proc_based_ctls3_high = 0; + u32 ign, msr_high; + + zx->vmx_tertiary_capability[0] = 0; + + if (boot_cpu_data.x86_vendor != X86_VENDOR_ZHAOXIN && + boot_cpu_data.x86_vendor != X86_VENDOR_CENTAUR) + return; + + if (rdmsr_safe(MSR_ZX_EXT_VMCS_CAPS, &ext_vmcs_cap, &ign)) + return; + + if (!(ext_vmcs_cap & MSR_ZX_VMCS_EXEC_CTL3_EN)) + return; + + if (rdmsr_safe(MSR_ZX_VMX_PROCBASED_CTLS3, &ign, &msr_high)) + return; + + if (!(msr_high & 0x1)) /* CTLS3 MSR doesn't exist */ + proc_based_ctls3_high = 0x1; /* set PAUSEOPT(bit0) */ + else + proc_based_ctls3_high = msr_high; + + zx->vmx_tertiary_capability[0] = proc_based_ctls3_high; +#endif +} + static void init_vmx_capabilities(struct cpuinfo_x86 *c) { u32 supported, funcs, ept, vpid, ign, low, high; @@ -98,15 +129,7 @@ static void init_vmx_capabilities(struct cpuinfo_x86 *c) set_cpu_cap(c, X86_FEATURE_EPT_AD); if (c->vmx_capability[MISC_FEATURES] & VMX_F(VPID)) set_cpu_cap(c, X86_FEATURE_VPID); - /* - * Initialize Zhaoxin Tertiary Exec Control feature flags. - */ - if (boot_cpu_data.x86_vendor == X86_VENDOR_CENTAUR || - boot_cpu_data.x86_vendor == X86_VENDOR_ZHAOXIN) { - rdmsr_safe(MSR_ZX_EXT_VMCS_CAPS, &supported, &ign); - if (supported & MSR_ZX_VMCS_EXEC_CTL3) - c->vmx_capability[ZX_TERTIARY_CTLS] |= VMX_F(GUEST_ZXPAUSE); - } + init_zhaoxin_ext_capabilities(c); } #endif /* CONFIG_X86_VMX_FEATURE_NAMES */ diff --git a/arch/x86/kernel/cpu/mkcapflags.sh b/arch/x86/kernel/cpu/mkcapflags.sh index 1db560ed2ca35a0d3572f01c25ea4f9f96d015fd..7d68330f16dc07cae2aa7fb218f6d0dfba0ec66f 100644 --- a/arch/x86/kernel/cpu/mkcapflags.sh +++ b/arch/x86/kernel/cpu/mkcapflags.sh @@ -68,6 +68,10 @@ trap 'rm "$OUT"' EXIT echo "#include " echo "#endif" dump_array "x86_vmx_flags" "NVMXINTS*32" "VMX_FEATURE_" "" $3 + echo "" + + dump_array "x86_vmx_zx_tertiary_flags" "NVMX_ZX_TERTIARY_INTS*32" \ + "VMX_ZX_TERTIARY_FEATURE_" "(NVMXINTS*32)" $3 echo "#endif /* CONFIG_X86_VMX_FEATURE_NAMES */" ) > $OUT diff --git a/arch/x86/kernel/cpu/zxpause.c b/arch/x86/kernel/cpu/pauseopt.c similarity index 40% rename from arch/x86/kernel/cpu/zxpause.c rename to arch/x86/kernel/cpu/pauseopt.c index 7f55f5d9e8c0cbb70c30c260705040901eee2a53..58a490da7b3e5e17d890c70b3d38a7ad95158bb3 100644 --- a/arch/x86/kernel/cpu/zxpause.c +++ b/arch/x86/kernel/cpu/pauseopt.c @@ -6,41 +6,37 @@ #include #include -#define ZXPAUSE_C02_ENABLE 0 - -#define ZXPAUSE_CTRL_VAL(max_time, c02_disable) \ - (((max_time) & MSR_ZX_PAUSE_CONTROL_TIME_MASK) | \ - ((c02_disable) & MSR_ZX_PAUSE_CONTROL_C02_DISABLE)) +#define PAUSEOPT_CTRL_VAL(max_time) (((max_time) & MSR_PAUSEOPT_CONTROL_TIME_MASK)) /* - * Cache ZX_PAUSE_CONTROL MSR. This is a systemwide control. By default, - * zxpause max time is 100000 in TSC-quanta and C0.2 is enabled + * Cache PAUSEOPT_CONTROL MSR. This is a systemwide control. By default, + * pauseopt max time is 100000 in TSC-quanta and P0.1 is enabled. */ -static u32 zxpause_control_cached = ZXPAUSE_CTRL_VAL(100000, ZXPAUSE_C02_ENABLE); +static u32 pauseopt_control_cached = PAUSEOPT_CTRL_VAL(100000); /* - * Cache the original ZX_PAUSE_CONTROL MSR value which is configured by + * Cache the original PAUSEOPT_CONTROL MSR value which is configured by * hardware or BIOS before kernel boot. */ -static u32 orig_zxpause_control_cached __ro_after_init; +static u32 orig_pauseopt_control_cached __ro_after_init; /* - * Serialize access to zxpause_control_cached and ZX_PAUSE_CONTROL MSR in + * Serialize access to pauseopt_control_cached and PAUSEOPT_CONTROL MSR in * the sysfs write functions. */ -static DEFINE_MUTEX(zxpause_lock); +static DEFINE_MUTEX(pauseopt_lock); -static void zxpause_update_control_msr(void *unused) +static void pauseopt_update_control_msr(void *unused) { lockdep_assert_irqs_disabled(); - wrmsr(MSR_ZX_PAUSE_CONTROL, READ_ONCE(zxpause_control_cached), 0); + wrmsr(MSR_PAUSEOPT_CONTROL, READ_ONCE(pauseopt_control_cached), 0); } /* * The CPU hotplug callback sets the control MSR to the global control * value. * - * Disable interrupts so the read of zxpause_control_cached and the WRMSR + * Disable interrupts so the read of pauseopt_control_cached and the WRMSR * are protected against a concurrent sysfs write. Otherwise the sysfs * write could update the cached value after it had been read on this CPU * and issue the IPI before the old value had been written. The IPI would @@ -51,10 +47,10 @@ static void zxpause_update_control_msr(void *unused) * value or the IPI is updating this CPU to the new control value after * interrupts have been reenabled. */ -static int zxpause_cpu_online(unsigned int cpu) +static int pauseopt_cpu_online(unsigned int cpu) { local_irq_disable(); - zxpause_update_control_msr(NULL); + pauseopt_update_control_msr(NULL); local_irq_enable(); return 0; } @@ -63,21 +59,21 @@ static int zxpause_cpu_online(unsigned int cpu) * The CPU hotplug callback sets the control MSR to the original control * value. */ -static int zxpause_cpu_offline(unsigned int cpu) +static int pauseopt_cpu_offline(unsigned int cpu) { /* * This code is protected by the CPU hotplug already and - * orig_zxpause_control_cached is never changed after it caches - * the original control MSR value in zxpause_init(). So there + * orig_pauseopt_control_cached is never changed after it caches + * the original control MSR value in pauseopt_init(). So there * is no race condition here. */ - wrmsr(MSR_ZX_PAUSE_CONTROL, orig_zxpause_control_cached, 0); + wrmsr(MSR_PAUSEOPT_CONTROL, orig_pauseopt_control_cached, 0); return 0; } /* - * On resume, restore ZX_PAUSE_CONTROL MSR on the boot processor which + * On resume, restore PAUSEOPT_CONTROL MSR on the boot processor which * is the only active CPU at this time. The MSR is set up on the APs via the * CPU hotplug callback. * @@ -86,81 +82,51 @@ static int zxpause_cpu_offline(unsigned int cpu) * trust the firmware nor does it matter if the same value is written * again. */ -static void zxpause_syscore_resume(void) +static void pauseopt_syscore_resume(void) { - zxpause_update_control_msr(NULL); + pauseopt_update_control_msr(NULL); } -static struct syscore_ops zxpause_syscore_ops = { - .resume = zxpause_syscore_resume, +static struct syscore_ops pauseopt_syscore_ops = { + .resume = pauseopt_syscore_resume, }; /* sysfs interface */ -/* - * When bit 0 in ZX_PAUSE_CONTROL MSR is 1, C0.2 is disabled. - * Otherwise, C0.2 is enabled. - */ -static inline bool zxpause_ctrl_c02_enabled(u32 ctrl) -{ - return !(ctrl & MSR_ZX_PAUSE_CONTROL_C02_DISABLE); -} - -static inline u32 zxpause_ctrl_max_time(u32 ctrl) +static inline u32 pauseopt_ctrl_max_time(u32 ctrl) { - return ctrl & MSR_ZX_PAUSE_CONTROL_TIME_MASK; + return ctrl & MSR_PAUSEOPT_CONTROL_TIME_MASK; } -static inline void zxpause_update_control(u32 maxtime, bool c02_enable) +static inline void pauseopt_update_control(u32 maxtime) { - u32 ctrl = maxtime & MSR_ZX_PAUSE_CONTROL_TIME_MASK; - - if (!c02_enable) - ctrl |= MSR_ZX_PAUSE_CONTROL_C02_DISABLE; + u32 ctrl = maxtime & MSR_PAUSEOPT_CONTROL_TIME_MASK; - WRITE_ONCE(zxpause_control_cached, ctrl); + WRITE_ONCE(pauseopt_control_cached, ctrl); /* Propagate to all CPUs */ - on_each_cpu(zxpause_update_control_msr, NULL, 1); + on_each_cpu(pauseopt_update_control_msr, NULL, 1); } static ssize_t -enable_c02_show(struct device *dev, struct device_attribute *attr, char *buf) +enable_p01_show(struct device *dev, struct device_attribute *attr, char *buf) { - u32 ctrl = READ_ONCE(zxpause_control_cached); + u32 ret; - return sprintf(buf, "%d\n", zxpause_ctrl_c02_enabled(ctrl)); -} + if (boot_cpu_has(X86_FEATURE_PAUSEOPT)) + ret = 1; + else + ret = 0; -static ssize_t enable_c02_store(struct device *dev, - struct device_attribute *attr, - const char *buf, size_t count) -{ - bool c02_enable; - u32 ctrl; - int ret; - - ret = kstrtobool(buf, &c02_enable); - if (ret) - return ret; - - mutex_lock(&zxpause_lock); - - ctrl = READ_ONCE(zxpause_control_cached); - if (c02_enable != zxpause_ctrl_c02_enabled(ctrl)) - zxpause_update_control(ctrl, c02_enable); - - mutex_unlock(&zxpause_lock); - - return count; + return sprintf(buf, "%d\n", ret); } -static DEVICE_ATTR_RW(enable_c02); +static DEVICE_ATTR_RO(enable_p01); static ssize_t max_time_show(struct device *kobj, struct device_attribute *attr, char *buf) { - u32 ctrl = READ_ONCE(zxpause_control_cached); + u32 ctrl = READ_ONCE(pauseopt_control_cached); - return sprintf(buf, "%u\n", zxpause_ctrl_max_time(ctrl)); + return sprintf(buf, "%u\n", pauseopt_ctrl_max_time(ctrl)); } static ssize_t max_time_store(struct device *kobj, @@ -175,49 +141,49 @@ static ssize_t max_time_store(struct device *kobj, return ret; /* bits[1:0] must be zero */ - if (max_time & ~MSR_ZX_PAUSE_CONTROL_TIME_MASK) + if (max_time & ~MSR_PAUSEOPT_CONTROL_TIME_MASK) return -EINVAL; - mutex_lock(&zxpause_lock); + mutex_lock(&pauseopt_lock); - ctrl = READ_ONCE(zxpause_control_cached); - if (max_time != zxpause_ctrl_max_time(ctrl)) - zxpause_update_control(max_time, zxpause_ctrl_c02_enabled(ctrl)); + ctrl = READ_ONCE(pauseopt_control_cached); + if (max_time != pauseopt_ctrl_max_time(ctrl)) + pauseopt_update_control(max_time); - mutex_unlock(&zxpause_lock); + mutex_unlock(&pauseopt_lock); return count; } static DEVICE_ATTR_RW(max_time); -static struct attribute *zxpause_attrs[] = { - &dev_attr_enable_c02.attr, +static struct attribute *pauseopt_attrs[] = { + &dev_attr_enable_p01.attr, &dev_attr_max_time.attr, NULL }; -static struct attribute_group zxpause_attr_group = { - .attrs = zxpause_attrs, - .name = "zxpause_control", +static struct attribute_group pauseopt_attr_group = { + .attrs = pauseopt_attrs, + .name = "pauseopt_control", }; -static int __init zxpause_init(void) +static int __init pauseopt_init(void) { struct device *dev; int ret; - if (!boot_cpu_has(X86_FEATURE_ZXPAUSE)) + if (!boot_cpu_has(X86_FEATURE_PAUSEOPT)) return -ENODEV; /* * Cache the original control MSR value before the control MSR is - * changed. This is the only place where orig_zxpause_control_cached + * changed. This is the only place where orig_pauseopt_control_cached * is modified. */ - rdmsrl(MSR_ZX_PAUSE_CONTROL, orig_zxpause_control_cached); + rdmsrl(MSR_PAUSEOPT_CONTROL, orig_pauseopt_control_cached); - ret = cpuhp_setup_state(CPUHP_AP_ONLINE_DYN, "zxpause:online", - zxpause_cpu_online, zxpause_cpu_offline); + ret = cpuhp_setup_state(CPUHP_AP_ONLINE_DYN, "pauseopt:online", + pauseopt_cpu_online, pauseopt_cpu_offline); if (ret < 0) { /* * On failure, the control MSR on all CPUs has the @@ -226,13 +192,17 @@ static int __init zxpause_init(void) return ret; } - register_syscore_ops(&zxpause_syscore_ops); + register_syscore_ops(&pauseopt_syscore_ops); /* - * Add zxpause control interface. Ignore failure, so at least the + * Add pauseopt control interface. Ignore failure, so at least the * default values are set up in case the machine manages to boot. */ dev = bus_get_dev_root(&cpu_subsys); - return sysfs_create_group(&dev->kobj, &zxpause_attr_group); + if (dev) { + ret = sysfs_create_group(&dev->kobj, &pauseopt_attr_group); + put_device(dev); + } + return ret; } -device_initcall(zxpause_init); +device_initcall(pauseopt_init); diff --git a/arch/x86/kernel/cpu/proc.c b/arch/x86/kernel/cpu/proc.c index 2941134c47da66ebc7fdd0eef96a89d6dea43140..f07aafc72a80cd929f99d1dace1dd1ece896e36d 100644 --- a/arch/x86/kernel/cpu/proc.c +++ b/arch/x86/kernel/cpu/proc.c @@ -5,12 +5,14 @@ #include #include #include +#include #include #include "cpu.h" #ifdef CONFIG_X86_VMX_FEATURE_NAMES extern const char * const x86_vmx_flags[NVMXINTS*32]; +extern const char * const x86_vmx_zx_tertiary_flags[NVMX_ZX_TERTIARY_INTS*32]; #endif /* @@ -123,12 +125,19 @@ static int show_cpuinfo(struct seq_file *m, void *v) #ifdef CONFIG_X86_VMX_FEATURE_NAMES if (cpu_has(c, X86_FEATURE_VMX) && c->vmx_capability[0]) { + struct extra_zx_cpuinfo_x86 *zx = &extra_zx_cpu_data(cpu); + seq_puts(m, "\nvmx flags\t:"); for (i = 0; i < 32*NVMXINTS; i++) { if (test_bit(i, (unsigned long *)c->vmx_capability) && x86_vmx_flags[i] != NULL) seq_printf(m, " %s", x86_vmx_flags[i]); } + for (i = 0; i < 32*NVMX_ZX_TERTIARY_INTS; i++) { + if (test_bit(i, zx->vmx_tertiary_capability) && + x86_vmx_zx_tertiary_flags[i] != NULL) + seq_printf(m, " %s", x86_vmx_zx_tertiary_flags[i]); + } } #endif diff --git a/arch/x86/kernel/setup.c b/arch/x86/kernel/setup.c index 93dc119c8e2e89aa17ac94926a0c010ffd4f67f3..149854a54e0bf7aba5aa19f814727c3d5d6bbb25 100644 --- a/arch/x86/kernel/setup.c +++ b/arch/x86/kernel/setup.c @@ -131,6 +131,9 @@ struct ist_info ist_info; struct cpuinfo_x86 boot_cpu_data __read_mostly; EXPORT_SYMBOL(boot_cpu_data); +#ifdef CONFIG_X86_VMX_FEATURE_NAMES +struct extra_zx_cpuinfo_x86 extra_zx_boot_cpu_data __read_mostly; +#endif #if !defined(CONFIG_X86_PAE) || defined(CONFIG_X86_64) __visible unsigned long mmu_cr4_features __ro_after_init; diff --git a/arch/x86/kernel/smpboot.c b/arch/x86/kernel/smpboot.c index 11a7757552a94af161f85fa0b8750cbb2971c0c0..62d20275b63461c77f2ebd88ee50f62137a7881b 100644 --- a/arch/x86/kernel/smpboot.c +++ b/arch/x86/kernel/smpboot.c @@ -104,6 +104,7 @@ EXPORT_PER_CPU_SYMBOL(cpu_die_map); /* Per CPU bogomips and other parameters */ DEFINE_PER_CPU_READ_MOSTLY(struct cpuinfo_x86, cpu_info); EXPORT_PER_CPU_SYMBOL(cpu_info); +DEFINE_PER_CPU_READ_MOSTLY(struct extra_zx_cpuinfo_x86, extra_zx_cpu_info); /* CPUs which are the primary SMT threads */ struct cpumask __cpu_primary_thread_mask __read_mostly; diff --git a/arch/x86/kernel/time.c b/arch/x86/kernel/time.c index 06289c254a0ede8fb7cbba9952f45ec5638e4e08..99355d27415ee3b52ba0bb84edc1ae806a298310 100644 --- a/arch/x86/kernel/time.c +++ b/arch/x86/kernel/time.c @@ -84,8 +84,9 @@ static __init void x86_late_time_init(void) if (static_cpu_has(X86_FEATURE_WAITPKG)) use_tpause_delay(); - else if (static_cpu_has(X86_FEATURE_ZXPAUSE)) - use_zxpause_delay(); + + if (static_cpu_has(X86_FEATURE_PAUSEOPT)) + use_pauseopt_delay(); } /* diff --git a/arch/x86/kvm/cpuid.c b/arch/x86/kvm/cpuid.c index 084f18086c0db05bd75b730d2f914a03353e9027..f39305cf95c719f5fe4241012109d99e49a120e6 100644 --- a/arch/x86/kvm/cpuid.c +++ b/arch/x86/kvm/cpuid.c @@ -817,9 +817,6 @@ void kvm_set_cpu_caps(void) F(PMM) | F(PMM_EN) ); - /* Zhaoxin 0xC0000006 leaf */ - kvm_cpu_cap_mask(CPUID_C000_0006_EAX, 0 /* bit0: zxpause */ | 0 /* bit1 HMAC */); - /* * Hide RDTSCP and RDPID if either feature is reported as supported but * probing MSR_TSC_AUX failed. This is purely a sanity check and @@ -834,6 +831,12 @@ void kvm_set_cpu_caps(void) kvm_cpu_cap_clear(X86_FEATURE_RDTSCP); kvm_cpu_cap_clear(X86_FEATURE_RDPID); } + + /* + * Do not hide any features supported by this leaf, allow the guest to see + * the original information.Now leaf 0xC000_0006 EAX only supports PAUSEOPT. + */ + kvm_cpu_cap_init(CPUID_C000_0006_EAX, F(PAUSEOPT)); } EXPORT_SYMBOL_GPL(kvm_set_cpu_caps); @@ -1363,7 +1366,6 @@ static inline int __do_cpuid_func(struct kvm_cpuid_array *array, u32 function) case 0xC0000006: cpuid_entry_override(entry, CPUID_C000_0006_EAX); break; - case 3: /* Processor serial number */ case 5: /* MONITOR/MWAIT */ case 0xC0000002: diff --git a/arch/x86/kvm/reverse_cpuid.h b/arch/x86/kvm/reverse_cpuid.h index 65a4eae06780ec81c50cc0defa1c5e44ee20658d..e0ba2cc2e7946e3b2a324f5d17f01568a4efe68f 100644 --- a/arch/x86/kvm/reverse_cpuid.h +++ b/arch/x86/kvm/reverse_cpuid.h @@ -79,6 +79,7 @@ static const struct cpuid_reg reverse_cpuid[] = { [CPUID_8086_0001_EDX] = {0x80860001, 0, CPUID_EDX}, [CPUID_1_ECX] = { 1, 0, CPUID_ECX}, [CPUID_C000_0001_EDX] = {0xc0000001, 0, CPUID_EDX}, + [CPUID_C000_0006_EAX] = {0xc0000006, 0, CPUID_EAX}, [CPUID_8000_0001_ECX] = {0x80000001, 0, CPUID_ECX}, [CPUID_7_0_EBX] = { 7, 0, CPUID_EBX}, [CPUID_D_1_EAX] = { 0xd, 1, CPUID_EAX}, @@ -97,7 +98,6 @@ static const struct cpuid_reg reverse_cpuid[] = { [CPUID_8000_0022_EAX] = {0x80000022, 0, CPUID_EAX}, [CPUID_7_2_EDX] = { 7, 2, CPUID_EDX}, [CPUID_8000_0021_ECX] = {0x80000021, 0, CPUID_ECX}, - [CPUID_C000_0006_EAX] = {0xc0000006, 0, CPUID_EAX}, }; /* diff --git a/arch/x86/kvm/vmx/capabilities.h b/arch/x86/kvm/vmx/capabilities.h index 631e65a212285073924d90df28ea1ee981efe747..e0939a4a2b73d0de8ac9fad0d51533021b05d1c8 100644 --- a/arch/x86/kvm/vmx/capabilities.h +++ b/arch/x86/kvm/vmx/capabilities.h @@ -139,6 +139,11 @@ static inline bool cpu_has_tertiary_exec_ctrls(void) CPU_BASED_ACTIVATE_TERTIARY_CONTROLS; } +static inline bool cpu_has_zx_tertiary_exec_ctrls(void) +{ + return !!vmcs_config.zx_cpu_based_3rd_exec_ctrl; +} + static inline bool cpu_has_vmx_virtualize_apic_accesses(void) { return vmcs_config.cpu_based_2nd_exec_ctrl & @@ -256,10 +261,10 @@ static inline bool cpu_has_vmx_xsaves(void) SECONDARY_EXEC_ENABLE_XSAVES; } -static inline bool cpu_has_vmx_zxpause(void) +static inline bool cpu_has_vmx_pauseopt(void) { return vmcs_config.zx_cpu_based_3rd_exec_ctrl & - ZX_TERTIARY_EXEC_GUEST_ZXPAUSE; + ZX_TERTIARY_EXEC_GUEST_PAUSEOPT; } static inline bool cpu_has_vmx_waitpkg(void) diff --git a/arch/x86/kvm/vmx/vmcs.h b/arch/x86/kvm/vmx/vmcs.h index 4eabed8e5813af72bac6e9d9377747cdf3a219fa..0e07394f02dd37b210f36ac60ecd6626117b0d26 100644 --- a/arch/x86/kvm/vmx/vmcs.h +++ b/arch/x86/kvm/vmx/vmcs.h @@ -50,9 +50,8 @@ struct vmcs_controls_shadow { u32 pin; u32 exec; u32 secondary_exec; - u32 zx_tertiary_exec; u64 tertiary_exec; - u64 zx_vmexit_tsc; + u32 zx_tertiary_exec; }; /* diff --git a/arch/x86/kvm/vmx/vmx.c b/arch/x86/kvm/vmx/vmx.c index f0ebb1ab1c1ef8a751ff657c86b9d795b7ab845d..8ce7739539ba8e42a93c0499f2b584b394218177 100644 --- a/arch/x86/kvm/vmx/vmx.c +++ b/arch/x86/kvm/vmx/vmx.c @@ -218,8 +218,6 @@ int __read_mostly pt_mode = PT_MODE_SYSTEM; module_param(pt_mode, int, S_IRUGO); #endif -static u32 zx_ext_vmcs_cap; - static DEFINE_STATIC_KEY_FALSE(vmx_l1d_should_flush); static DEFINE_STATIC_KEY_FALSE(vmx_l1d_flush_cond); static DEFINE_MUTEX(vmx_l1d_flush_mutex); @@ -1987,6 +1985,24 @@ static int vmx_get_msr_feature(struct kvm_msr_entry *msr) } } +static int zx_vmx_get_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info) +{ + struct vcpu_vmx *vmx = to_vmx(vcpu); + + if (!is_zhaoxin_cpu()) + return KVM_MSR_RET_UNHANDLED; + + switch (msr_info->index) { + case MSR_PAUSEOPT_CONTROL: + if (!msr_info->host_initiated && !vmx_guest_pauseopt_enabled(vmx)) + return 1; + msr_info->data = vmx->msr_pauseopt_control; + return 0; + default: + return KVM_MSR_RET_UNHANDLED; /* Non-zhaoxin MSRs */ + } +} + /* * Reads an msr value (of 'msr_info->index') into 'msr_info->data'. * Returns 0 on success, non-0 otherwise. @@ -1997,6 +2013,17 @@ static int vmx_get_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info) struct vcpu_vmx *vmx = to_vmx(vcpu); struct vmx_uret_msr *msr; u32 index; + int ret = 0; + + ret = zx_vmx_get_msr(vcpu, msr_info); + switch (ret) { + case 0: + case 1: + return ret; + case KVM_MSR_RET_UNHANDLED: + ret = 0; /* reset to generic vmx msr process */ + break; + } switch (msr_info->index) { #ifdef CONFIG_X86_64 @@ -2020,11 +2047,7 @@ static int vmx_get_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info) case MSR_IA32_UMWAIT_CONTROL: if (!msr_info->host_initiated && !vmx_has_waitpkg(vmx)) return 1; - msr_info->data = vmx->msr_ia32_umwait_control; - break; - case MSR_ZX_PAUSE_CONTROL: - if (!msr_info->host_initiated && !vmx_guest_zxpause_enabled(vmx)) - return 1; + msr_info->data = vmx->msr_ia32_umwait_control; break; case MSR_IA32_SPEC_CTRL: @@ -2170,6 +2193,31 @@ static u64 vmx_get_supported_debugctl(struct kvm_vcpu *vcpu, bool host_initiated return debugctl; } +static int zx_vmx_set_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info) +{ + struct vcpu_vmx *vmx = to_vmx(vcpu); + u32 msr_index = msr_info->index; + u64 data = msr_info->data; + + if (!is_zhaoxin_cpu()) + return KVM_MSR_RET_UNHANDLED; + + switch (msr_index) { + case MSR_PAUSEOPT_CONTROL: + if (!msr_info->host_initiated && !vmx_guest_pauseopt_enabled(vmx)) + return 1; + + /* The reserved bit 1 and non-32 bit [63:32] should be zero */ + if (data & (BIT_ULL(1) | GENMASK_ULL(63, 32))) + return 1; + + vmx->msr_pauseopt_control = data; + return 0; + default: + return KVM_MSR_RET_UNHANDLED; /* Non-zhaoxin MSRs*/ + } +} + /* * Writes msr value into the appropriate "register". * Returns 0 on success, non-0 otherwise. @@ -2184,6 +2232,16 @@ static int vmx_set_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info) u64 data = msr_info->data; u32 index; + ret = zx_vmx_set_msr(vcpu, msr_info); + switch (ret) { + case 0: + case 1: + return ret; + case KVM_MSR_RET_UNHANDLED: + ret = 0; /* reset to generic vmx msr process */ + break; + } + switch (msr_index) { case MSR_EFER: ret = kvm_set_msr_common(vcpu, msr_info); @@ -2284,15 +2342,7 @@ static int vmx_set_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info) /* The reserved bit 1 and non-32 bit [63:32] should be zero */ if (data & (BIT_ULL(1) | GENMASK_ULL(63, 32))) return 1; - vmx->msr_ia32_umwait_control = data; - break; - case MSR_ZX_PAUSE_CONTROL: - if (!msr_info->host_initiated && !vmx_guest_zxpause_enabled(vmx)) - return 1; - /* The reserved bit 1 and non-32 bit [63:32] should be zero */ - if (data & (BIT_ULL(1) | GENMASK_ULL(63, 32))) - return 1; vmx->msr_ia32_umwait_control = data; break; case MSR_IA32_SPEC_CTRL: @@ -2591,6 +2641,44 @@ static u64 adjust_vmx_controls64(u64 ctl_opt, u32 msr) return ctl_opt & allowed; } +static int setup_zhaoxin_vmcs_controls(struct vmcs_config *vmcs_conf) +{ + u32 zx_ext_vmcs_cap, msr_high, ign; + u32 zx_ctl3 = 0; + int ret; + + if (!is_zhaoxin_cpu()) + return 0; + + /* + * Zhaoxin uses MSR_ZX_EXT_VMCS_CAPS to enumerate the 3rd CPU-based + * control, rather than a bit in the 2nd CPU-based control. + */ + rdmsr_safe(MSR_ZX_EXT_VMCS_CAPS, &zx_ext_vmcs_cap, &ign); + if (!(zx_ext_vmcs_cap & MSR_ZX_VMCS_EXEC_CTL3_EN)) + return 0; + + ret = rdmsr_safe(MSR_ZX_VMX_PROCBASED_CTLS3, &ign, &msr_high); + if (msr_high & 0x1) { + /* ZX CPU with ZX_VMX_PROCBASED_CTLS3 support */ + ret = adjust_vmx_controls(KVM_REQUIRED_VMX_ZX_TERTIARY_VM_EXEC_CONTROL, + KVM_OPTIONAL_VMX_ZX_TERTIARY_VM_EXEC_CONTROL, + MSR_ZX_VMX_PROCBASED_CTLS3, &zx_ctl3); + if (ret) + return -EIO; + } else { + /* ZX CPU without ZX_VMX_PROCBASED_CTLS3 support: + * assume PAUSEOPT is supported and set that bit + */ + zx_ctl3 |= ZX_TERTIARY_EXEC_GUEST_PAUSEOPT; + } + + /* Will be exetended in the future for more 3rd controls */ + vmcs_conf->zx_cpu_based_3rd_exec_ctrl = zx_ctl3; + + return 0; +} + static int setup_vmcs_config(struct vmcs_config *vmcs_conf, struct vmx_capability *vmx_cap) { @@ -2719,6 +2807,9 @@ static int setup_vmcs_config(struct vmcs_config *vmcs_conf, _vmexit_control &= ~x_ctrl; } + if (setup_zhaoxin_vmcs_controls(vmcs_conf)) + return -EIO; + rdmsr(MSR_IA32_VMX_BASIC, vmx_msr_low, vmx_msr_high); /* IA-32 SDM Vol 3B: VMCS size is never greater than 4kB. */ @@ -2750,10 +2841,6 @@ static int setup_vmcs_config(struct vmcs_config *vmcs_conf, vmcs_conf->vmentry_ctrl = _vmentry_control; vmcs_conf->misc = misc_msr; - /* Setup Zhaoxin exec-cntl3 VMCS field. */ - if (zx_ext_vmcs_cap & MSR_ZX_VMCS_EXEC_CTL3) - vmcs_conf->zx_cpu_based_3rd_exec_ctrl |= ZX_TERTIARY_EXEC_GUEST_ZXPAUSE; - #if IS_ENABLED(CONFIG_HYPERV) if (enlightened_vmcs) evmcs_sanitize_exec_ctrls(vmcs_conf); @@ -4547,24 +4634,22 @@ static u64 vmx_tertiary_exec_control(struct vcpu_vmx *vmx) return exec_control; } -static u32 vmx_zx_tertiary_exec_control(struct vcpu_vmx *vmx) +/* + * We might need to modify the way the third level control corrections + * are handled here in the future by introducing a check using the + * CTLS3 MSR. The current hardware does not include the design for CTLS3, + * but the designer is attempting to add this MSR implementation + * through ucode. + */ +static u32 zx_vmx_tertiary_exec_control(struct vcpu_vmx *vmx) { struct kvm_vcpu *vcpu = &vmx->vcpu; u32 exec_control = vmcs_config.zx_cpu_based_3rd_exec_ctrl; - /* - * Show errors if Qemu wants to enable guest_zxpause while - * vmx not support it. - */ - if (guest_cpuid_has(vcpu, X86_FEATURE_ZXPAUSE)) { - if (!cpu_has_vmx_zxpause()) - pr_err("VMX not support guest_zxpause!\n"); - else - exec_control |= ZX_TERTIARY_EXEC_GUEST_ZXPAUSE; - } else - exec_control &= ~ZX_TERTIARY_EXEC_GUEST_ZXPAUSE; + if (!guest_cpuid_has(vcpu, X86_FEATURE_PAUSEOPT)) + exec_control &= ~ZX_TERTIARY_EXEC_GUEST_PAUSEOPT; - /* enable other features here */ + /* Adjust other features here */ return exec_control; } @@ -4754,6 +4839,25 @@ static int vmx_vcpu_precreate(struct kvm *kvm) #define VMX_XSS_EXIT_BITMAP 0 +static void zx_setup_3rd_ctrls(struct vcpu_vmx *vmx) +{ + if (cpu_has_zx_tertiary_exec_ctrls()) { + zx_tertiary_exec_controls_set(vmx, zx_vmx_tertiary_exec_control(vmx)); + /* + * Regardless of whether the guest has PAUSEOPT support or not, + * as long as there is a 3rd control, we need to initialize this + * field to 0 + */ + if (cpu_has_vmx_pauseopt()) + vmcs_write64(PAUSEOPT_TARGET_TSC, 0); + } +} + +static void zx_init_vmcs(struct vcpu_vmx *vmx) +{ + zx_setup_3rd_ctrls(vmx); +} + static void init_vmcs(struct vcpu_vmx *vmx) { struct kvm *kvm = vmx->vcpu.kvm; @@ -4775,11 +4879,6 @@ static void init_vmcs(struct vcpu_vmx *vmx) if (cpu_has_secondary_exec_ctrls()) secondary_exec_controls_set(vmx, vmx_secondary_exec_control(vmx)); - if (zx_ext_vmcs_cap & MSR_ZX_VMCS_EXEC_CTL3) { - zx_tertiary_exec_controls_set(vmx, vmx_zx_tertiary_exec_control(vmx)); - zx_vmexit_tsc_controls_set(vmx, 0); - } - if (cpu_has_tertiary_exec_ctrls()) tertiary_exec_controls_set(vmx, vmx_tertiary_exec_control(vmx)); @@ -4874,6 +4973,7 @@ static void init_vmcs(struct vcpu_vmx *vmx) vmcs_write32(TPR_THRESHOLD, 0); } + zx_init_vmcs(vmx); vmx_setup_uret_msrs(vmx); } @@ -4915,6 +5015,9 @@ static void vmx_vcpu_reset(struct kvm_vcpu *vcpu, bool init_event) vmx->spec_ctrl = 0; vmx->msr_ia32_umwait_control = 0; + vmx->msr_pauseopt_control = 0; + vmx->pauseopt_in_progress = false; + vmx->pauseopt_rip = 0; vmx->hv_deadline_tsc = -1; kvm_set_cr8(vcpu, 0); @@ -6320,15 +6423,20 @@ void dump_vmcs(struct kvm_vcpu *vcpu) else tertiary_exec_control = 0; - pr_err("*** Zhaoxin Specific Fields ***\n"); - if (zx_ext_vmcs_cap & MSR_ZX_VMCS_EXEC_CTL3) { + pr_err("VMCS %p, last attempted VM-entry on CPU %d\n", + vmx->loaded_vmcs->vmcs, vcpu->arch.last_vmentry_cpu); + + if (cpu_has_zx_tertiary_exec_ctrls()) { + /* + * Now zhaoxin only support specific vmcs fields on 3rd exec control, + * may exetend in the future. + */ + pr_err("*** Zhaoxin Specific Fields ***\n"); pr_err("Zhaoxin TertiaryExec Cntl = 0x%016x\n", - vmcs_read32(ZX_TERTIARY_VM_EXEC_CONTROL)); - pr_err("ZXPAUSE Saved TSC = 0x%016llx\n", vmcs_read64(ZXPAUSE_VMEXIT_TSC)); + vmcs_read32(ZX_TERTIARY_VM_EXEC_CONTROL)); + pr_err("PAUSEOPT Saved TSC = 0x%016llx\n", vmcs_read64(PAUSEOPT_TARGET_TSC)); } - pr_err("VMCS %p, last attempted VM-entry on CPU %d\n", - vmx->loaded_vmcs->vmcs, vcpu->arch.last_vmentry_cpu); pr_err("*** Guest State ***\n"); pr_err("CR0: actual=0x%016lx, shadow=0x%016lx, gh_mask=%016lx\n", vmcs_readl(GUEST_CR0), vmcs_readl(CR0_READ_SHADOW), @@ -7349,6 +7457,52 @@ static noinstr void vmx_vcpu_enter_exit(struct kvm_vcpu *vcpu, guest_state_exit_irqoff(); } +static void zx_vmx_vcpu_run_pre(struct kvm_vcpu *vcpu) +{ + struct vcpu_vmx *vmx = to_vmx(vcpu); + + if (!is_zhaoxin_cpu()) + return; + + if (!cpu_has_vmx_pauseopt()) + return; + + if (!vmx->pauseopt_in_progress) + return; + + if (kvm_rip_read(vcpu) != vmx->pauseopt_rip) { + /* + * When PAUSEOPT execution is interrupted by a VM-Exit and the + * guest RIP advances before the next vmentry, clear the saved + * target TSC so that the optimized state is not re-entered. + */ + vmcs_write64(PAUSEOPT_TARGET_TSC, 0); + vmx->pauseopt_in_progress = false; + vmx->pauseopt_rip = 0; + } +} + +static void zx_vmx_vcpu_run_post(struct kvm_vcpu *vcpu) +{ + struct vcpu_vmx *vmx = to_vmx(vcpu); + u64 target; + + if (!is_zhaoxin_cpu()) + return; + + if (!cpu_has_vmx_pauseopt()) + return; + + target = vmcs_read64(PAUSEOPT_TARGET_TSC); + if (target) { + vmx->pauseopt_in_progress = true; + vmx->pauseopt_rip = kvm_rip_read(vcpu); + } else { + vmx->pauseopt_in_progress = false; + vmx->pauseopt_rip = 0; + } +} + static fastpath_t vmx_vcpu_run(struct kvm_vcpu *vcpu) { struct vcpu_vmx *vmx = to_vmx(vcpu); @@ -7395,6 +7549,8 @@ static fastpath_t vmx_vcpu_run(struct kvm_vcpu *vcpu) vmcs_writel(GUEST_RIP, vcpu->arch.regs[VCPU_REGS_RIP]); vcpu->arch.regs_dirty = 0; + zx_vmx_vcpu_run_pre(vcpu); + /* * Refresh vmcs.HOST_CR3 if necessary. This must be done immediately * prior to VM-Enter, as the kernel may load a new ASID (PCID) any time @@ -7492,6 +7648,8 @@ static fastpath_t vmx_vcpu_run(struct kvm_vcpu *vcpu) vmx->loaded_vmcs->launched = 1; + zx_vmx_vcpu_run_post(vcpu); + vmx_recover_nmi_blocking(vmx); vmx_complete_interrupts(vmx); @@ -7849,10 +8007,7 @@ static void vmx_vcpu_after_set_cpuid(struct kvm_vcpu *vcpu) vmcs_set_secondary_exec_control(vmx, vmx_secondary_exec_control(vmx)); - if (zx_ext_vmcs_cap & MSR_ZX_VMCS_EXEC_CTL3) { - zx_tertiary_exec_controls_set(vmx, vmx_zx_tertiary_exec_control(vmx)); - zx_vmexit_tsc_controls_set(vmx, 0); - } + zx_setup_3rd_ctrls(vmx); if (guest_can_use(vcpu, X86_FEATURE_VMX)) vmx->msr_ia32_feature_control_valid_bits |= @@ -7957,6 +8112,12 @@ static u64 vmx_get_perf_capabilities(void) return perf_cap; } +static void zx_vmx_set_cpu_caps(void) +{ + if (cpu_has_vmx_pauseopt()) + kvm_cpu_cap_check_and_set(X86_FEATURE_PAUSEOPT); +} + static __init void vmx_set_cpu_caps(void) { kvm_set_cpu_caps(); @@ -8006,8 +8167,7 @@ static __init void vmx_set_cpu_caps(void) if (cpu_has_vmx_waitpkg()) kvm_cpu_cap_check_and_set(X86_FEATURE_WAITPKG); - if (cpu_has_vmx_zxpause()) - kvm_cpu_cap_check_and_set(X86_FEATURE_ZXPAUSE); + zx_vmx_set_cpu_caps(); } static void vmx_request_immediate_exit(struct kvm_vcpu *vcpu) @@ -8580,12 +8740,6 @@ static __init int hardware_setup(void) unsigned long host_bndcfgs; struct desc_ptr dt; int r; - u32 ign; - - /* Caches Zhaoxin extend VMCS capabilities. */ - if (boot_cpu_data.x86_vendor == X86_VENDOR_CENTAUR || - boot_cpu_data.x86_vendor == X86_VENDOR_ZHAOXIN) - rdmsr_safe(MSR_ZX_EXT_VMCS_CAPS, &zx_ext_vmcs_cap, &ign); store_idt(&dt); host_idt_base = dt.address; diff --git a/arch/x86/kvm/vmx/vmx.h b/arch/x86/kvm/vmx/vmx.h index 5d55e3e19017e865795917066dc7156401eb371e..b3d9d7f86e47cc467ae1a870198eddc51d304de3 100644 --- a/arch/x86/kvm/vmx/vmx.h +++ b/arch/x86/kvm/vmx/vmx.h @@ -280,6 +280,9 @@ struct vcpu_vmx { u64 spec_ctrl; u32 msr_ia32_umwait_control; + u32 msr_pauseopt_control; + bool pauseopt_in_progress; + unsigned long pauseopt_rip; /* * loaded_vmcs points to the VMCS currently used in this vcpu. For a @@ -580,15 +583,7 @@ static inline u8 vmx_get_rvi(void) (TERTIARY_EXEC_IPI_VIRT) #define KVM_REQUIRED_VMX_ZX_TERTIARY_VM_EXEC_CONTROL 0 -#define KVM_OPTIONAL_VMX_ZX_TERTIARY_VM_EXEC_CONTROL \ - (ZX_TERTIARY_EXEC_GUEST_ZXPAUSE) - -/* - * We shouldn't rw zxpause_vmexit_tsc vmcs field in this - * way, try to use another way in the future. - */ -#define KVM_REQUIRED_VMX_ZXPAUSE_VMEXIT_TSC 0 -#define KVM_OPTIONAL_VMX_ZXPAUSE_VMEXIT_TSC 1 +#define KVM_OPTIONAL_VMX_ZX_TERTIARY_VM_EXEC_CONTROL (ZX_TERTIARY_EXEC_GUEST_PAUSEOPT) #define BUILD_CONTROLS_SHADOW(lname, uname, bits) \ static inline void lname##_controls_set(struct vcpu_vmx *vmx, u##bits val) \ @@ -623,7 +618,6 @@ BUILD_CONTROLS_SHADOW(exec, CPU_BASED_VM_EXEC_CONTROL, 32) BUILD_CONTROLS_SHADOW(secondary_exec, SECONDARY_VM_EXEC_CONTROL, 32) BUILD_CONTROLS_SHADOW(tertiary_exec, TERTIARY_VM_EXEC_CONTROL, 64) BUILD_CONTROLS_SHADOW(zx_tertiary_exec, ZX_TERTIARY_VM_EXEC_CONTROL, 32) -BUILD_CONTROLS_SHADOW(zx_vmexit_tsc, ZXPAUSE_VMEXIT_TSC, 64) /* * VMX_REGS_LAZY_LOAD_SET - The set of registers that will be updated in the @@ -726,10 +720,10 @@ static inline bool vmx_has_waitpkg(struct vcpu_vmx *vmx) SECONDARY_EXEC_ENABLE_USR_WAIT_PAUSE; } -static inline bool vmx_guest_zxpause_enabled(struct vcpu_vmx *vmx) +static inline bool vmx_guest_pauseopt_enabled(struct vcpu_vmx *vmx) { return zx_tertiary_exec_controls_get(vmx) & - ZX_TERTIARY_EXEC_GUEST_ZXPAUSE; + ZX_TERTIARY_EXEC_GUEST_PAUSEOPT; } static inline bool vmx_need_pf_intercept(struct kvm_vcpu *vcpu) @@ -775,4 +769,12 @@ static inline bool guest_cpuid_has_evmcs(struct kvm_vcpu *vcpu) to_vmx(vcpu)->nested.enlightened_vmcs_enabled; } +static inline bool is_zhaoxin_cpu(void) +{ + /* Now zhaoxin owns 2 x86 vendor brands, Zhaoxin and Centaur */ + return (boot_cpu_data.x86_vendor == X86_VENDOR_ZHAOXIN || + boot_cpu_data.x86_vendor == X86_VENDOR_CENTAUR); +} + +#define KVM_MSR_RET_UNHANDLED 2 #endif /* __KVM_X86_VMX_H */ diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index 60b4cd245aab0a73fba735e994bbd0e38bbc0b8b..07910f8c0d1d86c9069e472796abf7b14baab995 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -1464,8 +1464,9 @@ static const u32 msrs_to_save_base[] = { MSR_IA32_RTIT_ADDR2_A, MSR_IA32_RTIT_ADDR2_B, MSR_IA32_RTIT_ADDR3_A, MSR_IA32_RTIT_ADDR3_B, MSR_IA32_UMWAIT_CONTROL, + MSR_PAUSEOPT_CONTROL, + MSR_IA32_XFD, MSR_IA32_XFD_ERR, - MSR_ZX_PAUSE_CONTROL, }; static const u32 msrs_to_save_pmu[] = { @@ -7211,8 +7212,8 @@ static void kvm_probe_msr_to_save(u32 msr_index) if (!kvm_cpu_cap_has(X86_FEATURE_WAITPKG)) return; break; - case MSR_ZX_PAUSE_CONTROL: - if (!kvm_cpu_cap_has(X86_FEATURE_ZXPAUSE)) + case MSR_PAUSEOPT_CONTROL: + if (!kvm_cpu_cap_has(X86_FEATURE_PAUSEOPT)) return; break; case MSR_IA32_RTIT_CTL: diff --git a/arch/x86/lib/delay.c b/arch/x86/lib/delay.c index 3946badbd78fd7058a871b3d6779766a8f0c606d..96bf5b3baacd399f69d662fabe140b7cf739bfc6 100644 --- a/arch/x86/lib/delay.c +++ b/arch/x86/lib/delay.c @@ -118,12 +118,12 @@ static void delay_halt_tpause(u64 start, u64 cycles) } /* - * On ZHAOXIN the ZXPAUSE instruction waits until any of: + * On ZHAOXIN the PAUSEOPT instruction waits until any of: * 1) the delta of TSC counter exceeds the value provided in EDX:EAX - * 2) global timeout in ZX_PAUSE_CONTROL is exceeded + * 2) global timeout in PAUSEOPT_CONTROL is exceeded * 3) an external interrupt occurs */ -static void delay_halt_zxpause(u64 unused, u64 cycles) +static void delay_halt_pauseopt(u64 unused, u64 cycles) { u64 until = cycles; u32 eax, edx; @@ -131,11 +131,7 @@ static void delay_halt_zxpause(u64 unused, u64 cycles) eax = lower_32_bits(until); edx = upper_32_bits(until); - /* - * Hard code the deeper (C0.1) sleep state because exit latency is - * small compared to the "microseconds" that usleep() will delay. - */ - __zxpause(ZXPAUSE_C01_STATE, edx, eax); + __pauseopt(PAUSEOPT_P01_STATE, edx, eax); } /* @@ -204,9 +200,9 @@ void __init use_tpause_delay(void) delay_fn = delay_halt; } -void __init use_zxpause_delay(void) +void __init use_pauseopt_delay(void) { - delay_halt_fn = delay_halt_zxpause; + delay_halt_fn = delay_halt_pauseopt; delay_fn = delay_halt; } diff --git a/tools/arch/x86/include/asm/cpufeatures.h b/tools/arch/x86/include/asm/cpufeatures.h index 59992ea1e1eb3026d51f1c09c183ff418a06190c..ddf5f7b6d1d440c3bf28a131c022f28ee38eccd4 100644 --- a/tools/arch/x86/include/asm/cpufeatures.h +++ b/tools/arch/x86/include/asm/cpufeatures.h @@ -13,7 +13,7 @@ /* * Defines x86 CPU feature bits */ -#define NCAPINTS 23 /* N 32-bit words worth of info */ +#define NCAPINTS 22 /* N 32-bit words worth of info */ #define NBUGINTS 2 /* N 32-bit bug flags */ /* @@ -444,8 +444,8 @@ #define X86_FEATURE_AUTOIBRS (20*32+ 8) /* "" Automatic IBRS */ #define X86_FEATURE_NO_SMM_CTL_MSR (20*32+ 9) /* "" SMM_CTL MSR is not present */ -/* VIA/Cyrix/Centaur-defined CPU features, CPUID level 0xC0000006, word 23 */ -#define X86_FEATURE_ZXPAUSE (23*32 + 0) /* ZHAOXIN ZXPAUSE */ +/* Zhaoxin/Centaur-defined CPU features, CPUID level 0xC0000006, word 21 */ +#define X86_FEATURE_PAUSEOPT (21*32 + 0) /* ZHAOXIN PAUSEOPT */ /* * BUG word(s) diff --git a/tools/arch/x86/include/asm/disabled-features.h b/tools/arch/x86/include/asm/disabled-features.h index be3fef5e80ba38f7b56a1364cb2d2c4a549d4486..fafe9be7a6f4ff6b7adc0ae3ea34d30b3d9ba79d 100644 --- a/tools/arch/x86/include/asm/disabled-features.h +++ b/tools/arch/x86/include/asm/disabled-features.h @@ -131,9 +131,6 @@ #define DISABLED_MASK18 0 #define DISABLED_MASK19 0 #define DISABLED_MASK20 0 -#define DISABLED_MASK21 0 -#define DISABLED_MASK22 0 -#define DISABLED_MASK23 0 -#define DISABLED_MASK_CHECK BUILD_BUG_ON_ZERO(NCAPINTS != 24) +#define DISABLED_MASK_CHECK BUILD_BUG_ON_ZERO(NCAPINTS != 21) #endif /* _ASM_X86_DISABLED_FEATURES_H */ diff --git a/tools/arch/x86/include/asm/msr-index.h b/tools/arch/x86/include/asm/msr-index.h index db58bb727835813c571a348fc970a83fc6c35138..cf828674afe5078d5f17ccb630eff71ffdff8f1b 100644 --- a/tools/arch/x86/include/asm/msr-index.h +++ b/tools/arch/x86/include/asm/msr-index.h @@ -72,22 +72,19 @@ #define MSR_IA32_UMWAIT_CONTROL 0xe1 #define MSR_IA32_UMWAIT_CONTROL_C02_DISABLE BIT(0) #define MSR_IA32_UMWAIT_CONTROL_RESERVED BIT(1) - -#define MSR_ZX_PAUSE_CONTROL 0x187f -#define MSR_ZX_PAUSE_CONTROL_C02_DISABLE BIT(0) -#define MSR_ZX_PAUSE_CONTROL_RESERVED BIT(1) - /* * The time field is bit[31:2], but representing a 32bit value with * bit[1:0] zero. */ #define MSR_IA32_UMWAIT_CONTROL_TIME_MASK (~0x03U) +#define MSR_PAUSEOPT_CONTROL 0x187f + /* * The time field is bit[31:2], but representing a 32bit value with * bit[1:0] zero. */ -#define MSR_ZX_PAUSE_CONTROL_TIME_MASK (~0x03U) +#define MSR_PAUSEOPT_CONTROL_TIME_MASK (~0x03U) /* Abbreviated from Intel SDM name IA32_CORE_CAPABILITIES */ #define MSR_IA32_CORE_CAPS 0x000000cf @@ -765,13 +762,6 @@ #define MSR_TMTA_LRTI_READOUT 0x80868018 #define MSR_TMTA_LRTI_VOLT_MHZ 0x8086801a -/* - * Zhaoxin extend VMCS capabilities: - * bit 0: exec-cntl3 VMCS field. - */ -#define MSR_ZX_EXT_VMCS_CAPS 0x1675 -#define MSR_ZX_VMCS_EXEC_CTL3 BIT(0) - /* Intel defined MSRs. */ #define MSR_IA32_P5_MC_ADDR 0x00000000 #define MSR_IA32_P5_MC_TYPE 0x00000001 diff --git a/tools/arch/x86/include/asm/required-features.h b/tools/arch/x86/include/asm/required-features.h index 6a3de575bec6a06ef41e3ac281cffbbd9248f1a5..7ba1726b71c7b8bfc95888dc78508998bba263fe 100644 --- a/tools/arch/x86/include/asm/required-features.h +++ b/tools/arch/x86/include/asm/required-features.h @@ -99,9 +99,6 @@ #define REQUIRED_MASK18 0 #define REQUIRED_MASK19 0 #define REQUIRED_MASK20 0 -#define REQUIRED_MASK21 0 -#define REQUIRED_MASK22 0 -#define REQUIRED_MASK23 0 -#define REQUIRED_MASK_CHECK BUILD_BUG_ON_ZERO(NCAPINTS != 24) +#define REQUIRED_MASK_CHECK BUILD_BUG_ON_ZERO(NCAPINTS != 21) #endif /* _ASM_X86_REQUIRED_FEATURES_H */ diff --git a/tools/arch/x86/include/uapi/asm/vmx.h b/tools/arch/x86/include/uapi/asm/vmx.h index a5faf6d88f1bf614a997e120bb364d8c695ab94a..0e10f6bcacae830fa45b4722c09eabc9bd1bc718 100644 --- a/tools/arch/x86/include/uapi/asm/vmx.h +++ b/tools/arch/x86/include/uapi/asm/vmx.h @@ -90,6 +90,7 @@ #define EXIT_REASON_XRSTORS 64 #define EXIT_REASON_UMWAIT 67 #define EXIT_REASON_TPAUSE 68 +#define EXIT_REASON_PAUSEOPT 68 #define EXIT_REASON_BUS_LOCK 74 #define EXIT_REASON_NOTIFY 75