diff --git a/Documentation/virt/kvm/arm/pvsched.rst b/Documentation/virt/kvm/arm/pvsched.rst index 8f7112a8a9cd91a5cd69dc0ff6d47c946f0960b4..6ba221e25089ed8b1de0098472a9c5cdbae059f7 100644 --- a/Documentation/virt/kvm/arm/pvsched.rst +++ b/Documentation/virt/kvm/arm/pvsched.rst @@ -11,6 +11,7 @@ Some SMCCC compatible hypercalls are defined: * PV_SCHED_FEATURES: 0xC5000090 * PV_SCHED_IPA_INIT: 0xC5000091 * PV_SCHED_IPA_RELEASE: 0xC5000092 +* PV_SCHED_KICK_CPU: 0xC5000093 The existence of the PV_SCHED hypercall should be probed using the SMCCC 1.1 ARCH_FEATURES mechanism before calling it. @@ -38,6 +39,13 @@ PV_SCHED_IPA_RELEASE this vCPU's PV data structure is released. ============= ======== ========== +PV_SCHED_KICK_CPU + ============= ======== ========== + Function ID: (uint32) 0xC5000093 + Return value: (int64) NOT_SUPPORTED (-1) or SUCCESS (0) if the vCPU is + kicked by the hypervisor. + ============= ======== ========== + PV sched state -------------- @@ -56,3 +64,11 @@ The structure pointed to by the PV_SCHED_IPA hypercall is as follows: The preempted field will be updated to 0 by the hypervisor prior to scheduling a vCPU. When the vCPU is scheduled out, the preempted field will be updated to 1 by the hypervisor. + +A vCPU of a paravirtualized guest that is busywaiting in guest kernel mode for +an event to occur (ex: a spinlock to become available) can execute WFI +instruction once it has busy-waited for more than a threshold time-interval. +Execution of WFI instruction would cause the hypervisor to put the vCPU to sleep +until occurrence of an appropriate event. Another vCPU of the same guest can +wakeup the sleeping vCPU by issuing PV_SCHED_KICK_CPU hypercall, specifying CPU +id (reg1) of the vCPU to be woken up. diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig index 9da9d58f1c024086f5b26c02526d0404585f4a51..df9568ba01319f3e1ca52f42e1082fc3f04f2ac7 100644 --- a/arch/arm64/Kconfig +++ b/arch/arm64/Kconfig @@ -1683,6 +1683,19 @@ config PARAVIRT_SCHED If you are unsure how to answer this question, answer Y. +config PARAVIRT_SPINLOCKS + bool "Paravirtualization layer for spinlocks" + depends on PARAVIRT && SMP + help + Paravirtualized spinlocks allow a pvops backend to replace the + spinlock implementation with something virtualization-friendly + (for example, block the virtual CPU rather than spinning). + + It has a minimal impact on native kernels and gives a nice performance + benefit on paravirtualized KVM kernels. + + If you are unsure how to answer this question, answer Y. + config PARAVIRT_TIME_ACCOUNTING bool "Paravirtual steal time accounting" select PARAVIRT diff --git a/arch/arm64/configs/openeuler_defconfig b/arch/arm64/configs/openeuler_defconfig index d7c511a2f9287f875cd3c852e842a603e107d9d3..c753e51914d311d40a718286ec1d9a883157763d 100644 --- a/arch/arm64/configs/openeuler_defconfig +++ b/arch/arm64/configs/openeuler_defconfig @@ -483,6 +483,7 @@ CONFIG_ARCH_LLC_128_LINE_SIZE=y CONFIG_CC_HAVE_SHADOW_CALL_STACK=y CONFIG_PARAVIRT=y CONFIG_PARAVIRT_SCHED=y +CONFIG_PARAVIRT_SPINLOCKS=n CONFIG_PARAVIRT_TIME_ACCOUNTING=y CONFIG_ARCH_SUPPORTS_KEXEC=y CONFIG_ARCH_SUPPORTS_KEXEC_FILE=y diff --git a/arch/arm64/include/asm/kvm_host.h b/arch/arm64/include/asm/kvm_host.h index ac4e59256f8ed5c6294208bb6f80cfc2293b2db4..981ed0e612ef5dc472ee454f49c6e30874dff919 100644 --- a/arch/arm64/include/asm/kvm_host.h +++ b/arch/arm64/include/asm/kvm_host.h @@ -625,6 +625,7 @@ struct kvm_vcpu_arch { #ifdef CONFIG_PARAVIRT_SCHED /* Guest PV sched state */ struct { + bool pv_unhalted; gpa_t base; } pvsched; #endif @@ -1112,6 +1113,7 @@ static inline bool kvm_arm_is_pvtime_enabled(struct kvm_vcpu_arch *vcpu_arch) #ifdef CONFIG_PARAVIRT_SCHED long kvm_hypercall_pvsched_features(struct kvm_vcpu *vcpu); void kvm_update_pvsched_preempted(struct kvm_vcpu *vcpu, u32 preempted); +long kvm_pvsched_kick_vcpu(struct kvm_vcpu *vcpu); static inline void kvm_arm_pvsched_vcpu_init(struct kvm_vcpu_arch *vcpu_arch) { diff --git a/arch/arm64/include/asm/paravirt.h b/arch/arm64/include/asm/paravirt.h index 5ccead71bf8786fd9301c6a911c719fe5076b3d5..19464514e71aefe9b2a7f03cf748b32ac90ebf07 100644 --- a/arch/arm64/include/asm/paravirt.h +++ b/arch/arm64/include/asm/paravirt.h @@ -32,10 +32,47 @@ static inline bool pv_vcpu_is_preempted(int cpu) } #endif /* CONFIG_PARAVIRT_SCHED */ +#if defined(CONFIG_SMP) && defined(CONFIG_PARAVIRT_SPINLOCKS) +void __init pv_qspinlock_init(void); +bool pv_is_native_spin_unlock(void); + +void dummy_queued_spin_lock_slowpath(struct qspinlock *lock, u32 val); +DECLARE_STATIC_CALL(pv_qspinlock_queued_spin_lock_slowpath, + dummy_queued_spin_lock_slowpath); +static inline void pv_queued_spin_lock_slowpath(struct qspinlock *lock, u32 val) +{ + return static_call(pv_qspinlock_queued_spin_lock_slowpath)(lock, val); +} + +void dummy_queued_spin_unlock(struct qspinlock *lock); +DECLARE_STATIC_CALL(pv_qspinlock_queued_spin_unlock, dummy_queued_spin_unlock); +static inline void pv_queued_spin_unlock(struct qspinlock *lock) +{ + return static_call(pv_qspinlock_queued_spin_unlock)(lock); +} + +void dummy_wait(u8 *ptr, u8 val); +DECLARE_STATIC_CALL(pv_qspinlock_wait, dummy_wait); +static inline void pv_wait(u8 *ptr, u8 val) +{ + return static_call(pv_qspinlock_wait)(ptr, val); +} + +void dummy_kick(int cpu); +DECLARE_STATIC_CALL(pv_qspinlock_kick, dummy_kick); +static inline void pv_kick(int cpu) +{ + return static_call(pv_qspinlock_kick)(cpu); +} +#else +#define pv_qspinlock_init() do {} while (0) +#endif /* SMP && CONFIG_PARAVIRT_SPINLOCKS */ + #else #define pv_time_init() do {} while (0) #define pv_sched_init() do {} while (0) +#define pv_qspinlock_init() do {} while (0) #endif // CONFIG_PARAVIRT diff --git a/arch/arm64/include/asm/qspinlock.h b/arch/arm64/include/asm/qspinlock.h index 8cc7d00b8c67b4ac865cf60f116876de9b8adfb7..51b5efe337111e7a903c8438a983370ffe822677 100644 --- a/arch/arm64/include/asm/qspinlock.h +++ b/arch/arm64/include/asm/qspinlock.h @@ -33,6 +33,41 @@ static inline void queued_spin_unlock(struct qspinlock *lock) } #endif +#if !defined(CONFIG_NUMA_AWARE_SPINLOCKS) && defined(CONFIG_PARAVIRT_SPINLOCKS) +#include +#include +#include +#include + +#define _Q_PENDING_LOOPS (1 << 9) + +extern void native_queued_spin_lock_slowpath(struct qspinlock *lock, u32 val); +extern void __pv_init_lock_hash(void); +extern void __pv_queued_spin_lock_slowpath(struct qspinlock *lock, u32 val); + +#define queued_spin_unlock queued_spin_unlock +/** + * queued_spin_unlock - release a queued spinlock + * @lock : Pointer to queued spinlock structure + * + * A smp_store_release() on the least-significant byte. + */ +static inline void native_queued_spin_unlock(struct qspinlock *lock) +{ + smp_store_release(&lock->locked, 0); +} + +static inline void queued_spin_lock_slowpath(struct qspinlock *lock, u32 val) +{ + pv_queued_spin_lock_slowpath(lock, val); +} + +static inline void queued_spin_unlock(struct qspinlock *lock) +{ + pv_queued_spin_unlock(lock); +} +#endif /* CONFIG_PARAVIRT_SPINLOCKS */ + #include #endif /* _ASM_ARM64_QSPINLOCK_H */ diff --git a/arch/arm64/include/asm/qspinlock_paravirt.h b/arch/arm64/include/asm/qspinlock_paravirt.h new file mode 100644 index 0000000000000000000000000000000000000000..eba4be28fbb9d4e286269b7f19f97fe9f3f91f17 --- /dev/null +++ b/arch/arm64/include/asm/qspinlock_paravirt.h @@ -0,0 +1,12 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * Copyright(c) 2019 Huawei Technologies Co., Ltd + * Author: Zengruan Ye + */ + +#ifndef __ASM_QSPINLOCK_PARAVIRT_H +#define __ASM_QSPINLOCK_PARAVIRT_H + +extern void __pv_queued_spin_unlock(struct qspinlock *lock); + +#endif diff --git a/arch/arm64/include/asm/spinlock.h b/arch/arm64/include/asm/spinlock.h index 378aea19e840896b1e186a5fcc984536e5a02c27..06219e9ebaa9c6e28bffd66305218f22f46d6f5b 100644 --- a/arch/arm64/include/asm/spinlock.h +++ b/arch/arm64/include/asm/spinlock.h @@ -9,6 +9,9 @@ #include #include +/* How long a lock should spin before we consider blocking */ +#define SPIN_THRESHOLD (1 << 15) + /* See include/linux/spinlock.h */ #define smp_mb__after_spinlock() smp_mb() diff --git a/arch/arm64/kernel/Makefile b/arch/arm64/kernel/Makefile index 4ce58887302a13f7a0995980623b64eb23b2047b..a2dccf3d56186b32dfab9bc2ecc4c2a8d499c75c 100644 --- a/arch/arm64/kernel/Makefile +++ b/arch/arm64/kernel/Makefile @@ -61,6 +61,7 @@ obj-$(CONFIG_ACPI) += acpi.o obj-$(CONFIG_ACPI_NUMA) += acpi_numa.o obj-$(CONFIG_ARM64_ACPI_PARKING_PROTOCOL) += acpi_parking_protocol.o obj-$(CONFIG_PARAVIRT) += paravirt.o paravirt-spinlocks.o +obj-$(CONFIG_PARAVIRT_SPINLOCKS) += paravirt.o paravirt-spinlocks.o obj-$(CONFIG_RANDOMIZE_BASE) += kaslr.o pi/ obj-$(CONFIG_HIBERNATION) += hibernate.o hibernate-asm.o obj-$(CONFIG_ELF_CORE) += elfcore.o diff --git a/arch/arm64/kernel/paravirt-spinlocks.c b/arch/arm64/kernel/paravirt-spinlocks.c index f402e7e6c30186a3c93f7d6d707b219b3792b85d..827956d5ae7bce733484a195388b0cfaaeae3816 100644 --- a/arch/arm64/kernel/paravirt-spinlocks.c +++ b/arch/arm64/kernel/paravirt-spinlocks.c @@ -15,4 +15,9 @@ __visible bool __native_vcpu_is_preempted(int cpu) } DEFINE_STATIC_CALL(pv_vcpu_preempted, __native_vcpu_is_preempted); + +bool pv_is_native_spin_unlock(void) +{ + return false; +} #endif /* CONFIG_PARAVIRT_SCHED */ diff --git a/arch/arm64/kernel/paravirt.c b/arch/arm64/kernel/paravirt.c index 53bb6fa7636689d24e9657b54653c769796c5071..161d86a4cdaafb2aaf91db2488c280f48ca98363 100644 --- a/arch/arm64/kernel/paravirt.c +++ b/arch/arm64/kernel/paravirt.c @@ -23,8 +23,12 @@ #include #include #include +#include #include +#define CREATE_TRACE_POINTS +#include "trace-paravirt.h" + struct static_key paravirt_steal_enabled; struct static_key paravirt_steal_rq_enabled; @@ -278,7 +282,81 @@ int __init pv_sched_init(void) static_call_update(pv_vcpu_preempted, kvm_vcpu_is_preempted); pr_info("using PV sched preempted\n"); + pv_qspinlock_init(); + return 0; } early_initcall(pv_sched_init); #endif /* CONFIG_PARAVIRT_SCHED */ + +#ifdef CONFIG_PARAVIRT_SPINLOCKS +static bool arm_pvspin = false; + +/* Kick a cpu by its cpuid. Used to wake up a halted vcpu */ +static void kvm_kick_cpu(int cpu) +{ + struct arm_smccc_res res; + + arm_smccc_1_1_invoke(ARM_SMCCC_HV_PV_SCHED_KICK_CPU, cpu, &res); + + trace_kvm_kick_cpu("kvm kick cpu", smp_processor_id(), cpu); +} + +static void kvm_wait(u8 *ptr, u8 val) +{ + unsigned long flags; + + if (in_nmi()) + return; + + local_irq_save(flags); + + if (READ_ONCE(*ptr) != val) + goto out; + + dsb(sy); + wfi(); + trace_kvm_wait("kvm wait wfi", smp_processor_id()); + +out: + local_irq_restore(flags); +} + +DEFINE_STATIC_CALL(pv_qspinlock_queued_spin_lock_slowpath, + native_queued_spin_lock_slowpath); +EXPORT_STATIC_CALL(pv_qspinlock_queued_spin_lock_slowpath); +DEFINE_STATIC_CALL(pv_qspinlock_queued_spin_unlock, + native_queued_spin_unlock); +EXPORT_STATIC_CALL(pv_qspinlock_queued_spin_unlock); +DEFINE_STATIC_CALL(pv_qspinlock_wait, kvm_wait); +DEFINE_STATIC_CALL(pv_qspinlock_kick, kvm_kick_cpu); + +void __init pv_qspinlock_init(void) +{ + /* Don't use the PV qspinlock code if there is only 1 vCPU. */ + if (num_possible_cpus() == 1) + arm_pvspin = false; + + if (!arm_pvspin) { + pr_info("PV qspinlocks disabled\n"); + return; + } + pr_info("PV qspinlocks enabled\n"); + + __pv_init_lock_hash(); + + static_call_update(pv_qspinlock_queued_spin_lock_slowpath, + __pv_queued_spin_lock_slowpath); + static_call_update(pv_qspinlock_queued_spin_unlock, + __pv_queued_spin_unlock); + static_call_update(pv_qspinlock_wait, kvm_wait); + static_call_update(pv_qspinlock_kick, kvm_kick_cpu); +} + +static __init int arm_parse_pvspin(char *arg) +{ + arm_pvspin = true; + return 0; +} +early_param("arm_pvspin", arm_parse_pvspin); +#endif /* CONFIG_PARAVIRT_SPINLOCKS */ diff --git a/arch/arm64/kernel/trace-paravirt.h b/arch/arm64/kernel/trace-paravirt.h new file mode 100644 index 0000000000000000000000000000000000000000..2d76272f39ae477bd863f9868adc27f2f0f268cf --- /dev/null +++ b/arch/arm64/kernel/trace-paravirt.h @@ -0,0 +1,66 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * Copyright(c) 2019 Huawei Technologies Co., Ltd + * Author: Zengruan Ye + */ + +#undef TRACE_SYSTEM +#define TRACE_SYSTEM paravirt + +#if !defined(_TRACE_PARAVIRT_H) || defined(TRACE_HEADER_MULTI_READ) +#define _TRACE_PARAVIRT_H + +#include + +TRACE_EVENT(kvm_kick_cpu, + TP_PROTO(const char *name, int cpu, int target), + TP_ARGS(name, cpu, target), + + TP_STRUCT__entry( + __string(name, name) + __field(int, cpu) + __field(int, target) + ), + + TP_fast_assign( + __assign_str(name, name); + __entry->cpu = cpu; + __entry->target = target; + ), + + TP_printk("PV qspinlock: %s, cpu %d kick target cpu %d", + __get_str(name), + __entry->cpu, + __entry->target + ) +); + +TRACE_EVENT(kvm_wait, + TP_PROTO(const char *name, int cpu), + TP_ARGS(name, cpu), + + TP_STRUCT__entry( + __string(name, name) + __field(int, cpu) + ), + + TP_fast_assign( + __assign_str(name, name); + __entry->cpu = cpu; + ), + + TP_printk("PV qspinlock: %s, cpu %d wait kvm access wfi", + __get_str(name), + __entry->cpu + ) +); + +#endif /* _TRACE_PARAVIRT_H */ + +/* This part must be outside protection */ +#undef TRACE_INCLUDE_PATH +#undef TRACE_INCLUDE_FILE +#define TRACE_INCLUDE_PATH ../../../arch/arm64/kernel/ +#define TRACE_INCLUDE_FILE trace-paravirt + +#include diff --git a/arch/arm64/kvm/arm.c b/arch/arm64/kvm/arm.c index 5ba336d1efad598d6c5e9e0ecc24668ecf66c2bc..f6b1a6688e7e8aba2c9717bcf1a65054ebfc663a 100644 --- a/arch/arm64/kvm/arm.c +++ b/arch/arm64/kvm/arm.c @@ -725,7 +725,9 @@ int kvm_arch_vcpu_ioctl_set_mpstate(struct kvm_vcpu *vcpu, int kvm_arch_vcpu_runnable(struct kvm_vcpu *v) { bool irq_lines = *vcpu_hcr(v) & (HCR_VI | HCR_VF); - return ((irq_lines || kvm_vgic_vcpu_pending_irq(v)) + bool pv_unhalted = v->arch.pvsched.pv_unhalted; + + return ((irq_lines || kvm_vgic_vcpu_pending_irq(v) || pv_unhalted) && !kvm_arm_vcpu_stopped(v) && !v->arch.pause); } diff --git a/arch/arm64/kvm/handle_exit.c b/arch/arm64/kvm/handle_exit.c index 90959b8b6228d0c282810bc714baf71a48e79883..96042ed4181647293a0f0c11f51d4e45c87e9720 100644 --- a/arch/arm64/kvm/handle_exit.c +++ b/arch/arm64/kvm/handle_exit.c @@ -122,6 +122,7 @@ static int kvm_handle_wfx(struct kvm_vcpu *vcpu) } else { trace_kvm_wfx_arm64(*vcpu_pc(vcpu), false); vcpu->stat.wfi_exit_stat++; + vcpu->arch.pvsched.pv_unhalted = false; } if (esr & ESR_ELx_WFx_ISS_WFxT) { diff --git a/arch/arm64/kvm/hypercalls.c b/arch/arm64/kvm/hypercalls.c index 4c9fc5df5142b1a8b531a57fe059609701dea393..0b2809439fce12f723131350f9e686536b04f54c 100644 --- a/arch/arm64/kvm/hypercalls.c +++ b/arch/arm64/kvm/hypercalls.c @@ -362,6 +362,9 @@ int kvm_smccc_call_handler(struct kvm_vcpu *vcpu) vcpu->arch.pvsched.base = INVALID_GPA; val[0] = SMCCC_RET_SUCCESS; break; + case ARM_SMCCC_HV_PV_SCHED_KICK_CPU: + val[0] = kvm_pvsched_kick_vcpu(vcpu); + break; #endif /* CONFIG_PARAVIRT_SCHED */ case ARM_SMCCC_VENDOR_HYP_CALL_UID_FUNC_ID: val[0] = ARM_SMCCC_VENDOR_HYP_UID_KVM_REG_0; diff --git a/arch/arm64/kvm/pvsched.c b/arch/arm64/kvm/pvsched.c index ee7fed4f539ed3c2da289b18867ad120fe6bb5c2..5ff680f7fbc59cf587f7b6f038b86b1d1848438e 100644 --- a/arch/arm64/kvm/pvsched.c +++ b/arch/arm64/kvm/pvsched.c @@ -12,6 +12,8 @@ #include +#include "trace.h" + void kvm_update_pvsched_preempted(struct kvm_vcpu *vcpu, u32 preempted) { struct kvm *kvm = vcpu->kvm; @@ -35,6 +37,31 @@ void kvm_update_pvsched_preempted(struct kvm_vcpu *vcpu, u32 preempted) pagefault_enable(); } +long kvm_pvsched_kick_vcpu(struct kvm_vcpu *vcpu) +{ + unsigned int vcpu_idx; + long val = SMCCC_RET_NOT_SUPPORTED; + struct kvm *kvm = vcpu->kvm; + struct kvm_vcpu *target = NULL; + + vcpu_idx = smccc_get_arg1(vcpu); + target = kvm_get_vcpu(kvm, vcpu_idx); + if (!target) + goto out; + + target->arch.pvsched.pv_unhalted = true; + kvm_make_request(KVM_REQ_IRQ_PENDING, target); + kvm_vcpu_kick(target); + if (READ_ONCE(target->ready)) + kvm_vcpu_yield_to(target); + + val = SMCCC_RET_SUCCESS; + trace_kvm_pvsched_kick_vcpu(vcpu->vcpu_id, target->vcpu_id); + +out: + return val; +} + long kvm_hypercall_pvsched_features(struct kvm_vcpu *vcpu) { u32 feature = smccc_get_arg1(vcpu); @@ -44,6 +71,7 @@ long kvm_hypercall_pvsched_features(struct kvm_vcpu *vcpu) case ARM_SMCCC_HV_PV_SCHED_FEATURES: case ARM_SMCCC_HV_PV_SCHED_IPA_INIT: case ARM_SMCCC_HV_PV_SCHED_IPA_RELEASE: + case ARM_SMCCC_HV_PV_SCHED_KICK_CPU: val = SMCCC_RET_SUCCESS; break; } diff --git a/arch/arm64/kvm/trace_arm.h b/arch/arm64/kvm/trace_arm.h index 8ad53104934dcdfdc892852ce4f67cb1d0138f52..680c7e39af4a9a6b3754a21ed85688f6de992c4a 100644 --- a/arch/arm64/kvm/trace_arm.h +++ b/arch/arm64/kvm/trace_arm.h @@ -390,6 +390,24 @@ TRACE_EVENT(kvm_forward_sysreg_trap, sys_reg_Op2(__entry->sysreg)) ); +TRACE_EVENT(kvm_pvsched_kick_vcpu, + TP_PROTO(int vcpu_id, int target_vcpu_id), + TP_ARGS(vcpu_id, target_vcpu_id), + + TP_STRUCT__entry( + __field(int, vcpu_id) + __field(int, target_vcpu_id) + ), + + TP_fast_assign( + __entry->vcpu_id = vcpu_id; + __entry->target_vcpu_id = target_vcpu_id; + ), + + TP_printk("PV qspinlock: vcpu %d kick target vcpu %d", + __entry->vcpu_id, __entry->target_vcpu_id) +); + #endif /* _TRACE_ARM_ARM64_KVM_H */ #undef TRACE_INCLUDE_PATH diff --git a/include/linux/arm-smccc.h b/include/linux/arm-smccc.h index 9aba481603524d991ca2bdd61fe03aa8dda22d06..6a87a28a4d7abd007cf152b93494c497b73b2ab6 100644 --- a/include/linux/arm-smccc.h +++ b/include/linux/arm-smccc.h @@ -572,5 +572,11 @@ asmlinkage void __arm_smccc_hvc(unsigned long a0, unsigned long a1, 0x92) #endif /* CONFIG_PARAVIRT_SCHED */ +#define ARM_SMCCC_HV_PV_SCHED_KICK_CPU \ + ARM_SMCCC_CALL_VAL(ARM_SMCCC_FAST_CALL, \ + ARM_SMCCC_SMC_64, \ + ARM_SMCCC_OWNER_STANDARD_HYP, \ + 0x93) + #endif /*__ASSEMBLY__*/ #endif /*__LINUX_ARM_SMCCC_H*/