diff --git a/C b/C new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/Documentation/virt/kvm/arm/pvsched.rst b/Documentation/virt/kvm/arm/pvsched.rst new file mode 100644 index 0000000000000000000000000000000000000000..6ba221e25089ed8b1de0098472a9c5cdbae059f7 --- /dev/null +++ b/Documentation/virt/kvm/arm/pvsched.rst @@ -0,0 +1,74 @@ +.. SPDX-License-Identifier: GPL-2.0 + +Paravirtualized sched support for arm64 +======================================= + +KVM/arm64 provides some hypervisor service calls to support a paravirtualized +sched. + +Some SMCCC compatible hypercalls are defined: + +* PV_SCHED_FEATURES: 0xC5000090 +* PV_SCHED_IPA_INIT: 0xC5000091 +* PV_SCHED_IPA_RELEASE: 0xC5000092 +* PV_SCHED_KICK_CPU: 0xC5000093 + +The existence of the PV_SCHED hypercall should be probed using the SMCCC 1.1 +ARCH_FEATURES mechanism before calling it. + +PV_SCHED_FEATURES + ============= ======== ========== + Function ID: (uint32) 0xC5000090 + PV_call_id: (uint32) The function to query for support. + Return value: (int64) NOT_SUPPORTED (-1) or SUCCESS (0) if the relevant + PV-sched feature is supported by the hypervisor. + ============= ======== ========== + +PV_SCHED_IPA_INIT + ============= ======== ========== + Function ID: (uint32) 0xC5000091 + Return value: (int64) NOT_SUPPORTED (-1) or SUCCESS (0) if the IPA of + this vCPU's PV data structure is shared to the + hypervisor. + ============= ======== ========== + +PV_SCHED_IPA_RELEASE + ============= ======== ========== + Function ID: (uint32) 0xC5000092 + Return value: (int64) NOT_SUPPORTED (-1) or SUCCESS (0) if the IPA of + this vCPU's PV data structure is released. + ============= ======== ========== + +PV_SCHED_KICK_CPU + ============= ======== ========== + Function ID: (uint32) 0xC5000093 + Return value: (int64) NOT_SUPPORTED (-1) or SUCCESS (0) if the vCPU is + kicked by the hypervisor. + ============= ======== ========== + +PV sched state +-------------- + +The structure pointed to by the PV_SCHED_IPA hypercall is as follows: + ++-----------+-------------+-------------+-----------------------------------+ +| Field | Byte Length | Byte Offset | Description | ++===========+=============+=============+===================================+ +| preempted | 4 | 0 | Indicates that the vCPU that owns | +| | | | this struct is running or not. | +| | | | Non-zero values mean the vCPU has | +| | | | been preempted. Zero means the | +| | | | vCPU is not preempted. | ++-----------+-------------+-------------+-----------------------------------+ + +The preempted field will be updated to 0 by the hypervisor prior to scheduling +a vCPU. When the vCPU is scheduled out, the preempted field will be updated +to 1 by the hypervisor. + +A vCPU of a paravirtualized guest that is busywaiting in guest kernel mode for +an event to occur (ex: a spinlock to become available) can execute WFI +instruction once it has busy-waited for more than a threshold time-interval. +Execution of WFI instruction would cause the hypervisor to put the vCPU to sleep +until occurrence of an appropriate event. Another vCPU of the same guest can +wakeup the sleeping vCPU by issuing PV_SCHED_KICK_CPU hypercall, specifying CPU +id (reg1) of the vCPU to be woken up. diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig index 9568049f36e35518d2dfeb77eeefb02339063e7b..b518783557ea9d245e705cb85e25b38294d464ed 100644 --- a/arch/arm64/Kconfig +++ b/arch/arm64/Kconfig @@ -1548,6 +1548,26 @@ config PARAVIRT under a hypervisor, potentially improving performance significantly over full virtualization. +config PARAVIRT_SPINLOCKS + bool "Paravirtualization layer for spinlocks" + depends on PARAVIRT && SMP + help + Paravirtualized spinlocks allow a pvops backend to replace the + spinlock implementation with something virtualization-friendly + (for example, block the virtual CPU rather than spinning). + + It has a minimal impact on native kernels and gives a nice performance + benefit on paravirtualized KVM kernels. + + If you are unsure how to answer this question, answer Y. + +config PARAVIRT_SCHED + bool "Paravirtualization layer for sched" + depends on PARAVIRT + help + + If you are unsure how to answer this question, answer Y. + config PARAVIRT_TIME_ACCOUNTING bool "Paravirtual steal time accounting" select PARAVIRT diff --git a/arch/arm64/include/asm/Kbuild b/arch/arm64/include/asm/Kbuild index 5c8ee5a541d2047c10c8440e3691d6b1b9f97c25..d16ee8095366326ae55d0939d31424dca21f6528 100644 --- a/arch/arm64/include/asm/Kbuild +++ b/arch/arm64/include/asm/Kbuild @@ -2,7 +2,6 @@ generic-y += early_ioremap.h generic-y += mcs_spinlock.h generic-y += qrwlock.h -generic-y += qspinlock.h generic-y += parport.h generic-y += user.h diff --git a/arch/arm64/include/asm/kvm_host.h b/arch/arm64/include/asm/kvm_host.h index af06ccb7ee343304535c30b8855d6de8d8b1f4fc..37e0e434c505330d5318248a437485edd7379a7f 100644 --- a/arch/arm64/include/asm/kvm_host.h +++ b/arch/arm64/include/asm/kvm_host.h @@ -589,6 +589,14 @@ struct kvm_vcpu_arch { gpa_t base; } steal; +#ifdef CONFIG_PARAVIRT_SCHED + /* Guest PV sched state */ + struct { + bool pv_unhalted; + gpa_t base; + } pvsched; +#endif + /* Per-vcpu CCSIDR override or NULL */ u32 *ccsidr; }; @@ -905,6 +913,21 @@ struct kvm_vcpu_stat { u64 mmio_exit_kernel; u64 signal_exits; u64 exits; + u64 fp_asimd_exit_stat; + u64 irq_exit_stat; + u64 sys64_exit_stat; + u64 mabt_exit_stat; + u64 fail_entry_exit_stat; + u64 internal_error_exit_stat; + u64 unknown_ec_exit_stat; + u64 cp15_32_exit_stat; + u64 cp15_64_exit_stat; + u64 cp14_mr_exit_stat; + u64 cp14_ls_exit_stat; + u64 cp14_64_exit_stat; + u64 smc_exit_stat; + u64 sve_exit_stat; + u64 debug_exit_stat; }; unsigned long kvm_arm_num_regs(struct kvm_vcpu *vcpu); @@ -1038,6 +1061,22 @@ static inline bool kvm_arm_is_pvtime_enabled(struct kvm_vcpu_arch *vcpu_arch) return (vcpu_arch->steal.base != INVALID_GPA); } +#ifdef CONFIG_PARAVIRT_SCHED +long kvm_hypercall_pvsched_features(struct kvm_vcpu *vcpu); +void kvm_update_pvsched_preempted(struct kvm_vcpu *vcpu, u32 preempted); +long kvm_pvsched_kick_vcpu(struct kvm_vcpu *vcpu); + +static inline void kvm_arm_pvsched_vcpu_init(struct kvm_vcpu_arch *vcpu_arch) +{ + vcpu_arch->pvsched.base = INVALID_GPA; +} + +static inline bool kvm_arm_is_pvsched_enabled(struct kvm_vcpu_arch *vcpu_arch) +{ + return (vcpu_arch->pvsched.base != INVALID_GPA); +} +#endif + void kvm_set_sei_esr(struct kvm_vcpu *vcpu, u64 syndrome); struct kvm_vcpu *kvm_mpidr_to_vcpu(struct kvm *kvm, unsigned long mpidr); diff --git a/arch/arm64/include/asm/paravirt.h b/arch/arm64/include/asm/paravirt.h index 9aa193e0e8f28d9309bc18013230e714152f4f93..1ce7187eed485749edb85ab65c35b9eef42e0188 100644 --- a/arch/arm64/include/asm/paravirt.h +++ b/arch/arm64/include/asm/paravirt.h @@ -20,9 +20,60 @@ static inline u64 paravirt_steal_clock(int cpu) int __init pv_time_init(void); +#ifdef CONFIG_PARAVIRT_SCHED +int __init pv_sched_init(void); +#endif + +__visible bool __native_vcpu_is_preempted(int cpu); +DECLARE_STATIC_CALL(pv_vcpu_preempted, __native_vcpu_is_preempted); + +static inline bool pv_vcpu_is_preempted(int cpu) +{ + return static_call(pv_vcpu_preempted)(cpu); +} + +#if defined(CONFIG_SMP) && defined(CONFIG_PARAVIRT_SPINLOCKS) +void __init pv_qspinlock_init(void); +bool pv_is_native_spin_unlock(void); + +void dummy_queued_spin_lock_slowpath(struct qspinlock *lock, u32 val); +DECLARE_STATIC_CALL(pv_qspinlock_queued_spin_lock_slowpath, + dummy_queued_spin_lock_slowpath); +static inline void pv_queued_spin_lock_slowpath(struct qspinlock *lock, u32 val) +{ + return static_call(pv_qspinlock_queued_spin_lock_slowpath)(lock, val); +} + +void dummy_queued_spin_unlock(struct qspinlock *lock); +DECLARE_STATIC_CALL(pv_qspinlock_queued_spin_unlock, dummy_queued_spin_unlock); +static inline void pv_queued_spin_unlock(struct qspinlock *lock) +{ + return static_call(pv_qspinlock_queued_spin_unlock)(lock); +} + +void dummy_wait(u8 *ptr, u8 val); +DECLARE_STATIC_CALL(pv_qspinlock_wait, dummy_wait); +static inline void pv_wait(u8 *ptr, u8 val) +{ + return static_call(pv_qspinlock_wait)(ptr, val); +} + +void dummy_kick(int cpu); +DECLARE_STATIC_CALL(pv_qspinlock_kick, dummy_kick); +static inline void pv_kick(int cpu) +{ + return static_call(pv_qspinlock_kick)(cpu); +} +#else + +#define pv_qspinlock_init() do {} while (0) + +#endif /* SMP && PARAVIRT_SPINLOCKS */ + #else #define pv_time_init() do {} while (0) +#define pv_sched_init() do {} while (0) #endif // CONFIG_PARAVIRT diff --git a/arch/arm64/include/asm/pvsched-abi.h b/arch/arm64/include/asm/pvsched-abi.h new file mode 100644 index 0000000000000000000000000000000000000000..80e50e7a1a3179eb4104262cd0cf18dd1cd603ec --- /dev/null +++ b/arch/arm64/include/asm/pvsched-abi.h @@ -0,0 +1,16 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * Copyright(c) 2019 Huawei Technologies Co., Ltd + * Author: Zengruan Ye + */ + +#ifndef __ASM_PVSCHED_ABI_H +#define __ASM_PVSCHED_ABI_H + +struct pvsched_vcpu_state { + __le32 preempted; + /* Structure must be 64 byte aligned, pad to that size */ + u8 padding[60]; +} __packed; + +#endif diff --git a/arch/arm64/include/asm/qspinlock.h b/arch/arm64/include/asm/qspinlock.h new file mode 100644 index 0000000000000000000000000000000000000000..ce3816b60f25edb58b11efcc01deb62b707ee4a0 --- /dev/null +++ b/arch/arm64/include/asm/qspinlock.h @@ -0,0 +1,48 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * Copyright(c) 2020 Huawei Technologies Co., Ltd + * Author: Zengruan Ye + */ + +#ifndef _ASM_ARM64_QSPINLOCK_H +#define _ASM_ARM64_QSPINLOCK_H + +#include +#include +#include +#include + +#define _Q_PENDING_LOOPS (1 << 9) + +#ifdef CONFIG_PARAVIRT_SPINLOCKS +extern void native_queued_spin_lock_slowpath(struct qspinlock *lock, u32 val); +extern void __pv_init_lock_hash(void); +extern void __pv_queued_spin_lock_slowpath(struct qspinlock *lock, u32 val); + +#define queued_spin_unlock queued_spin_unlock +/** + * queued_spin_unlock - release a queued spinlock + * @lock : Pointer to queued spinlock structure + * + * A smp_store_release() on the least-significant byte. + */ +static inline void native_queued_spin_unlock(struct qspinlock *lock) +{ + smp_store_release(&lock->locked, 0); +} + +static inline void queued_spin_lock_slowpath(struct qspinlock *lock, u32 val) +{ + pv_queued_spin_lock_slowpath(lock, val); +} + +static inline void queued_spin_unlock(struct qspinlock *lock) +{ + pv_queued_spin_unlock(lock); +} + +#endif + +#include + +#endif /* _ASM_ARM64_QSPINLOCK_H */ diff --git a/arch/arm64/include/asm/qspinlock_paravirt.h b/arch/arm64/include/asm/qspinlock_paravirt.h new file mode 100644 index 0000000000000000000000000000000000000000..eba4be28fbb9d4e286269b7f19f97fe9f3f91f17 --- /dev/null +++ b/arch/arm64/include/asm/qspinlock_paravirt.h @@ -0,0 +1,12 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * Copyright(c) 2019 Huawei Technologies Co., Ltd + * Author: Zengruan Ye + */ + +#ifndef __ASM_QSPINLOCK_PARAVIRT_H +#define __ASM_QSPINLOCK_PARAVIRT_H + +extern void __pv_queued_spin_unlock(struct qspinlock *lock); + +#endif diff --git a/arch/arm64/include/asm/spinlock.h b/arch/arm64/include/asm/spinlock.h index 0525c0b089edf7b1551bc1e4a47f167c969727e9..4040a27ecf86a6c4dc80d16f3bfb470584295b8c 100644 --- a/arch/arm64/include/asm/spinlock.h +++ b/arch/arm64/include/asm/spinlock.h @@ -7,6 +7,10 @@ #include #include +#include + +/* How long a lock should spin before we consider blocking */ +#define SPIN_THRESHOLD (1 << 15) /* See include/linux/spinlock.h */ #define smp_mb__after_spinlock() smp_mb() @@ -19,9 +23,18 @@ * https://lore.kernel.org/lkml/20200110100612.GC2827@hirez.programming.kicks-ass.net */ #define vcpu_is_preempted vcpu_is_preempted +#ifdef CONFIG_PARAVIRT +static inline bool vcpu_is_preempted(int cpu) +{ + return pv_vcpu_is_preempted(cpu); +} + +#else + static inline bool vcpu_is_preempted(int cpu) { return false; } +#endif /* CONFIG_PARAVIRT */ #endif /* __ASM_SPINLOCK_H */ diff --git a/arch/arm64/kernel/Makefile b/arch/arm64/kernel/Makefile index 31859e7741a5bb5eed868a9b4fff843348523a1c..4db14d6677bd9a6b300ad95cfc503551481f79f1 100644 --- a/arch/arm64/kernel/Makefile +++ b/arch/arm64/kernel/Makefile @@ -60,7 +60,8 @@ obj-$(CONFIG_ARMV8_DEPRECATED) += armv8_deprecated.o obj-$(CONFIG_ACPI) += acpi.o obj-$(CONFIG_ACPI_NUMA) += acpi_numa.o obj-$(CONFIG_ARM64_ACPI_PARKING_PROTOCOL) += acpi_parking_protocol.o -obj-$(CONFIG_PARAVIRT) += paravirt.o +obj-$(CONFIG_PARAVIRT) += paravirt.o paravirt-spinlocks.o +obj-$(CONFIG_PARAVIRT_SPINLOCKS) += paravirt.o paravirt-spinlocks.o obj-$(CONFIG_RANDOMIZE_BASE) += kaslr.o pi/ obj-$(CONFIG_HIBERNATION) += hibernate.o hibernate-asm.o obj-$(CONFIG_ELF_CORE) += elfcore.o diff --git a/arch/arm64/kernel/paravirt-spinlocks.c b/arch/arm64/kernel/paravirt-spinlocks.c new file mode 100644 index 0000000000000000000000000000000000000000..2e4abc83e31868f17b79a3d2361abbacfc30c220 --- /dev/null +++ b/arch/arm64/kernel/paravirt-spinlocks.c @@ -0,0 +1,21 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * Copyright(c) 2019 Huawei Technologies Co., Ltd + * Author: Zengruan Ye + */ + +#include +#include +#include + +__visible bool __native_vcpu_is_preempted(int cpu) +{ + return false; +} + +DEFINE_STATIC_CALL(pv_vcpu_preempted, __native_vcpu_is_preempted); + +bool pv_is_native_spin_unlock(void) +{ + return false; +} diff --git a/arch/arm64/kernel/paravirt.c b/arch/arm64/kernel/paravirt.c index aa718d6a9274ab06e78b6dc28f08de97a517a3ec..126dff610e075cc23f16c7d704093f2cff3c2c35 100644 --- a/arch/arm64/kernel/paravirt.c +++ b/arch/arm64/kernel/paravirt.c @@ -22,8 +22,13 @@ #include #include +#include +#include #include +#define CREATE_TRACE_POINTS +#include "trace-paravirt.h" + struct static_key paravirt_steal_enabled; struct static_key paravirt_steal_rq_enabled; @@ -174,3 +179,187 @@ int __init pv_time_init(void) return 0; } + +#ifdef CONFIG_PARAVIRT_SCHED +DEFINE_PER_CPU(struct pvsched_vcpu_state, pvsched_vcpu_region) __aligned(64); +EXPORT_PER_CPU_SYMBOL(pvsched_vcpu_region); + +static bool kvm_vcpu_is_preempted(int cpu) +{ + struct pvsched_vcpu_state *reg; + u32 preempted; + + reg = &per_cpu(pvsched_vcpu_region, cpu); + if (!reg) { + pr_warn_once("PV sched enabled but not configured for cpu %d\n", + cpu); + return false; + } + + preempted = le32_to_cpu(READ_ONCE(reg->preempted)); + + return !!preempted; +} + +static int pvsched_vcpu_state_dying_cpu(unsigned int cpu) +{ + struct pvsched_vcpu_state *reg; + struct arm_smccc_res res; + + reg = this_cpu_ptr(&pvsched_vcpu_region); + if (!reg) + return -EFAULT; + + arm_smccc_1_1_invoke(ARM_SMCCC_HV_PV_SCHED_IPA_RELEASE, &res); + memset(reg, 0, sizeof(*reg)); + + return 0; +} + +static int init_pvsched_vcpu_state(unsigned int cpu) +{ + struct pvsched_vcpu_state *reg; + struct arm_smccc_res res; + + reg = this_cpu_ptr(&pvsched_vcpu_region); + if (!reg) + return -EFAULT; + + /* Pass the memory address to host via hypercall */ + arm_smccc_1_1_invoke(ARM_SMCCC_HV_PV_SCHED_IPA_INIT, + virt_to_phys(reg), &res); + + return 0; +} + +static int kvm_arm_init_pvsched(void) +{ + int ret; + + ret = cpuhp_setup_state(CPUHP_AP_ARM_KVM_PVSCHED_STARTING, + "hypervisor/arm/pvsched:starting", + init_pvsched_vcpu_state, + pvsched_vcpu_state_dying_cpu); + + if (ret < 0) { + pr_warn("PV sched init failed\n"); + return ret; + } + + return 0; +} + +static bool has_kvm_pvsched(void) +{ + struct arm_smccc_res res; + + /* To detect the presence of PV sched support we require SMCCC 1.1+ */ + if (arm_smccc_1_1_get_conduit() == SMCCC_CONDUIT_NONE) + return false; + + arm_smccc_1_1_invoke(ARM_SMCCC_ARCH_FEATURES_FUNC_ID, + ARM_SMCCC_HV_PV_SCHED_FEATURES, &res); + + return (res.a0 == SMCCC_RET_SUCCESS); +} + +int __init pv_sched_init(void) +{ + int ret; + + if (is_hyp_mode_available()) + return 0; + + if (!has_kvm_pvsched()) { + pr_warn("PV sched is not available\n"); + return 0; + } + + ret = kvm_arm_init_pvsched(); + if (ret) + return ret; + + static_call_update(pv_vcpu_preempted, kvm_vcpu_is_preempted); + pr_info("using PV sched preempted\n"); + + pv_qspinlock_init(); + + return 0; +} + +early_initcall(pv_sched_init); +#endif /* CONFIG_PARAVIRT_SCHED */ + +#ifdef CONFIG_PARAVIRT_SPINLOCKS +static bool arm_pvspin; + +/* Kick a cpu by its cpuid. Used to wake up a halted vcpu */ +static void kvm_kick_cpu(int cpu) +{ + struct arm_smccc_res res; + + arm_smccc_1_1_invoke(ARM_SMCCC_HV_PV_SCHED_KICK_CPU, cpu, &res); + + trace_kvm_kick_cpu("kvm kick cpu", smp_processor_id(), cpu); +} + +static void kvm_wait(u8 *ptr, u8 val) +{ + unsigned long flags; + + if (in_nmi()) + return; + + local_irq_save(flags); + + if (READ_ONCE(*ptr) != val) + goto out; + + dsb(sy); + wfi(); + trace_kvm_wait("kvm wait wfi", smp_processor_id()); + +out: + local_irq_restore(flags); +} + +DEFINE_STATIC_CALL(pv_qspinlock_queued_spin_lock_slowpath, + native_queued_spin_lock_slowpath); +DEFINE_STATIC_CALL(pv_qspinlock_queued_spin_unlock, native_queued_spin_unlock); +DEFINE_STATIC_CALL(pv_qspinlock_wait, kvm_wait); +DEFINE_STATIC_CALL(pv_qspinlock_kick, kvm_kick_cpu); + +EXPORT_STATIC_CALL(pv_qspinlock_queued_spin_lock_slowpath); +EXPORT_STATIC_CALL(pv_qspinlock_queued_spin_unlock); +EXPORT_STATIC_CALL(pv_qspinlock_wait); +EXPORT_STATIC_CALL(pv_qspinlock_kick); + +void __init pv_qspinlock_init(void) +{ + /* Don't use the PV qspinlock code if there is only 1 vCPU. */ + if (num_possible_cpus() == 1) + arm_pvspin = false; + + if (!arm_pvspin) { + pr_info("PV qspinlocks disabled\n"); + return; + } + pr_info("PV qspinlocks enabled\n"); + + __pv_init_lock_hash(); + + static_call_update(pv_qspinlock_queued_spin_lock_slowpath, + __pv_queued_spin_lock_slowpath); + static_call_update(pv_qspinlock_queued_spin_unlock, + __pv_queued_spin_unlock); + static_call_update(pv_qspinlock_wait, kvm_wait); + static_call_update(pv_qspinlock_kick, kvm_kick_cpu); +} + +static __init int arm_parse_pvspin(char *arg) +{ + arm_pvspin = true; + return 0; +} +early_param("arm_pvspin", arm_parse_pvspin); +#endif /* CONFIG_PARAVIRT_SPINLOCKS */ diff --git a/arch/arm64/kernel/trace-paravirt.h b/arch/arm64/kernel/trace-paravirt.h new file mode 100644 index 0000000000000000000000000000000000000000..2d76272f39ae477bd863f9868adc27f2f0f268cf --- /dev/null +++ b/arch/arm64/kernel/trace-paravirt.h @@ -0,0 +1,66 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * Copyright(c) 2019 Huawei Technologies Co., Ltd + * Author: Zengruan Ye + */ + +#undef TRACE_SYSTEM +#define TRACE_SYSTEM paravirt + +#if !defined(_TRACE_PARAVIRT_H) || defined(TRACE_HEADER_MULTI_READ) +#define _TRACE_PARAVIRT_H + +#include + +TRACE_EVENT(kvm_kick_cpu, + TP_PROTO(const char *name, int cpu, int target), + TP_ARGS(name, cpu, target), + + TP_STRUCT__entry( + __string(name, name) + __field(int, cpu) + __field(int, target) + ), + + TP_fast_assign( + __assign_str(name, name); + __entry->cpu = cpu; + __entry->target = target; + ), + + TP_printk("PV qspinlock: %s, cpu %d kick target cpu %d", + __get_str(name), + __entry->cpu, + __entry->target + ) +); + +TRACE_EVENT(kvm_wait, + TP_PROTO(const char *name, int cpu), + TP_ARGS(name, cpu), + + TP_STRUCT__entry( + __string(name, name) + __field(int, cpu) + ), + + TP_fast_assign( + __assign_str(name, name); + __entry->cpu = cpu; + ), + + TP_printk("PV qspinlock: %s, cpu %d wait kvm access wfi", + __get_str(name), + __entry->cpu + ) +); + +#endif /* _TRACE_PARAVIRT_H */ + +/* This part must be outside protection */ +#undef TRACE_INCLUDE_PATH +#undef TRACE_INCLUDE_FILE +#define TRACE_INCLUDE_PATH ../../../arch/arm64/kernel/ +#define TRACE_INCLUDE_FILE trace-paravirt + +#include diff --git a/arch/arm64/kvm/Makefile b/arch/arm64/kvm/Makefile index c0c050e53157d9908c91fd781aa1b5d3271e4092..3e16e6c364c63b47ed74dca9184e0dee6a70d6f3 100644 --- a/arch/arm64/kvm/Makefile +++ b/arch/arm64/kvm/Makefile @@ -10,7 +10,7 @@ include $(srctree)/virt/kvm/Makefile.kvm obj-$(CONFIG_KVM) += kvm.o obj-$(CONFIG_KVM) += hyp/ -kvm-y += arm.o mmu.o mmio.o psci.o hypercalls.o pvtime.o \ +kvm-y += arm.o mmu.o mmio.o psci.o hypercalls.o pvtime.o pvsched.o \ inject_fault.o va_layout.o handle_exit.o \ guest.o debug.o reset.o sys_regs.o stacktrace.o \ vgic-sys-reg-v3.o fpsimd.o pkvm.o \ diff --git a/arch/arm64/kvm/arm.c b/arch/arm64/kvm/arm.c index 4866b3f7b4ea3847d885e00cfac47a4d7abf9da3..c128a97fcc88fb0f1d5f7eda19d60f62e93a77c3 100644 --- a/arch/arm64/kvm/arm.c +++ b/arch/arm64/kvm/arm.c @@ -386,6 +386,10 @@ int kvm_arch_vcpu_create(struct kvm_vcpu *vcpu) kvm_arm_pvtime_vcpu_init(&vcpu->arch); +#ifdef CONFIG_PARAVIRT_SCHED + kvm_arm_pvsched_vcpu_init(&vcpu->arch); +#endif + vcpu->arch.hw_mmu = &vcpu->kvm->arch.mmu; err = kvm_vgic_vcpu_init(vcpu); @@ -461,10 +465,20 @@ void kvm_arch_vcpu_load(struct kvm_vcpu *vcpu, int cpu) if (vcpu_has_ptrauth(vcpu)) vcpu_ptrauth_disable(vcpu); + kvm_arch_vcpu_load_debug_state_flags(vcpu); if (!cpumask_test_cpu(cpu, vcpu->kvm->arch.supported_cpus)) vcpu_set_on_unsupported_cpu(vcpu); + +#ifdef CONFIG_PARAVIRT_SCHED + if (kvm_arm_is_pvsched_enabled(&vcpu->arch)) + kvm_update_pvsched_preempted(vcpu, 0); +#endif + +#ifdef CONFIG_KVM_HISI_VIRT + kvm_hisi_dvmbm_load(vcpu); +#endif } void kvm_arch_vcpu_put(struct kvm_vcpu *vcpu) @@ -480,6 +494,15 @@ void kvm_arch_vcpu_put(struct kvm_vcpu *vcpu) vcpu_clear_on_unsupported_cpu(vcpu); vcpu->cpu = -1; + +#ifdef CONFIG_PARAVIRT_SCHED + if (kvm_arm_is_pvsched_enabled(&vcpu->arch)) + kvm_update_pvsched_preempted(vcpu, 1); +#endif + +#ifdef CONFIG_KVM_HISI_VIRT + kvm_hisi_dvmbm_put(vcpu); +#endif } static void __kvm_arm_vcpu_power_off(struct kvm_vcpu *vcpu) @@ -557,8 +580,15 @@ int kvm_arch_vcpu_ioctl_set_mpstate(struct kvm_vcpu *vcpu, int kvm_arch_vcpu_runnable(struct kvm_vcpu *v) { bool irq_lines = *vcpu_hcr(v) & (HCR_VI | HCR_VF); +#ifdef CONFIG_PARAVIRT_SCHED + bool pv_unhalted = v->arch.pvsched.pv_unhalted; + + return ((irq_lines || kvm_vgic_vcpu_pending_irq(v) || pv_unhalted) + && !kvm_arm_vcpu_stopped(v) && !v->arch.pause); +#else return ((irq_lines || kvm_vgic_vcpu_pending_irq(v)) && !kvm_arm_vcpu_stopped(v) && !v->arch.pause); +#endif } bool kvm_arch_vcpu_in_kernel(struct kvm_vcpu *vcpu) @@ -1332,6 +1362,10 @@ static int kvm_arch_vcpu_ioctl_vcpu_init(struct kvm_vcpu *vcpu, spin_unlock(&vcpu->arch.mp_state_lock); +#ifdef CONFIG_PARAVIRT_SCHED + kvm_arm_pvsched_vcpu_init(&vcpu->arch); +#endif + return 0; } diff --git a/arch/arm64/kvm/handle_exit.c b/arch/arm64/kvm/handle_exit.c index 617ae6dea5d5bcf01c8c72cec5f16c684151905c..65d780887ab2af4eaa3e6b8172dd12345fdf8e36 100644 --- a/arch/arm64/kvm/handle_exit.c +++ b/arch/arm64/kvm/handle_exit.c @@ -73,6 +73,7 @@ static int handle_smc(struct kvm_vcpu *vcpu) */ if (kvm_vcpu_hvc_get_imm(vcpu)) { vcpu_set_reg(vcpu, 0, ~0UL); + vcpu->stat.smc_exit_stat++; return 1; } @@ -121,6 +122,9 @@ static int kvm_handle_wfx(struct kvm_vcpu *vcpu) } else { trace_kvm_wfx_arm64(*vcpu_pc(vcpu), false); vcpu->stat.wfi_exit_stat++; +#ifdef CONFIG_PARAVIRT_SCHED + vcpu->arch.pvsched.pv_unhalted = false; +#endif } if (esr & ESR_ELx_WFx_ISS_WFxT) { @@ -173,6 +177,8 @@ static int kvm_handle_guest_debug(struct kvm_vcpu *vcpu) run->debug.arch.hsr_high = upper_32_bits(esr); run->flags = KVM_DEBUG_ARCH_HSR_HIGH_VALID; + vcpu->stat.debug_exit_stat++; + switch (ESR_ELx_EC(esr)) { case ESR_ELx_EC_WATCHPT_LOW: run->debug.arch.far = vcpu->arch.fault.far_el2; @@ -193,6 +199,7 @@ static int kvm_handle_unknown_ec(struct kvm_vcpu *vcpu) esr, esr_get_class_string(esr)); kvm_inject_undefined(vcpu); + vcpu->stat.unknown_ec_exit_stat++; return 1; } @@ -203,6 +210,7 @@ static int kvm_handle_unknown_ec(struct kvm_vcpu *vcpu) static int handle_sve(struct kvm_vcpu *vcpu) { kvm_inject_undefined(vcpu); + vcpu->stat.sve_exit_stat++; return 1; } @@ -335,6 +343,7 @@ int handle_exit(struct kvm_vcpu *vcpu, int exception_index) switch (exception_index) { case ARM_EXCEPTION_IRQ: + vcpu->stat.irq_exit_stat++; return 1; case ARM_EXCEPTION_EL1_SERROR: return 1; @@ -346,6 +355,7 @@ int handle_exit(struct kvm_vcpu *vcpu, int exception_index) * is pre-emptied by kvm_reboot()'s shutdown call. */ run->exit_reason = KVM_EXIT_FAIL_ENTRY; + vcpu->stat.fail_entry_exit_stat++; return 0; case ARM_EXCEPTION_IL: /* @@ -353,11 +363,13 @@ int handle_exit(struct kvm_vcpu *vcpu, int exception_index) * have been corrupted somehow. Give up. */ run->exit_reason = KVM_EXIT_FAIL_ENTRY; + vcpu->stat.fail_entry_exit_stat++; return -EINVAL; default: kvm_pr_unimpl("Unsupported exception type: %d", exception_index); run->exit_reason = KVM_EXIT_INTERNAL_ERROR; + vcpu->stat.internal_error_exit_stat++; return 0; } } diff --git a/arch/arm64/kvm/hyp/include/hyp/switch.h b/arch/arm64/kvm/hyp/include/hyp/switch.h index 9cfe6bd1dbe459cb3588bccd94359369a546947e..61907949e00ffd676b8c40d9cf8c4860ce02851e 100644 --- a/arch/arm64/kvm/hyp/include/hyp/switch.h +++ b/arch/arm64/kvm/hyp/include/hyp/switch.h @@ -294,8 +294,10 @@ static bool kvm_hyp_handle_fpsimd(struct kvm_vcpu *vcpu, u64 *exit_code) /* Only handle traps the vCPU can support here: */ switch (esr_ec) { case ESR_ELx_EC_FP_ASIMD: + vcpu->stat.fp_asimd_exit_stat++; break; case ESR_ELx_EC_SVE: + vcpu->stat.sve_exit_stat++; if (!sve_guest) return false; break; diff --git a/arch/arm64/kvm/hypercalls.c b/arch/arm64/kvm/hypercalls.c index 7fb4df0456dea53f9cdeccdd28c2ff47a8ce6ff3..2e7a1d06e6ffb34480a693cea23dbc79caa6772c 100644 --- a/arch/arm64/kvm/hypercalls.c +++ b/arch/arm64/kvm/hypercalls.c @@ -332,6 +332,11 @@ int kvm_smccc_call_handler(struct kvm_vcpu *vcpu) &smccc_feat->std_hyp_bmap)) val[0] = SMCCC_RET_SUCCESS; break; +#ifdef CONFIG_PARAVIRT_SCHED + case ARM_SMCCC_HV_PV_SCHED_FEATURES: + val[0] = SMCCC_RET_SUCCESS; + break; +#endif } break; case ARM_SMCCC_HV_PV_TIME_FEATURES: @@ -360,6 +365,25 @@ int kvm_smccc_call_handler(struct kvm_vcpu *vcpu) case ARM_SMCCC_TRNG_RND32: case ARM_SMCCC_TRNG_RND64: return kvm_trng_call(vcpu); +#ifdef CONFIG_PARAVIRT_SCHED + case ARM_SMCCC_HV_PV_SCHED_FEATURES: + val[0] = kvm_hypercall_pvsched_features(vcpu); + break; + case ARM_SMCCC_HV_PV_SCHED_IPA_INIT: + gpa = smccc_get_arg1(vcpu); + if (gpa != INVALID_GPA) { + vcpu->arch.pvsched.base = gpa; + val[0] = SMCCC_RET_SUCCESS; + } + break; + case ARM_SMCCC_HV_PV_SCHED_IPA_RELEASE: + vcpu->arch.pvsched.base = INVALID_GPA; + val[0] = SMCCC_RET_SUCCESS; + break; + case ARM_SMCCC_HV_PV_SCHED_KICK_CPU: + val[0] = kvm_pvsched_kick_vcpu(vcpu); + break; +#endif default: return kvm_psci_call(vcpu); } diff --git a/arch/arm64/kvm/mmu.c b/arch/arm64/kvm/mmu.c index 482280fe22d7c59d2539b9f0e758cfc95afad0ff..121a3d90240daaa504ead609bc71b75f858eb907 100644 --- a/arch/arm64/kvm/mmu.c +++ b/arch/arm64/kvm/mmu.c @@ -1416,6 +1416,7 @@ static int user_mem_abort(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa, write_fault = kvm_is_write_fault(vcpu); exec_fault = kvm_vcpu_trap_is_exec_fault(vcpu); VM_BUG_ON(write_fault && exec_fault); + vcpu->stat.mabt_exit_stat++; if (fault_status == ESR_ELx_FSC_PERM && !write_fault && !exec_fault) { kvm_err("Unexpected L2 read permission error\n"); diff --git a/arch/arm64/kvm/pvsched.c b/arch/arm64/kvm/pvsched.c new file mode 100644 index 0000000000000000000000000000000000000000..eae6ad7b6c5dc615d06cfe5ec2b6627f694f800c --- /dev/null +++ b/arch/arm64/kvm/pvsched.c @@ -0,0 +1,81 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * Copyright(c) 2019 Huawei Technologies Co., Ltd + * Author: Zengruan Ye + */ + +#ifdef CONFIG_PARAVIRT_SCHED +#include +#include + +#include + +#include + +#include "trace.h" + +void kvm_update_pvsched_preempted(struct kvm_vcpu *vcpu, u32 preempted) +{ + struct kvm *kvm = vcpu->kvm; + u64 base = vcpu->arch.pvsched.base; + u64 offset = offsetof(struct pvsched_vcpu_state, preempted); + int idx; + + if (base == INVALID_GPA) + return; + + /* + * This function is called from atomic context, so we need to + * disable page faults. + */ + pagefault_disable(); + + idx = srcu_read_lock(&kvm->srcu); + kvm_put_guest(kvm, base + offset, cpu_to_le32(preempted)); + srcu_read_unlock(&kvm->srcu, idx); + + pagefault_enable(); +} + +long kvm_pvsched_kick_vcpu(struct kvm_vcpu *vcpu) +{ + unsigned int vcpu_idx; + long val = SMCCC_RET_NOT_SUPPORTED; + struct kvm *kvm = vcpu->kvm; + struct kvm_vcpu *target = NULL; + + vcpu_idx = smccc_get_arg1(vcpu); + target = kvm_get_vcpu(kvm, vcpu_idx); + if (!target) + goto out; + + target->arch.pvsched.pv_unhalted = true; + kvm_make_request(KVM_REQ_IRQ_PENDING, target); + kvm_vcpu_kick(target); + if (READ_ONCE(target->ready)) + kvm_vcpu_yield_to(target); + + val = SMCCC_RET_SUCCESS; + trace_kvm_pvsched_kick_vcpu(vcpu->vcpu_id, target->vcpu_id); + +out: + return val; +} + +long kvm_hypercall_pvsched_features(struct kvm_vcpu *vcpu) +{ + u32 feature = smccc_get_arg1(vcpu); + long val = SMCCC_RET_NOT_SUPPORTED; + + switch (feature) { + case ARM_SMCCC_HV_PV_SCHED_FEATURES: + case ARM_SMCCC_HV_PV_SCHED_IPA_INIT: + case ARM_SMCCC_HV_PV_SCHED_IPA_RELEASE: + case ARM_SMCCC_HV_PV_SCHED_KICK_CPU: + val = SMCCC_RET_SUCCESS; + break; + } + + return val; +} +#endif /* CONFIG_PARAVIRT_SCHED */ diff --git a/arch/arm64/kvm/sys_regs.c b/arch/arm64/kvm/sys_regs.c index 0afd6136e2759cbe0773fc5a036577e7e9e09db2..8a92e848a1e6b5cf143841f83a5345d07512e506 100644 --- a/arch/arm64/kvm/sys_regs.c +++ b/arch/arm64/kvm/sys_regs.c @@ -2774,6 +2774,8 @@ static bool check_sysreg_table(const struct sys_reg_desc *table, unsigned int n, int kvm_handle_cp14_load_store(struct kvm_vcpu *vcpu) { kvm_inject_undefined(vcpu); + vcpu->stat.cp14_ls_exit_stat++; + return 1; } @@ -3050,6 +3052,8 @@ static int kvm_handle_cp_32(struct kvm_vcpu *vcpu, int kvm_handle_cp15_64(struct kvm_vcpu *vcpu) { + vcpu->stat.cp15_64_exit_stat++; + return kvm_handle_cp_64(vcpu, cp15_64_regs, ARRAY_SIZE(cp15_64_regs)); } @@ -3059,6 +3063,8 @@ int kvm_handle_cp15_32(struct kvm_vcpu *vcpu) params = esr_cp1x_32_to_params(kvm_vcpu_get_esr(vcpu)); + vcpu->stat.cp15_32_exit_stat++; + /* * Certain AArch32 ID registers are handled by rerouting to the AArch64 * system register table. Registers in the ID range where CRm=0 are @@ -3073,6 +3079,8 @@ int kvm_handle_cp15_32(struct kvm_vcpu *vcpu) int kvm_handle_cp14_64(struct kvm_vcpu *vcpu) { + vcpu->stat.cp14_64_exit_stat++; + return kvm_handle_cp_64(vcpu, cp14_64_regs, ARRAY_SIZE(cp14_64_regs)); } @@ -3082,6 +3090,8 @@ int kvm_handle_cp14_32(struct kvm_vcpu *vcpu) params = esr_cp1x_32_to_params(kvm_vcpu_get_esr(vcpu)); + vcpu->stat.cp14_mr_exit_stat++; + return kvm_handle_cp_32(vcpu, ¶ms, cp14_regs, ARRAY_SIZE(cp14_regs)); } @@ -3178,6 +3188,7 @@ int kvm_handle_sys_reg(struct kvm_vcpu *vcpu) int Rt = kvm_vcpu_sys_get_rt(vcpu); trace_kvm_handle_sys_reg(esr); + vcpu->stat.sys64_exit_stat++; if (__check_nv_sr_forward(vcpu)) return 1; diff --git a/arch/arm64/kvm/trace_arm.h b/arch/arm64/kvm/trace_arm.h index 8ad53104934dcdfdc892852ce4f67cb1d0138f52..680c7e39af4a9a6b3754a21ed85688f6de992c4a 100644 --- a/arch/arm64/kvm/trace_arm.h +++ b/arch/arm64/kvm/trace_arm.h @@ -390,6 +390,24 @@ TRACE_EVENT(kvm_forward_sysreg_trap, sys_reg_Op2(__entry->sysreg)) ); +TRACE_EVENT(kvm_pvsched_kick_vcpu, + TP_PROTO(int vcpu_id, int target_vcpu_id), + TP_ARGS(vcpu_id, target_vcpu_id), + + TP_STRUCT__entry( + __field(int, vcpu_id) + __field(int, target_vcpu_id) + ), + + TP_fast_assign( + __entry->vcpu_id = vcpu_id; + __entry->target_vcpu_id = target_vcpu_id; + ), + + TP_printk("PV qspinlock: vcpu %d kick target vcpu %d", + __entry->vcpu_id, __entry->target_vcpu_id) +); + #endif /* _TRACE_ARM_ARM64_KVM_H */ #undef TRACE_INCLUDE_PATH diff --git a/include/linux/arm-smccc.h b/include/linux/arm-smccc.h index 083f8565371616269fa0050036ec8a3631c44357..02b857f44ae8efc592b46f02bc1844186ec7e969 100644 --- a/include/linux/arm-smccc.h +++ b/include/linux/arm-smccc.h @@ -577,5 +577,30 @@ asmlinkage void __arm_smccc_hvc(unsigned long a0, unsigned long a1, method; \ }) +/* Paravirtualised sched calls */ +#define ARM_SMCCC_HV_PV_SCHED_FEATURES \ + ARM_SMCCC_CALL_VAL(ARM_SMCCC_FAST_CALL, \ + ARM_SMCCC_SMC_64, \ + ARM_SMCCC_OWNER_STANDARD_HYP, \ + 0x90) + +#define ARM_SMCCC_HV_PV_SCHED_IPA_INIT \ + ARM_SMCCC_CALL_VAL(ARM_SMCCC_FAST_CALL, \ + ARM_SMCCC_SMC_64, \ + ARM_SMCCC_OWNER_STANDARD_HYP, \ + 0x91) + +#define ARM_SMCCC_HV_PV_SCHED_IPA_RELEASE \ + ARM_SMCCC_CALL_VAL(ARM_SMCCC_FAST_CALL, \ + ARM_SMCCC_SMC_64, \ + ARM_SMCCC_OWNER_STANDARD_HYP, \ + 0x92) + +#define ARM_SMCCC_HV_PV_SCHED_KICK_CPU \ + ARM_SMCCC_CALL_VAL(ARM_SMCCC_FAST_CALL, \ + ARM_SMCCC_SMC_64, \ + ARM_SMCCC_OWNER_STANDARD_HYP, \ + 0x93) + #endif /*__ASSEMBLY__*/ #endif /*__LINUX_ARM_SMCCC_H*/ diff --git a/include/linux/cpuhotplug.h b/include/linux/cpuhotplug.h index 624d4a38c358a08f2ca417523058bc1c6a319a8d..f94a1b8e34e0278f09e176935fe9522c13c04795 100644 --- a/include/linux/cpuhotplug.h +++ b/include/linux/cpuhotplug.h @@ -190,6 +190,7 @@ enum cpuhp_state { CPUHP_AP_DUMMY_TIMER_STARTING, CPUHP_AP_ARM_XEN_STARTING, CPUHP_AP_ARM_XEN_RUNSTATE_STARTING, + CPUHP_AP_ARM_KVM_PVSCHED_STARTING, CPUHP_AP_ARM_CORESIGHT_STARTING, CPUHP_AP_ARM_CORESIGHT_CTI_STARTING, CPUHP_AP_ARM64_ISNDEP_STARTING,