diff --git a/Documentation/virt/kvm/arm/pvsched.rst b/Documentation/virt/kvm/arm/pvsched.rst new file mode 100644 index 0000000000000000000000000000000000000000..6ba221e25089ed8b1de0098472a9c5cdbae059f7 --- /dev/null +++ b/Documentation/virt/kvm/arm/pvsched.rst @@ -0,0 +1,74 @@ +.. SPDX-License-Identifier: GPL-2.0 + +Paravirtualized sched support for arm64 +======================================= + +KVM/arm64 provides some hypervisor service calls to support a paravirtualized +sched. + +Some SMCCC compatible hypercalls are defined: + +* PV_SCHED_FEATURES: 0xC5000090 +* PV_SCHED_IPA_INIT: 0xC5000091 +* PV_SCHED_IPA_RELEASE: 0xC5000092 +* PV_SCHED_KICK_CPU: 0xC5000093 + +The existence of the PV_SCHED hypercall should be probed using the SMCCC 1.1 +ARCH_FEATURES mechanism before calling it. + +PV_SCHED_FEATURES + ============= ======== ========== + Function ID: (uint32) 0xC5000090 + PV_call_id: (uint32) The function to query for support. + Return value: (int64) NOT_SUPPORTED (-1) or SUCCESS (0) if the relevant + PV-sched feature is supported by the hypervisor. + ============= ======== ========== + +PV_SCHED_IPA_INIT + ============= ======== ========== + Function ID: (uint32) 0xC5000091 + Return value: (int64) NOT_SUPPORTED (-1) or SUCCESS (0) if the IPA of + this vCPU's PV data structure is shared to the + hypervisor. + ============= ======== ========== + +PV_SCHED_IPA_RELEASE + ============= ======== ========== + Function ID: (uint32) 0xC5000092 + Return value: (int64) NOT_SUPPORTED (-1) or SUCCESS (0) if the IPA of + this vCPU's PV data structure is released. + ============= ======== ========== + +PV_SCHED_KICK_CPU + ============= ======== ========== + Function ID: (uint32) 0xC5000093 + Return value: (int64) NOT_SUPPORTED (-1) or SUCCESS (0) if the vCPU is + kicked by the hypervisor. + ============= ======== ========== + +PV sched state +-------------- + +The structure pointed to by the PV_SCHED_IPA hypercall is as follows: + ++-----------+-------------+-------------+-----------------------------------+ +| Field | Byte Length | Byte Offset | Description | ++===========+=============+=============+===================================+ +| preempted | 4 | 0 | Indicates that the vCPU that owns | +| | | | this struct is running or not. | +| | | | Non-zero values mean the vCPU has | +| | | | been preempted. Zero means the | +| | | | vCPU is not preempted. | ++-----------+-------------+-------------+-----------------------------------+ + +The preempted field will be updated to 0 by the hypervisor prior to scheduling +a vCPU. When the vCPU is scheduled out, the preempted field will be updated +to 1 by the hypervisor. + +A vCPU of a paravirtualized guest that is busywaiting in guest kernel mode for +an event to occur (ex: a spinlock to become available) can execute WFI +instruction once it has busy-waited for more than a threshold time-interval. +Execution of WFI instruction would cause the hypervisor to put the vCPU to sleep +until occurrence of an appropriate event. Another vCPU of the same guest can +wakeup the sleeping vCPU by issuing PV_SCHED_KICK_CPU hypercall, specifying CPU +id (reg1) of the vCPU to be woken up. diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig index e3821be64a22fed210fae39b68b6e4c720918621..d26f391decd035620703180af3676aa485f8cc3d 100644 --- a/arch/arm64/Kconfig +++ b/arch/arm64/Kconfig @@ -1550,6 +1550,30 @@ config PARAVIRT under a hypervisor, potentially improving performance significantly over full virtualization. +config PARAVIRT_SCHED + bool "Paravirtualization layer for sched" + depends on PARAVIRT + help + This supports the vCPU preemption check to enhance lock performance on + overcommitted hosts (more runnable vCPUs than physical CPUs in the + system) as doing busy waits for preempted vCPUs will hurt system + performance far worse than early yielding. + + If you are unsure how to answer this question, answer Y. + +config PARAVIRT_SPINLOCKS + bool "Paravirtualization layer for spinlocks" + depends on PARAVIRT && SMP + help + Paravirtualized spinlocks allow a pvops backend to replace the + spinlock implementation with something virtualization-friendly + (for example, block the virtual CPU rather than spinning). + + It has a minimal impact on native kernels and gives a nice performance + benefit on paravirtualized KVM kernels. + + If you are unsure how to answer this question, answer Y. + config PARAVIRT_TIME_ACCOUNTING bool "Paravirtual steal time accounting" select PARAVIRT diff --git a/arch/arm64/include/asm/Kbuild b/arch/arm64/include/asm/Kbuild index 5c8ee5a541d2047c10c8440e3691d6b1b9f97c25..d16ee8095366326ae55d0939d31424dca21f6528 100644 --- a/arch/arm64/include/asm/Kbuild +++ b/arch/arm64/include/asm/Kbuild @@ -2,7 +2,6 @@ generic-y += early_ioremap.h generic-y += mcs_spinlock.h generic-y += qrwlock.h -generic-y += qspinlock.h generic-y += parport.h generic-y += user.h diff --git a/arch/arm64/include/asm/kvm_host.h b/arch/arm64/include/asm/kvm_host.h index bd7552746f9e510876ad37f459446df7891f5481..dc2f06c4fcdc45c0cd1fc2e7642f435f4ef22f9f 100644 --- a/arch/arm64/include/asm/kvm_host.h +++ b/arch/arm64/include/asm/kvm_host.h @@ -589,6 +589,12 @@ struct kvm_vcpu_arch { gpa_t base; } steal; + /* Guest PV sched state */ + struct { + bool pv_unhalted; + gpa_t base; + } pvsched; + /* Per-vcpu CCSIDR override or NULL */ u32 *ccsidr; }; @@ -1038,6 +1044,38 @@ static inline bool kvm_arm_is_pvtime_enabled(struct kvm_vcpu_arch *vcpu_arch) return (vcpu_arch->steal.base != INVALID_GPA); } +#ifdef CONFIG_PARAVIRT_SCHED +long kvm_hypercall_pvsched_features(struct kvm_vcpu *vcpu); +void kvm_update_pvsched_preempted(struct kvm_vcpu *vcpu, u32 preempted); +long kvm_pvsched_kick_vcpu(struct kvm_vcpu *vcpu); + +static inline void kvm_arm_pvsched_vcpu_init(struct kvm_vcpu_arch *vcpu_arch) +{ + vcpu_arch->pvsched.base = INVALID_GPA; +} + +static inline bool kvm_arm_is_pvsched_enabled(struct kvm_vcpu_arch *vcpu_arch) +{ + return (vcpu_arch->pvsched.base != INVALID_GPA); +} +#else +static inline long kvm_hypercall_pvsched_features(struct kvm_vcpu *vcpu) +{ + return 0; +} +static inline void kvm_update_pvsched_preempted(struct kvm_vcpu *vcpu, + u32 preempted) {} +static inline long kvm_pvsched_kick_vcpu(struct kvm_vcpu *vcpu) +{ + return 0; +} +static inline void kvm_arm_pvsched_vcpu_init(struct kvm_vcpu_arch *vcpu_arch) {} +static inline bool kvm_arm_is_pvsched_enabled(struct kvm_vcpu_arch *vcpu_arch) +{ + return false; +} +#endif + void kvm_set_sei_esr(struct kvm_vcpu *vcpu, u64 syndrome); struct kvm_vcpu *kvm_mpidr_to_vcpu(struct kvm *kvm, unsigned long mpidr); diff --git a/arch/arm64/include/asm/paravirt.h b/arch/arm64/include/asm/paravirt.h index 9aa193e0e8f28d9309bc18013230e714152f4f93..a29eeffa49aa31976a4533ff0929548c46ca0755 100644 --- a/arch/arm64/include/asm/paravirt.h +++ b/arch/arm64/include/asm/paravirt.h @@ -20,9 +20,58 @@ static inline u64 paravirt_steal_clock(int cpu) int __init pv_time_init(void); +int __init pv_sched_init(void); + +__visible bool __native_vcpu_is_preempted(int cpu); +DECLARE_STATIC_CALL(pv_vcpu_preempted, __native_vcpu_is_preempted); + +static inline bool pv_vcpu_is_preempted(int cpu) +{ + return static_call(pv_vcpu_preempted)(cpu); +} + +#if defined(CONFIG_SMP) && defined(CONFIG_PARAVIRT_SPINLOCKS) +void __init pv_qspinlock_init(void); +bool pv_is_native_spin_unlock(void); + +void dummy_queued_spin_lock_slowpath(struct qspinlock *lock, u32 val); +DECLARE_STATIC_CALL(pv_qspinlock_queued_spin_lock_slowpath, + dummy_queued_spin_lock_slowpath); +static inline void pv_queued_spin_lock_slowpath(struct qspinlock *lock, u32 val) +{ + return static_call(pv_qspinlock_queued_spin_lock_slowpath)(lock, val); +} + +void dummy_queued_spin_unlock(struct qspinlock *lock); +DECLARE_STATIC_CALL(pv_qspinlock_queued_spin_unlock, dummy_queued_spin_unlock); +static inline void pv_queued_spin_unlock(struct qspinlock *lock) +{ + return static_call(pv_qspinlock_queued_spin_unlock)(lock); +} + +void dummy_wait(u8 *ptr, u8 val); +DECLARE_STATIC_CALL(pv_qspinlock_wait, dummy_wait); +static inline void pv_wait(u8 *ptr, u8 val) +{ + return static_call(pv_qspinlock_wait)(ptr, val); +} + +void dummy_kick(int cpu); +DECLARE_STATIC_CALL(pv_qspinlock_kick, dummy_kick); +static inline void pv_kick(int cpu) +{ + return static_call(pv_qspinlock_kick)(cpu); +} +#else + +#define pv_qspinlock_init() do {} while (0) + +#endif /* SMP && PARAVIRT_SPINLOCKS */ + #else #define pv_time_init() do {} while (0) +#define pv_sched_init() do {} while (0) #endif // CONFIG_PARAVIRT diff --git a/arch/arm64/include/asm/pvsched-abi.h b/arch/arm64/include/asm/pvsched-abi.h new file mode 100644 index 0000000000000000000000000000000000000000..80e50e7a1a3179eb4104262cd0cf18dd1cd603ec --- /dev/null +++ b/arch/arm64/include/asm/pvsched-abi.h @@ -0,0 +1,16 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * Copyright(c) 2019 Huawei Technologies Co., Ltd + * Author: Zengruan Ye + */ + +#ifndef __ASM_PVSCHED_ABI_H +#define __ASM_PVSCHED_ABI_H + +struct pvsched_vcpu_state { + __le32 preempted; + /* Structure must be 64 byte aligned, pad to that size */ + u8 padding[60]; +} __packed; + +#endif diff --git a/arch/arm64/include/asm/qspinlock.h b/arch/arm64/include/asm/qspinlock.h new file mode 100644 index 0000000000000000000000000000000000000000..ce3816b60f25edb58b11efcc01deb62b707ee4a0 --- /dev/null +++ b/arch/arm64/include/asm/qspinlock.h @@ -0,0 +1,48 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * Copyright(c) 2020 Huawei Technologies Co., Ltd + * Author: Zengruan Ye + */ + +#ifndef _ASM_ARM64_QSPINLOCK_H +#define _ASM_ARM64_QSPINLOCK_H + +#include +#include +#include +#include + +#define _Q_PENDING_LOOPS (1 << 9) + +#ifdef CONFIG_PARAVIRT_SPINLOCKS +extern void native_queued_spin_lock_slowpath(struct qspinlock *lock, u32 val); +extern void __pv_init_lock_hash(void); +extern void __pv_queued_spin_lock_slowpath(struct qspinlock *lock, u32 val); + +#define queued_spin_unlock queued_spin_unlock +/** + * queued_spin_unlock - release a queued spinlock + * @lock : Pointer to queued spinlock structure + * + * A smp_store_release() on the least-significant byte. + */ +static inline void native_queued_spin_unlock(struct qspinlock *lock) +{ + smp_store_release(&lock->locked, 0); +} + +static inline void queued_spin_lock_slowpath(struct qspinlock *lock, u32 val) +{ + pv_queued_spin_lock_slowpath(lock, val); +} + +static inline void queued_spin_unlock(struct qspinlock *lock) +{ + pv_queued_spin_unlock(lock); +} + +#endif + +#include + +#endif /* _ASM_ARM64_QSPINLOCK_H */ diff --git a/arch/arm64/include/asm/qspinlock_paravirt.h b/arch/arm64/include/asm/qspinlock_paravirt.h new file mode 100644 index 0000000000000000000000000000000000000000..eba4be28fbb9d4e286269b7f19f97fe9f3f91f17 --- /dev/null +++ b/arch/arm64/include/asm/qspinlock_paravirt.h @@ -0,0 +1,12 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * Copyright(c) 2019 Huawei Technologies Co., Ltd + * Author: Zengruan Ye + */ + +#ifndef __ASM_QSPINLOCK_PARAVIRT_H +#define __ASM_QSPINLOCK_PARAVIRT_H + +extern void __pv_queued_spin_unlock(struct qspinlock *lock); + +#endif diff --git a/arch/arm64/include/asm/spinlock.h b/arch/arm64/include/asm/spinlock.h index 0525c0b089edf7b1551bc1e4a47f167c969727e9..06219e9ebaa9c6e28bffd66305218f22f46d6f5b 100644 --- a/arch/arm64/include/asm/spinlock.h +++ b/arch/arm64/include/asm/spinlock.h @@ -7,6 +7,10 @@ #include #include +#include + +/* How long a lock should spin before we consider blocking */ +#define SPIN_THRESHOLD (1 << 15) /* See include/linux/spinlock.h */ #define smp_mb__after_spinlock() smp_mb() @@ -19,9 +23,16 @@ * https://lore.kernel.org/lkml/20200110100612.GC2827@hirez.programming.kicks-ass.net */ #define vcpu_is_preempted vcpu_is_preempted +#if defined(CONFIG_PARAVIRT) && defined(CONFIG_PARAVIRT_SCHED) +static inline bool vcpu_is_preempted(int cpu) +{ + return pv_vcpu_is_preempted(cpu); +} +#else static inline bool vcpu_is_preempted(int cpu) { return false; } +#endif /* CONFIG_PARAVIRT && CONFIG_PARAVIRT_SCHED */ #endif /* __ASM_SPINLOCK_H */ diff --git a/arch/arm64/kernel/Makefile b/arch/arm64/kernel/Makefile index c3b93910fa4d57b3b01853169f248184b980286a..2afd76900f459aabb36b11c504caa430c20ac0d3 100644 --- a/arch/arm64/kernel/Makefile +++ b/arch/arm64/kernel/Makefile @@ -60,7 +60,8 @@ obj-$(CONFIG_ARMV8_DEPRECATED) += armv8_deprecated.o obj-$(CONFIG_ACPI) += acpi.o obj-$(CONFIG_ACPI_NUMA) += acpi_numa.o obj-$(CONFIG_ARM64_ACPI_PARKING_PROTOCOL) += acpi_parking_protocol.o -obj-$(CONFIG_PARAVIRT) += paravirt.o +obj-$(CONFIG_PARAVIRT) += paravirt.o paravirt-spinlocks.o +obj-$(CONFIG_PARAVIRT_SPINLOCKS) += paravirt.o paravirt-spinlocks.o obj-$(CONFIG_RANDOMIZE_BASE) += kaslr.o pi/ obj-$(CONFIG_HIBERNATION) += hibernate.o hibernate-asm.o obj-$(CONFIG_ELF_CORE) += elfcore.o diff --git a/arch/arm64/kernel/paravirt-spinlocks.c b/arch/arm64/kernel/paravirt-spinlocks.c new file mode 100644 index 0000000000000000000000000000000000000000..2e4abc83e31868f17b79a3d2361abbacfc30c220 --- /dev/null +++ b/arch/arm64/kernel/paravirt-spinlocks.c @@ -0,0 +1,21 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * Copyright(c) 2019 Huawei Technologies Co., Ltd + * Author: Zengruan Ye + */ + +#include +#include +#include + +__visible bool __native_vcpu_is_preempted(int cpu) +{ + return false; +} + +DEFINE_STATIC_CALL(pv_vcpu_preempted, __native_vcpu_is_preempted); + +bool pv_is_native_spin_unlock(void) +{ + return false; +} diff --git a/arch/arm64/kernel/paravirt.c b/arch/arm64/kernel/paravirt.c index aa718d6a9274ab06e78b6dc28f08de97a517a3ec..4dedec239a5951418a40c143d650f92458ffedcb 100644 --- a/arch/arm64/kernel/paravirt.c +++ b/arch/arm64/kernel/paravirt.c @@ -22,8 +22,13 @@ #include #include +#include +#include #include +#define CREATE_TRACE_POINTS +#include "trace-paravirt.h" + struct static_key paravirt_steal_enabled; struct static_key paravirt_steal_rq_enabled; @@ -174,3 +179,185 @@ int __init pv_time_init(void) return 0; } + +DEFINE_PER_CPU(struct pvsched_vcpu_state, pvsched_vcpu_region) __aligned(64); +EXPORT_PER_CPU_SYMBOL(pvsched_vcpu_region); + +static bool kvm_vcpu_is_preempted(int cpu) +{ + struct pvsched_vcpu_state *reg; + u32 preempted; + + reg = &per_cpu(pvsched_vcpu_region, cpu); + if (!reg) { + pr_warn_once("PV sched enabled but not configured for cpu %d\n", + cpu); + return false; + } + + preempted = le32_to_cpu(READ_ONCE(reg->preempted)); + + return !!preempted; +} + +static int pvsched_vcpu_state_dying_cpu(unsigned int cpu) +{ + struct pvsched_vcpu_state *reg; + struct arm_smccc_res res; + + reg = this_cpu_ptr(&pvsched_vcpu_region); + if (!reg) + return -EFAULT; + + arm_smccc_1_1_invoke(ARM_SMCCC_HV_PV_SCHED_IPA_RELEASE, &res); + memset(reg, 0, sizeof(*reg)); + + return 0; +} + +static int init_pvsched_vcpu_state(unsigned int cpu) +{ + struct pvsched_vcpu_state *reg; + struct arm_smccc_res res; + + reg = this_cpu_ptr(&pvsched_vcpu_region); + if (!reg) + return -EFAULT; + + /* Pass the memory address to host via hypercall */ + arm_smccc_1_1_invoke(ARM_SMCCC_HV_PV_SCHED_IPA_INIT, + virt_to_phys(reg), &res); + + return 0; +} + +static int kvm_arm_init_pvsched(void) +{ + int ret; + + ret = cpuhp_setup_state(CPUHP_AP_ARM_KVM_PVSCHED_STARTING, + "hypervisor/arm/pvsched:starting", + init_pvsched_vcpu_state, + pvsched_vcpu_state_dying_cpu); + + if (ret < 0) { + pr_warn("PV sched init failed\n"); + return ret; + } + + return 0; +} + +static bool has_kvm_pvsched(void) +{ + struct arm_smccc_res res; + + /* To detect the presence of PV sched support we require SMCCC 1.1+ */ + if (arm_smccc_1_1_get_conduit() == SMCCC_CONDUIT_NONE) + return false; + + arm_smccc_1_1_invoke(ARM_SMCCC_ARCH_FEATURES_FUNC_ID, + ARM_SMCCC_HV_PV_SCHED_FEATURES, &res); + + return (res.a0 == SMCCC_RET_SUCCESS); +} + +#ifdef CONFIG_PARAVIRT_SPINLOCKS +static bool arm_pvspin; + +/* Kick a cpu by its cpuid. Used to wake up a halted vcpu */ +static void kvm_kick_cpu(int cpu) +{ + struct arm_smccc_res res; + + arm_smccc_1_1_invoke(ARM_SMCCC_HV_PV_SCHED_KICK_CPU, cpu, &res); + + trace_kvm_kick_cpu("kvm kick cpu", smp_processor_id(), cpu); +} + +static void kvm_wait(u8 *ptr, u8 val) +{ + unsigned long flags; + + if (in_nmi()) + return; + + local_irq_save(flags); + + if (READ_ONCE(*ptr) != val) + goto out; + + dsb(sy); + wfi(); + trace_kvm_wait("kvm wait wfi", smp_processor_id()); + +out: + local_irq_restore(flags); +} + +DEFINE_STATIC_CALL(pv_qspinlock_queued_spin_lock_slowpath, + native_queued_spin_lock_slowpath); +DEFINE_STATIC_CALL(pv_qspinlock_queued_spin_unlock, native_queued_spin_unlock); +DEFINE_STATIC_CALL(pv_qspinlock_wait, kvm_wait); +DEFINE_STATIC_CALL(pv_qspinlock_kick, kvm_kick_cpu); + +EXPORT_STATIC_CALL(pv_qspinlock_queued_spin_lock_slowpath); +EXPORT_STATIC_CALL(pv_qspinlock_queued_spin_unlock); +EXPORT_STATIC_CALL(pv_qspinlock_wait); +EXPORT_STATIC_CALL(pv_qspinlock_kick); + +void __init pv_qspinlock_init(void) +{ + /* Don't use the PV qspinlock code if there is only 1 vCPU. */ + if (num_possible_cpus() == 1) + arm_pvspin = false; + + if (!arm_pvspin) { + pr_info("PV qspinlocks disabled\n"); + return; + } + pr_info("PV qspinlocks enabled\n"); + + __pv_init_lock_hash(); + + static_call_update(pv_qspinlock_queued_spin_lock_slowpath, + __pv_queued_spin_lock_slowpath); + static_call_update(pv_qspinlock_queued_spin_unlock, + __pv_queued_spin_unlock); + static_call_update(pv_qspinlock_wait, kvm_wait); + static_call_update(pv_qspinlock_kick, kvm_kick_cpu); +} + +static __init int arm_parse_pvspin(char *arg) +{ + arm_pvspin = true; + return 0; +} +early_param("arm_pvspin", arm_parse_pvspin); +#endif /* CONFIG_PARAVIRT_SPINLOCKS */ + +int __init pv_sched_init(void) +{ + int ret; + + if (is_hyp_mode_available()) + return 0; + + if (!has_kvm_pvsched()) { + pr_warn("PV sched is not available\n"); + return 0; + } + + ret = kvm_arm_init_pvsched(); + if (ret) + return ret; + + static_call_update(pv_vcpu_preempted, kvm_vcpu_is_preempted); + pr_info("using PV sched preempted\n"); + + pv_qspinlock_init(); + + return 0; +} +early_initcall(pv_sched_init); + diff --git a/arch/arm64/kernel/trace-paravirt.h b/arch/arm64/kernel/trace-paravirt.h new file mode 100644 index 0000000000000000000000000000000000000000..2d76272f39ae477bd863f9868adc27f2f0f268cf --- /dev/null +++ b/arch/arm64/kernel/trace-paravirt.h @@ -0,0 +1,66 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * Copyright(c) 2019 Huawei Technologies Co., Ltd + * Author: Zengruan Ye + */ + +#undef TRACE_SYSTEM +#define TRACE_SYSTEM paravirt + +#if !defined(_TRACE_PARAVIRT_H) || defined(TRACE_HEADER_MULTI_READ) +#define _TRACE_PARAVIRT_H + +#include + +TRACE_EVENT(kvm_kick_cpu, + TP_PROTO(const char *name, int cpu, int target), + TP_ARGS(name, cpu, target), + + TP_STRUCT__entry( + __string(name, name) + __field(int, cpu) + __field(int, target) + ), + + TP_fast_assign( + __assign_str(name, name); + __entry->cpu = cpu; + __entry->target = target; + ), + + TP_printk("PV qspinlock: %s, cpu %d kick target cpu %d", + __get_str(name), + __entry->cpu, + __entry->target + ) +); + +TRACE_EVENT(kvm_wait, + TP_PROTO(const char *name, int cpu), + TP_ARGS(name, cpu), + + TP_STRUCT__entry( + __string(name, name) + __field(int, cpu) + ), + + TP_fast_assign( + __assign_str(name, name); + __entry->cpu = cpu; + ), + + TP_printk("PV qspinlock: %s, cpu %d wait kvm access wfi", + __get_str(name), + __entry->cpu + ) +); + +#endif /* _TRACE_PARAVIRT_H */ + +/* This part must be outside protection */ +#undef TRACE_INCLUDE_PATH +#undef TRACE_INCLUDE_FILE +#define TRACE_INCLUDE_PATH ../../../arch/arm64/kernel/ +#define TRACE_INCLUDE_FILE trace-paravirt + +#include diff --git a/arch/arm64/kvm/Makefile b/arch/arm64/kvm/Makefile index c0c050e53157d9908c91fd781aa1b5d3271e4092..3e16e6c364c63b47ed74dca9184e0dee6a70d6f3 100644 --- a/arch/arm64/kvm/Makefile +++ b/arch/arm64/kvm/Makefile @@ -10,7 +10,7 @@ include $(srctree)/virt/kvm/Makefile.kvm obj-$(CONFIG_KVM) += kvm.o obj-$(CONFIG_KVM) += hyp/ -kvm-y += arm.o mmu.o mmio.o psci.o hypercalls.o pvtime.o \ +kvm-y += arm.o mmu.o mmio.o psci.o hypercalls.o pvtime.o pvsched.o \ inject_fault.o va_layout.o handle_exit.o \ guest.o debug.o reset.o sys_regs.o stacktrace.o \ vgic-sys-reg-v3.o fpsimd.o pkvm.o \ diff --git a/arch/arm64/kvm/arm.c b/arch/arm64/kvm/arm.c index 537a9d3ecc68454fb2742cc59e81c8c9e282c703..6ba9d90ab645f22ded70cefdc0dcb5e8f6c28102 100644 --- a/arch/arm64/kvm/arm.c +++ b/arch/arm64/kvm/arm.c @@ -394,6 +394,8 @@ int kvm_arch_vcpu_create(struct kvm_vcpu *vcpu) kvm_arm_pvtime_vcpu_init(&vcpu->arch); + kvm_arm_pvsched_vcpu_init(&vcpu->arch); + vcpu->arch.hw_mmu = &vcpu->kvm->arch.mmu; err = kvm_vgic_vcpu_init(vcpu); @@ -469,10 +471,14 @@ void kvm_arch_vcpu_load(struct kvm_vcpu *vcpu, int cpu) if (vcpu_has_ptrauth(vcpu)) vcpu_ptrauth_disable(vcpu); + kvm_arch_vcpu_load_debug_state_flags(vcpu); if (!cpumask_test_cpu(cpu, vcpu->kvm->arch.supported_cpus)) vcpu_set_on_unsupported_cpu(vcpu); + + if (kvm_arm_is_pvsched_enabled(&vcpu->arch)) + kvm_update_pvsched_preempted(vcpu, 0); } void kvm_arch_vcpu_put(struct kvm_vcpu *vcpu) @@ -488,6 +494,9 @@ void kvm_arch_vcpu_put(struct kvm_vcpu *vcpu) vcpu_clear_on_unsupported_cpu(vcpu); vcpu->cpu = -1; + + if (kvm_arm_is_pvsched_enabled(&vcpu->arch)) + kvm_update_pvsched_preempted(vcpu, 1); } static void __kvm_arm_vcpu_power_off(struct kvm_vcpu *vcpu) @@ -565,7 +574,9 @@ int kvm_arch_vcpu_ioctl_set_mpstate(struct kvm_vcpu *vcpu, int kvm_arch_vcpu_runnable(struct kvm_vcpu *v) { bool irq_lines = *vcpu_hcr(v) & (HCR_VI | HCR_VF); - return ((irq_lines || kvm_vgic_vcpu_pending_irq(v)) + bool pv_unhalted = v->arch.pvsched.pv_unhalted; + + return ((irq_lines || kvm_vgic_vcpu_pending_irq(v) || pv_unhalted) && !kvm_arm_vcpu_stopped(v) && !v->arch.pause); } @@ -1342,6 +1353,8 @@ static int kvm_arch_vcpu_ioctl_vcpu_init(struct kvm_vcpu *vcpu, spin_unlock(&vcpu->arch.mp_state_lock); + kvm_arm_pvsched_vcpu_init(&vcpu->arch); + return 0; } diff --git a/arch/arm64/kvm/handle_exit.c b/arch/arm64/kvm/handle_exit.c index 617ae6dea5d5bcf01c8c72cec5f16c684151905c..b9a44c3bebb703e57528378218038a69a1882a8d 100644 --- a/arch/arm64/kvm/handle_exit.c +++ b/arch/arm64/kvm/handle_exit.c @@ -121,6 +121,7 @@ static int kvm_handle_wfx(struct kvm_vcpu *vcpu) } else { trace_kvm_wfx_arm64(*vcpu_pc(vcpu), false); vcpu->stat.wfi_exit_stat++; + vcpu->arch.pvsched.pv_unhalted = false; } if (esr & ESR_ELx_WFx_ISS_WFxT) { diff --git a/arch/arm64/kvm/hypercalls.c b/arch/arm64/kvm/hypercalls.c index 7fb4df0456dea53f9cdeccdd28c2ff47a8ce6ff3..243c91bb54e0b08a17f253aed028207d04665dc9 100644 --- a/arch/arm64/kvm/hypercalls.c +++ b/arch/arm64/kvm/hypercalls.c @@ -332,6 +332,9 @@ int kvm_smccc_call_handler(struct kvm_vcpu *vcpu) &smccc_feat->std_hyp_bmap)) val[0] = SMCCC_RET_SUCCESS; break; + case ARM_SMCCC_HV_PV_SCHED_FEATURES: + val[0] = SMCCC_RET_SUCCESS; + break; } break; case ARM_SMCCC_HV_PV_TIME_FEATURES: @@ -342,6 +345,23 @@ int kvm_smccc_call_handler(struct kvm_vcpu *vcpu) if (gpa != INVALID_GPA) val[0] = gpa; break; + case ARM_SMCCC_HV_PV_SCHED_FEATURES: + val[0] = kvm_hypercall_pvsched_features(vcpu); + break; + case ARM_SMCCC_HV_PV_SCHED_IPA_INIT: + gpa = smccc_get_arg1(vcpu); + if (gpa != INVALID_GPA) { + vcpu->arch.pvsched.base = gpa; + val[0] = SMCCC_RET_SUCCESS; + } + break; + case ARM_SMCCC_HV_PV_SCHED_IPA_RELEASE: + vcpu->arch.pvsched.base = INVALID_GPA; + val[0] = SMCCC_RET_SUCCESS; + break; + case ARM_SMCCC_HV_PV_SCHED_KICK_CPU: + val[0] = kvm_pvsched_kick_vcpu(vcpu); + break; case ARM_SMCCC_VENDOR_HYP_CALL_UID_FUNC_ID: val[0] = ARM_SMCCC_VENDOR_HYP_UID_KVM_REG_0; val[1] = ARM_SMCCC_VENDOR_HYP_UID_KVM_REG_1; diff --git a/arch/arm64/kvm/pvsched.c b/arch/arm64/kvm/pvsched.c new file mode 100644 index 0000000000000000000000000000000000000000..5ff680f7fbc59cf587f7b6f038b86b1d1848438e --- /dev/null +++ b/arch/arm64/kvm/pvsched.c @@ -0,0 +1,82 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * Copyright(c) 2019 Huawei Technologies Co., Ltd + * Author: Zengruan Ye + */ + +#ifdef CONFIG_PARAVIRT_SCHED +#include +#include + +#include + +#include + +#include "trace.h" + +void kvm_update_pvsched_preempted(struct kvm_vcpu *vcpu, u32 preempted) +{ + struct kvm *kvm = vcpu->kvm; + u64 base = vcpu->arch.pvsched.base; + u64 offset = offsetof(struct pvsched_vcpu_state, preempted); + int idx; + + if (base == INVALID_GPA) + return; + + /* + * This function is called from atomic context, so we need to + * disable page faults. + */ + pagefault_disable(); + + idx = srcu_read_lock(&kvm->srcu); + kvm_put_guest(kvm, base + offset, cpu_to_le32(preempted)); + srcu_read_unlock(&kvm->srcu, idx); + + pagefault_enable(); +} + +long kvm_pvsched_kick_vcpu(struct kvm_vcpu *vcpu) +{ + unsigned int vcpu_idx; + long val = SMCCC_RET_NOT_SUPPORTED; + struct kvm *kvm = vcpu->kvm; + struct kvm_vcpu *target = NULL; + + vcpu_idx = smccc_get_arg1(vcpu); + target = kvm_get_vcpu(kvm, vcpu_idx); + if (!target) + goto out; + + target->arch.pvsched.pv_unhalted = true; + kvm_make_request(KVM_REQ_IRQ_PENDING, target); + kvm_vcpu_kick(target); + if (READ_ONCE(target->ready)) + kvm_vcpu_yield_to(target); + + val = SMCCC_RET_SUCCESS; + trace_kvm_pvsched_kick_vcpu(vcpu->vcpu_id, target->vcpu_id); + +out: + return val; +} + +long kvm_hypercall_pvsched_features(struct kvm_vcpu *vcpu) +{ + u32 feature = smccc_get_arg1(vcpu); + long val = SMCCC_RET_NOT_SUPPORTED; + + switch (feature) { + case ARM_SMCCC_HV_PV_SCHED_FEATURES: + case ARM_SMCCC_HV_PV_SCHED_IPA_INIT: + case ARM_SMCCC_HV_PV_SCHED_IPA_RELEASE: + case ARM_SMCCC_HV_PV_SCHED_KICK_CPU: + val = SMCCC_RET_SUCCESS; + break; + } + + return val; +} +#endif /* CONFIG_PARAVIRT_SCHED */ + diff --git a/arch/arm64/kvm/trace_arm.h b/arch/arm64/kvm/trace_arm.h index 8ad53104934dcdfdc892852ce4f67cb1d0138f52..680c7e39af4a9a6b3754a21ed85688f6de992c4a 100644 --- a/arch/arm64/kvm/trace_arm.h +++ b/arch/arm64/kvm/trace_arm.h @@ -390,6 +390,24 @@ TRACE_EVENT(kvm_forward_sysreg_trap, sys_reg_Op2(__entry->sysreg)) ); +TRACE_EVENT(kvm_pvsched_kick_vcpu, + TP_PROTO(int vcpu_id, int target_vcpu_id), + TP_ARGS(vcpu_id, target_vcpu_id), + + TP_STRUCT__entry( + __field(int, vcpu_id) + __field(int, target_vcpu_id) + ), + + TP_fast_assign( + __entry->vcpu_id = vcpu_id; + __entry->target_vcpu_id = target_vcpu_id; + ), + + TP_printk("PV qspinlock: vcpu %d kick target vcpu %d", + __entry->vcpu_id, __entry->target_vcpu_id) +); + #endif /* _TRACE_ARM_ARM64_KVM_H */ #undef TRACE_INCLUDE_PATH diff --git a/include/linux/arm-smccc.h b/include/linux/arm-smccc.h index 083f8565371616269fa0050036ec8a3631c44357..02b857f44ae8efc592b46f02bc1844186ec7e969 100644 --- a/include/linux/arm-smccc.h +++ b/include/linux/arm-smccc.h @@ -577,5 +577,30 @@ asmlinkage void __arm_smccc_hvc(unsigned long a0, unsigned long a1, method; \ }) +/* Paravirtualised sched calls */ +#define ARM_SMCCC_HV_PV_SCHED_FEATURES \ + ARM_SMCCC_CALL_VAL(ARM_SMCCC_FAST_CALL, \ + ARM_SMCCC_SMC_64, \ + ARM_SMCCC_OWNER_STANDARD_HYP, \ + 0x90) + +#define ARM_SMCCC_HV_PV_SCHED_IPA_INIT \ + ARM_SMCCC_CALL_VAL(ARM_SMCCC_FAST_CALL, \ + ARM_SMCCC_SMC_64, \ + ARM_SMCCC_OWNER_STANDARD_HYP, \ + 0x91) + +#define ARM_SMCCC_HV_PV_SCHED_IPA_RELEASE \ + ARM_SMCCC_CALL_VAL(ARM_SMCCC_FAST_CALL, \ + ARM_SMCCC_SMC_64, \ + ARM_SMCCC_OWNER_STANDARD_HYP, \ + 0x92) + +#define ARM_SMCCC_HV_PV_SCHED_KICK_CPU \ + ARM_SMCCC_CALL_VAL(ARM_SMCCC_FAST_CALL, \ + ARM_SMCCC_SMC_64, \ + ARM_SMCCC_OWNER_STANDARD_HYP, \ + 0x93) + #endif /*__ASSEMBLY__*/ #endif /*__LINUX_ARM_SMCCC_H*/ diff --git a/include/linux/cpuhotplug.h b/include/linux/cpuhotplug.h index 624d4a38c358a08f2ca417523058bc1c6a319a8d..f94a1b8e34e0278f09e176935fe9522c13c04795 100644 --- a/include/linux/cpuhotplug.h +++ b/include/linux/cpuhotplug.h @@ -190,6 +190,7 @@ enum cpuhp_state { CPUHP_AP_DUMMY_TIMER_STARTING, CPUHP_AP_ARM_XEN_STARTING, CPUHP_AP_ARM_XEN_RUNSTATE_STARTING, + CPUHP_AP_ARM_KVM_PVSCHED_STARTING, CPUHP_AP_ARM_CORESIGHT_STARTING, CPUHP_AP_ARM_CORESIGHT_CTI_STARTING, CPUHP_AP_ARM64_ISNDEP_STARTING,