From 266a36a33889db8c37568b69a4c73fd946e1ec67 Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Wed, 27 Sep 2023 10:09:01 +0100 Subject: [PATCH 01/10] KVM: arm64: vgic: Make kvm_vgic_inject_irq() take a vcpu pointer ANBZ: #27447 commit 9a0a75d3ccee20149587ab740a2dee31ba401ada upstream Passing a vcpu_id to kvm_vgic_inject_irq() is silly for two reasons: - we often confuse vcpu_id and vcpu_idx - we eventually have to convert it back to a vcpu - we can't count Instead, pass a vcpu pointer, which is unambiguous. A NULL vcpu is also allowed for interrupts that are not private to a vcpu (such as SPIs). Reviewed-by: Zenghui Yu Signed-off-by: Marc Zyngier Link: https://lore.kernel.org/r/20230927090911.3355209-2-maz@kernel.org Signed-off-by: Oliver Upton Signed-off-by: Jun He Change-Id: I22d9e29a3b10a77caac2bf6047a7529e404d4797 --- arch/arm64/kvm/arch_timer.c | 2 +- arch/arm64/kvm/arm.c | 23 ++++++++--------------- arch/arm64/kvm/pmu-emul.c | 2 +- arch/arm64/kvm/vgic/vgic-irqfd.c | 2 +- arch/arm64/kvm/vgic/vgic.c | 12 +++++------- include/kvm/arm_vgic.h | 4 ++-- 6 files changed, 18 insertions(+), 27 deletions(-) diff --git a/arch/arm64/kvm/arch_timer.c b/arch/arm64/kvm/arch_timer.c index 89489ad31c06..8f0a7a3cc9f8 100644 --- a/arch/arm64/kvm/arch_timer.c +++ b/arch/arm64/kvm/arch_timer.c @@ -452,7 +452,7 @@ static void kvm_timer_update_irq(struct kvm_vcpu *vcpu, bool new_level, timer_ctx->irq.level); if (!userspace_irqchip(vcpu->kvm)) { - ret = kvm_vgic_inject_irq(vcpu->kvm, vcpu->vcpu_id, + ret = kvm_vgic_inject_irq(vcpu->kvm, vcpu, timer_irq(timer_ctx), timer_ctx->irq.level, timer_ctx); diff --git a/arch/arm64/kvm/arm.c b/arch/arm64/kvm/arm.c index 087e15406822..54857d9a41c5 100644 --- a/arch/arm64/kvm/arm.c +++ b/arch/arm64/kvm/arm.c @@ -1146,27 +1146,23 @@ int kvm_vm_ioctl_irq_line(struct kvm *kvm, struct kvm_irq_level *irq_level, bool line_status) { u32 irq = irq_level->irq; - unsigned int irq_type, vcpu_idx, irq_num; - int nrcpus = atomic_read(&kvm->online_vcpus); + unsigned int irq_type, vcpu_id, irq_num; struct kvm_vcpu *vcpu = NULL; bool level = irq_level->level; irq_type = (irq >> KVM_ARM_IRQ_TYPE_SHIFT) & KVM_ARM_IRQ_TYPE_MASK; - vcpu_idx = (irq >> KVM_ARM_IRQ_VCPU_SHIFT) & KVM_ARM_IRQ_VCPU_MASK; - vcpu_idx += ((irq >> KVM_ARM_IRQ_VCPU2_SHIFT) & KVM_ARM_IRQ_VCPU2_MASK) * (KVM_ARM_IRQ_VCPU_MASK + 1); + vcpu_id = (irq >> KVM_ARM_IRQ_VCPU_SHIFT) & KVM_ARM_IRQ_VCPU_MASK; + vcpu_id += ((irq >> KVM_ARM_IRQ_VCPU2_SHIFT) & KVM_ARM_IRQ_VCPU2_MASK) * (KVM_ARM_IRQ_VCPU_MASK + 1); irq_num = (irq >> KVM_ARM_IRQ_NUM_SHIFT) & KVM_ARM_IRQ_NUM_MASK; - trace_kvm_irq_line(irq_type, vcpu_idx, irq_num, irq_level->level); + trace_kvm_irq_line(irq_type, vcpu_id, irq_num, irq_level->level); switch (irq_type) { case KVM_ARM_IRQ_TYPE_CPU: if (irqchip_in_kernel(kvm)) return -ENXIO; - if (vcpu_idx >= nrcpus) - return -EINVAL; - - vcpu = kvm_get_vcpu(kvm, vcpu_idx); + vcpu = kvm_get_vcpu_by_id(kvm, vcpu_id); if (!vcpu) return -EINVAL; @@ -1178,17 +1174,14 @@ int kvm_vm_ioctl_irq_line(struct kvm *kvm, struct kvm_irq_level *irq_level, if (!irqchip_in_kernel(kvm)) return -ENXIO; - if (vcpu_idx >= nrcpus) - return -EINVAL; - - vcpu = kvm_get_vcpu(kvm, vcpu_idx); + vcpu = kvm_get_vcpu_by_id(kvm, vcpu_id); if (!vcpu) return -EINVAL; if (irq_num < VGIC_NR_SGIS || irq_num >= VGIC_NR_PRIVATE_IRQS) return -EINVAL; - return kvm_vgic_inject_irq(kvm, vcpu->vcpu_id, irq_num, level, NULL); + return kvm_vgic_inject_irq(kvm, vcpu, irq_num, level, NULL); case KVM_ARM_IRQ_TYPE_SPI: if (!irqchip_in_kernel(kvm)) return -ENXIO; @@ -1196,7 +1189,7 @@ int kvm_vm_ioctl_irq_line(struct kvm *kvm, struct kvm_irq_level *irq_level, if (irq_num < VGIC_NR_PRIVATE_IRQS) return -EINVAL; - return kvm_vgic_inject_irq(kvm, 0, irq_num, level, NULL); + return kvm_vgic_inject_irq(kvm, NULL, irq_num, level, NULL); } return -EINVAL; diff --git a/arch/arm64/kvm/pmu-emul.c b/arch/arm64/kvm/pmu-emul.c index 05e20825a59a..49f53ab3d848 100644 --- a/arch/arm64/kvm/pmu-emul.c +++ b/arch/arm64/kvm/pmu-emul.c @@ -342,7 +342,7 @@ static void kvm_pmu_update_state(struct kvm_vcpu *vcpu) pmu->irq_level = overflow; if (likely(irqchip_in_kernel(vcpu->kvm))) { - int ret = kvm_vgic_inject_irq(vcpu->kvm, vcpu->vcpu_id, + int ret = kvm_vgic_inject_irq(vcpu->kvm, vcpu, pmu->irq_num, overflow, pmu); WARN_ON(ret); } diff --git a/arch/arm64/kvm/vgic/vgic-irqfd.c b/arch/arm64/kvm/vgic/vgic-irqfd.c index 475059bacedf..8c711deb25aa 100644 --- a/arch/arm64/kvm/vgic/vgic-irqfd.c +++ b/arch/arm64/kvm/vgic/vgic-irqfd.c @@ -23,7 +23,7 @@ static int vgic_irqfd_set_irq(struct kvm_kernel_irq_routing_entry *e, if (!vgic_valid_spi(kvm, spi_id)) return -EINVAL; - return kvm_vgic_inject_irq(kvm, 0, spi_id, level, NULL); + return kvm_vgic_inject_irq(kvm, NULL, spi_id, level, NULL); } /** diff --git a/arch/arm64/kvm/vgic/vgic.c b/arch/arm64/kvm/vgic/vgic.c index 8be4c1ebdec2..db2a95762b1b 100644 --- a/arch/arm64/kvm/vgic/vgic.c +++ b/arch/arm64/kvm/vgic/vgic.c @@ -422,7 +422,7 @@ bool vgic_queue_irq_unlock(struct kvm *kvm, struct vgic_irq *irq, /** * kvm_vgic_inject_irq - Inject an IRQ from a device to the vgic * @kvm: The VM structure pointer - * @cpuid: The CPU for PPIs + * @vcpu: The CPU for PPIs or NULL for global interrupts * @intid: The INTID to inject a new state to. * @level: Edge-triggered: true: to trigger the interrupt * false: to ignore the call @@ -436,24 +436,22 @@ bool vgic_queue_irq_unlock(struct kvm *kvm, struct vgic_irq *irq, * level-sensitive interrupts. You can think of the level parameter as 1 * being HIGH and 0 being LOW and all devices being active-HIGH. */ -int kvm_vgic_inject_irq(struct kvm *kvm, int cpuid, unsigned int intid, - bool level, void *owner) +int kvm_vgic_inject_irq(struct kvm *kvm, struct kvm_vcpu *vcpu, + unsigned int intid, bool level, void *owner) { - struct kvm_vcpu *vcpu; struct vgic_irq *irq; unsigned long flags; int ret; - trace_vgic_update_irq_pending(cpuid, intid, level); - ret = vgic_lazy_init(kvm); if (ret) return ret; - vcpu = kvm_get_vcpu(kvm, cpuid); if (!vcpu && intid < VGIC_NR_PRIVATE_IRQS) return -EINVAL; + trace_vgic_update_irq_pending(vcpu ? vcpu->vcpu_idx : 0, intid, level); + irq = vgic_get_irq(kvm, vcpu, intid); if (!irq) return -EINVAL; diff --git a/include/kvm/arm_vgic.h b/include/kvm/arm_vgic.h index 7adbddf0483b..694d48469fb7 100644 --- a/include/kvm/arm_vgic.h +++ b/include/kvm/arm_vgic.h @@ -376,8 +376,8 @@ int kvm_vgic_hyp_init(void); void kvm_vgic_hyp_uninit(void); void kvm_vgic_init_cpu_hardware(void); -int kvm_vgic_inject_irq(struct kvm *kvm, int cpuid, unsigned int intid, - bool level, void *owner); +int kvm_vgic_inject_irq(struct kvm *kvm, struct kvm_vcpu *vcpu, + unsigned int intid, bool level, void *owner); int kvm_vgic_map_phys_irq(struct kvm_vcpu *vcpu, unsigned int host_irq, u32 vintid, struct irq_ops *ops); int kvm_vgic_unmap_phys_irq(struct kvm_vcpu *vcpu, unsigned int vintid); -- Gitee From 0cfa46b7937a1dc1a5ff8daffca5cb62bfb19cf6 Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Wed, 27 Sep 2023 10:09:02 +0100 Subject: [PATCH 02/10] KVM: arm64: vgic-its: Treat the collection target address as a vcpu_id ANBZ: #27447 commit d455d366c451e68781122c693e2e357c673ee807 upstream Since our emulated ITS advertises GITS_TYPER.PTA=0, the target address associated to a collection is a PE number and not an address. So far, so good. However, the PE number is what userspace has provided given us (aka the vcpu_id), and not the internal vcpu index. Make sure we consistently retrieve the vcpu by ID rather than by index, adding a helper that deals with most of the cases. We also get rid of the pointless (and bogus) comparisons to online_vcpus, which don't really make sense. Reviewed-by: Zenghui Yu Signed-off-by: Marc Zyngier Link: https://lore.kernel.org/r/20230927090911.3355209-3-maz@kernel.org Signed-off-by: Oliver Upton Signed-off-by: Jun He Change-Id: Ie3dbcf08603547652ec110e6028374bcf8cee413 --- arch/arm64/kvm/vgic/vgic-its.c | 49 +++++++++++++++++++--------------- 1 file changed, 27 insertions(+), 22 deletions(-) diff --git a/arch/arm64/kvm/vgic/vgic-its.c b/arch/arm64/kvm/vgic/vgic-its.c index 608c39859f4e..dc43b51cc373 100644 --- a/arch/arm64/kvm/vgic/vgic-its.c +++ b/arch/arm64/kvm/vgic/vgic-its.c @@ -378,6 +378,12 @@ static int update_affinity(struct vgic_irq *irq, struct kvm_vcpu *vcpu) return ret; } +static struct kvm_vcpu *collection_to_vcpu(struct kvm *kvm, + struct its_collection *col) +{ + return kvm_get_vcpu_by_id(kvm, col->target_addr); +} + /* * Promotes the ITS view of affinity of an ITTE (which redistributor this LPI * is targeting) to the VGIC's view, which deals with target VCPUs. @@ -391,7 +397,7 @@ static void update_affinity_ite(struct kvm *kvm, struct its_ite *ite) if (!its_is_collection_mapped(ite->collection)) return; - vcpu = kvm_get_vcpu(kvm, ite->collection->target_addr); + vcpu = collection_to_vcpu(kvm, ite->collection); update_affinity(ite->irq, vcpu); } @@ -686,7 +692,7 @@ int vgic_its_resolve_lpi(struct kvm *kvm, struct vgic_its *its, if (!ite || !its_is_collection_mapped(ite->collection)) return E_ITS_INT_UNMAPPED_INTERRUPT; - vcpu = kvm_get_vcpu(kvm, ite->collection->target_addr); + vcpu = collection_to_vcpu(kvm, ite->collection); if (!vcpu) return E_ITS_INT_UNMAPPED_INTERRUPT; @@ -899,7 +905,7 @@ static int vgic_its_cmd_handle_movi(struct kvm *kvm, struct vgic_its *its, return E_ITS_MOVI_UNMAPPED_COLLECTION; ite->collection = collection; - vcpu = kvm_get_vcpu(kvm, collection->target_addr); + vcpu = collection_to_vcpu(kvm, collection); vgic_its_invalidate_cache(kvm); @@ -1133,7 +1139,7 @@ static int vgic_its_cmd_handle_mapi(struct kvm *kvm, struct vgic_its *its, } if (its_is_collection_mapped(collection)) - vcpu = kvm_get_vcpu(kvm, collection->target_addr); + vcpu = collection_to_vcpu(kvm, collection); irq = vgic_add_lpi(kvm, lpi_nr, vcpu); if (IS_ERR(irq)) { @@ -1256,21 +1262,22 @@ static int vgic_its_cmd_handle_mapc(struct kvm *kvm, struct vgic_its *its, u64 *its_cmd) { u16 coll_id; - u32 target_addr; struct its_collection *collection; bool valid; valid = its_cmd_get_validbit(its_cmd); coll_id = its_cmd_get_collection(its_cmd); - target_addr = its_cmd_get_target_addr(its_cmd); - - if (target_addr >= atomic_read(&kvm->online_vcpus)) - return E_ITS_MAPC_PROCNUM_OOR; if (!valid) { vgic_its_free_collection(its, coll_id); vgic_its_invalidate_cache(kvm); } else { + struct kvm_vcpu *vcpu; + + vcpu = kvm_get_vcpu_by_id(kvm, its_cmd_get_target_addr(its_cmd)); + if (!vcpu) + return E_ITS_MAPC_PROCNUM_OOR; + collection = find_collection(its, coll_id); if (!collection) { @@ -1284,9 +1291,9 @@ static int vgic_its_cmd_handle_mapc(struct kvm *kvm, struct vgic_its *its, coll_id); if (ret) return ret; - collection->target_addr = target_addr; + collection->target_addr = vcpu->vcpu_id; } else { - collection->target_addr = target_addr; + collection->target_addr = vcpu->vcpu_id; update_affinity_collection(kvm, its, collection); } } @@ -1396,7 +1403,7 @@ static int vgic_its_cmd_handle_invall(struct kvm *kvm, struct vgic_its *its, if (!its_is_collection_mapped(collection)) return E_ITS_INVALL_UNMAPPED_COLLECTION; - vcpu = kvm_get_vcpu(kvm, collection->target_addr); + vcpu = collection_to_vcpu(kvm, collection); vgic_its_invall(vcpu); return 0; @@ -1413,23 +1420,21 @@ static int vgic_its_cmd_handle_invall(struct kvm *kvm, struct vgic_its *its, static int vgic_its_cmd_handle_movall(struct kvm *kvm, struct vgic_its *its, u64 *its_cmd) { - u32 target1_addr = its_cmd_get_target_addr(its_cmd); - u32 target2_addr = its_cmd_mask_field(its_cmd, 3, 16, 32); struct kvm_vcpu *vcpu1, *vcpu2; struct vgic_irq *irq; u32 *intids; int irq_count, i; - if (target1_addr >= atomic_read(&kvm->online_vcpus) || - target2_addr >= atomic_read(&kvm->online_vcpus)) + /* We advertise GITS_TYPER.PTA==0, making the address the vcpu ID */ + vcpu1 = kvm_get_vcpu_by_id(kvm, its_cmd_get_target_addr(its_cmd)); + vcpu2 = kvm_get_vcpu_by_id(kvm, its_cmd_mask_field(its_cmd, 3, 16, 32)); + + if (!vcpu1 || !vcpu2) return E_ITS_MOVALL_PROCNUM_OOR; - if (target1_addr == target2_addr) + if (vcpu1 == vcpu2) return 0; - vcpu1 = kvm_get_vcpu(kvm, target1_addr); - vcpu2 = kvm_get_vcpu(kvm, target2_addr); - irq_count = vgic_copy_lpi_list(kvm, vcpu1, &intids); if (irq_count < 0) return irq_count; @@ -2274,7 +2279,7 @@ static int vgic_its_restore_ite(struct vgic_its *its, u32 event_id, return PTR_ERR(ite); if (its_is_collection_mapped(collection)) - vcpu = kvm_get_vcpu(kvm, collection->target_addr); + vcpu = kvm_get_vcpu_by_id(kvm, collection->target_addr); irq = vgic_add_lpi(kvm, lpi_id, vcpu); if (IS_ERR(irq)) { @@ -2589,7 +2594,7 @@ static int vgic_its_restore_cte(struct vgic_its *its, gpa_t gpa, int esz) coll_id = val & KVM_ITS_CTE_ICID_MASK; if (target_addr != COLLECTION_NOT_MAPPED && - target_addr >= atomic_read(&kvm->online_vcpus)) + !kvm_get_vcpu_by_id(kvm, target_addr)) return -EINVAL; collection = find_collection(its, coll_id); -- Gitee From 3b69e66177f3cf0bb4856e5ac0913e3ba980d43b Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Wed, 27 Sep 2023 10:09:03 +0100 Subject: [PATCH 03/10] KVM: arm64: vgic-v3: Refactor GICv3 SGI generation ANBZ: #27447 commit f3f60a56539108dacfd87b6514398b3970daec54 upstream As we're about to change the way SGIs are sent, start by splitting out some of the basic functionnality: instead of intermingling the broadcast and non-broadcast cases with the actual SGI generation, perform the following cleanups: - move the SGI queuing into its own helper - split the broadcast code from the affinity-driven code - replace the mask/shift combinations with FIELD_GET() - fix the confusion between vcpu_id and vcpu when handling the broadcast case The result is much more readable, and paves the way for further optimisations. Tested-by: Joey Gouly Tested-by: Shameer Kolothum Reviewed-by: Zenghui Yu Signed-off-by: Marc Zyngier Link: https://lore.kernel.org/r/20230927090911.3355209-4-maz@kernel.org Signed-off-by: Oliver Upton Signed-off-by: Jun He Change-Id: Icf297301654d5b769ddd24b0f31c9d2a870d888f --- arch/arm64/kvm/vgic/vgic-mmio-v3.c | 110 ++++++++++++++++------------- 1 file changed, 59 insertions(+), 51 deletions(-) diff --git a/arch/arm64/kvm/vgic/vgic-mmio-v3.c b/arch/arm64/kvm/vgic/vgic-mmio-v3.c index 7c0b23415ad9..31e0b5aa2678 100644 --- a/arch/arm64/kvm/vgic/vgic-mmio-v3.c +++ b/arch/arm64/kvm/vgic/vgic-mmio-v3.c @@ -1075,6 +1075,38 @@ static int match_mpidr(u64 sgi_aff, u16 sgi_cpu_mask, struct kvm_vcpu *vcpu) ((((reg) & ICC_SGI1R_AFFINITY_## level ##_MASK) \ >> ICC_SGI1R_AFFINITY_## level ##_SHIFT) << MPIDR_LEVEL_SHIFT(level)) +static void vgic_v3_queue_sgi(struct kvm_vcpu *vcpu, u32 sgi, bool allow_group1) +{ + struct vgic_irq *irq = vgic_get_irq(vcpu->kvm, vcpu, sgi); + unsigned long flags; + + raw_spin_lock_irqsave(&irq->irq_lock, flags); + + /* + * An access targeting Group0 SGIs can only generate + * those, while an access targeting Group1 SGIs can + * generate interrupts of either group. + */ + if (!irq->group || allow_group1) { + if (!irq->hw) { + irq->pending_latch = true; + vgic_queue_irq_unlock(vcpu->kvm, irq, flags); + } else { + /* HW SGI? Ask the GIC to inject it */ + int err; + err = irq_set_irqchip_state(irq->host_irq, + IRQCHIP_STATE_PENDING, + true); + WARN_RATELIMIT(err, "IRQ %d", irq->host_irq); + raw_spin_unlock_irqrestore(&irq->irq_lock, flags); + } + } else { + raw_spin_unlock_irqrestore(&irq->irq_lock, flags); + } + + vgic_put_irq(vcpu->kvm, irq); +} + /** * vgic_v3_dispatch_sgi - handle SGI requests from VCPUs * @vcpu: The VCPU requesting a SGI @@ -1093,19 +1125,30 @@ void vgic_v3_dispatch_sgi(struct kvm_vcpu *vcpu, u64 reg, bool allow_group1) { struct kvm *kvm = vcpu->kvm; struct kvm_vcpu *c_vcpu; - u16 target_cpus; + unsigned long target_cpus; u64 mpidr; - int sgi; - int vcpu_id = vcpu->vcpu_id; - bool broadcast; - unsigned long c, flags; - - sgi = (reg & ICC_SGI1R_SGI_ID_MASK) >> ICC_SGI1R_SGI_ID_SHIFT; - broadcast = reg & BIT_ULL(ICC_SGI1R_IRQ_ROUTING_MODE_BIT); - target_cpus = (reg & ICC_SGI1R_TARGET_LIST_MASK) >> ICC_SGI1R_TARGET_LIST_SHIFT; + u32 sgi; + unsigned long c; + + sgi = FIELD_GET(ICC_SGI1R_SGI_ID_MASK, reg); + + /* Broadcast */ + if (unlikely(reg & BIT_ULL(ICC_SGI1R_IRQ_ROUTING_MODE_BIT))) { + kvm_for_each_vcpu(c, c_vcpu, kvm) { + /* Don't signal the calling VCPU */ + if (c_vcpu == vcpu) + continue; + + vgic_v3_queue_sgi(c_vcpu, sgi, allow_group1); + } + + return; + } + mpidr = SGI_AFFINITY_LEVEL(reg, 3); mpidr |= SGI_AFFINITY_LEVEL(reg, 2); mpidr |= SGI_AFFINITY_LEVEL(reg, 1); + target_cpus = FIELD_GET(ICC_SGI1R_TARGET_LIST_MASK, reg); /* * We iterate over all VCPUs to find the MPIDRs matching the request. @@ -1114,54 +1157,19 @@ void vgic_v3_dispatch_sgi(struct kvm_vcpu *vcpu, u64 reg, bool allow_group1) * VCPUs when most of the times we just signal a single VCPU. */ kvm_for_each_vcpu(c, c_vcpu, kvm) { - struct vgic_irq *irq; + int level0; /* Exit early if we have dealt with all requested CPUs */ - if (!broadcast && target_cpus == 0) + if (target_cpus == 0) break; - - /* Don't signal the calling VCPU */ - if (broadcast && c == vcpu_id) + level0 = match_mpidr(mpidr, target_cpus, c_vcpu); + if (level0 == -1) continue; - if (!broadcast) { - int level0; - - level0 = match_mpidr(mpidr, target_cpus, c_vcpu); - if (level0 == -1) - continue; - - /* remove this matching VCPU from the mask */ - target_cpus &= ~BIT(level0); - } + /* remove this matching VCPU from the mask */ + target_cpus &= ~BIT(level0); - irq = vgic_get_irq(vcpu->kvm, c_vcpu, sgi); - - raw_spin_lock_irqsave(&irq->irq_lock, flags); - - /* - * An access targeting Group0 SGIs can only generate - * those, while an access targeting Group1 SGIs can - * generate interrupts of either group. - */ - if (!irq->group || allow_group1) { - if (!irq->hw) { - irq->pending_latch = true; - vgic_queue_irq_unlock(vcpu->kvm, irq, flags); - } else { - /* HW SGI? Ask the GIC to inject it */ - int err; - err = irq_set_irqchip_state(irq->host_irq, - IRQCHIP_STATE_PENDING, - true); - WARN_RATELIMIT(err, "IRQ %d", irq->host_irq); - raw_spin_unlock_irqrestore(&irq->irq_lock, flags); - } - } else { - raw_spin_unlock_irqrestore(&irq->irq_lock, flags); - } - - vgic_put_irq(vcpu->kvm, irq); + vgic_v3_queue_sgi(c_vcpu, sgi, allow_group1); } } -- Gitee From c6f20ec415195578969136f0656124c7253a3a41 Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Wed, 27 Sep 2023 10:09:05 +0100 Subject: [PATCH 04/10] KVM: arm64: vgic: Use vcpu_idx for the debug information ANBZ: #27447 commit ac0fe56d46c0d534dddcf97d132c222722b9b7a5 upstream When dumping the debug information, use vcpu_idx instead of vcpu_id, as this is independent of any userspace influence. Reviewed-by: Zenghui Yu Signed-off-by: Marc Zyngier Link: https://lore.kernel.org/r/20230927090911.3355209-6-maz@kernel.org Signed-off-by: Oliver Upton Signed-off-by: Jun He Change-Id: I8b99df9404f2c77381d1a1a2d5619834b01163c5 --- arch/arm64/kvm/vgic/vgic-debug.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/arch/arm64/kvm/vgic/vgic-debug.c b/arch/arm64/kvm/vgic/vgic-debug.c index 07aa0437125a..85606a531dc3 100644 --- a/arch/arm64/kvm/vgic/vgic-debug.c +++ b/arch/arm64/kvm/vgic/vgic-debug.c @@ -166,7 +166,7 @@ static void print_header(struct seq_file *s, struct vgic_irq *irq, if (vcpu) { hdr = "VCPU"; - id = vcpu->vcpu_id; + id = vcpu->vcpu_idx; } seq_printf(s, "\n"); @@ -212,7 +212,7 @@ static void print_irq_state(struct seq_file *s, struct vgic_irq *irq, " %2d " "\n", type, irq->intid, - (irq->target_vcpu) ? irq->target_vcpu->vcpu_id : -1, + (irq->target_vcpu) ? irq->target_vcpu->vcpu_idx : -1, pending, irq->line_level, irq->active, @@ -224,7 +224,7 @@ static void print_irq_state(struct seq_file *s, struct vgic_irq *irq, irq->mpidr, irq->source, irq->priority, - (irq->vcpu) ? irq->vcpu->vcpu_id : -1); + (irq->vcpu) ? irq->vcpu->vcpu_idx : -1); } static int vgic_debug_show(struct seq_file *s, void *v) -- Gitee From a1e18b83930031477eab11cd15d3f2aeef4eadc5 Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Wed, 27 Sep 2023 10:09:06 +0100 Subject: [PATCH 05/10] KVM: arm64: Use vcpu_idx for invalidation tracking ANBZ: #27447 commit 5f4bd815ec718992c09de1f39479ae90dcbb7df3 upstream While vcpu_id isn't necessarily a bad choice as an identifier for the currently running vcpu, it is provided by userspace, and there is close to no guarantee that it would be unique. Switch it to vcpu_idx instead, for which we have much stronger guarantees. Reviewed-by: Zenghui Yu Signed-off-by: Marc Zyngier Link: https://lore.kernel.org/r/20230927090911.3355209-7-maz@kernel.org Signed-off-by: Oliver Upton Signed-off-by: Jun He Change-Id: I10acd79e1f80e63e6136cd9e01356d7cb4286601 --- arch/arm64/kvm/arm.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/arch/arm64/kvm/arm.c b/arch/arm64/kvm/arm.c index 54857d9a41c5..2609073f2d4d 100644 --- a/arch/arm64/kvm/arm.c +++ b/arch/arm64/kvm/arm.c @@ -442,9 +442,9 @@ void kvm_arch_vcpu_load(struct kvm_vcpu *vcpu, int cpu) * We might get preempted before the vCPU actually runs, but * over-invalidation doesn't affect correctness. */ - if (*last_ran != vcpu->vcpu_id) { + if (*last_ran != vcpu->vcpu_idx) { kvm_call_hyp(__kvm_flush_cpu_context, mmu); - *last_ran = vcpu->vcpu_id; + *last_ran = vcpu->vcpu_idx; } vcpu->cpu = cpu; -- Gitee From 2a712c5fc03f515f2736b96f8d504013c1a332ab Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Wed, 27 Sep 2023 10:09:07 +0100 Subject: [PATCH 06/10] KVM: arm64: Simplify kvm_vcpu_get_mpidr_aff() ANBZ: #27447 commit 0a2acd38d23b8865b3a5a8c851001350df76fc09 upstream By definition, MPIDR_EL1 cannot be modified by the guest. This means it is pointless to check whether this is loaded on the CPU. Simplify the kvm_vcpu_get_mpidr_aff() helper to directly access the in-memory value. Reviewed-by: Joey Gouly Reviewed-by: Zenghui Yu Tested-by: Joey Gouly Tested-by: Shameer Kolothum Signed-off-by: Marc Zyngier Link: https://lore.kernel.org/r/20230927090911.3355209-8-maz@kernel.org Signed-off-by: Oliver Upton Signed-off-by: Jun He Change-Id: I136cc975504fead3d12cc6c3c3f059377ad4b300 --- arch/arm64/include/asm/kvm_emulate.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/arm64/include/asm/kvm_emulate.h b/arch/arm64/include/asm/kvm_emulate.h index 3d6725ff0bf6..b66ef77cf49e 100644 --- a/arch/arm64/include/asm/kvm_emulate.h +++ b/arch/arm64/include/asm/kvm_emulate.h @@ -465,7 +465,7 @@ static inline bool kvm_is_write_fault(struct kvm_vcpu *vcpu) static inline unsigned long kvm_vcpu_get_mpidr_aff(struct kvm_vcpu *vcpu) { - return vcpu_read_sys_reg(vcpu, MPIDR_EL1) & MPIDR_HWID_BITMASK; + return __vcpu_sys_reg(vcpu, MPIDR_EL1) & MPIDR_HWID_BITMASK; } static inline void kvm_vcpu_set_be(struct kvm_vcpu *vcpu) -- Gitee From 17e66bad97ffbc057dd78e2a173c0d9dfc249b6d Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Wed, 27 Sep 2023 10:09:08 +0100 Subject: [PATCH 07/10] KVM: arm64: Build MPIDR to vcpu index cache at runtime ANBZ: #27447 commit 5544750efd51143fee9e9184d69518c0c05426a1 upstream The MPIDR_EL1 register contains a unique value that identifies the CPU. The only problem with it is that it is stupidly large (32 bits, once the useless stuff is removed). Trying to obtain a vcpu from an MPIDR value is a fairly common, yet costly operation: we iterate over all the vcpus until we find the correct one. While this is cheap for small VMs, it is pretty expensive on large ones, specially if you are trying to get to the one that's at the end of the list... In order to help with this, it is important to realise that the MPIDR values are actually structured, and that implementations tend to use a small number of significant bits in the 32bit space. We can use this fact to our advantage by computing a small hash table that uses the "compression" of the significant MPIDR bits as an index, giving us the vcpu index as a result. Given that the MPIDR values can be supplied by userspace, and that an evil VMM could decide to make *all* bits significant, resulting in a 4G-entry table, we only use this method if the resulting table fits in a single page. Otherwise, we fallback to the good old iterative method. Nothing uses that table just yet, but keep your eyes peeled. Reviewed-by: Joey Gouly Reviewed-by: Zenghui Yu Tested-by: Joey Gouly Tested-by: Shameer Kolothum Signed-off-by: Marc Zyngier Link: https://lore.kernel.org/r/20230927090911.3355209-9-maz@kernel.org Signed-off-by: Oliver Upton Signed-off-by: Jun He Change-Id: I5443fb99c82e8434ac3e593dc9f10ba1a79118c6 --- arch/arm64/include/asm/kvm_host.h | 28 ++++++++++++++++ arch/arm64/kvm/arm.c | 54 +++++++++++++++++++++++++++++++ 2 files changed, 82 insertions(+) diff --git a/arch/arm64/include/asm/kvm_host.h b/arch/arm64/include/asm/kvm_host.h index 2a0b36e4879d..f0fb2540cc6d 100644 --- a/arch/arm64/include/asm/kvm_host.h +++ b/arch/arm64/include/asm/kvm_host.h @@ -201,6 +201,31 @@ struct kvm_protected_vm { struct kvm_hyp_memcache teardown_mc; }; +struct kvm_mpidr_data { + u64 mpidr_mask; + DECLARE_FLEX_ARRAY(u16, cmpidr_to_idx); +}; + +static inline u16 kvm_mpidr_index(struct kvm_mpidr_data *data, u64 mpidr) +{ + unsigned long mask = data->mpidr_mask; + u64 aff = mpidr & MPIDR_HWID_BITMASK; + int nbits, bit, bit_idx = 0; + u16 index = 0; + + /* + * If this looks like RISC-V's BEXT or x86's PEXT + * instructions, it isn't by accident. + */ + nbits = fls(mask); + for_each_set_bit(bit, &mask, nbits) { + index |= (aff & BIT(bit)) >> (bit - bit_idx); + bit_idx++; + } + + return index; +} + struct kvm_arch { struct kvm_s2_mmu mmu; @@ -247,6 +272,9 @@ struct kvm_arch { /* VM-wide vCPU feature set */ DECLARE_BITMAP(vcpu_features, KVM_VCPU_MAX_FEATURES); + /* MPIDR to vcpu index mapping, optional */ + struct kvm_mpidr_data *mpidr_data; + /* * VM-wide PMU filter, implemented as a bitmap and big enough for * up to 2^10 events (ARMv8.0) or 2^16 events (ARMv8.1+). diff --git a/arch/arm64/kvm/arm.c b/arch/arm64/kvm/arm.c index 2609073f2d4d..bdc7e3f5444d 100644 --- a/arch/arm64/kvm/arm.c +++ b/arch/arm64/kvm/arm.c @@ -206,6 +206,7 @@ void kvm_arch_destroy_vm(struct kvm *kvm) if (is_protected_kvm_enabled()) pkvm_destroy_hyp_vm(kvm); + kfree(kvm->arch.mpidr_data); kvm_destroy_vcpus(kvm); kvm_unshare_hyp(kvm, kvm + 1); @@ -595,6 +596,57 @@ static int kvm_vcpu_initialized(struct kvm_vcpu *vcpu) return vcpu_get_flag(vcpu, VCPU_INITIALIZED); } +static void kvm_init_mpidr_data(struct kvm *kvm) +{ + struct kvm_mpidr_data *data = NULL; + unsigned long c, mask, nr_entries; + u64 aff_set = 0, aff_clr = ~0UL; + struct kvm_vcpu *vcpu; + + mutex_lock(&kvm->arch.config_lock); + + if (kvm->arch.mpidr_data || atomic_read(&kvm->online_vcpus) == 1) + goto out; + + kvm_for_each_vcpu(c, vcpu, kvm) { + u64 aff = kvm_vcpu_get_mpidr_aff(vcpu); + aff_set |= aff; + aff_clr &= aff; + } + + /* + * A significant bit can be either 0 or 1, and will only appear in + * aff_set. Use aff_clr to weed out the useless stuff. + */ + mask = aff_set ^ aff_clr; + nr_entries = BIT_ULL(hweight_long(mask)); + + /* + * Don't let userspace fool us. If we need more than a single page + * to describe the compressed MPIDR array, just fall back to the + * iterative method. Single vcpu VMs do not need this either. + */ + if (struct_size(data, cmpidr_to_idx, nr_entries) <= PAGE_SIZE) + data = kzalloc(struct_size(data, cmpidr_to_idx, nr_entries), + GFP_KERNEL_ACCOUNT); + + if (!data) + goto out; + + data->mpidr_mask = mask; + + kvm_for_each_vcpu(c, vcpu, kvm) { + u64 aff = kvm_vcpu_get_mpidr_aff(vcpu); + u16 index = kvm_mpidr_index(data, aff); + + data->cmpidr_to_idx[index] = c; + } + + kvm->arch.mpidr_data = data; +out: + mutex_unlock(&kvm->arch.config_lock); +} + /* * Handle both the initialisation that is being done when the vcpu is * run for the first time, as well as the updates that must be @@ -618,6 +670,8 @@ int kvm_arch_vcpu_run_pid_change(struct kvm_vcpu *vcpu) if (likely(vcpu_has_run_once(vcpu))) return 0; + kvm_init_mpidr_data(kvm); + kvm_arm_vcpu_init_debug(vcpu); if (likely(irqchip_in_kernel(kvm))) { -- Gitee From e1efd7ef0527d07104c6bcdd88ac23b064b38b98 Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Wed, 27 Sep 2023 10:09:09 +0100 Subject: [PATCH 08/10] KVM: arm64: Fast-track kvm_mpidr_to_vcpu() when mpidr_data is available ANBZ: #27447 commit 54a8006d0b49044d0cb682119686a45de906fe3c upstream If our fancy little table is present when calling kvm_mpidr_to_vcpu(), use it to recover the corresponding vcpu. Reviewed-by: Joey Gouly Reviewed-by: Zenghui Yu Tested-by: Joey Gouly Tested-by: Shameer Kolothum Signed-off-by: Marc Zyngier Link: https://lore.kernel.org/r/20230927090911.3355209-10-maz@kernel.org Signed-off-by: Oliver Upton Signed-off-by: Jun He Change-Id: I1bdc4775a008b42b228f65831428a0be11e72779 --- arch/arm64/kvm/arm.c | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/arch/arm64/kvm/arm.c b/arch/arm64/kvm/arm.c index bdc7e3f5444d..8737145b40a0 100644 --- a/arch/arm64/kvm/arm.c +++ b/arch/arm64/kvm/arm.c @@ -2505,6 +2505,18 @@ struct kvm_vcpu *kvm_mpidr_to_vcpu(struct kvm *kvm, unsigned long mpidr) unsigned long i; mpidr &= MPIDR_HWID_BITMASK; + + if (kvm->arch.mpidr_data) { + u16 idx = kvm_mpidr_index(kvm->arch.mpidr_data, mpidr); + + vcpu = kvm_get_vcpu(kvm, + kvm->arch.mpidr_data->cmpidr_to_idx[idx]); + if (mpidr != kvm_vcpu_get_mpidr_aff(vcpu)) + vcpu = NULL; + + return vcpu; + } + kvm_for_each_vcpu(i, vcpu, kvm) { if (mpidr == kvm_vcpu_get_mpidr_aff(vcpu)) return vcpu; -- Gitee From 41ba667a2aaa5ae7d71fee264c8d9652f991d7c9 Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Wed, 27 Sep 2023 10:09:10 +0100 Subject: [PATCH 09/10] KVM: arm64: vgic-v3: Optimize affinity-based SGI injection ANBZ: #27447 commit b5daffb120bb60f974ae1a5589160b05c98e00e5 upstream Our affinity-based SGI injection code is a bit daft. We iterate over all the CPUs trying to match the set of affinities that the guest is trying to reach, leading to some very bad behaviours if the selected targets are at a high vcpu index. Instead, we can now use the fact that we have an optimised MPIDR to vcpu mapping, and only look at the relevant values. This results in a much faster injection for large VMs, and in a near constant time, irrespective of the position in the vcpu index space. As a bonus, this is mostly deleting a lot of hard-to-read code. Nobody will complain about that. Suggested-by: Xu Zhao Tested-by: Joey Gouly Tested-by: Shameer Kolothum Reviewed-by: Zenghui Yu Signed-off-by: Marc Zyngier Link: https://lore.kernel.org/r/20230927090911.3355209-11-maz@kernel.org Signed-off-by: Oliver Upton Signed-off-by: Jun He Change-Id: I720a20f0f6499e4a48775cf4bf73373ada53f8b0 --- arch/arm64/kvm/vgic/vgic-mmio-v3.c | 64 +++++------------------------- 1 file changed, 11 insertions(+), 53 deletions(-) diff --git a/arch/arm64/kvm/vgic/vgic-mmio-v3.c b/arch/arm64/kvm/vgic/vgic-mmio-v3.c index 31e0b5aa2678..cea9745fa2c5 100644 --- a/arch/arm64/kvm/vgic/vgic-mmio-v3.c +++ b/arch/arm64/kvm/vgic/vgic-mmio-v3.c @@ -1036,35 +1036,6 @@ int vgic_v3_has_attr_regs(struct kvm_device *dev, struct kvm_device_attr *attr) return 0; } -/* - * Compare a given affinity (level 1-3 and a level 0 mask, from the SGI - * generation register ICC_SGI1R_EL1) with a given VCPU. - * If the VCPU's MPIDR matches, return the level0 affinity, otherwise - * return -1. - */ -static int match_mpidr(u64 sgi_aff, u16 sgi_cpu_mask, struct kvm_vcpu *vcpu) -{ - unsigned long affinity; - int level0; - - /* - * Split the current VCPU's MPIDR into affinity level 0 and the - * rest as this is what we have to compare against. - */ - affinity = kvm_vcpu_get_mpidr_aff(vcpu); - level0 = MPIDR_AFFINITY_LEVEL(affinity, 0); - affinity &= ~MPIDR_LEVEL_MASK; - - /* bail out if the upper three levels don't match */ - if (sgi_aff != affinity) - return -1; - - /* Is this VCPU's bit set in the mask ? */ - if (!(sgi_cpu_mask & BIT(level0))) - return -1; - - return level0; -} /* * The ICC_SGI* registers encode the affinity differently from the MPIDR, @@ -1117,9 +1088,11 @@ static void vgic_v3_queue_sgi(struct kvm_vcpu *vcpu, u32 sgi, bool allow_group1) * This will trap in sys_regs.c and call this function. * This ICC_SGI1R_EL1 register contains the upper three affinity levels of the * target processors as well as a bitmask of 16 Aff0 CPUs. - * If the interrupt routing mode bit is not set, we iterate over all VCPUs to - * check for matching ones. If this bit is set, we signal all, but not the - * calling VCPU. + * + * If the interrupt routing mode bit is not set, we iterate over the Aff0 + * bits and signal the VCPUs matching the provided Aff{3,2,1}. + * + * If this bit is set, we signal all, but not the calling VCPU. */ void vgic_v3_dispatch_sgi(struct kvm_vcpu *vcpu, u64 reg, bool allow_group1) { @@ -1127,7 +1100,7 @@ void vgic_v3_dispatch_sgi(struct kvm_vcpu *vcpu, u64 reg, bool allow_group1) struct kvm_vcpu *c_vcpu; unsigned long target_cpus; u64 mpidr; - u32 sgi; + u32 sgi, aff0; unsigned long c; sgi = FIELD_GET(ICC_SGI1R_SGI_ID_MASK, reg); @@ -1145,31 +1118,16 @@ void vgic_v3_dispatch_sgi(struct kvm_vcpu *vcpu, u64 reg, bool allow_group1) return; } + /* We iterate over affinities to find the corresponding vcpus */ mpidr = SGI_AFFINITY_LEVEL(reg, 3); mpidr |= SGI_AFFINITY_LEVEL(reg, 2); mpidr |= SGI_AFFINITY_LEVEL(reg, 1); target_cpus = FIELD_GET(ICC_SGI1R_TARGET_LIST_MASK, reg); - /* - * We iterate over all VCPUs to find the MPIDRs matching the request. - * If we have handled one CPU, we clear its bit to detect early - * if we are already finished. This avoids iterating through all - * VCPUs when most of the times we just signal a single VCPU. - */ - kvm_for_each_vcpu(c, c_vcpu, kvm) { - int level0; - - /* Exit early if we have dealt with all requested CPUs */ - if (target_cpus == 0) - break; - level0 = match_mpidr(mpidr, target_cpus, c_vcpu); - if (level0 == -1) - continue; - - /* remove this matching VCPU from the mask */ - target_cpus &= ~BIT(level0); - - vgic_v3_queue_sgi(c_vcpu, sgi, allow_group1); + for_each_set_bit(aff0, &target_cpus, hweight_long(ICC_SGI1R_TARGET_LIST_MASK)) { + c_vcpu = kvm_mpidr_to_vcpu(kvm, mpidr | aff0); + if (c_vcpu) + vgic_v3_queue_sgi(c_vcpu, sgi, allow_group1); } } -- Gitee From 81806a620f6037bfa729c34327b25f5e402946d2 Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Wed, 27 Sep 2023 10:09:11 +0100 Subject: [PATCH 10/10] KVM: arm64: Clarify the ordering requirements for vcpu/RD creation ANBZ: #27447 commit f9940416f193d148dd7ad7498ce4f40c83d12e7a upstream It goes without saying, but it is probably better to spell it out: If userspace tries to restore and VM, but creates vcpus and/or RDs in a different order, the vcpu/RD mapping will be different. Yes, our API is an ugly piece of crap and I can't believe that we missed this. If we want to relax the above, we'll need to define a new userspace API that allows the mapping to be specified, rather than relying on the kernel to perform the mapping on its own. Reviewed-by: Zenghui Yu Signed-off-by: Marc Zyngier Link: https://lore.kernel.org/r/20230927090911.3355209-12-maz@kernel.org Signed-off-by: Oliver Upton Signed-off-by: Jun He Change-Id: If722e23403cdd8ec86046ec65aaf5faafb181eea --- Documentation/virt/kvm/devices/arm-vgic-v3.rst | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/Documentation/virt/kvm/devices/arm-vgic-v3.rst b/Documentation/virt/kvm/devices/arm-vgic-v3.rst index 51e5e5762571..5817edb4e046 100644 --- a/Documentation/virt/kvm/devices/arm-vgic-v3.rst +++ b/Documentation/virt/kvm/devices/arm-vgic-v3.rst @@ -59,6 +59,13 @@ Groups: It is invalid to mix calls with KVM_VGIC_V3_ADDR_TYPE_REDIST and KVM_VGIC_V3_ADDR_TYPE_REDIST_REGION attributes. + Note that to obtain reproducible results (the same VCPU being associated + with the same redistributor across a save/restore operation), VCPU creation + order, redistributor region creation order as well as the respective + interleaves of VCPU and region creation MUST be preserved. Any change in + either ordering may result in a different vcpu_id/redistributor association, + resulting in a VM that will fail to run at restore time. + Errors: ======= ============================================================= -- Gitee