diff --git a/arch/arm64/include/asm/kvm_host.h b/arch/arm64/include/asm/kvm_host.h index f171ab3d0d37cc91987c5e9929036f633d4b2d12..aa69338f662803eff464c375cdabe42395fb9f8f 100644 --- a/arch/arm64/include/asm/kvm_host.h +++ b/arch/arm64/include/asm/kvm_host.h @@ -412,6 +412,7 @@ struct kvm_vcpu_arch { /* Guest PV sched state */ struct { bool pv_unhalted; + bool preempted; gpa_t base; } pvsched; @@ -645,12 +646,14 @@ long kvm_hypercall_pvsched_features(struct kvm_vcpu *vcpu); void kvm_update_pvsched_preempted(struct kvm_vcpu *vcpu, u32 preempted); long kvm_pvsched_kick_vcpu(struct kvm_vcpu *vcpu); +extern bool pv_preempted_enable; static inline void kvm_arm_pvsched_vcpu_init(struct kvm_vcpu_arch *vcpu_arch) { vcpu_arch->pvsched.base = GPA_INVALID; + vcpu_arch->pvsched.preempted = false; } -static inline bool kvm_arm_is_pvsched_enabled(struct kvm_vcpu_arch *vcpu_arch) +static inline bool kvm_arm_is_pvsched_valid(struct kvm_vcpu_arch *vcpu_arch) { return (vcpu_arch->pvsched.base != GPA_INVALID); } diff --git a/arch/arm64/kvm/arm.c b/arch/arm64/kvm/arm.c index 240edaa9eb50b955e9c322c0f5035d0dc049dd41..d6962ea83e18e343afa202ab6996fe8657f80730 100644 --- a/arch/arm64/kvm/arm.c +++ b/arch/arm64/kvm/arm.c @@ -84,6 +84,15 @@ unsigned int twedel = 0; module_param(twedel, uint, S_IRUGO | S_IWUSR); #endif +static const struct kernel_param_ops pv_preempted_enable_ops = { + .set = param_set_bool, + .get = param_get_bool, +}; + +bool pv_preempted_enable = true; +MODULE_PARM_DESC(pv_preempted_enable, "bool"); +module_param_cb(pv_preempted_enable, &pv_preempted_enable_ops, &pv_preempted_enable, S_IRUGO | S_IWUSR); + static int vcpu_req_reload_wfi_traps(const char *val, const struct kernel_param *kp); static const struct kernel_param_ops force_wfi_trap_ops = { @@ -575,8 +584,20 @@ void kvm_arch_vcpu_load(struct kvm_vcpu *vcpu, int cpu) if (vcpu_has_ptrauth(vcpu)) vcpu_ptrauth_disable(vcpu); - if (kvm_arm_is_pvsched_enabled(&vcpu->arch)) - kvm_update_pvsched_preempted(vcpu, 0); + /* + * When pv_preempted is changed from enabled to disabled, preempted + * state will not be updated in kvm_arch_vcpu_put/load. So we must + * update the preempted state to 0 for every vCPU in case some vCPUs' + * preempted state will always be 1. + */ + if (kvm_arm_is_pvsched_valid(&vcpu->arch)) { + if (pv_preempted_enable) + kvm_update_pvsched_preempted(vcpu, 0); + else { + if (vcpu->arch.pvsched.preempted) + kvm_update_pvsched_preempted(vcpu, 0); + } + } #ifdef CONFIG_KVM_HISI_VIRT kvm_hisi_dvmbm_load(vcpu); @@ -600,7 +621,7 @@ void kvm_arch_vcpu_put(struct kvm_vcpu *vcpu) vcpu->cpu = -1; - if (kvm_arm_is_pvsched_enabled(&vcpu->arch)) + if (kvm_arm_is_pvsched_valid(&vcpu->arch) && pv_preempted_enable) kvm_update_pvsched_preempted(vcpu, 1); #ifdef CONFIG_KVM_HISI_VIRT diff --git a/arch/arm64/kvm/pvsched.c b/arch/arm64/kvm/pvsched.c index dc1768815467b41289a7ad5ff1231163f90896f4..9693415226d138ebd9efbf8c5f833aaad27a1675 100644 --- a/arch/arm64/kvm/pvsched.c +++ b/arch/arm64/kvm/pvsched.c @@ -34,6 +34,8 @@ void kvm_update_pvsched_preempted(struct kvm_vcpu *vcpu, u32 preempted) srcu_read_unlock(&kvm->srcu, idx); pagefault_enable(); + + vcpu->arch.pvsched.preempted = !!preempted; } long kvm_pvsched_kick_vcpu(struct kvm_vcpu *vcpu) diff --git a/arch/arm64/kvm/vgic/vgic-its.c b/arch/arm64/kvm/vgic/vgic-its.c index 4805a12e06169fd04cbb574ddc167097fcf42fff..201015e0ebd1cc47c7d263dc4d6acac8213de644 100644 --- a/arch/arm64/kvm/vgic/vgic-its.c +++ b/arch/arm64/kvm/vgic/vgic-its.c @@ -2746,6 +2746,96 @@ static int vgic_its_has_attr(struct kvm_device *dev, return -ENXIO; } +/* we use ram just at offset 0x0 */ +#define PVSCHED_GPA_OFFSET 0x0 +static void pvsched_save_all_gpa(struct kvm *kvm) +{ + int i, ret; + struct kvm_vcpu *vcpu; + gpa_t pendbase, ptr, val; + + kvm_for_each_vcpu(i, vcpu, kvm) { + struct vgic_cpu *vgic_cpu = &vcpu->arch.vgic_cpu; + + /* + * Quote the GIC spec - "Changing GICR_PENDBASER with + * GICR_CTLR.EnableLPIs == 1 is UNPREDICTABLE." We're pretty + * sure 'pendbaser' is valid with that. + */ + if (!vgic_cpu->lpis_enabled) + continue; + + pendbase = GICR_PENDBASER_ADDRESS(vcpu->arch.vgic_cpu.pendbaser); + ptr = pendbase + PVSCHED_GPA_OFFSET; + + ret = kvm_read_guest_lock(kvm, ptr, &val, sizeof(gpa_t)); + if (ret) { + kvm_err("%s: read from addr[%p] failed. ret[%d]", __func__, (void *)ptr, ret); + continue; + } + /* + * GIC spec states that "Behavior is UNPREDICTABLE if LPI + * Pending tables contains none zeros. Feel free to + * go ahead to corrupt the insane guest. + */ + if (val != 0) { + kvm_err("%s: read[%p] != 0 from addr[%p]", __func__, (void *)val, (void *)ptr); + } + + kvm_debug("%s: origin addr[%p]", __func__, (void *)val); + + /* + * We save the 'pvsched.base' as a generic state, regardless of + * whether it is valid or not. + */ + ret = kvm_write_guest_lock(kvm, ptr, &vcpu->arch.pvsched.base, sizeof(vcpu->arch.pvsched.base)); + kvm_debug("%s: save pvsched.base[%p] into addr[%p], ret = %d\n", __func__, (void *)vcpu->arch.pvsched.base, (void *)ptr, ret); + } +} + +static void pvsched_restore_all_gpa(struct kvm *kvm) +{ + int i, ret; + struct kvm_vcpu *vcpu; + gpa_t pendbase, ptr, val; + + kvm_for_each_vcpu(i, vcpu, kvm) { + struct vgic_cpu *vgic_cpu = &vcpu->arch.vgic_cpu; + + if (!vgic_cpu->lpis_enabled) + continue; + + pendbase = GICR_PENDBASER_ADDRESS(vcpu->arch.vgic_cpu.pendbaser); + ptr = pendbase + PVSCHED_GPA_OFFSET; + + ret = kvm_read_guest_lock(kvm, ptr, &val, sizeof(gpa_t)); + if (ret) { + kvm_err("%s: read from addr[%p] failed. ret[%d]", __func__, (void *)ptr, ret); + continue; + } + + kvm_debug("%s: restore [%p] into pvsched.base[%p] from addr[%p]", __func__, (void *)val, (void *)vcpu->arch.pvsched.base, (void *)ptr); + + if (val == 0) + continue; + + /* Here vcpu->arch.pvsched.pv_unhalted means pvsched info saved in pending tabls are valid. */ + if (vcpu->arch.pvsched.pv_unhalted) { + vcpu->arch.pvsched.base = val; + kvm_debug("%s: migrate from new version, pending table's pvsched info is valid. Restore it .\n", __func__); + } + + val = 0; + ret = kvm_write_guest_lock(kvm, ptr, &val, sizeof(vcpu->arch.pvsched.base)); + if (ret) + kvm_err("%s: restore 0 into addr[%p] failed. ret[%d]", __func__, (void *)ptr, ret); + } + kvm_for_each_vcpu(i, vcpu, kvm) { + /* Now vcpu->arch.pvsched.pv_unhalted mean original pvsched.pv_unhalted */ + vcpu->arch.pvsched.pv_unhalted = false; + } +} + static int vgic_its_ctrl(struct kvm *kvm, struct vgic_its *its, u64 attr) { const struct vgic_its_abi *abi = vgic_its_get_abi(its); @@ -2769,9 +2859,11 @@ static int vgic_its_ctrl(struct kvm *kvm, struct vgic_its *its, u64 attr) break; case KVM_DEV_ARM_ITS_SAVE_TABLES: ret = abi->save_tables(its); + pvsched_save_all_gpa(kvm); break; case KVM_DEV_ARM_ITS_RESTORE_TABLES: ret = abi->restore_tables(its); + pvsched_restore_all_gpa(kvm); break; } diff --git a/arch/arm64/kvm/vgic/vgic-mmio-v3.c b/arch/arm64/kvm/vgic/vgic-mmio-v3.c index b6bdf22abab11bb43f6d8a7cd4a08369db17ac70..526fc05f887847aa652304f74c2d0a1af6597e91 100644 --- a/arch/arm64/kvm/vgic/vgic-mmio-v3.c +++ b/arch/arm64/kvm/vgic/vgic-mmio-v3.c @@ -552,6 +552,25 @@ static void vgic_mmio_write_pendbase(struct kvm_vcpu *vcpu, pendbaser) != old_pendbaser); } +static unsigned long vgic_mmio_uaccess_read_waker(struct kvm_vcpu *vcpu, + gpa_t addr, unsigned int len) +{ + return GICR_WAKER_PVSCHED_VALID; +} + +/* If pvsched info stored in pending table valid. + * When migrate from old version, since kernel don't save pvsched info. + * here we just reuse vcpu->arch.pvsched.pv_unhalted, and clear it in pvsched_restore_all_gpa + */ +static int vgic_mmio_uaccess_write_waker(struct kvm_vcpu *vcpu, + gpa_t addr, unsigned int len, + unsigned long val) +{ + vcpu->arch.pvsched.pv_unhalted = !!(val & GICR_WAKER_PVSCHED_VALID); + + return 0; +} + /* * The GICv3 per-IRQ registers are split to control PPIs and SGIs in the * redistributors, while SPIs are covered by registers in the distributor @@ -648,8 +667,9 @@ static const struct vgic_register_region vgic_v3_rd_registers[] = { vgic_mmio_read_v3r_typer, vgic_mmio_write_wi, vgic_uaccess_read_v3r_typer, vgic_mmio_uaccess_write_wi, 8, VGIC_ACCESS_64bit | VGIC_ACCESS_32bit), - REGISTER_DESC_WITH_LENGTH(GICR_WAKER, - vgic_mmio_read_raz, vgic_mmio_write_wi, 4, + REGISTER_DESC_WITH_LENGTH_UACCESS(GICR_WAKER, + vgic_mmio_read_raz, vgic_mmio_write_wi, + vgic_mmio_uaccess_read_waker, vgic_mmio_uaccess_write_waker, 4, VGIC_ACCESS_32bit), REGISTER_DESC_WITH_LENGTH(GICR_PROPBASER, vgic_mmio_read_propbase, vgic_mmio_write_propbase, 8, diff --git a/include/linux/irqchip/arm-gic-v3.h b/include/linux/irqchip/arm-gic-v3.h index 88b02e3b81da7e4510a7418444666f578a9a41d3..3847c2de7694b57b466e54d4088e7e42dca58277 100644 --- a/include/linux/irqchip/arm-gic-v3.h +++ b/include/linux/irqchip/arm-gic-v3.h @@ -147,6 +147,7 @@ #define GICR_WAKER_ProcessorSleep (1U << 1) #define GICR_WAKER_ChildrenAsleep (1U << 2) +#define GICR_WAKER_PVSCHED_VALID (1U << 31) #define GIC_BASER_CACHE_nCnB 0ULL #define GIC_BASER_CACHE_SameAsInner 0ULL