diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h index 7587dd0cdbce4669c8afb1750f628101d7b01746..fdabb70649148d8be4e91c78cd7543649aff1501 100644 --- a/arch/x86/include/asm/kvm_host.h +++ b/arch/x86/include/asm/kvm_host.h @@ -1171,6 +1171,7 @@ struct kvm_x86_ops { void (*cache_reg)(struct kvm_vcpu *vcpu, enum kvm_reg reg); unsigned long (*get_rflags)(struct kvm_vcpu *vcpu); void (*set_rflags)(struct kvm_vcpu *vcpu, unsigned long rflags); + bool (*get_if_flag)(struct kvm_vcpu *vcpu); void (*tlb_flush_all)(struct kvm_vcpu *vcpu); void (*tlb_flush_current)(struct kvm_vcpu *vcpu); diff --git a/arch/x86/kvm/hyperv.c b/arch/x86/kvm/hyperv.c index 09ec1cda2d687c517a3bfe5de1f94899f1b5f8e7..f7737e1df6606f4a38dbd91c78880d6ab65c81ae 100644 --- a/arch/x86/kvm/hyperv.c +++ b/arch/x86/kvm/hyperv.c @@ -1684,7 +1684,7 @@ static void kvm_hv_hypercall_set_result(struct kvm_vcpu *vcpu, u64 result) { bool longmode; - longmode = is_64_bit_mode(vcpu); + longmode = is_64_bit_hypercall(vcpu); if (longmode) kvm_rax_write(vcpu, result); else { @@ -1760,7 +1760,7 @@ int kvm_hv_hypercall(struct kvm_vcpu *vcpu) } #ifdef CONFIG_X86_64 - if (is_64_bit_mode(vcpu)) { + if (is_64_bit_hypercall(vcpu)) { param = kvm_rcx_read(vcpu); ingpa = kvm_rdx_read(vcpu); outgpa = kvm_r8_read(vcpu); diff --git a/arch/x86/kvm/svm/sev.c b/arch/x86/kvm/svm/sev.c index 5c7c70674233c02538c5fa959ddc433d94255326..36cd903c3aa7214fc574ae83c34763db834a66f6 100644 --- a/arch/x86/kvm/svm/sev.c +++ b/arch/x86/kvm/svm/sev.c @@ -94,7 +94,7 @@ static bool __sev_recycle_asids(int min_asid, int max_asid) return true; } -static int sev_asid_new(struct kvm_sev_info *sev) +static int sev_asid_new(bool es_active) { int pos, min_asid, max_asid; bool retry = true; @@ -105,8 +105,8 @@ static int sev_asid_new(struct kvm_sev_info *sev) * SEV-enabled guests must use asid from min_sev_asid to max_sev_asid. * SEV-ES-enabled guest can use from 1 to min_sev_asid - 1. */ - min_asid = sev->es_active ? 0 : min_sev_asid - 1; - max_asid = sev->es_active ? min_sev_asid - 1 : max_sev_asid; + min_asid = es_active ? 0 : min_sev_asid - 1; + max_asid = es_active ? min_sev_asid - 1 : max_sev_asid; again: pos = find_next_zero_bit(sev_asid_bitmap, max_sev_asid, min_asid); if (pos >= max_asid) { @@ -194,6 +194,7 @@ static void sev_unbind_asid(struct kvm *kvm, unsigned int handle) static int sev_guest_init(struct kvm *kvm, struct kvm_sev_cmd *argp) { struct kvm_sev_info *sev = &to_kvm_svm(kvm)->sev_info; + bool es_active = argp->id == KVM_SEV_ES_INIT; int asid, ret; if (kvm->created_vcpus) @@ -203,7 +204,7 @@ static int sev_guest_init(struct kvm *kvm, struct kvm_sev_cmd *argp) if (unlikely(sev->active)) return ret; - asid = sev_asid_new(sev); + asid = sev_asid_new(es_active); if (asid < 0) return ret; @@ -212,6 +213,7 @@ static int sev_guest_init(struct kvm *kvm, struct kvm_sev_cmd *argp) goto e_free; sev->active = true; + sev->es_active = es_active; sev->asid = asid; INIT_LIST_HEAD(&sev->regions_list); @@ -222,16 +224,6 @@ static int sev_guest_init(struct kvm *kvm, struct kvm_sev_cmd *argp) return ret; } -static int sev_es_guest_init(struct kvm *kvm, struct kvm_sev_cmd *argp) -{ - if (!sev_es) - return -ENOTTY; - - to_kvm_svm(kvm)->sev_info.es_active = true; - - return sev_guest_init(kvm, argp); -} - static int sev_bind_asid(struct kvm *kvm, unsigned int handle, int *error) { struct sev_data_activate *data; @@ -602,51 +594,67 @@ static int sev_es_sync_vmsa(struct vcpu_svm *svm) return 0; } -static int sev_launch_update_vmsa(struct kvm *kvm, struct kvm_sev_cmd *argp) +static int __sev_launch_update_vmsa(struct kvm *kvm, struct kvm_vcpu *vcpu, + int *error) { - struct kvm_sev_info *sev = &to_kvm_svm(kvm)->sev_info; struct sev_data_launch_update_vmsa *vmsa; - int i, ret; - - if (!sev_es_guest(kvm)) - return -ENOTTY; + struct vcpu_svm *svm = to_svm(vcpu); + int ret; vmsa = kzalloc(sizeof(*vmsa), GFP_KERNEL); if (!vmsa) return -ENOMEM; - for (i = 0; i < kvm->created_vcpus; i++) { - struct vcpu_svm *svm = to_svm(kvm->vcpus[i]); + /* Perform some pre-encryption checks against the VMSA */ + ret = sev_es_sync_vmsa(svm); + if (ret) + goto e_free; - /* Perform some pre-encryption checks against the VMSA */ - ret = sev_es_sync_vmsa(svm); - if (ret) - goto e_free; + /* + * The LAUNCH_UPDATE_VMSA command will perform in-place encryption of + * the VMSA memory content (i.e it will write the same memory region + * with the guest's key), so invalidate it first. + */ + clflush_cache_range(svm->vmsa, PAGE_SIZE); - /* - * The LAUNCH_UPDATE_VMSA command will perform in-place - * encryption of the VMSA memory content (i.e it will write - * the same memory region with the guest's key), so invalidate - * it first. - */ - clflush_cache_range(svm->vmsa, PAGE_SIZE); + vmsa->handle = to_kvm_svm(kvm)->sev_info.handle; + vmsa->address = __sme_pa(svm->vmsa); + vmsa->len = PAGE_SIZE; + ret = sev_issue_cmd(kvm, SEV_CMD_LAUNCH_UPDATE_VMSA, vmsa, error); - vmsa->handle = sev->handle; - vmsa->address = __sme_pa(svm->vmsa); - vmsa->len = PAGE_SIZE; - ret = sev_issue_cmd(kvm, SEV_CMD_LAUNCH_UPDATE_VMSA, vmsa, - &argp->error); - if (ret) - goto e_free; + if (ret) + goto e_free; - svm->vcpu.arch.guest_state_protected = true; - } + vcpu->arch.guest_state_protected = true; e_free: kfree(vmsa); return ret; } +static int sev_launch_update_vmsa(struct kvm *kvm, struct kvm_sev_cmd *argp) +{ + struct kvm_vcpu *vcpu; + int i, ret; + + if (!sev_es_guest(kvm)) + return -ENOTTY; + + kvm_for_each_vcpu(i, vcpu, kvm) { + ret = mutex_lock_killable(&vcpu->mutex); + if (ret) + return ret; + + ret = __sev_launch_update_vmsa(kvm, vcpu, &argp->error); + + mutex_unlock(&vcpu->mutex); + if (ret) + return ret; + } + + return 0; +} + static int sev_launch_measure(struct kvm *kvm, struct kvm_sev_cmd *argp) { void __user *measure = (void __user *)(uintptr_t)argp->data; @@ -2125,12 +2133,15 @@ int svm_mem_enc_op(struct kvm *kvm, void __user *argp) mutex_lock(&kvm->lock); switch (sev_cmd.id) { + case KVM_SEV_ES_INIT: + if (!sev_es) { + r = -ENOTTY; + goto out; + } + fallthrough; case KVM_SEV_INIT: r = sev_guest_init(kvm, &sev_cmd); break; - case KVM_SEV_ES_INIT: - r = sev_es_guest_init(kvm, &sev_cmd); - break; case KVM_SEV_LAUNCH_START: r = sev_launch_start(kvm, &sev_cmd); break; @@ -2507,51 +2518,39 @@ void sev_guest_memory_reclaimed(struct kvm *kvm) * Pages used by hardware to hold guest encrypted state must be flushed before * returning them to the system. */ -static void sev_flush_guest_memory(struct vcpu_svm *svm, void *va, - unsigned long len) +static void sev_flush_encrypted_page(struct kvm_vcpu *vcpu, void *va) { + int asid = to_kvm_svm(vcpu->kvm)->sev_info.asid; + /* - * If hardware enforced cache coherency for encrypted mappings of the - * same physical page is supported, nothing to do. + * Note! The address must be a kernel address, as regular page walk + * checks are performed by VM_PAGE_FLUSH, i.e. operating on a user + * address is non-deterministic and unsafe. This function deliberately + * takes a pointer to deter passing in a user address. */ - if (boot_cpu_has(X86_FEATURE_SME_COHERENT)) - return; + unsigned long addr = (unsigned long)va; /* - * If the VM Page Flush MSR is supported, use it to flush the page - * (using the page virtual address and the guest ASID). + * If CPU enforced cache coherency for encrypted mappings of the + * same physical page is supported, use CLFLUSHOPT instead. NOTE: cache + * flush is still needed in order to work properly with DMA devices. */ - if (boot_cpu_has(X86_FEATURE_VM_PAGE_FLUSH)) { - struct kvm_sev_info *sev; - unsigned long va_start; - u64 start, stop; - - /* Align start and stop to page boundaries. */ - va_start = (unsigned long)va; - start = (u64)va_start & PAGE_MASK; - stop = PAGE_ALIGN((u64)va_start + len); - - if (start < stop) { - sev = &to_kvm_svm(svm->vcpu.kvm)->sev_info; - - while (start < stop) { - wrmsrl(MSR_AMD64_VM_PAGE_FLUSH, - start | sev->asid); - - start += PAGE_SIZE; - } - - return; - } - - WARN(1, "Address overflow, using WBINVD\n"); + if (boot_cpu_has(X86_FEATURE_SME_COHERENT)) { + clflush_cache_range(va, PAGE_SIZE); + return; } /* - * Hardware should always have one of the above features, - * but if not, use WBINVD and issue a warning. + * VM Page Flush takes a host virtual address and a guest ASID. Fall + * back to WBINVD if this faults so as not to make any problems worse + * by leaving stale encrypted data in the cache. */ - WARN_ONCE(1, "Using WBINVD to flush guest memory\n"); + if (WARN_ON_ONCE(wrmsrl_safe(MSR_AMD64_VM_PAGE_FLUSH, addr | asid))) + goto do_wbinvd; + + return; + +do_wbinvd: wbinvd_on_all_cpus(); } @@ -2565,7 +2564,8 @@ void sev_free_vcpu(struct kvm_vcpu *vcpu) svm = to_svm(vcpu); if (vcpu->arch.guest_state_protected) - sev_flush_guest_memory(svm, svm->vmsa, PAGE_SIZE); + sev_flush_encrypted_page(vcpu, svm->vmsa); + __free_page(virt_to_page(svm->vmsa)); if (svm->ghcb_sa_free) @@ -2783,7 +2783,7 @@ static int sev_es_validate_vmgexit(struct vcpu_svm *svm) return -EINVAL; } -static void pre_sev_es_run(struct vcpu_svm *svm) +void sev_es_unmap_ghcb(struct vcpu_svm *svm) { if (!svm->ghcb) return; @@ -2819,9 +2819,6 @@ void pre_sev_run(struct vcpu_svm *svm, int cpu) struct svm_cpu_data *sd = per_cpu(svm_data, cpu); int asid = sev_get_asid(svm->vcpu.kvm); - /* Perform any SEV-ES pre-run actions */ - pre_sev_es_run(svm); - /* Assign the asid allocated with this SEV guest */ svm->vmcb->control.asid = asid; @@ -3249,5 +3246,8 @@ void sev_vcpu_deliver_sipi_vector(struct kvm_vcpu *vcpu, u8 vector) * the guest will set the CS and RIP. Set SW_EXIT_INFO_2 to a * non-zero value. */ + if (!svm->ghcb) + return; + ghcb_set_sw_exit_info_2(svm->ghcb, 1); } diff --git a/arch/x86/kvm/svm/svm.c b/arch/x86/kvm/svm/svm.c index ded9b9d3e1ceef5a237a156d77d4dbad08495d75..c9be33312e3b8be660e68aae1142507d65d148b5 100644 --- a/arch/x86/kvm/svm/svm.c +++ b/arch/x86/kvm/svm/svm.c @@ -1521,6 +1521,15 @@ static void svm_set_rflags(struct kvm_vcpu *vcpu, unsigned long rflags) to_svm(vcpu)->vmcb->save.rflags = rflags; } +static bool svm_get_if_flag(struct kvm_vcpu *vcpu) +{ + struct vmcb *vmcb = to_svm(vcpu)->vmcb; + + return sev_es_guest(vcpu->kvm) + ? vmcb->control.int_state & SVM_GUEST_INTERRUPT_MASK + : kvm_get_rflags(vcpu) & X86_EFLAGS_IF; +} + static void svm_cache_reg(struct kvm_vcpu *vcpu, enum kvm_reg reg) { switch (reg) { @@ -2750,7 +2759,7 @@ static int svm_get_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info) static int svm_complete_emulated_msr(struct kvm_vcpu *vcpu, int err) { struct vcpu_svm *svm = to_svm(vcpu); - if (!sev_es_guest(svm->vcpu.kvm) || !err) + if (!err || !sev_es_guest(vcpu->kvm) || WARN_ON_ONCE(!svm->ghcb)) return kvm_complete_insn_gp(&svm->vcpu, err); ghcb_set_sw_exit_info_1(svm->ghcb, 1); @@ -3480,14 +3489,7 @@ bool svm_interrupt_blocked(struct kvm_vcpu *vcpu) if (!gif_set(svm)) return true; - if (sev_es_guest(svm->vcpu.kvm)) { - /* - * SEV-ES guests to not expose RFLAGS. Use the VMCB interrupt mask - * bit to determine the state of the IF flag. - */ - if (!(vmcb->control.int_state & SVM_GUEST_INTERRUPT_MASK)) - return true; - } else if (is_guest_mode(vcpu)) { + if (is_guest_mode(vcpu)) { /* As long as interrupts are being delivered... */ if ((svm->nested.ctl.int_ctl & V_INTR_MASKING_MASK) ? !(svm->nested.hsave->save.rflags & X86_EFLAGS_IF) @@ -3498,7 +3500,7 @@ bool svm_interrupt_blocked(struct kvm_vcpu *vcpu) if (nested_exit_on_intr(svm)) return false; } else { - if (!(kvm_get_rflags(vcpu) & X86_EFLAGS_IF)) + if (!svm_get_if_flag(vcpu)) return true; } @@ -3604,6 +3606,8 @@ static void svm_flush_tlb_gva(struct kvm_vcpu *vcpu, gva_t gva) static void svm_prepare_guest_switch(struct kvm_vcpu *vcpu) { + if (sev_es_guest(vcpu->kvm)) + sev_es_unmap_ghcb(to_svm(vcpu)); } static inline void sync_cr8_to_lapic(struct kvm_vcpu *vcpu) @@ -4508,6 +4512,7 @@ static struct kvm_x86_ops svm_x86_ops __initdata = { .cache_reg = svm_cache_reg, .get_rflags = svm_get_rflags, .set_rflags = svm_set_rflags, + .get_if_flag = svm_get_if_flag, .tlb_flush_all = svm_flush_tlb, .tlb_flush_current = svm_flush_tlb, diff --git a/arch/x86/kvm/svm/svm.h b/arch/x86/kvm/svm/svm.h index 030f98bda58baf9034dc8f3b2fc94c6c264b4dcf..c0436dc2a4c128751c613058192e5e0eacbcc244 100644 --- a/arch/x86/kvm/svm/svm.h +++ b/arch/x86/kvm/svm/svm.h @@ -234,7 +234,7 @@ static inline bool sev_es_guest(struct kvm *kvm) #ifdef CONFIG_KVM_AMD_SEV struct kvm_sev_info *sev = &to_kvm_svm(kvm)->sev_info; - return sev_guest(kvm) && sev->es_active; + return sev->es_active && !WARN_ON_ONCE(!sev->active); #else return false; #endif @@ -592,6 +592,7 @@ void sev_es_create_vcpu(struct vcpu_svm *svm); void sev_es_vcpu_load(struct vcpu_svm *svm, int cpu); void sev_es_vcpu_put(struct vcpu_svm *svm); void sev_vcpu_deliver_sipi_vector(struct kvm_vcpu *vcpu, u8 vector); +void sev_es_unmap_ghcb(struct vcpu_svm *svm); /* vmenter.S */ diff --git a/arch/x86/kvm/vmx/vmx.c b/arch/x86/kvm/vmx/vmx.c index 80fc17602bf50be258807c6a35fcc3d718eb2c9f..6ccf34771e2cd406bb6c6a5f647303d11691c0a2 100644 --- a/arch/x86/kvm/vmx/vmx.c +++ b/arch/x86/kvm/vmx/vmx.c @@ -1557,6 +1557,11 @@ void vmx_set_rflags(struct kvm_vcpu *vcpu, unsigned long rflags) vmx->emulation_required = emulation_required(vcpu); } +static bool vmx_get_if_flag(struct kvm_vcpu *vcpu) +{ + return vmx_get_rflags(vcpu) & X86_EFLAGS_IF; +} + u32 vmx_get_interrupt_shadow(struct kvm_vcpu *vcpu) { u32 interruptibility = vmcs_read32(GUEST_INTERRUPTIBILITY_INFO); @@ -8435,6 +8440,7 @@ static struct kvm_x86_ops vmx_x86_ops __initdata = { .cache_reg = vmx_cache_reg, .get_rflags = vmx_get_rflags, .set_rflags = vmx_set_rflags, + .get_if_flag = vmx_get_if_flag, .tlb_flush_all = vmx_flush_tlb_all, .tlb_flush_current = vmx_flush_tlb_current, diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index 7e76aa18c1b938f5db3aa48c670d1012a7647738..5f2677378d5e9f29d56d3826064d773cd62aa280 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -8451,7 +8451,7 @@ static int complete_hypercall_exit(struct kvm_vcpu *vcpu) { u64 ret = vcpu->run->hypercall.ret; - if (!is_64_bit_mode(vcpu)) + if (!is_64_bit_hypercall(vcpu)) ret = (u32)ret; kvm_rax_write(vcpu, ret); ++vcpu->stat.hypercalls; @@ -8474,7 +8474,7 @@ int kvm_emulate_hypercall(struct kvm_vcpu *vcpu) trace_kvm_hypercall(nr, a0, a1, a2, a3); - op_64_bit = is_64_bit_mode(vcpu); + op_64_bit = is_64_bit_hypercall(vcpu); if (!op_64_bit) { nr &= 0xFFFFFFFF; a0 &= 0xFFFFFFFF; @@ -8583,14 +8583,7 @@ static void post_kvm_run_save(struct kvm_vcpu *vcpu) { struct kvm_run *kvm_run = vcpu->run; - /* - * if_flag is obsolete and useless, so do not bother - * setting it for SEV-ES guests. Userspace can just - * use kvm_run->ready_for_interrupt_injection. - */ - kvm_run->if_flag = !vcpu->arch.guest_state_protected - && (kvm_get_rflags(vcpu) & X86_EFLAGS_IF) != 0; - + kvm_run->if_flag = kvm_x86_ops.get_if_flag(vcpu); kvm_run->cr8 = kvm_get_cr8(vcpu); kvm_run->apic_base = kvm_get_apic_base(vcpu); kvm_run->ready_for_interrupt_injection = @@ -11222,6 +11215,9 @@ bool kvm_arch_dy_runnable(struct kvm_vcpu *vcpu) bool kvm_arch_vcpu_in_kernel(struct kvm_vcpu *vcpu) { + if (vcpu->arch.guest_state_protected) + return true; + return vcpu->arch.preempted_in_kernel; } diff --git a/arch/x86/kvm/x86.h b/arch/x86/kvm/x86.h index 545fadc31e34b2faf3acf39dae9902ee12adf60b..1ecbfe7bc66e27b0fac1e345ee4dfe98b5473e41 100644 --- a/arch/x86/kvm/x86.h +++ b/arch/x86/kvm/x86.h @@ -95,12 +95,24 @@ static inline bool is_64_bit_mode(struct kvm_vcpu *vcpu) { int cs_db, cs_l; + WARN_ON_ONCE(vcpu->arch.guest_state_protected); + if (!is_long_mode(vcpu)) return false; kvm_x86_ops.get_cs_db_l_bits(vcpu, &cs_db, &cs_l); return cs_l; } +static inline bool is_64_bit_hypercall(struct kvm_vcpu *vcpu) +{ + /* + * If running with protected guest state, the CS register is not + * accessible. The hypercall register values will have had to been + * provided in 64-bit mode, so assume the guest is in 64-bit. + */ + return vcpu->arch.guest_state_protected || is_64_bit_mode(vcpu); +} + static inline bool is_la57_mode(struct kvm_vcpu *vcpu) { #ifdef CONFIG_X86_64