diff --git a/arch/loongarch/include/asm/kvm_host.h b/arch/loongarch/include/asm/kvm_host.h index 1a6664ddc02a786940f8b787ad83142bd7b6a6f7..a5ef721a901197802c1ded227abe63448bdd9027 100644 --- a/arch/loongarch/include/asm/kvm_host.h +++ b/arch/loongarch/include/asm/kvm_host.h @@ -243,6 +243,7 @@ struct kvm_vcpu_arch { u64 perf_ctrl[4]; u64 perf_cntr[4]; + int blocking; }; static inline unsigned long readl_sw_gcsr(struct loongarch_csrs *csr, int reg) @@ -319,8 +320,6 @@ static inline void kvm_arch_free_memslot(struct kvm *kvm, struct kvm_memory_slot *slot) {} static inline void kvm_arch_memslots_updated(struct kvm *kvm, u64 gen) {} static inline void kvm_arch_sched_in(struct kvm_vcpu *vcpu, int cpu) {} -static inline void kvm_arch_vcpu_blocking(struct kvm_vcpu *vcpu) {} -static inline void kvm_arch_vcpu_unblocking(struct kvm_vcpu *vcpu) {} static inline void kvm_arch_vcpu_block_finish(struct kvm_vcpu *vcpu) {} extern int kvm_enter_guest(struct kvm_run *run, struct kvm_vcpu *vcpu); diff --git a/arch/loongarch/kvm/emulate.c b/arch/loongarch/kvm/emulate.c index 01bafcb3abb35b5b4adb4741b3def75e581b2e82..6a68dd57ecd128c2e330225fc9aab8fb9d4b6daf 100644 --- a/arch/loongarch/kvm/emulate.c +++ b/arch/loongarch/kvm/emulate.c @@ -24,20 +24,9 @@ int _kvm_emu_idle(struct kvm_vcpu *vcpu) { ++vcpu->stat.idle_exits; trace_kvm_exit(vcpu, KVM_TRACE_EXIT_IDLE); - if (!vcpu->arch.irq_pending) { - kvm_save_timer(vcpu); - kvm_vcpu_block(vcpu); - - /* - * We we are runnable, then definitely go off to user space to - * check if any I/O interrupts are pending. - */ - if (kvm_check_request(KVM_REQ_UNHALT, vcpu)) { - kvm_clear_request(KVM_REQ_UNHALT, vcpu); - vcpu->run->exit_reason = KVM_EXIT_IRQ_WINDOW_OPEN; - } - } + kvm_vcpu_block(vcpu); + kvm_clear_request(KVM_REQ_UNHALT, vcpu); return EMULATE_DONE; } diff --git a/arch/loongarch/kvm/kvmcpu.h b/arch/loongarch/kvm/kvmcpu.h index 78f83a1a8400f19b596e53bd540dee95387ea92f..7bcaaa254d166ff1e46ae89a3a872c315f55ab02 100644 --- a/arch/loongarch/kvm/kvmcpu.h +++ b/arch/loongarch/kvm/kvmcpu.h @@ -99,7 +99,6 @@ void kvm_restore_lasx_upper(struct kvm_vcpu *cpu); void kvm_lose_hw_perf(struct kvm_vcpu *vcpu); void kvm_restore_hw_perf(struct kvm_vcpu *vcpu); -void kvm_acquire_timer(struct kvm_vcpu *vcpu); void kvm_reset_timer(struct kvm_vcpu *vcpu); void kvm_init_timer(struct kvm_vcpu *vcpu, unsigned long hz); void kvm_restore_timer(struct kvm_vcpu *vcpu); diff --git a/arch/loongarch/kvm/loongarch.c b/arch/loongarch/kvm/loongarch.c index 01e28c05ac1122779dc6e4d8e57a4f7a9237ce16..7a4c7fcf5bef35a113bd38fab508fb64573ff190 100644 --- a/arch/loongarch/kvm/loongarch.c +++ b/arch/loongarch/kvm/loongarch.c @@ -246,7 +246,6 @@ int kvm_arch_hardware_enable(void) */ gcfg |= KVM_GCFG_GCI_SECURE; gcfg |= KVM_GCFG_MATC_ROOT; - gcfg |= KVM_GCFG_TIT; kvm_write_csr_gcfg(gcfg); kvm_flush_tlb_all(); @@ -457,9 +456,6 @@ static int _kvm_handle_exit(struct kvm_run *run, struct kvm_vcpu *vcpu) local_irq_disable(); - if (ret == RESUME_GUEST) - kvm_acquire_timer(vcpu); - if (!(ret & RESUME_HOST)) { _kvm_deliver_intr(vcpu); /* Only check for signals if not already exiting to userspace */ @@ -652,7 +648,6 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu) smp_store_mb(vcpu->mode, IN_GUEST_MODE); cpu = smp_processor_id(); - kvm_acquire_timer(vcpu); /* Check if we have any exceptions/interrupts pending */ _kvm_deliver_intr(vcpu); @@ -698,23 +693,6 @@ int kvm_arch_vcpu_ioctl_set_mpstate(struct kvm_vcpu *vcpu, return -ENOIOCTLCMD; } -/** - * kvm_migrate_count() - Migrate timer. - * @vcpu: Virtual CPU. - * - * Migrate hrtimer to the current CPU by cancelling and restarting it - * if it was running prior to being cancelled. - * - * Must be called when the VCPU is migrated to a different CPU to ensure that - * timer expiry during guest execution interrupts the guest and causes the - * interrupt to be delivered in a timely manner. - */ -static void kvm_migrate_count(struct kvm_vcpu *vcpu) -{ - if (hrtimer_cancel(&vcpu->arch.swtimer)) - hrtimer_restart(&vcpu->arch.swtimer); -} - static int _kvm_vcpu_load(struct kvm_vcpu *vcpu, int cpu) { struct kvm_context *context; @@ -822,22 +800,22 @@ void kvm_arch_vcpu_load(struct kvm_vcpu *vcpu, int cpu) local_irq_save(flags); vcpu->cpu = cpu; - if (vcpu->arch.last_sched_cpu != cpu) { - kvm_debug("[%d->%d]KVM VCPU[%d] switch\n", - vcpu->arch.last_sched_cpu, cpu, vcpu->vcpu_id); - /* - * Migrate the timer interrupt to the current CPU so that it - * always interrupts the guest and synchronously triggers a - * guest timer interrupt. - */ - kvm_migrate_count(vcpu); - } /* restore guest state to registers */ _kvm_vcpu_load(vcpu, cpu); local_irq_restore(flags); } +void kvm_arch_vcpu_blocking(struct kvm_vcpu *vcpu) +{ + vcpu->arch.blocking = 1; +} + +void kvm_arch_vcpu_unblocking(struct kvm_vcpu *vcpu) +{ + vcpu->arch.blocking = 0; +} + static int _kvm_vcpu_put(struct kvm_vcpu *vcpu, int cpu) { struct loongarch_csrs *csr = vcpu->arch.csr; @@ -1712,9 +1690,15 @@ int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext) int kvm_cpu_has_pending_timer(struct kvm_vcpu *vcpu) { - return _kvm_pending_timer(vcpu) || + int ret; + + /* protect from TOD sync and vcpu_load/put */ + preempt_disable(); + ret = _kvm_pending_timer(vcpu) || kvm_read_hw_gcsr(KVM_CSR_ESTAT) & (1 << (KVM_INT_TIMER - KVM_INT_START)); + preempt_enable(); + return ret; } int kvm_arch_vcpu_dump_regs(struct kvm_vcpu *vcpu) diff --git a/arch/loongarch/kvm/mmu.c b/arch/loongarch/kvm/mmu.c index cdd16862fb4c98ae3d92a67367d221ced66c7af8..7b39290e5020ca577ba8aa2815618a0dd6594284 100644 --- a/arch/loongarch/kvm/mmu.c +++ b/arch/loongarch/kvm/mmu.c @@ -27,6 +27,18 @@ #define KVM_MMU_CACHE_MIN_PAGES 2 #endif +static inline int kvm_pte_huge(pte_t pte) { return pte_val(pte) & _PAGE_HUGE; } +static inline pte_t kvm_pte_mksmall(pte_t pte) +{ + pte_val(pte) &= ~_PAGE_HUGE; + return pte; +} + +static inline void kvm_set_pte(pte_t *ptep, pte_t val) +{ + WRITE_ONCE(*ptep, val); +} + static int kvm_tlb_flush_gpa(struct kvm_vcpu *vcpu, unsigned long gpa) { preempt_disable(); @@ -893,7 +905,7 @@ static int kvm_set_pmd_huge(struct kvm_vcpu *vcpu, struct kvm_mmu_memory_cache pmd_clear(pmd); } - kvm_tlb_flush_gpa(vcpu, addr & PMD_MASK); + kvm_make_request(KVM_REQ_TLB_FLUSH, vcpu); set_pmd(pmd, *new_pmd); return 0; } @@ -950,14 +962,16 @@ static bool transparent_hugepage_adjust(kvm_pfn_t *pfnp, unsigned long *gpap) } static bool fault_supports_huge_mapping(struct kvm_memory_slot *memslot, - unsigned long hva, - unsigned long map_size) + unsigned long hva, bool write) { gpa_t gpa_start; hva_t uaddr_start, uaddr_end; + unsigned long map_size; size_t size; - if (memslot->arch.flags & KVM_MEMSLOT_DISABLE_THP) + map_size = PMD_SIZE; + /* Disable dirty logging on HugePages */ + if ((memslot->flags & KVM_MEM_LOG_DIRTY_PAGES) && write) return false; size = memslot->npages * PAGE_SIZE; @@ -1012,9 +1026,6 @@ static bool fault_supports_huge_mapping(struct kvm_memory_slot *memslot, * @vcpu: VCPU pointer. * @gpa: Guest physical address of fault. * @write: Whether the fault was due to a write. - * @out_entry: New PTE for @gpa (written on success unless NULL). - * @out_buddy: New PTE for @gpa's buddy (written on success unless - * NULL). * * Perform fast path GPA fault handling, doing all that can be done without * calling into KVM. This handles marking old pages young (for idle page @@ -1026,8 +1037,7 @@ static bool fault_supports_huge_mapping(struct kvm_memory_slot *memslot, * read-only page, in which case KVM must be consulted. */ static int kvm_map_page_fast(struct kvm_vcpu *vcpu, unsigned long gpa, - bool write, - pte_t *out_entry, pte_t *out_buddy) + bool write) { struct kvm *kvm = vcpu->kvm; gfn_t gfn = gpa >> PAGE_SHIFT; @@ -1035,6 +1045,7 @@ static int kvm_map_page_fast(struct kvm_vcpu *vcpu, unsigned long gpa, kvm_pfn_t pfn = 0; /* silence bogus GCC warning */ bool pfn_valid = false; int ret = 0; + struct kvm_memory_slot *slot; spin_lock(&kvm->mmu_lock); @@ -1058,6 +1069,18 @@ static int kvm_map_page_fast(struct kvm_vcpu *vcpu, unsigned long gpa, goto out; } + if (kvm_pte_huge(*ptep)) { + /* + * Do not set write permission when dirty logging is + * enabled for HugePages + */ + slot = gfn_to_memslot(kvm, gfn); + if (slot->flags & KVM_MEM_LOG_DIRTY_PAGES) { + ret = -EFAULT; + goto out; + } + } + /* Track dirtying of writeable pages */ set_pte(ptep, pte_mkdirty(*ptep)); pfn = pte_pfn(*ptep); @@ -1072,11 +1095,6 @@ static int kvm_map_page_fast(struct kvm_vcpu *vcpu, unsigned long gpa, kvm_set_pfn_dirty(pfn); } - if (out_entry) - *out_entry = *ptep; - if (out_buddy) - *out_buddy = *ptep_buddy(ptep); - out: spin_unlock(&kvm->mmu_lock); if (pfn_valid) @@ -1084,14 +1102,35 @@ static int kvm_map_page_fast(struct kvm_vcpu *vcpu, unsigned long gpa, return ret; } +/* + * Split huge page + */ +static pte_t *kvm_split_huge(struct kvm_vcpu *vcpu, pte_t *ptep, gfn_t gfn, + struct vm_area_struct *vma, unsigned long hva) +{ + int i; + pte_t val, *child; + struct kvm_mmu_memory_cache *memcache; + + memcache = &vcpu->arch.mmu_page_cache; + child = kvm_mmu_memory_cache_alloc(memcache); + val = kvm_pte_mksmall(*ptep); + for (i = 0; i < PTRS_PER_PTE; i++) { + kvm_set_pte(child + i, val); + pte_val(val) += PAGE_SIZE; + } + + /* The later kvm_flush_tlb_gpa() will flush hugepage tlb */ + pte_val(val) = (unsigned long)child; + kvm_set_pte(ptep, val); + return child + (gfn & (PTRS_PER_PTE - 1)); +} + /** * kvm_map_page() - Map a guest physical page. * @vcpu: VCPU pointer. * @gpa: Guest physical address of fault. * @write: Whether the fault was due to a write. - * @out_entry: New PTE for @gpa (written on success unless NULL). - * @out_buddy: New PTE for @gpa's buddy (written on success unless - * NULL). * * Handle GPA faults by creating a new GPA mapping (or updating an existing * one). @@ -1109,8 +1148,7 @@ static int kvm_map_page_fast(struct kvm_vcpu *vcpu, unsigned long gpa, * as an MMIO access. */ static int kvm_map_page(struct kvm_vcpu *vcpu, unsigned long gpa, - bool write, - pte_t *out_entry, pte_t *out_buddy) + bool write) { struct kvm *kvm = vcpu->kvm; struct kvm_mmu_memory_cache *memcache = &vcpu->arch.mmu_page_cache; @@ -1134,8 +1172,7 @@ static int kvm_map_page(struct kvm_vcpu *vcpu, unsigned long gpa, /* Try the fast path to handle old / clean pages */ srcu_idx = srcu_read_lock(&kvm->srcu); if ((exccode != KVM_EXCCODE_TLBRI) && (exccode != KVM_EXCCODE_TLBXI)) { - err = kvm_map_page_fast(vcpu, gpa, write, out_entry, - out_buddy); + err = kvm_map_page_fast(vcpu, gpa, write); if (!err) goto out; } @@ -1156,8 +1193,9 @@ static int kvm_map_page(struct kvm_vcpu *vcpu, unsigned long gpa, } vma_pagesize = vma_kernel_pagesize(vma); + if ((vma_pagesize == PMD_SIZE) && + !fault_supports_huge_mapping(memslot, hva, write)) { - if (fault_supports_huge_mapping(memslot, hva, vma_pagesize)) { force_pte = true; vma_pagesize = PAGE_SIZE; ++vcpu->stat.huge_dec_exits; @@ -1227,7 +1265,7 @@ static int kvm_map_page(struct kvm_vcpu *vcpu, unsigned long gpa, * aligned and that the block is contained within the memslot. */ ++vcpu->stat.huge_thp_exits; - if (fault_supports_huge_mapping(memslot, hva, PMD_SIZE) && + if (fault_supports_huge_mapping(memslot, hva, write) && transparent_hugepage_adjust(&pfn, &gpa)) { ++vcpu->stat.huge_adjust_exits; vma_pagesize = PMD_SIZE; @@ -1272,13 +1310,12 @@ static int kvm_map_page(struct kvm_vcpu *vcpu, unsigned long gpa, /* Ensure page tables are allocated */ ptep = kvm_pte_for_gpa(kvm, memcache, vma, hva, gpa); + if (ptep && kvm_pte_huge(*ptep) && write) + ptep = kvm_split_huge(vcpu, ptep, gfn, vma, hva); + set_pte(ptep, new_pte); err = 0; - if (out_entry) - *out_entry = new_pte; - if (out_buddy) - *out_buddy = *ptep_buddy(&new_pte); } spin_unlock(&kvm->mmu_lock); @@ -1294,7 +1331,7 @@ int kvm_handle_mm_fault(struct kvm_vcpu *vcpu, unsigned long badv, { int ret; - ret = kvm_map_page(vcpu, badv, write, NULL, NULL); + ret = kvm_map_page(vcpu, badv, write); if (ret) return ret; diff --git a/arch/loongarch/kvm/timer.c b/arch/loongarch/kvm/timer.c index 04e156e82965efe56038ed9e3aff464df9269cd7..4e16f3bc53351e772804ab8a4572256033ea7898 100644 --- a/arch/loongarch/kvm/timer.c +++ b/arch/loongarch/kvm/timer.c @@ -84,6 +84,7 @@ void kvm_restore_timer(struct kvm_vcpu *vcpu) struct loongarch_csrs *csr = vcpu->arch.csr; ktime_t expire, now; unsigned long cfg, delta, period; + unsigned long ticks, estat; /* * Set guest stable timer cfg csr @@ -97,24 +98,53 @@ void kvm_restore_timer(struct kvm_vcpu *vcpu) return; } + /* + * Freeze the soft-timer and sync the guest stable timer with it. We do + * this with interrupts disabled to avoid latency. + */ + hrtimer_cancel(&vcpu->arch.swtimer); + + /* + * From LoongArch Reference Manual Volume 1 Chapter 7.6.2 + * If oneshot timer is fired, CSR TVAL will be -1, there are two + * conditions: + * a) timer is fired during exiting to host + * b) timer is fired and vm is handling timer irq, host should not + * inject timer irq to avoid spurious timer interrupt + */ + ticks = kvm_read_sw_gcsr(csr, LOONGARCH_CSR_TVAL); + estat = kvm_read_sw_gcsr(csr, LOONGARCH_CSR_ESTAT); + if (!(cfg & CSR_TCFG_PERIOD) && (ticks > cfg)) { + /* + * Writing 0 to LOONGARCH_CSR_TVAL will inject timer irq + * and set CSR TVAL with -1 + * + * Writing CSR_TINTCLR_TI to LOONGARCH_CSR_TINTCLR will clear + * timer interrupt, and set CSR TVAL keeps unchanged with -1, + * it avoids spurious timer interrupt + */ + kvm_write_gcsr_timertick(0); + if ((estat & CPU_TIMER) == 0) + kvm_gcsr_write(CSR_TINTCLR_TI, LOONGARCH_CSR_TINTCLR); + return; + } + /* * set remainder tick value if not expired */ now = ktime_get(); expire = vcpu->arch.expire; + delta = 0; if (ktime_before(now, expire)) delta = ktime_to_tick(vcpu, ktime_sub(expire, now)); - else { - if (cfg & CSR_TCFG_PERIOD) { - period = cfg & CSR_TCFG_VAL; - delta = ktime_to_tick(vcpu, ktime_sub(now, expire)); - delta = period - (delta % period); - } else - delta = 0; + else if (cfg & CSR_TCFG_PERIOD) { + period = cfg & CSR_TCFG_VAL; + delta = ktime_to_tick(vcpu, ktime_sub(now, expire)); + delta = period - (delta % period); + /* * inject timer here though sw timer should inject timer - * interrupt async already, since sw timer may be cancelled - * during injecting intr async in function kvm_acquire_timer + * interrupt async already */ _kvm_queue_irq(vcpu, LARCH_INT_TIMER); } @@ -122,31 +152,6 @@ void kvm_restore_timer(struct kvm_vcpu *vcpu) kvm_write_gcsr_timertick(delta); } -/* - * - * Restore hard timer state and enable guest to access timer registers - * without trap - * - * it is called with irq disabled - */ -void kvm_acquire_timer(struct kvm_vcpu *vcpu) -{ - unsigned long cfg; - - cfg = kvm_read_csr_gcfg(); - if (!(cfg & CSR_GCFG_TIT)) - return; - - /* enable guest access to hard timer */ - kvm_write_csr_gcfg(cfg & ~CSR_GCFG_TIT); - - /* - * Freeze the soft-timer and sync the guest stable timer with it. We do - * this with interrupts disabled to avoid latency. - */ - hrtimer_cancel(&vcpu->arch.swtimer); -} - /* * Save guest timer state and switch to software emulation of guest * timer. The hard timer must already be in use, so preemption should be @@ -154,28 +159,40 @@ void kvm_acquire_timer(struct kvm_vcpu *vcpu) */ static void _kvm_save_timer(struct kvm_vcpu *vcpu) { - unsigned long ticks, delta; + unsigned long ticks, delta, cfg; ktime_t expire; struct loongarch_csrs *csr = vcpu->arch.csr; ticks = kvm_read_sw_gcsr(csr, LOONGARCH_CSR_TVAL); - delta = tick_to_ns(vcpu, ticks); - expire = ktime_add_ns(ktime_get(), delta); - vcpu->arch.expire = expire; - if (ticks) { + cfg = kvm_read_sw_gcsr(csr, LOONGARCH_CSR_TCFG); + + /* + * From LoongArch Reference Manual Volume 1 Chapter 7.6.2 + * If period timer is fired, CSR TVAL will be reloaded from CSR TCFG + * If oneshot timer is fired, CSR TVAL will be -1 + * Here judge one shot timer fired by checking whether TVAL is larger + * than TCFG + */ + if (ticks < cfg) { /* * Update hrtimer to use new timeout * HRTIMER_MODE_PINNED is suggested since vcpu may run in * the same physical cpu in next time */ - hrtimer_cancel(&vcpu->arch.swtimer); + delta = tick_to_ns(vcpu, ticks); + expire = ktime_add_ns(ktime_get(), delta); + vcpu->arch.expire = expire; hrtimer_start(&vcpu->arch.swtimer, expire, HRTIMER_MODE_ABS_PINNED); - } else + } else if (vcpu->arch.blocking) { /* - * inject timer interrupt so that hall polling can dectect - * and exit + * Inject timer interrupt so that hall polling can dectect and exit + * kvm_queue_irq is not enough, hrtimer had better be used since vcpu + * is halt-polling and scheduled out already */ - _kvm_queue_irq(vcpu, LARCH_INT_TIMER); + expire = ktime_add_ns(ktime_get(), 10); // 10ns is enough here + vcpu->arch.expire = expire; + hrtimer_start(&vcpu->arch.swtimer, expire, HRTIMER_MODE_ABS_PINNED); + } } /* @@ -185,20 +202,14 @@ static void _kvm_save_timer(struct kvm_vcpu *vcpu) void kvm_save_timer(struct kvm_vcpu *vcpu) { struct loongarch_csrs *csr = vcpu->arch.csr; - unsigned long cfg; preempt_disable(); - cfg = kvm_read_csr_gcfg(); - if (!(cfg & CSR_GCFG_TIT)) { - /* disable guest use of hard timer */ - kvm_write_csr_gcfg(cfg | CSR_GCFG_TIT); - - /* save hard timer state */ - kvm_save_hw_gcsr(csr, LOONGARCH_CSR_TCFG); - kvm_save_hw_gcsr(csr, LOONGARCH_CSR_TVAL); - if (kvm_read_sw_gcsr(csr, LOONGARCH_CSR_TCFG) & CSR_TCFG_EN) - _kvm_save_timer(vcpu); - } + + /* save hard timer state */ + kvm_save_hw_gcsr(csr, LOONGARCH_CSR_TCFG); + kvm_save_hw_gcsr(csr, LOONGARCH_CSR_TVAL); + if (kvm_read_sw_gcsr(csr, LOONGARCH_CSR_TCFG) & CSR_TCFG_EN) + _kvm_save_timer(vcpu); /* save timer-related state to vCPU context */ kvm_save_hw_gcsr(csr, LOONGARCH_CSR_ESTAT);