From f58aacc373efe7d7a5f9b6b6ea6b47892ec1345c Mon Sep 17 00:00:00 2001 From: Zou Wei Date: Tue, 28 Nov 2023 17:46:31 +0800 Subject: [PATCH 1/2] KVM: arm64: Move the clean of dcache to the map handler We currently uniformly clean dcache in user_mem_abort() before calling the fault handlers, if we take a translation fault and the pfn is cacheable. But if there are concurrent translation faults on the same page or block, clean of dcache for the first time is necessary while the others are not. By moving clean of dcache to the map handler, we can easily identify the conditions where CMOs are really needed and avoid the unnecessary ones. As it's a time consuming process to perform CMOs especially when flushing a block range, so this solution reduces much load of kvm and improve the efficiency of creating mappings. Mainline: Open-Source Signed-off-by: Zou Wei Signed-off-by: Li Mingzhe Signed-off-by: Wang Yinfeng Change-Id: I7dbad984563c955faf7531eecea94b99880e73c6 --- arch/arm64/kvm/hyp/pgtable.c | 53 ++++++++++++++++++++++++++---------- arch/arm64/kvm/mmu.c | 18 ++---------- 2 files changed, 41 insertions(+), 30 deletions(-) diff --git a/arch/arm64/kvm/hyp/pgtable.c b/arch/arm64/kvm/hyp/pgtable.c index 4d99d07c610..321f682339c 100644 --- a/arch/arm64/kvm/hyp/pgtable.c +++ b/arch/arm64/kvm/hyp/pgtable.c @@ -170,6 +170,38 @@ static void kvm_set_table_pte(kvm_pte_t *ptep, kvm_pte_t *childp) smp_store_release(ptep, pte); } +static bool stage2_pte_cacheable(kvm_pte_t pte) +{ + u64 memattr = pte & KVM_PTE_LEAF_ATTR_LO_S2_MEMATTR; + return memattr == PAGE_S2_MEMATTR(NORMAL); +} + +static void stage2_flush_dcache(void *addr, u64 size) +{ + if (cpus_have_const_cap(ARM64_HAS_STAGE2_FWB)) + return; + + __flush_dcache_area(addr, size); +} + +static bool stage2_pte_executable(kvm_pte_t pte) +{ + return !(pte & KVM_PTE_LEAF_ATTR_HI_S2_XN); +} + +static void stage2_invalidate_icache(kvm_pfn_t pfn, unsigned long size) +{ + if (icache_is_aliasing()) { + /* any kind of VIPT cache */ + __flush_icache_all(); + } else if (is_kernel_in_hyp_mode() || !icache_is_vpipt()) { + /* PIPT or VPIPT at EL2 (see comment in __kvm_tlb_flush_vmid_ipa) */ + void *va = page_address(pfn_to_page(pfn)); + + invalidate_icache_range((unsigned long)va, (unsigned long)va + size); + } +} + static bool kvm_set_valid_leaf_pte(kvm_pte_t *ptep, u64 pa, kvm_pte_t attr, u32 level) { @@ -185,6 +217,13 @@ static bool kvm_set_valid_leaf_pte(kvm_pte_t *ptep, u64 pa, kvm_pte_t attr, if (kvm_pte_valid(old)) return old == pte; + /* Flush data cache before installation of the new PTE */ + if (stage2_pte_cacheable(pte)) + stage2_flush_dcache(kvm_pte_follow(pte), kvm_granule_size(level)); + + if (stage2_pte_executable(pte)) + stage2_invalidate_icache(__phys_to_pfn(pa), kvm_granule_size(level)); + smp_store_release(ptep, pte); return true; } @@ -641,20 +680,6 @@ int kvm_pgtable_stage2_map(struct kvm_pgtable *pgt, u64 addr, u64 size, return ret; } -static void stage2_flush_dcache(void *addr, u64 size) -{ - if (cpus_have_const_cap(ARM64_HAS_STAGE2_FWB)) - return; - - __flush_dcache_area(addr, size); -} - -static bool stage2_pte_cacheable(kvm_pte_t pte) -{ - u64 memattr = pte & KVM_PTE_LEAF_ATTR_LO_S2_MEMATTR; - return memattr == PAGE_S2_MEMATTR(NORMAL); -} - static int stage2_unmap_walker(u64 addr, u64 end, u32 level, kvm_pte_t *ptep, enum kvm_pgtable_walk_flags flag, void * const arg) diff --git a/arch/arm64/kvm/mmu.c b/arch/arm64/kvm/mmu.c index 26068456ec0..d9c6994c79d 100644 --- a/arch/arm64/kvm/mmu.c +++ b/arch/arm64/kvm/mmu.c @@ -609,16 +609,6 @@ void kvm_arch_mmu_enable_log_dirty_pt_masked(struct kvm *kvm, kvm_mmu_write_protect_pt_masked(kvm, slot, gfn_offset, mask); } -static void clean_dcache_guest_page(kvm_pfn_t pfn, unsigned long size) -{ - __clean_dcache_guest_page(pfn, size); -} - -static void invalidate_icache_guest_page(kvm_pfn_t pfn, unsigned long size) -{ - __invalidate_icache_guest_page(pfn, size); -} - static void kvm_send_hwpoison_signal(unsigned long address, short lsb) { send_sig_mceerr(BUS_MCEERR_AR, (void __user *)address, lsb, current); @@ -885,12 +875,8 @@ static int user_mem_abort(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa, mark_page_dirty(kvm, gfn); } - if (fault_status != FSC_PERM && !device) - clean_dcache_guest_page(pfn, vma_pagesize); - if (exec_fault) { prot |= KVM_PGTABLE_PROT_X; - invalidate_icache_guest_page(pfn, vma_pagesize); } if (device) @@ -1142,9 +1128,9 @@ int kvm_set_spte_hva(struct kvm *kvm, unsigned long hva, pte_t pte) /* * We've moved a page around, probably through CoW, so let's treat it - * just like a translation fault and clean the cache to the PoC. + * just like a translation fault and the map handler will clean the + * cache to the PoC. */ - clean_dcache_guest_page(pfn, PAGE_SIZE); handle_hva_to_gpa(kvm, hva, end, &kvm_set_spte_handler, &pfn); return 0; } -- Gitee From fa09984405016a2678f5507fd5e10b3119ec7559 Mon Sep 17 00:00:00 2001 From: Peng Mengguang Date: Mon, 22 Jan 2024 16:17:45 +0800 Subject: [PATCH 2/2] KVM:arm64: Fixed gpu passthrough to vm on arm64 The vfio memory type is set to DEVICE_nGnRnE, and the KVM memory type is set to DEVICE_nGnRnE, but in guestOS, the memory type is set to Device_ngnrne. The memory attribute of all I/O memory mappings is set to MT_NORMAL_NC, and the graphics card is no exception as a peripheral in guestOS. DEVICE_nGnRnE is the strongest according to the memory attribute rule, which requires that the memory addresses of devices in VMS must be aligned. Therefore, the error reported when the graphics card is transparent transmission is also the memory address alignment error. In this patch, if the passthrough device is a graphics card device, the memory attribute is changed to MT_NORMAL_NC instead of DEVICE_nGnRnE, so that memory alignment errors can be avoided. Mainline: Open-Source Signed-off-by: Peng Mengguang Signed-off-by: Li Mingzhe Signed-off-by: Wang Yinfeng Change-Id: I28d53367317a28f46d51a501603932cc0fd769f6 --- arch/arm64/include/asm/kvm_mmu.h | 2 +- arch/arm64/include/asm/kvm_pgtable.h | 1 + arch/arm64/include/asm/memory.h | 2 ++ arch/arm64/kvm/hyp/pgtable.c | 12 +++++++++--- arch/arm64/kvm/mmu.c | 14 +++++++++++--- arch/arm64/kvm/vgic/vgic-v2.c | 2 +- drivers/vfio/pci/vfio_pci.c | 7 +++++++ 7 files changed, 32 insertions(+), 8 deletions(-) diff --git a/arch/arm64/include/asm/kvm_mmu.h b/arch/arm64/include/asm/kvm_mmu.h index 47dafd6ab3a..356c2d28b6b 100644 --- a/arch/arm64/include/asm/kvm_mmu.h +++ b/arch/arm64/include/asm/kvm_mmu.h @@ -123,7 +123,7 @@ void stage2_unmap_vm(struct kvm *kvm); int kvm_init_stage2_mmu(struct kvm *kvm, struct kvm_s2_mmu *mmu); void kvm_free_stage2_pgd(struct kvm_s2_mmu *mmu); int kvm_phys_addr_ioremap(struct kvm *kvm, phys_addr_t guest_ipa, - phys_addr_t pa, unsigned long size, bool writable); + phys_addr_t pa, unsigned long size, bool writable, bool writecombine); int kvm_handle_guest_abort(struct kvm_vcpu *vcpu); diff --git a/arch/arm64/include/asm/kvm_pgtable.h b/arch/arm64/include/asm/kvm_pgtable.h index 52ab38db04c..3b5a2e8e6e3 100644 --- a/arch/arm64/include/asm/kvm_pgtable.h +++ b/arch/arm64/include/asm/kvm_pgtable.h @@ -42,6 +42,7 @@ enum kvm_pgtable_prot { KVM_PGTABLE_PROT_R = BIT(2), KVM_PGTABLE_PROT_DEVICE = BIT(3), + KVM_PGTABLE_PROT_DEVICE_VGA = BIT(4), }; #define PAGE_HYP (KVM_PGTABLE_PROT_R | KVM_PGTABLE_PROT_W) diff --git a/arch/arm64/include/asm/memory.h b/arch/arm64/include/asm/memory.h index 505bdd75b54..50ac48f865f 100644 --- a/arch/arm64/include/asm/memory.h +++ b/arch/arm64/include/asm/memory.h @@ -143,6 +143,7 @@ * Memory types for Stage-2 translation */ #define MT_S2_NORMAL 0xf +#define MT_S2_NORMAL_NC 0x5 #define MT_S2_DEVICE_nGnRE 0x1 /* @@ -150,6 +151,7 @@ * Stage-2 enforces Normal-WB and Device-nGnRE */ #define MT_S2_FWB_NORMAL 6 +#define MT_S2_FWB_NORMAL_NC 5 #define MT_S2_FWB_DEVICE_nGnRE 1 #ifdef CONFIG_ARM64_4K_PAGES diff --git a/arch/arm64/kvm/hyp/pgtable.c b/arch/arm64/kvm/hyp/pgtable.c index 321f682339c..d298df1e23f 100644 --- a/arch/arm64/kvm/hyp/pgtable.c +++ b/arch/arm64/kvm/hyp/pgtable.c @@ -479,9 +479,15 @@ struct stage2_map_data { static int stage2_map_set_prot_attr(enum kvm_pgtable_prot prot, struct stage2_map_data *data) { - bool device = prot & KVM_PGTABLE_PROT_DEVICE; - kvm_pte_t attr = device ? PAGE_S2_MEMATTR(DEVICE_nGnRE) : - PAGE_S2_MEMATTR(NORMAL); + bool device = (prot & KVM_PGTABLE_PROT_DEVICE_VGA) || (prot & KVM_PGTABLE_PROT_DEVICE); + kvm_pte_t attr; + + if (prot & KVM_PGTABLE_PROT_DEVICE_VGA) + attr = PAGE_S2_MEMATTR(NORMAL_NC); + else if (prot & KVM_PGTABLE_PROT_DEVICE) + attr = PAGE_S2_MEMATTR(DEVICE_nGnRE); + else + attr = PAGE_S2_MEMATTR(NORMAL); u32 sh = KVM_PTE_LEAF_ATTR_LO_S2_SH_IS; if (!(prot & KVM_PGTABLE_PROT_X)) diff --git a/arch/arm64/kvm/mmu.c b/arch/arm64/kvm/mmu.c index d9c6994c79d..09feab947a2 100644 --- a/arch/arm64/kvm/mmu.c +++ b/arch/arm64/kvm/mmu.c @@ -487,6 +487,13 @@ void kvm_free_stage2_pgd(struct kvm_s2_mmu *mmu) } } +static inline bool is_vma_write_combine(struct vm_area_struct *vma) +{ + pteval_t pteval = pgprot_val(vma->vm_page_prot); + + return ((pteval & PTE_ATTRINDX_MASK) == PTE_ATTRINDX(MT_NORMAL_NC)); +} + /** * kvm_phys_addr_ioremap - map a device range to guest IPA * @@ -497,13 +504,14 @@ void kvm_free_stage2_pgd(struct kvm_s2_mmu *mmu) * @writable: Whether or not to create a writable mapping */ int kvm_phys_addr_ioremap(struct kvm *kvm, phys_addr_t guest_ipa, - phys_addr_t pa, unsigned long size, bool writable) + phys_addr_t pa, unsigned long size, bool writable, bool writecombine) { phys_addr_t addr; int ret = 0; struct kvm_mmu_memory_cache cache = { 0, __GFP_ZERO, NULL, }; struct kvm_pgtable *pgt = kvm->arch.mmu.pgt; - enum kvm_pgtable_prot prot = KVM_PGTABLE_PROT_DEVICE | + enum kvm_pgtable_prot prot = + (writecombine ? KVM_PGTABLE_PROT_DEVICE_VGA : KVM_PGTABLE_PROT_DEVICE) | KVM_PGTABLE_PROT_R | (writable ? KVM_PGTABLE_PROT_W : 0); @@ -1340,7 +1348,7 @@ int kvm_arch_prepare_memory_region(struct kvm *kvm, ret = kvm_phys_addr_ioremap(kvm, gpa, pa, vm_end - vm_start, - writable); + writable, is_vma_write_combine(vma)); if (ret) break; } diff --git a/arch/arm64/kvm/vgic/vgic-v2.c b/arch/arm64/kvm/vgic/vgic-v2.c index ebf53a4e129..5d6492fa19b 100644 --- a/arch/arm64/kvm/vgic/vgic-v2.c +++ b/arch/arm64/kvm/vgic/vgic-v2.c @@ -341,7 +341,7 @@ int vgic_v2_map_resources(struct kvm *kvm) if (!static_branch_unlikely(&vgic_v2_cpuif_trap)) { ret = kvm_phys_addr_ioremap(kvm, dist->vgic_cpu_base, kvm_vgic_global_state.vcpu_base, - KVM_VGIC_V2_CPU_SIZE, true); + KVM_VGIC_V2_CPU_SIZE, true, false); if (ret) { kvm_err("Unable to remap VGIC CPU to VCPU\n"); goto out; diff --git a/drivers/vfio/pci/vfio_pci.c b/drivers/vfio/pci/vfio_pci.c index 57ae8b46b83..40ce1ff3667 100644 --- a/drivers/vfio/pci/vfio_pci.c +++ b/drivers/vfio/pci/vfio_pci.c @@ -1718,7 +1718,14 @@ static int vfio_pci_mmap(void *device_data, struct vm_area_struct *vma) } vma->vm_private_data = vdev; +#ifdef CONFIG_ARM64 + if (vfio_pci_is_vga(pdev)) + vma->vm_page_prot = pgprot_writecombine(vma->vm_page_prot); + else + vma->vm_page_prot = pgprot_noncached(vma->vm_page_prot); +#else vma->vm_page_prot = pgprot_noncached(vma->vm_page_prot); +#endif vma->vm_pgoff = (pci_resource_start(pdev, index) >> PAGE_SHIFT) + pgoff; /* -- Gitee