From 96e4d6a563a7654a7c00b6085531135854b8aede Mon Sep 17 00:00:00 2001 From: Oliver Upton Date: Sat, 30 Nov 2024 16:44:30 +0800 Subject: [PATCH 1/4] arm64: tlbflush: Rename MAX_TLBI_OPS mainline inclusion from mainline-v6.7-rc1 commit ec1c3b9ff16082f880b304be40992568f4eee6a7 category: performance bugzilla: https://gitee.com/openeuler/kernel/issues/IB82FR CVE: NA ------------------------------------------------- Perhaps unsurprisingly, I-cache invalidations suffer from performance issues similar to TLB invalidations on certain systems. TLB and I-cache maintenance all result in DVM on the mesh, which is where the real bottleneck lies. Rename the heuristic to point the finger at DVM, such that it may be reused for limiting I-cache invalidations. Reviewed-by: Gavin Shan Tested-by: Gavin Shan Acked-by: Will Deacon Link: https://lore.kernel.org/r/20230920080133.944717-2-oliver.upton@linux.dev Signed-off-by: Oliver Upton (cherry picked from commit ec1c3b9ff16082f880b304be40992568f4eee6a7) Signed-off-by: Kefeng Wang --- arch/arm64/include/asm/tlbflush.h | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/arch/arm64/include/asm/tlbflush.h b/arch/arm64/include/asm/tlbflush.h index 831c314d75ff..acfc2e4e0af7 100644 --- a/arch/arm64/include/asm/tlbflush.h +++ b/arch/arm64/include/asm/tlbflush.h @@ -339,7 +339,7 @@ static inline void arch_tlbbatch_flush(struct arch_tlbflush_unmap_batch *batch) * This is meant to avoid soft lock-ups on large TLB flushing ranges and not * necessarily a performance improvement. */ -#define MAX_TLBI_OPS PTRS_PER_PTE +#define MAX_DVM_OPS PTRS_PER_PTE /* * __flush_tlb_range_op - Perform TLBI operation upon a range @@ -415,12 +415,12 @@ static inline void __flush_tlb_range_nosync(struct vm_area_struct *vma, /* * When not uses TLB range ops, we can handle up to - * (MAX_TLBI_OPS - 1) pages; + * (MAX_DVM_OPS - 1) pages; * When uses TLB range ops, we can handle up to * (MAX_TLBI_RANGE_PAGES - 1) pages. */ if ((!system_supports_tlb_range() && - (end - start) >= (MAX_TLBI_OPS * stride)) || + (end - start) >= (MAX_DVM_OPS * stride)) || pages >= MAX_TLBI_RANGE_PAGES) { flush_tlb_mm(vma->vm_mm); return; @@ -462,7 +462,7 @@ static inline void flush_tlb_kernel_range(unsigned long start, unsigned long end { unsigned long addr; - if ((end - start) > (MAX_TLBI_OPS * PAGE_SIZE)) { + if ((end - start) > (MAX_DVM_OPS * PAGE_SIZE)) { flush_tlb_all(); return; } -- Gitee From 1fe29f6d875f73d38602425bc512552b7948ce7c Mon Sep 17 00:00:00 2001 From: Gavin Shan Date: Sat, 30 Nov 2024 16:44:31 +0800 Subject: [PATCH 2/4] arm64: tlb: Allow range operation for MAX_TLBI_RANGE_PAGES mainline inclusion from mainline-v6.10-rc1 commit 73301e464a72a0d007d0d4e0f4d3dab5c58125bf category: performance bugzilla: https://gitee.com/openeuler/kernel/issues/IB82FR CVE: NA ------------------------------------------------- MAX_TLBI_RANGE_PAGES pages is covered by SCALE#3 and NUM#31 and it's supported now. Allow TLBI RANGE operation when the number of pages is equal to MAX_TLBI_RANGE_PAGES in __flush_tlb_range_nosync(). Suggested-by: Marc Zyngier Signed-off-by: Gavin Shan Reviewed-by: Anshuman Khandual Reviewed-by: Ryan Roberts Reviewed-by: Catalin Marinas Reviewed-by: Shaoqin Huang Link: https://lore.kernel.org/r/20240405035852.1532010-4-gshan@redhat.com Signed-off-by: Will Deacon (cherry picked from commit 73301e464a72a0d007d0d4e0f4d3dab5c58125bf) Signed-off-by: Kefeng Wang --- arch/arm64/include/asm/tlbflush.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/arch/arm64/include/asm/tlbflush.h b/arch/arm64/include/asm/tlbflush.h index acfc2e4e0af7..3b6acfc00c4b 100644 --- a/arch/arm64/include/asm/tlbflush.h +++ b/arch/arm64/include/asm/tlbflush.h @@ -417,11 +417,11 @@ static inline void __flush_tlb_range_nosync(struct vm_area_struct *vma, * When not uses TLB range ops, we can handle up to * (MAX_DVM_OPS - 1) pages; * When uses TLB range ops, we can handle up to - * (MAX_TLBI_RANGE_PAGES - 1) pages. + * MAX_TLBI_RANGE_PAGES pages. */ if ((!system_supports_tlb_range() && (end - start) >= (MAX_DVM_OPS * stride)) || - pages >= MAX_TLBI_RANGE_PAGES) { + pages > MAX_TLBI_RANGE_PAGES) { flush_tlb_mm(vma->vm_mm); return; } -- Gitee From 7d6fd04f158b830f4b4d66a936c94c8ce90b1228 Mon Sep 17 00:00:00 2001 From: Kefeng Wang Date: Sat, 30 Nov 2024 16:44:32 +0800 Subject: [PATCH 3/4] arm64: tlbflush: add __flush_tlb_range_limit_excess() mainline inclusion from mainline-v6.13-rc1 commit 7ffc13e233951f15728c9d09db3cc8d9f6cf81f2 category: performance bugzilla: https://gitee.com/openeuler/kernel/issues/IB82FR CVE: NA ------------------------------------------------- The __flush_tlb_range_limit_excess() helper will be used when flush tlb kernel range soon. Signed-off-by: Kefeng Wang Reviewed-by: Anshuman Khandual Link: https://lore.kernel.org/r/20240923131351.713304-2-wangkefeng.wang@huawei.com Signed-off-by: Catalin Marinas (cherry picked from commit 7ffc13e233951f15728c9d09db3cc8d9f6cf81f2) Signed-off-by: Kefeng Wang --- arch/arm64/include/asm/tlbflush.h | 27 ++++++++++++++++++--------- 1 file changed, 18 insertions(+), 9 deletions(-) diff --git a/arch/arm64/include/asm/tlbflush.h b/arch/arm64/include/asm/tlbflush.h index 3b6acfc00c4b..172ce53a4e1c 100644 --- a/arch/arm64/include/asm/tlbflush.h +++ b/arch/arm64/include/asm/tlbflush.h @@ -402,6 +402,23 @@ do { \ #define __flush_s2_tlb_range_op(op, start, pages, stride, tlb_level) \ __flush_tlb_range_op(op, start, pages, stride, 0, tlb_level, false) +static inline bool __flush_tlb_range_limit_excess(unsigned long start, + unsigned long end, unsigned long pages, unsigned long stride) +{ + /* + * When the system does not support TLB range based flush + * operation, (MAX_DVM_OPS - 1) pages can be handled. But + * with TLB range based operation, MAX_TLBI_RANGE_PAGES + * pages can be handled. + */ + if ((!system_supports_tlb_range() && + (end - start) >= (MAX_DVM_OPS * stride)) || + pages > MAX_TLBI_RANGE_PAGES) + return true; + + return false; +} + static inline void __flush_tlb_range_nosync(struct vm_area_struct *vma, unsigned long start, unsigned long end, unsigned long stride, bool last_level, @@ -413,15 +430,7 @@ static inline void __flush_tlb_range_nosync(struct vm_area_struct *vma, end = round_up(end, stride); pages = (end - start) >> PAGE_SHIFT; - /* - * When not uses TLB range ops, we can handle up to - * (MAX_DVM_OPS - 1) pages; - * When uses TLB range ops, we can handle up to - * MAX_TLBI_RANGE_PAGES pages. - */ - if ((!system_supports_tlb_range() && - (end - start) >= (MAX_DVM_OPS * stride)) || - pages > MAX_TLBI_RANGE_PAGES) { + if (__flush_tlb_range_limit_excess(start, end, pages, stride)) { flush_tlb_mm(vma->vm_mm); return; } -- Gitee From 67561efee231eb3c636eb3391f34111795453ccf Mon Sep 17 00:00:00 2001 From: Kefeng Wang Date: Sat, 30 Nov 2024 16:44:33 +0800 Subject: [PATCH 4/4] arm64: optimize flush tlb kernel range mainline inclusion from mainline-v6.13-rc1 commit a923705c69f7f4ebe6a5488c1f556bed12d28031 category: performance bugzilla: https://gitee.com/openeuler/kernel/issues/IB82FR CVE: NA ------------------------------------------------- Currently the kernel TLBs is flushed page by page if the target VA range is less than MAX_DVM_OPS * PAGE_SIZE, otherwise we'll brutally issue a TLBI ALL. But we could optimize it when CPU supports TLB range operations, convert to use __flush_tlb_range_op() like other tlb range flush to improve performance. Co-developed-by: Yicong Yang Signed-off-by: Yicong Yang Signed-off-by: Kefeng Wang Reviewed-by: Anshuman Khandual Link: https://lore.kernel.org/r/20240923131351.713304-3-wangkefeng.wang@huawei.com Signed-off-by: Catalin Marinas (cherry picked from commit a923705c69f7f4ebe6a5488c1f556bed12d28031) [KF: no lpa2 support] Signed-off-by: Kefeng Wang --- arch/arm64/include/asm/tlbflush.h | 15 ++++++++------- 1 file changed, 8 insertions(+), 7 deletions(-) diff --git a/arch/arm64/include/asm/tlbflush.h b/arch/arm64/include/asm/tlbflush.h index 172ce53a4e1c..4bbd9ed591f2 100644 --- a/arch/arm64/include/asm/tlbflush.h +++ b/arch/arm64/include/asm/tlbflush.h @@ -469,19 +469,20 @@ static inline void flush_tlb_range(struct vm_area_struct *vma, static inline void flush_tlb_kernel_range(unsigned long start, unsigned long end) { - unsigned long addr; + const unsigned long stride = PAGE_SIZE; + unsigned long pages; - if ((end - start) > (MAX_DVM_OPS * PAGE_SIZE)) { + start = round_down(start, stride); + end = round_up(end, stride); + pages = (end - start) >> PAGE_SHIFT; + + if (__flush_tlb_range_limit_excess(start, end, pages, stride)) { flush_tlb_all(); return; } - start = __TLBI_VADDR(start, 0); - end = __TLBI_VADDR(end, 0); - dsb(ishst); - for (addr = start; addr < end; addr += 1 << (PAGE_SHIFT - 12)) - __tlbi(vaale1is, addr); + __flush_tlb_range_op(vaale1is, start, pages, stride, 0, 0, false); dsb(ish); isb(); } -- Gitee