diff --git a/0007-CodeGen-enhance-rotate-loop-in-blockplacement.patch b/0007-CodeGen-enhance-rotate-loop-in-blockplacement.patch new file mode 100644 index 0000000000000000000000000000000000000000..7e8fbdcaef003f9921e0bcf7bc1458d2df09b5a1 --- /dev/null +++ b/0007-CodeGen-enhance-rotate-loop-in-blockplacement.patch @@ -0,0 +1,249 @@ +From 1872a6d3a691385be96857ab8a4fcd0a0553dd47 Mon Sep 17 00:00:00 2001 +From: zengyong <2595650269@qq.com> +Date: Mon, 17 Nov 2025 14:21:12 +0800 +Subject: [PATCH] [CodeGen] enhance rotate loop in blockplacement 1. delete an + unnecessary condition in findBestLoopExit 2. similar to findBestLoopTop, + caculate fallthrough gains via rotating loop 3. update test + +--- + llvm/lib/CodeGen/MachineBlockPlacement.cpp | 122 ++++++++++--------- + llvm/test/CodeGen/X86/bb_rotate.ll | 6 +- + llvm/test/CodeGen/X86/loop-rotate.ll | 4 +- + llvm/test/CodeGen/X86/lsr-negative-stride.ll | 14 +-- + 4 files changed, 75 insertions(+), 71 deletions(-) + +diff --git a/llvm/lib/CodeGen/MachineBlockPlacement.cpp b/llvm/lib/CodeGen/MachineBlockPlacement.cpp +index b69045b4d61f..d93be6a065f8 100644 +--- a/llvm/lib/CodeGen/MachineBlockPlacement.cpp ++++ b/llvm/lib/CodeGen/MachineBlockPlacement.cpp +@@ -2216,13 +2216,6 @@ MachineBlockPlacement::findBestLoopExit(const MachineLoop &L, + continue; + if (Succ == MBB) + continue; +- BlockChain &SuccChain = *BlockToChain[Succ]; +- // Don't split chains, either this chain or the successor's chain. +- if (&Chain == &SuccChain) { +- LLVM_DEBUG(dbgs() << " exiting: " << getBlockName(MBB) << " -> " +- << getBlockName(Succ) << " (chain conflict)\n"); +- continue; +- } + + auto SuccProb = MBPI->getEdgeProbability(MBB, Succ); + if (LoopBlockSet.count(Succ)) { +@@ -2322,12 +2315,29 @@ MachineBlockPlacement::hasViableTopFallthrough( + return false; + } + +-/// Attempt to rotate an exiting block to the bottom of the loop. +-/// +-/// Once we have built a chain, try to rotate it to line up the hot exit block +-/// with fallthrough out of the loop if doing so doesn't introduce unnecessary +-/// branches. For example, if the loop has fallthrough into its header and out +-/// of its bottom already, don't rotate it. ++// Compute the fallthrough gains via rotating loop, and rotate only when gains > 0 ++// ++// In following diagram, B0,B1...,Bn is a previously built loop chain, ++// Bk is the new bottom found by findBestLoopExit, edges markd as "-" are reduced fallthrough, ++// edges marked as "+" are increased fallthrough, this function computes ++// ++// SUM(increased fallthrough) - SUM(decreased fallthrough) ++// ++// | ++// | - ++// V ++// ---> B0 ++// | B1 ++// | . + ++// | Bk ---> ++// +| |- ++// | V ++// | Bk+1 ++// | . ++// | Bn-1 ++// --- Bn <--- ++// |- ++// + void MachineBlockPlacement::rotateLoop(BlockChain &LoopChain, + const MachineBasicBlock *ExitingBB, + BlockFrequency ExitFreq, +@@ -2346,57 +2356,53 @@ void MachineBlockPlacement::rotateLoop(BlockChain &LoopChain, + if (Top->isEntryBlock()) + return; + +- bool ViableTopFallthrough = hasViableTopFallthrough(Top, LoopBlockSet); +- +- // If the header has viable fallthrough, check whether the current loop +- // bottom is a viable exiting block. If so, bail out as rotating will +- // introduce an unnecessary branch. +- if (ViableTopFallthrough) { +- for (MachineBasicBlock *Succ : Bottom->successors()) { +- BlockChain *SuccChain = BlockToChain[Succ]; +- if (!LoopBlockSet.count(Succ) && +- (!SuccChain || Succ == *SuccChain->begin())) +- return; +- } ++ // ignore when bottom's successors is bigger than 2 (similar to find BestLoopTop) ++ if (Bottom->succ_size() > 2) ++ return; ++ ++ BlockFrequency FallThrough2Exit = BlockFrequency(0); + +- // Rotate will destroy the top fallthrough, we need to ensure the new exit +- // frequency is larger than top fallthrough. +- BlockFrequency FallThrough2Top = TopFallThroughFreq(Top, LoopBlockSet); +- if (FallThrough2Top >= ExitFreq) +- return; ++ if (Bottom->succ_size() == 2) { ++ MachineBasicBlock *Succ = *Bottom->succ_begin(); ++ if (Succ == Top) ++ Succ = *Bottom->succ_rbegin(); ++ BlockChain *SuccChain = BlockToChain[Succ]; ++ // fallthrough2exit exits only when succ is not in current loop and succ is in a chain's head ++ if (!LoopBlockSet.count(Succ) && ++ (!SuccChain || Succ == *SuccChain->begin())) ++ FallThrough2Exit = ++ MBFI->getBlockFreq(Bottom) * MBPI->getEdgeProbability(Bottom, Succ); + } + + BlockChain::iterator ExitIt = llvm::find(LoopChain, ExitingBB); + if (ExitIt == LoopChain.end()) + return; +- +- // Rotating a loop exit to the bottom when there is a fallthrough to top +- // trades the entry fallthrough for an exit fallthrough. +- // If there is no bottom->top edge, but the chosen exit block does have +- // a fallthrough, we break that fallthrough for nothing in return. +- +- // Let's consider an example. We have a built chain of basic blocks +- // B1, B2, ..., Bn, where Bk is a ExitingBB - chosen exit block. +- // By doing a rotation we get +- // Bk+1, ..., Bn, B1, ..., Bk +- // Break of fallthrough to B1 is compensated by a fallthrough from Bk. +- // If we had a fallthrough Bk -> Bk+1 it is broken now. +- // It might be compensated by fallthrough Bn -> B1. +- // So we have a condition to avoid creation of extra branch by loop rotation. +- // All below must be true to avoid loop rotation: +- // If there is a fallthrough to top (B1) +- // There was fallthrough from chosen exit block (Bk) to next one (Bk+1) +- // There is no fallthrough from bottom (Bn) to top (B1). +- // Please note that there is no exit fallthrough from Bn because we checked it +- // above. +- if (ViableTopFallthrough) { +- assert(std::next(ExitIt) != LoopChain.end() && +- "Exit should not be last BB"); +- MachineBasicBlock *NextBlockInChain = *std::next(ExitIt); +- if (ExitingBB->isSuccessor(NextBlockInChain)) +- if (!Bottom->isSuccessor(Top)) +- return; +- } ++ ++ assert(std::next(ExitIt) != LoopChain.end() && "Exit should not be last BB"); ++ MachineBasicBlock *NextBlockInChain = *std::next(ExitIt); ++ ++ BlockFrequency FallThroughFromPred = BlockFrequency(0); ++ BlockFrequency BackEdgeFreq = BlockFrequency(0); ++ ++ // fallthrough from bk to bk+1 ++ if (ExitingBB->isSuccessor(NextBlockInChain)) ++ FallThroughFromPred = MBFI->getBlockFreq(ExitingBB) * ++ MBPI->getEdgeProbability(ExitingBB, NextBlockInChain); ++ ++ // fallthrough from bottom to top ++ if (Bottom->isSuccessor(Top)) ++ BackEdgeFreq = ++ MBFI->getBlockFreq(Bottom) * MBPI->getEdgeProbability(Bottom, Top); ++ ++ BlockFrequency NewFreq = ExitFreq; ++ BlockFrequency FallThrough2Top = TopFallThroughFreq(Top, LoopBlockSet); ++ ++ BlockFrequency Gains = BackEdgeFreq + NewFreq; ++ BlockFrequency Lost = ++ FallThrough2Top + FallThrough2Exit + FallThroughFromPred; ++ ++ if (Lost >= Gains) ++ return; + + LLVM_DEBUG(dbgs() << "Rotating loop to put exit " << getBlockName(ExitingBB) + << " at bottom\n"); +diff --git a/llvm/test/CodeGen/X86/bb_rotate.ll b/llvm/test/CodeGen/X86/bb_rotate.ll +index 55a7b0138026..bc4f1f008b36 100644 +--- a/llvm/test/CodeGen/X86/bb_rotate.ll ++++ b/llvm/test/CodeGen/X86/bb_rotate.ll +@@ -4,13 +4,13 @@ define i1 @no_viable_top_fallthrough() { + ; CHECK-LABEL: no_viable_top_fallthrough + ; CHECK: %.entry + ; CHECK: %.bb1 ++; CHECK: %.exit ++; CHECK: %.stop ++; CHECK: %.header + ; CHECK: %.bb2 + ; CHECK: %.middle + ; CHECK: %.backedge + ; CHECK: %.bb3 +-; CHECK: %.header +-; CHECK: %.exit +-; CHECK: %.stop + .entry: + %val1 = call i1 @foo() + br i1 %val1, label %.bb1, label %.header, !prof !10 +diff --git a/llvm/test/CodeGen/X86/loop-rotate.ll b/llvm/test/CodeGen/X86/loop-rotate.ll +index 3f0a390e7c1b..77104c021d51 100644 +--- a/llvm/test/CodeGen/X86/loop-rotate.ll ++++ b/llvm/test/CodeGen/X86/loop-rotate.ll +@@ -38,9 +38,9 @@ define void @do_rotate() { + ; CHECK: %entry + ; CHECK: %then + ; CHECK: %else ++; CHECK: %header + ; CHECK: %latch1 + ; CHECK: %latch2 +-; CHECK: %header + ; CHECK: %end + entry: + %val0 = call i1 @foo() +@@ -76,10 +76,10 @@ define void @do_rotate2() { + ; CHECK-LABEL: do_rotate2 + ; CHECK: %entry + ; CHECK: %then ++; CHECK: %header + ; CHECK: %middle + ; CHECK: %latch1 + ; CHECK: %latch2 +-; CHECK: %header + ; CHECK: %exit + entry: + %val0 = call i1 @foo() +diff --git a/llvm/test/CodeGen/X86/lsr-negative-stride.ll b/llvm/test/CodeGen/X86/lsr-negative-stride.ll +index 26c6128ab48d..924fd25ac62d 100644 +--- a/llvm/test/CodeGen/X86/lsr-negative-stride.ll ++++ b/llvm/test/CodeGen/X86/lsr-negative-stride.ll +@@ -24,6 +24,12 @@ define i32 @t(i32 %a, i32 %b) nounwind { + ; CHECK-NEXT: movl %edx, %eax + ; CHECK-NEXT: retl + ; CHECK-NEXT: .p2align 4, 0x90 ++; CHECK-NEXT: .LBB0_5: # %cond_false ++; CHECK-NEXT: # in Loop: Header=BB0_2 Depth=1 ++; CHECK-NEXT: subl %edx, %ecx ++; CHECK-NEXT: cmpl %edx, %ecx ++; CHECK-NEXT: movl %edx, %eax ++; CHECK-NEXT: je .LBB0_6 + ; CHECK-NEXT: .LBB0_2: # %bb.outer + ; CHECK-NEXT: # =>This Loop Header: Depth=1 + ; CHECK-NEXT: # Child Loop BB0_3 Depth 2 +@@ -39,14 +45,6 @@ define i32 @t(i32 %a, i32 %b) nounwind { + ; CHECK-NEXT: cmpl %eax, %ecx + ; CHECK-NEXT: movl %eax, %edx + ; CHECK-NEXT: jne .LBB0_3 +-; CHECK-NEXT: jmp .LBB0_6 +-; CHECK-NEXT: .p2align 4, 0x90 +-; CHECK-NEXT: .LBB0_5: # %cond_false +-; CHECK-NEXT: # in Loop: Header=BB0_2 Depth=1 +-; CHECK-NEXT: subl %edx, %ecx +-; CHECK-NEXT: cmpl %edx, %ecx +-; CHECK-NEXT: movl %edx, %eax +-; CHECK-NEXT: jne .LBB0_2 + ; CHECK-NEXT: .LBB0_6: # %bb17 + ; CHECK-NEXT: retl + entry: +-- +Gitee diff --git a/llvm.spec b/llvm.spec index d59962688578d930b7cee41581a511aa27a3de5e..b640f472df9cab701371e1e09f2eb20f48df2dcf 100644 --- a/llvm.spec +++ b/llvm.spec @@ -164,7 +164,7 @@ Name: llvm Name: llvm-toolset-%{maj_ver} %endif Version: %{maj_ver}.%{min_ver}.%{patch_ver} -Release: 54 +Release: 55 Summary: The Low Level Virtual Machine License: NCSA @@ -187,6 +187,7 @@ Patch0004: 0004-remove-cmake_minimum_required.patch Patch0005: 0005-Fix-for-building-autotuner-with-mlir.patch %endif Patch0006: 0006-fix-for-prefix-c.patch +Patch0007: 0007-CodeGen-enhance-rotate-loop-in-blockplacement.patch BuildRequires: gcc BuildRequires: gcc-c++ @@ -3013,6 +3014,9 @@ fi #endregion files %changelog +* Wed Nov 26 2025 liyunfei - 17.0.6-55 +- enhance rotate loop in blockplacement + * Tue Nov 18 2025 liyunfei - 17.0.6-54 - update to llvm-for-oe-17.0.6-2512.0.1 - release-note https://gitee.com/openeuler/llvm-project/releases/tag/llvm-for-oE-17.0.6-2512.0.1