diff --git a/llvm/include/llvm/InitializePasses.h b/llvm/include/llvm/InitializePasses.h index d70370404ec58f3c14e6ccd3314f0ed78d7092b9..961fca604e05b920ac9dc2431bd47b3c033489f7 100644 --- a/llvm/include/llvm/InitializePasses.h +++ b/llvm/include/llvm/InitializePasses.h @@ -200,6 +200,7 @@ void initializeLoopSimplifyCFGLegacyPassPass(PassRegistry&); void initializeLoopSimplifyPass(PassRegistry&); void initializeLoopStrengthReducePass(PassRegistry&); void initializeLoopUnrollPass(PassRegistry&); +void initializeLoopVersioningLICMLegacyPassPass(PassRegistry&); void initializeLowerAtomicLegacyPassPass(PassRegistry&); void initializeLowerConstantIntrinsicsPass(PassRegistry&); void initializeLowerEmuTLSPass(PassRegistry&); diff --git a/llvm/include/llvm/LinkAllPasses.h b/llvm/include/llvm/LinkAllPasses.h index c4206fdbb8a302f18b13f43ad91fd861d0689204..142c9dfdadf7b1826e44685146430a07a97efec7 100644 --- a/llvm/include/llvm/LinkAllPasses.h +++ b/llvm/include/llvm/LinkAllPasses.h @@ -113,6 +113,7 @@ namespace { (void) llvm::createLoopStrengthReducePass(); (void) llvm::createLoopUnrollPass(); (void) llvm::createLoopRotatePass(); + (void) llvm::createLoopVersioningLICMPass(); (void) llvm::createLowerConstantIntrinsicsPass(); (void) llvm::createLowerExpectIntrinsicPass(); (void) llvm::createLowerGlobalDtorsLegacyPass(); diff --git a/llvm/include/llvm/Transforms/Scalar.h b/llvm/include/llvm/Transforms/Scalar.h index e69beeade94785c3e1ba29ba0a66f75350d70687..e3e31ad146f6262e3c82dd069f330b631071e71d 100644 --- a/llvm/include/llvm/Transforms/Scalar.h +++ b/llvm/include/llvm/Transforms/Scalar.h @@ -117,6 +117,12 @@ Pass *createLoopUnrollPass(int OptLevel = 2, bool OnlyWhenForced = false, // Pass *createLoopRotatePass(int MaxHeaderSize = -1, bool PrepareForLTO = false); +//===----------------------------------------------------------------------===// +// +// LoopVersioningLICM - This pass is a loop versioning pass. +// +Pass *createLoopVersioningLICMPass(); + //===----------------------------------------------------------------------===// // // DemoteRegisterToMemoryPass - This pass is used to demote registers to memory diff --git a/llvm/lib/Passes/PassBuilderPipelines.cpp b/llvm/lib/Passes/PassBuilderPipelines.cpp index c9f3512da32b22083c835fb8eeecfafa4a38ed37..4d92fdd3f1d7193d8380854778ccbd35123d101b 100644 --- a/llvm/lib/Passes/PassBuilderPipelines.cpp +++ b/llvm/lib/Passes/PassBuilderPipelines.cpp @@ -105,7 +105,6 @@ #include "llvm/Transforms/Scalar/LoopSink.h" #include "llvm/Transforms/Scalar/LoopUnrollAndJamPass.h" #include "llvm/Transforms/Scalar/LoopUnrollPass.h" -#include "llvm/Transforms/Scalar/LoopVersioningLICM.h" #include "llvm/Transforms/Scalar/LowerConstantIntrinsics.h" #include "llvm/Transforms/Scalar/LowerExpectIntrinsic.h" #include "llvm/Transforms/Scalar/LowerMatrixIntrinsics.h" @@ -285,10 +284,6 @@ static cl::opt AttributorRun( clEnumValN(AttributorRunOption::NONE, "none", "disable attributor runs"))); -static cl::opt UseLoopVersioningLICM( - "enable-loop-versioning-licm", cl::init(false), cl::Hidden, - cl::desc("Enable the experimental Loop Versioning LICM pass")); - cl::opt EnableMemProfContextDisambiguation( "enable-memprof-context-disambiguation", cl::init(false), cl::Hidden, cl::ZeroOrMore, cl::desc("Enable MemProf context disambiguation")); @@ -1491,21 +1486,6 @@ PassBuilder::buildModuleOptimizationPipeline(OptimizationLevel Level, invokeOptimizerEarlyEPCallbacks(MPM, Level); FunctionPassManager OptimizePM; - // Scheduling LoopVersioningLICM when inlining is over, because after that - // we may see more accurate aliasing. Reason to run this late is that too - // early versioning may prevent further inlining due to increase of code - // size. Other optimizations which runs later might get benefit of no-alias - // assumption in clone loop. - if (UseLoopVersioningLICM) { - OptimizePM.addPass( - createFunctionToLoopPassAdaptor(LoopVersioningLICMPass())); - // LoopVersioningLICM pass might increase new LICM opportunities. - OptimizePM.addPass(createFunctionToLoopPassAdaptor( - LICMPass(PTO.LicmMssaOptCap, PTO.LicmMssaNoAccForPromotionCap, - /*AllowSpeculation=*/true), - /*USeMemorySSA=*/true, /*UseBlockFrequencyInfo=*/false)); - } - OptimizePM.addPass(Float2IntPass()); OptimizePM.addPass(LowerConstantIntrinsicsPass()); diff --git a/llvm/lib/Target/AArch64/AArch64TargetMachine.cpp b/llvm/lib/Target/AArch64/AArch64TargetMachine.cpp index d2c12e6dfdc7307e9c58ddb18e24115a0fcf87c3..f4e07b2dbdac61f25c889c3cc393abf5f2b1471f 100644 --- a/llvm/lib/Target/AArch64/AArch64TargetMachine.cpp +++ b/llvm/lib/Target/AArch64/AArch64TargetMachine.cpp @@ -49,6 +49,7 @@ #include "llvm/TargetParser/Triple.h" #include "llvm/Transforms/CFGuard.h" #include "llvm/Transforms/Scalar.h" +#include "llvm/Transforms/Utils.h" #include #include #include @@ -196,6 +197,10 @@ static cl::opt EnableGISelLoadStoreOptPostLegal( cl::desc("Enable GlobalISel's post-legalizer load/store optimization pass"), cl::init(false), cl::Hidden); +static cl::opt EnableLoopVersioningLICM( + "aarch64-enable-loop-versioning-licm", cl::init(false), cl::Hidden, + cl::desc("Enable the experimental Loop Versioning LICM pass")); + extern "C" LLVM_EXTERNAL_VISIBILITY void LLVMInitializeAArch64Target() { // Register the target. RegisterTargetMachine X(getTheAArch64leTarget()); @@ -566,6 +571,12 @@ void AArch64PassConfig::addIRPasses() { .hoistCommonInsts(true) .sinkCommonInsts(true))); + if (EnableLoopVersioningLICM) { + // Loop needs to be in loop simplify form. + addPass(createLoopSimplifyPass()); + addPass(createLoopVersioningLICMPass()); + } + // Run LoopDataPrefetch // // Run this before LSR to remove the multiplies involved in computing the diff --git a/llvm/lib/Transforms/Scalar/LoopVersioningLICM.cpp b/llvm/lib/Transforms/Scalar/LoopVersioningLICM.cpp index 3de3b1f76c080e1ff38c8eb07dcb0f191f9572b6..73469e47c4f9735fe17557775a8402f6b136cb91 100644 --- a/llvm/lib/Transforms/Scalar/LoopVersioningLICM.cpp +++ b/llvm/lib/Transforms/Scalar/LoopVersioningLICM.cpp @@ -77,10 +77,12 @@ #include "llvm/IR/MDBuilder.h" #include "llvm/IR/Metadata.h" #include "llvm/IR/Value.h" +#include "llvm/InitializePasses.h" #include "llvm/Support/Casting.h" #include "llvm/Support/CommandLine.h" #include "llvm/Support/Debug.h" #include "llvm/Support/raw_ostream.h" +#include "llvm/Transforms/Scalar.h" #include "llvm/Transforms/Utils.h" #include "llvm/Transforms/Utils/LoopUtils.h" #include "llvm/Transforms/Utils/LoopVersioning.h" @@ -112,7 +114,7 @@ static cl::opt LVLoopDepthThreshold( static cl::opt LVOverlap("loop-versioning-overlap", cl::desc("Loop versioning with a fixed length's overlap"), - cl::init(false), cl::Hidden); + cl::init(true), cl::Hidden); namespace { @@ -729,3 +731,51 @@ PreservedAnalyses LoopVersioningLICMPass::run(Loop &L, LoopAnalysisManager &AM, return getLoopPassPreservedAnalyses(); } } // namespace llvm + +class LoopVersioningLICMLegacyPass : public LoopPass { +public: + static char ID; // Pass identification, replacement for typeid + LoopVersioningLICMLegacyPass() : LoopPass(ID) { + initializeLoopVersioningLICMLegacyPassPass( + *PassRegistry::getPassRegistry()); + } + + bool runOnLoop(Loop *L, LPPassManager &LPM) override { + if (skipLoop(L)) + return false; + + AliasAnalysis *AA = &getAnalysis().getAAResults(); + ScalarEvolution *SE = &getAnalysis().getSE(); + DominatorTree *DT = &getAnalysis().getDomTree(); + LoopInfo *LI = &getAnalysis().getLoopInfo(); + OptimizationRemarkEmitter *ORE = + &getAnalysis().getORE(); + + LoopAccessInfoManager LAIs(*SE, *AA, *DT, *LI, nullptr); + return LoopVersioningLICM(AA, SE, ORE, LAIs, *LI, L).run(DT); + } + + void getAnalysisUsage(AnalysisUsage &AU) const override { + AU.addRequired(); + AU.addRequired(); + AU.addRequired(); + AU.addRequired(); + AU.addRequired(); + } +}; + +Pass *llvm::createLoopVersioningLICMPass() { + return new LoopVersioningLICMLegacyPass(); +} + +char LoopVersioningLICMLegacyPass::ID = 0; +INITIALIZE_PASS_BEGIN(LoopVersioningLICMLegacyPass, "loop-versioning-licm", + "Loop Versioning LICM", false, false) +INITIALIZE_PASS_DEPENDENCY(AAResultsWrapperPass) +INITIALIZE_PASS_DEPENDENCY(ScalarEvolutionWrapperPass) +INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass) +INITIALIZE_PASS_DEPENDENCY(LoopInfoWrapperPass) +INITIALIZE_PASS_DEPENDENCY(OptimizationRemarkEmitterWrapperPass) +INITIALIZE_PASS_DEPENDENCY(LoopSimplify) +INITIALIZE_PASS_END(LoopVersioningLICMLegacyPass, "loop-versioning-licm", + "Loop Versioning LICM", false, false) diff --git a/llvm/test/Transforms/LoopVersioningLICM/lvoverlap.ll b/llvm/test/Transforms/LoopVersioningLICM/lvoverlap.ll index d59c9472e183cb29d758459795648d16928e6058..300a230798f7a0dec2a471ecb3cbe55a196f1c9d 100644 --- a/llvm/test/Transforms/LoopVersioningLICM/lvoverlap.ll +++ b/llvm/test/Transforms/LoopVersioningLICM/lvoverlap.ll @@ -12,7 +12,7 @@ ; CHECK-NEXT: load ; CHECK-NEXT: store i64 [[TMP1]], ptr [[Y:%.*]] -define void @LZ4_wildCopy8(ptr %dstPtr, ptr %srcPtr, ptr %dstEnd) { +define void @test0(ptr %dstPtr, ptr %srcPtr, ptr %dstEnd) { entry: br label %do.body