From 36d5128eb444031534d2fa75cf2000532d7dad51 Mon Sep 17 00:00:00 2001 From: Sen Fei Date: Tue, 21 Nov 2023 20:28:39 +0800 Subject: [PATCH 1/2] =?UTF-8?q?feat:=20Code=20Size=20Optimization=20=20=20?= =?UTF-8?q?=20=20=20=20FunctionMerging=20Pass=20=20=20=20=20=20=20Os?= =?UTF-8?q?=E3=80=81Oz=20properties=20=20=20=20=20=20=20machine-outliner?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- llvm/CMakeLists.txt | 7 + llvm/include/llvm/ADT/ArrayView.h | 50 + llvm/include/llvm/ADT/SADiagonalWindows.h | 89 + llvm/include/llvm/ADT/SAHirschberg.h | 190 + llvm/include/llvm/ADT/SANeedlemanWunsch.h | 274 + llvm/include/llvm/ADT/SequenceAlignment.h | 165 + llvm/include/llvm/IR/Function.h | 14 + llvm/include/llvm/Support/CodeSizeOpt.h | 12 + .../llvm/Transforms/IPO/FunctionMerging.h | 420 ++ .../llvm/Transforms/IPO/SearchStrategy.h | 204 + llvm/lib/CodeGen/TargetPassConfig.cpp | 18 + llvm/lib/Passes/PassBuilder.cpp | 1 + llvm/lib/Passes/PassBuilderPipelines.cpp | 19 + llvm/lib/Passes/PassRegistry.def | 1 + llvm/lib/Support/CMakeLists.txt | 1 + llvm/lib/Support/CodeSizeOpt.cpp | 10 + llvm/lib/Transforms/IPO/CMakeLists.txt | 1 + llvm/lib/Transforms/IPO/FunctionMerging.cpp | 4393 +++++++++++++++++ llvm/lib/Transforms/IPO/MergeFunctions.cpp | 6 + llvm/lib/Transforms/Scalar/LoopUnrollPass.cpp | 24 + 20 files changed, 5899 insertions(+) create mode 100644 llvm/include/llvm/ADT/ArrayView.h create mode 100644 llvm/include/llvm/ADT/SADiagonalWindows.h create mode 100644 llvm/include/llvm/ADT/SAHirschberg.h create mode 100644 llvm/include/llvm/ADT/SANeedlemanWunsch.h create mode 100644 llvm/include/llvm/ADT/SequenceAlignment.h create mode 100644 llvm/include/llvm/Support/CodeSizeOpt.h create mode 100644 llvm/include/llvm/Transforms/IPO/FunctionMerging.h create mode 100644 llvm/include/llvm/Transforms/IPO/SearchStrategy.h create mode 100644 llvm/lib/Support/CodeSizeOpt.cpp create mode 100644 llvm/lib/Transforms/IPO/FunctionMerging.cpp diff --git a/llvm/CMakeLists.txt b/llvm/CMakeLists.txt index db207e3328be..f9c1cff0ac40 100644 --- a/llvm/CMakeLists.txt +++ b/llvm/CMakeLists.txt @@ -28,6 +28,13 @@ if(NOT DEFINED LLVM_VERSION_SUFFIX) set(LLVM_VERSION_SUFFIX) endif() +# 添加选项以启用或禁用CODESIZE优化宏 +option(LLVM_ENABLE_CODESIZE_OPT "Enable code size optimizations" OFF) + +if(LLVM_ENABLE_CODESIZE_OPT) + add_definitions(-DENABLE_CODESIZE_OPT) +endif() + if (NOT PACKAGE_VERSION) set(PACKAGE_VERSION "${LLVM_VERSION_MAJOR}.${LLVM_VERSION_MINOR}.${LLVM_VERSION_PATCH}${LLVM_VERSION_SUFFIX}") diff --git a/llvm/include/llvm/ADT/ArrayView.h b/llvm/include/llvm/ADT/ArrayView.h new file mode 100644 index 000000000000..e32692dd6d05 --- /dev/null +++ b/llvm/include/llvm/ADT/ArrayView.h @@ -0,0 +1,50 @@ +#ifndef LLVM_ADT_ARRAYVIEW_H +#define LLVM_ADT_ARRAYVIEW_H + +template class ArrayView { +public: + using iterator = typename ArrayBaseType::iterator; + using reverse_iterator = typename ArrayBaseType::reverse_iterator; + using value_type = typename ArrayBaseType::value_type; + +private: + iterator Begin; + iterator End; + reverse_iterator RBegin; + reverse_iterator REnd; + size_t Size; + +public: + ArrayView(ArrayBaseType &Arr) { + Begin = Arr.begin(); + End = Arr.end(); + RBegin = Arr.rbegin(); + REnd = Arr.rend(); + Size = End - Begin; + } + + ArrayView(iterator Begin, iterator End, reverse_iterator RBegin, + reverse_iterator REnd) + : Begin(Begin), End(End), RBegin(RBegin), REnd(REnd) { + Size = End - Begin; + } + + iterator begin() { return Begin; } + iterator end() { return End; } + reverse_iterator rbegin() { return RBegin; } + reverse_iterator rend() { return REnd; } + + size_t size() { return Size; } + + void sliceWindow(size_t StartOffset, size_t EndOffset) { + End = Begin + EndOffset; + Begin = Begin + StartOffset; + REnd = RBegin + (Size - StartOffset); + RBegin = RBegin + (Size - EndOffset); + Size = End - Begin; + } + + value_type &operator[](size_t Index) { return *(Begin + Index); } +}; + +#endif diff --git a/llvm/include/llvm/ADT/SADiagonalWindows.h b/llvm/include/llvm/ADT/SADiagonalWindows.h new file mode 100644 index 000000000000..86f5990779ad --- /dev/null +++ b/llvm/include/llvm/ADT/SADiagonalWindows.h @@ -0,0 +1,89 @@ +#ifndef LLVM_ADT_SADIAGONALWINDOWS_H +#define LLVM_ADT_SADIAGONALWINDOWS_H + +#include "llvm/ADT/SequenceAlignment.h" + +template > +class DiagonalWindowsSA + : public SequenceAligner { +private: + using BaseType = SequenceAligner; + + size_t WindowSize; + +public: + DiagonalWindowsSA(ScoringSystem Scoring, MatchFnTy Match, size_t WindowSize) + : BaseType(Scoring, Match), WindowSize(WindowSize) {} + + virtual size_t getMemoryRequirement(ContainerType &Seq1, + ContainerType &Seq2) { + size_t MemorySize = + sizeof(ScoreSystemType) * (WindowSize + 1) * (WindowSize + 1); + + if (BaseType::getMatchOperation() != nullptr) + MemorySize += WindowSize * WindowSize * sizeof(bool); + + return MemorySize; + } + + virtual AlignedSequence getAlignment(ContainerType &Seq1, + ContainerType &Seq2) { + + AlignedSequence Res; + + size_t Offset1 = 0; + size_t Offset2 = 0; + + while (Offset1 < Seq1.size() && Offset2 < Seq2.size()) { + + ArrayView View1(Seq1); + size_t EndWindow1 = ((Offset1 + WindowSize) > View1.size()) + ? View1.size() + : (Offset1 + WindowSize); + View1.sliceWindow(Offset1, EndWindow1); + + ArrayView View2(Seq2); + size_t EndWindow2 = ((Offset2 + WindowSize) > View2.size()) + ? View2.size() + : (Offset2 + WindowSize); + View2.sliceWindow(Offset2, EndWindow2); + + NeedlemanWunschSA, Ty, Blank, MatchFnTy> SA( + BaseType::getScoring(), BaseType::getMatchOperation()); + + AlignedSequence NWRes = SA.getAlignment(View1, View2); + + Res.splice(NWRes); + + Offset1 = EndWindow1; + Offset2 = EndWindow2; + + // Finished Seq1 or Seq2 + if (Offset1 >= Seq1.size()) { + // Copy the remaining entries from Seq2 + if (Offset2 < Seq2.size()) { + ArrayView View2(Seq2); + View2.sliceWindow(Offset2, Seq2.size()); + for (auto Char : View2) + Res.Data.push_back( + typename BaseType::EntryType(Blank, Char, false)); + } + } else if (Offset2 >= Seq2.size()) { + // Copy the remaining entries from Seq1 + if (Offset1 < Seq1.size()) { + ArrayView View1(Seq1); + View1.sliceWindow(Offset1, Seq1.size()); + for (auto Char : View1) + Res.Data.push_back( + typename BaseType::EntryType(Char, Blank, false)); + } + } + } + + return Res; + } +}; + +#endif \ No newline at end of file diff --git a/llvm/include/llvm/ADT/SAHirschberg.h b/llvm/include/llvm/ADT/SAHirschberg.h new file mode 100644 index 000000000000..b637a0f058ee --- /dev/null +++ b/llvm/include/llvm/ADT/SAHirschberg.h @@ -0,0 +1,190 @@ +#ifndef LLVM_ADT_SAHIRSCHBERG_H +#define LLVM_ADT_SAHIRSCHBERG_H + +#include "llvm/ADT/SequenceAlignment.h" + +template > +class HirschbergSA + : public SequenceAligner { +private: + ScoreSystemType *FinalScore; + ScoreSystemType *ScoreAux; + ScoreSystemType *ScoreCache; + + using BaseType = SequenceAligner; + + template + void NWScore(iterator1 Begin1, iterator1 End1, iterator2 Begin2, + iterator2 End2) { + const size_t SizeSeq1 = End1 - Begin1; + const size_t SizeSeq2 = End2 - Begin2; + + ScoringSystem &Scoring = BaseType::getScoring(); + const ScoreSystemType Gap = Scoring.getGapPenalty(); + const ScoreSystemType Match = Scoring.getMatchProfit(); + const bool AllowMismatch = Scoring.getAllowMismatch(); + const ScoreSystemType Mismatch = + AllowMismatch ? Scoring.getMismatchPenalty() + : std::numeric_limits::min(); + + FinalScore[0] = 0; + for (size_t j = 1; j <= SizeSeq2; j++) { + FinalScore[j] = FinalScore[j - 1] + Gap; // Ins(F2[j-1]); + } + + if (BaseType::getMatchOperation() == nullptr) { + if (AllowMismatch) { + for (size_t i = 1; i <= SizeSeq1; i++) { + ScoreAux[0] = FinalScore[0] + Gap; // Del(*(Begin1+(i-1))); + for (size_t j = 1; j <= SizeSeq2; j++) { + ScoreSystemType Similarity = + (*(Begin1 + (i - 1)) == *(Begin2 + (j - 1))) ? Match : Mismatch; + ScoreSystemType ScoreSub = + FinalScore[j - 1] + Similarity; // Sub(F1[i-1],F2[j-1]); + ScoreSystemType ScoreDel = FinalScore[j] + Gap; // Del(F1[i-1]); + ScoreSystemType ScoreIns = ScoreAux[j - 1] + Gap; // Ins(F2[j-1]); + ScoreAux[j] = std::max(std::max(ScoreSub, ScoreDel), ScoreIns); + } + std::swap(FinalScore, ScoreAux); + } + } else { + for (size_t i = 1; i <= SizeSeq1; i++) { + ScoreAux[0] = FinalScore[0] + Gap; // Del(F1[i-1]); + for (size_t j = 1; j <= SizeSeq2; j++) { + ScoreSystemType ScoreSub = + (*(Begin1 + (i - 1)) == *(Begin2 + (j - 1))) + ? (FinalScore[j - 1] + Match) + : Mismatch; + ScoreSystemType ScoreDel = FinalScore[j] + Gap; // Del(F1[i-1]); + ScoreSystemType ScoreIns = ScoreAux[j - 1] + Gap; // Ins(F2[j-1]); + ScoreAux[j] = std::max(std::max(ScoreSub, ScoreDel), ScoreIns); + } + std::swap(FinalScore, ScoreAux); + } + } + } else { + if (AllowMismatch) { + for (size_t i = 1; i <= SizeSeq1; i++) { + ScoreAux[0] = FinalScore[0] + Gap; // Del(*(Begin1+(i-1))); + for (size_t j = 1; j <= SizeSeq2; j++) { + ScoreSystemType Similarity = + BaseType::match(*(Begin1 + (i - 1)), *(Begin2 + (j - 1))) + ? Match + : Mismatch; + ScoreSystemType ScoreSub = + FinalScore[j - 1] + Similarity; // Sub(F1[i-1],F2[j-1]); + ScoreSystemType ScoreDel = FinalScore[j] + Gap; // Del(F1[i-1]); + ScoreSystemType ScoreIns = ScoreAux[j - 1] + Gap; // Ins(F2[j-1]); + ScoreAux[j] = std::max(std::max(ScoreSub, ScoreDel), ScoreIns); + } + std::swap(FinalScore, ScoreAux); + } + } else { + for (size_t i = 1; i <= SizeSeq1; i++) { + ScoreAux[0] = FinalScore[0] + Gap; // Del(F1[i-1]); + for (size_t j = 1; j <= SizeSeq2; j++) { + ScoreSystemType ScoreSub = + BaseType::match(*(Begin1 + (i - 1)), *(Begin2 + (j - 1))) + ? (FinalScore[j - 1] + Match) + : Mismatch; + ScoreSystemType ScoreDel = FinalScore[j] + Gap; // Del(F1[i-1]); + ScoreSystemType ScoreIns = ScoreAux[j - 1] + Gap; // Ins(F2[j-1]); + ScoreAux[j] = std::max(std::max(ScoreSub, ScoreDel), ScoreIns); + } + std::swap(FinalScore, ScoreAux); + } + } + } + // last score is in FinalScore + } + + template + void hirschbergRec(ArrayType &Seq1, ArrayType &Seq2, + AlignedSequence &Res) { + if (Seq1.size() == 0) { + for (auto Char : Seq2) { + Res.Data.push_back(typename BaseType::EntryType(Blank, Char, false)); + } + } else if (Seq2.size() == 0) { + for (auto Char : Seq1) { + Res.Data.push_back(typename BaseType::EntryType(Char, Blank, false)); + } + } else if (Seq1.size() == 1 || Seq2.size() == 1) { + NeedlemanWunschSA, Ty, Blank, MatchFnTy> SA( + BaseType::getScoring(), BaseType::getMatchOperation()); + AlignedSequence NWResult = SA.getAlignment(Seq1, Seq2); + Res.splice(NWResult); + } else { + int Seq1Mid = Seq1.size() / 2; + + NWScore(Seq1.begin(), Seq1.begin() + Seq1Mid, Seq2.begin(), Seq2.end()); + std::swap(FinalScore, ScoreCache); + + ArrayType SlicedSeq1(Seq1); + SlicedSeq1.sliceWindow(Seq1Mid, Seq1.size()); + NWScore(SlicedSeq1.rbegin(), SlicedSeq1.rend(), Seq2.rbegin(), + Seq2.rend()); + + size_t Seq2Mid = 0; + int MaxScore = std::numeric_limits::min(); + size_t Size2 = Seq2.size(); + for (size_t i = 0; i < Seq2.size(); i++) { + int S = ScoreCache[i] + FinalScore[Size2 - i]; + if (S >= MaxScore) { + MaxScore = S; + Seq2Mid = i; + } + } + + ArrayType NewSeq1L(Seq1); + NewSeq1L.sliceWindow(0, Seq1Mid); + ArrayType NewSeq2L(Seq2); + NewSeq2L.sliceWindow(0, Seq2Mid); + hirschbergRec(NewSeq1L, NewSeq2L, Res); + + ArrayType NewSeq1R(Seq1); + NewSeq1R.sliceWindow(Seq1Mid, Seq1.size()); + ArrayType NewSeq2R(Seq2); + NewSeq2R.sliceWindow(Seq2Mid, Seq2.size()); + hirschbergRec(NewSeq1R, NewSeq2R, Res); + } + } + +public: + HirschbergSA() + : BaseType(NeedlemanWunschSA, Ty, Blank, + MatchFnTy>::getDefaultScoring(), + nullptr) {} + + HirschbergSA(ScoringSystem Scoring, MatchFnTy Match = nullptr) + : BaseType(Scoring, Match) {} + + virtual size_t getMemoryRequirement(ContainerType &Seq1, + ContainerType &Seq2) { + size_t MemorySize = sizeof(ScoreSystemType) * (3 * (Seq2.size() + 1)); + + if (BaseType::getMatchOperation() != nullptr) + MemorySize += sizeof(bool) * (3 * (Seq2.size() + 1)); + + return MemorySize; + } + + virtual AlignedSequence getAlignment(ContainerType &Seq1, + ContainerType &Seq2) { + AlignedSequence Result; + ScoreSystemType *ScoreContainer = + new ScoreSystemType[3 * (Seq2.size() + 1)]; + FinalScore = &ScoreContainer[0]; + ScoreAux = &ScoreContainer[Seq2.size() + 1]; + ScoreCache = &ScoreContainer[2 * (Seq2.size() + 1)]; + ArrayView View1(Seq1); + ArrayView View2(Seq2); + hirschbergRec(View1, View2, Result); + delete[] ScoreContainer; + return Result; + } +}; + +#endif diff --git a/llvm/include/llvm/ADT/SANeedlemanWunsch.h b/llvm/include/llvm/ADT/SANeedlemanWunsch.h new file mode 100644 index 000000000000..dccc690bf288 --- /dev/null +++ b/llvm/include/llvm/ADT/SANeedlemanWunsch.h @@ -0,0 +1,274 @@ +#ifndef LLVM_ADT_SANEEDLEMANWUNSCH_H +#define LLVM_ADT_SANEEDLEMANWUNSCH_H + +#include "llvm/ADT/SequenceAlignment.h" + +template > +class NeedlemanWunschSA + : public SequenceAligner { +private: + ScoreSystemType *Matrix; + size_t MatrixRows; + size_t MatrixCols; + bool *Matches; + size_t MatchesRows; + size_t MatchesCols; + + const static unsigned END = 0; + const static unsigned DIAGONAL = 1; + const static unsigned UP = 2; + const static unsigned LEFT = 3; + + size_t MaxRow; + size_t MaxCol; + + using BaseType = SequenceAligner; + + void cacheAllMatches(ContainerType &Seq1, ContainerType &Seq2) { + if (BaseType::getMatchOperation() == nullptr) { + Matches = nullptr; + return; + } + const size_t SizeSeq1 = Seq1.size(); + const size_t SizeSeq2 = Seq2.size(); + + MatchesRows = SizeSeq1; + MatchesCols = SizeSeq2; + Matches = new bool[SizeSeq1 * SizeSeq2]; + for (unsigned i = 0; i < SizeSeq1; i++) + for (unsigned j = 0; j < SizeSeq2; j++) + Matches[i * SizeSeq2 + j] = BaseType::match(Seq1[i], Seq2[j]); + } + + void computeScoreMatrix(ContainerType &Seq1, ContainerType &Seq2) { + const size_t SizeSeq1 = Seq1.size(); + const size_t SizeSeq2 = Seq2.size(); + + const size_t NumRows = SizeSeq1 + 1; + const size_t NumCols = SizeSeq2 + 1; + Matrix = new ScoreSystemType[NumRows * NumCols]; + MatrixRows = NumRows; + MatrixCols = NumCols; + + ScoringSystem &Scoring = BaseType::getScoring(); + const ScoreSystemType Gap = Scoring.getGapPenalty(); + const ScoreSystemType Match = Scoring.getMatchProfit(); + const bool AllowMismatch = Scoring.getAllowMismatch(); + const ScoreSystemType Mismatch = + AllowMismatch ? Scoring.getMismatchPenalty() + : std::numeric_limits::min(); + + for (unsigned i = 0; i < NumRows; i++) + Matrix[i * NumCols + 0] = i * Gap; + for (unsigned j = 0; j < NumCols; j++) + Matrix[0 * NumCols + j] = j * Gap; + + ScoreSystemType MaxScore = std::numeric_limits::min(); + if (Matches) { + if (AllowMismatch) { + for (unsigned i = 1; i < NumRows; i++) { + for (unsigned j = 1; j < NumCols; j++) { + ScoreSystemType Similarity = + Matches[(i - 1) * MatchesCols + j - 1] ? Match : Mismatch; + ScoreSystemType Diagonal = + Matrix[(i - 1) * NumCols + j - 1] + Similarity; + ScoreSystemType Upper = Matrix[(i - 1) * NumCols + j] + Gap; + ScoreSystemType Left = Matrix[i * NumCols + j - 1] + Gap; + ScoreSystemType Score = std::max(std::max(Diagonal, Upper), Left); + Matrix[i * NumCols + j] = Score; + if (Score >= MaxScore) { + MaxScore = Score; + MaxRow = i; + MaxCol = j; + } + } + } + } else { + for (unsigned i = 1; i < NumRows; i++) { + for (unsigned j = 1; j < NumCols; j++) { + ScoreSystemType Diagonal = + Matches[(i - 1) * MatchesCols + j - 1] + ? (Matrix[(i - 1) * NumCols + j - 1] + Match) + : Mismatch; + ScoreSystemType Upper = Matrix[(i - 1) * NumCols + j] + Gap; + ScoreSystemType Left = Matrix[i * NumCols + j - 1] + Gap; + ScoreSystemType Score = std::max(std::max(Diagonal, Upper), Left); + Matrix[i * NumCols + j] = Score; + if (Score >= MaxScore) { + MaxScore = Score; + MaxRow = i; + MaxCol = j; + } + } + } + } + } else { + if (AllowMismatch) { + for (unsigned i = 1; i < NumRows; i++) { + for (unsigned j = 1; j < NumCols; j++) { + ScoreSystemType Similarity = + (Seq1[i - 1] == Seq2[j - 1]) ? Match : Mismatch; + ScoreSystemType Diagonal = + Matrix[(i - 1) * NumCols + j - 1] + Similarity; + ScoreSystemType Upper = Matrix[(i - 1) * NumCols + j] + Gap; + ScoreSystemType Left = Matrix[i * NumCols + j - 1] + Gap; + ScoreSystemType Score = std::max(std::max(Diagonal, Upper), Left); + Matrix[i * NumCols + j] = Score; + if (Score >= MaxScore) { + MaxScore = Score; + MaxRow = i; + MaxCol = j; + } + } + } + } else { + for (unsigned i = 1; i < NumRows; i++) { + for (unsigned j = 1; j < NumCols; j++) { + ScoreSystemType Diagonal = + (Seq1[i - 1] == Seq2[j - 1]) + ? (Matrix[(i - 1) * NumCols + j - 1] + Match) + : Mismatch; + ScoreSystemType Upper = Matrix[(i - 1) * NumCols + j] + Gap; + ScoreSystemType Left = Matrix[i * NumCols + j - 1] + Gap; + ScoreSystemType Score = std::max(std::max(Diagonal, Upper), Left); + Matrix[i * NumCols + j] = Score; + if (Score >= MaxScore) { + MaxScore = Score; + MaxRow = i; + MaxCol = j; + } + } + } + } + } + } + + void buildResult(ContainerType &Seq1, ContainerType &Seq2, + AlignedSequence &Result) { + auto &Data = Result.Data; + + ScoringSystem &Scoring = BaseType::getScoring(); + const ScoreSystemType Gap = Scoring.getGapPenalty(); + const ScoreSystemType Match = Scoring.getMatchProfit(); + const bool AllowMismatch = Scoring.getAllowMismatch(); + const ScoreSystemType Mismatch = + AllowMismatch ? Scoring.getMismatchPenalty() + : std::numeric_limits::min(); + + int i = MatrixRows - 1, j = MatrixCols - 1; + + size_t LongestMatch = 0; + size_t CurrentMatch = 0; + + while (i > 0 || j > 0) { + if (i > 0 && j > 0) { + // Diagonal + + bool IsValidMatch = false; + + ScoreSystemType Score = std::numeric_limits::min(); + if (Matches) { + IsValidMatch = Matches[(i - 1) * MatchesCols + j - 1]; + } else { + IsValidMatch = (Seq1[i - 1] == Seq2[j - 1]); + } + + if (!IsValidMatch) { + if (CurrentMatch > LongestMatch) + LongestMatch = CurrentMatch; + CurrentMatch = 0; + } else + CurrentMatch += 1; + + if (AllowMismatch) { + Score = Matrix[(i - 1) * MatrixCols + j - 1] + + (IsValidMatch ? Match : Mismatch); + } else { + Score = IsValidMatch ? (Matrix[(i - 1) * MatrixCols + j - 1] + Match) + : Mismatch; + } + + if (Matrix[i * MatrixCols + j] == Score) { + if (IsValidMatch || AllowMismatch) { + Data.push_front(typename BaseType::EntryType( + Seq1[i - 1], Seq2[j - 1], IsValidMatch)); + } else { + Data.push_front( + typename BaseType::EntryType(Seq1[i - 1], Blank, false)); + Data.push_front( + typename BaseType::EntryType(Blank, Seq2[j - 1], false)); + } + + i--; + j--; + continue; + } + } + if (i > 0 && Matrix[i * MatrixCols + j] == + (Matrix[(i - 1) * MatrixCols + j] + Gap)) { + // Up + Data.push_front( + typename BaseType::EntryType(Seq1[i - 1], Blank, false)); + i--; + } else if (j > 0 && Matrix[i * MatrixCols + j] == + (Matrix[i * MatrixCols + (j - 1)] + Gap)) { + // Left + Data.push_front( + typename BaseType::EntryType(Blank, Seq2[j - 1], false)); + j--; + } + } + + if (CurrentMatch > LongestMatch) + LongestMatch = CurrentMatch; + } + + void clearAll() { + if (Matrix) + delete[] Matrix; + if (Matches) + delete[] Matches; + Matrix = nullptr; + Matches = nullptr; + } + +public: + static ScoringSystem getDefaultScoring() { return ScoringSystem(-1, 2, -1); } + + NeedlemanWunschSA() + : BaseType(getDefaultScoring(), nullptr), Matrix(nullptr), + Matches(nullptr) {} + + NeedlemanWunschSA(ScoringSystem Scoring, MatchFnTy Match = nullptr) + : BaseType(Scoring, Match), Matrix(nullptr), Matches(nullptr) {} + + ~NeedlemanWunschSA() { clearAll(); } + + virtual size_t getMemoryRequirement(ContainerType &Seq1, + ContainerType &Seq2) override { + const size_t SizeSeq1 = Seq1.size(); + const size_t SizeSeq2 = Seq2.size(); + size_t MemorySize = 0; + + MemorySize += sizeof(ScoreSystemType) * (SizeSeq1 + 1) * (SizeSeq2 + 1); + + if (BaseType::getMatchOperation() != nullptr) + MemorySize += SizeSeq1 * SizeSeq2 * sizeof(bool); + + return MemorySize; + } + + virtual AlignedSequence + getAlignment(ContainerType &Seq1, ContainerType &Seq2) override { + AlignedSequence Result; + cacheAllMatches(Seq1, Seq2); + computeScoreMatrix(Seq1, Seq2); + buildResult(Seq1, Seq2, Result); + clearAll(); + return Result; + } +}; + +#endif diff --git a/llvm/include/llvm/ADT/SequenceAlignment.h b/llvm/include/llvm/ADT/SequenceAlignment.h new file mode 100644 index 000000000000..8b4103c7cd06 --- /dev/null +++ b/llvm/include/llvm/ADT/SequenceAlignment.h @@ -0,0 +1,165 @@ +//===-- llvm/ADT/SequenceAlignment.h - Sequence Alignment -------*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// Provides efficient implementations of different algorithms for sequence +// alignment. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_ADT_SEQUENCEALIGNMENT_H +#define LLVM_ADT_SEQUENCEALIGNMENT_H + +#include +#include +#include +#include +#include // INT_MIN +#include + +#include "llvm/ADT/ArrayView.h" + +#define ScoreSystemType int + +// Store alignment result here +template class AlignedSequence { +public: + class Entry { + private: + // TODO: change it for a vector for Multi-Sequence Alignment + std::pair Pair; + bool IsMatchingPair; + + public: + Entry() { IsMatchingPair = false; } + + Entry(Ty V1, Ty V2) : Pair(V1, V2) { IsMatchingPair = !hasBlank(); } + + Entry(Ty V1, Ty V2, bool Matching) + : Pair(V1, V2), IsMatchingPair(Matching) {} + + Ty get(size_t Index) const { + assert((Index == 0 || Index == 1) && "Index out of bounds!"); + if (Index == 0) + return Pair.first; + return Pair.second; + } + + bool empty() const { return (Pair.first == Blank && Pair.second == Blank); } + bool hasBlank() const { + return (Pair.first == Blank || Pair.second == Blank); + } + + bool match() const { return IsMatchingPair; } + bool mismatch() const { return (!IsMatchingPair); } + + Ty getNonBlank() const { + if (Pair.first != Blank) + return Pair.first; + return Pair.second; + } + }; + + std::list Data; + size_t LargestMatch{0}; + + AlignedSequence() = default; + + AlignedSequence(const AlignedSequence &Other) + : Data(Other.Data), LargestMatch(Other.LargestMatch) {} + AlignedSequence(AlignedSequence &&Other) + : Data(std::move(Other.Data)), LargestMatch(Other.LargestMatch) {} + + AlignedSequence &operator=(const AlignedSequence &Other) { + Data = Other.Data; + LargestMatch = Other.LargestMatch; + return (*this); + } + + void append(const AlignedSequence &Other) { + Data.insert(Data.end(), Other.Data.begin(), Other.Data.end()); + } + + void splice(AlignedSequence &Other) { + Data.splice(Data.end(), Other.Data); + } + + typename std::list::iterator begin() { return Data.begin(); } + typename std::list::iterator end() { return Data.end(); } + typename std::list::const_iterator begin() const { + return Data.cbegin(); + } + typename std::list::const_iterator end() const { return Data.cend(); } + + size_t size() { return Data.size(); } +}; + +class ScoringSystem { + ScoreSystemType Gap; + ScoreSystemType Match; + ScoreSystemType Mismatch; + bool AllowMismatch; + +public: + ScoringSystem(ScoreSystemType Gap, ScoreSystemType Match) { + this->Gap = Gap; + this->Match = Match; + this->Mismatch = std::numeric_limits::min(); + this->AllowMismatch = false; + } + + ScoringSystem(ScoreSystemType Gap, ScoreSystemType Match, + ScoreSystemType Mismatch, bool AllowMismatch = true) { + this->Gap = Gap; + this->Match = Match; + this->Mismatch = Mismatch; + this->AllowMismatch = AllowMismatch; + } + + bool getAllowMismatch() { return AllowMismatch; } + + ScoreSystemType getMismatchPenalty() { return Mismatch; } + + ScoreSystemType getGapPenalty() { return Gap; } + + ScoreSystemType getMatchProfit() { return Match; } +}; + +template > +class SequenceAligner { +private: + ScoringSystem Scoring; + MatchFnTy Match; + +public: + using EntryType = typename AlignedSequence::Entry; + + SequenceAligner(ScoringSystem Scoring, MatchFnTy Match = nullptr) + : Scoring(Scoring), Match(Match) {} + + virtual ~SequenceAligner() = default; + + ScoringSystem &getScoring() { return Scoring; } + + bool match(Ty Val1, Ty Val2) { return Match(Val1, Val2); } + + MatchFnTy getMatchOperation() { return Match; } + + Ty getBlank() { return Blank; } + + virtual AlignedSequence getAlignment(ContainerType &Seq0, + ContainerType &Seq1) = 0; + virtual size_t getMemoryRequirement(ContainerType &Seq0, + ContainerType &Seq1) = 0; +}; + +#include "llvm/ADT/SANeedlemanWunsch.h" + +#endif diff --git a/llvm/include/llvm/IR/Function.h b/llvm/include/llvm/IR/Function.h index 7945c64c8610..062d43f1913e 100644 --- a/llvm/include/llvm/IR/Function.h +++ b/llvm/include/llvm/IR/Function.h @@ -38,6 +38,10 @@ #include #include +#ifdef ENABLE_CODESIZE_OPT +#include "llvm/Support/CodeSizeOpt.h" +#endif + namespace llvm { namespace Intrinsic { @@ -662,7 +666,17 @@ public: /// Optimize this function for size (-Os) or minimum size (-Oz). bool hasOptSize() const { + #ifdef ENABLE_CODESIZE_OPT + if(EnableCodeSize){ + //for size + return true; + } + else{ + return hasFnAttribute(Attribute::OptimizeForSize) || hasMinSize(); + } + #else return hasFnAttribute(Attribute::OptimizeForSize) || hasMinSize(); + #endif } /// Returns the denormal handling type for the default rounding mode of the diff --git a/llvm/include/llvm/Support/CodeSizeOpt.h b/llvm/include/llvm/Support/CodeSizeOpt.h new file mode 100644 index 000000000000..1fd9a72b3c32 --- /dev/null +++ b/llvm/include/llvm/Support/CodeSizeOpt.h @@ -0,0 +1,12 @@ + +#ifndef LLVM_SUPPORT_CODESIZEOPT_H +#define LLVM_SUPPORT_CODESIZEOPT_H +#include "llvm/Support/CommandLine.h" + +namespace llvm { +extern llvm::cl::opt EnableCodeSize; +} // namespace llvm + + + +#endif // LLVM_SUPPORT_CODESIZEOPT_H \ No newline at end of file diff --git a/llvm/include/llvm/Transforms/IPO/FunctionMerging.h b/llvm/include/llvm/Transforms/IPO/FunctionMerging.h new file mode 100644 index 000000000000..a3d10b60bff9 --- /dev/null +++ b/llvm/include/llvm/Transforms/IPO/FunctionMerging.h @@ -0,0 +1,420 @@ +//===- FunctionMerging.h - A function merging pass ----------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file implements the general function merging optimization. +// +// It identifies similarities between functions, and If profitable, merges them +// into a single function, replacing the original ones. Functions do not need +// to be identical to be merged. In fact, there is very little restriction to +// merge two function, however, the produced merged function can be larger than +// the two original functions together. For that reason, it uses the +// TargetTransformInfo analysis to estimate the code-size costs of instructions +// in order to estimate the profitability of merging two functions. +// +// This function merging transformation has three major parts: +// 1. The input functions are linearized, representing their CFGs as sequences +// of labels and instructions. +// 2. We apply a sequence alignment algorithm, namely, the Needleman-Wunsch +// algorithm, to identify similar code between the two linearized functions. +// 3. We use the aligned sequences to perform code generate, producing the new +// merged function, using an extra parameter to represent the function +// identifier. +// +// This pass integrates the function merging transformation with an exploration +// framework. For every function, the other functions are ranked based their +// degree of similarity, which is computed from the functions' fingerprints. +// Only the top candidates are analyzed in a greedy manner and if one of them +// produces a profitable result, the merged function is taken. +// +//===----------------------------------------------------------------------===// +// +// This optimization was proposed in +// +// Function Merging by Sequence Alignment: An Interprocedural Code-Size +// Optimization +// Rodrigo C. O. Rocha, Pavlos Petoumenos, Zheng Wang, Murray Cole, Hugh Leather +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_TRANSFORMS_IPO_FUNCTIONMERGING_H +#define LLVM_TRANSFORMS_IPO_FUNCTIONMERGING_H + +#include "llvm/ADT/SequenceAlignment.h" +#include "llvm/ADT/SmallVector.h" +#include "llvm/ADT/StringSet.h" + +#include "llvm/Analysis/BlockFrequencyInfo.h" +#include "llvm/Analysis/ProfileSummaryInfo.h" +#include "llvm/Analysis/TargetTransformInfo.h" + +#include "llvm/InitializePasses.h" + +#include "llvm/IR/Function.h" +#include "llvm/IR/IRBuilder.h" +#include "llvm/IR/Module.h" +#include "llvm/IR/PassManager.h" + +#include "llvm/Transforms/IPO/SearchStrategy.h" +#include "llvm/Transforms/Utils/Cloning.h" + +#include +#include + +namespace llvm { + +/// A set of parameters used to control the transforms by MergeFunctions. +struct FunctionMergingOptions { + bool MaximizeParamScore; + bool IdenticalTypesOnly; + bool EnableUnifiedReturnType; + + FunctionMergingOptions(bool MaximizeParamScore = true, + bool IdenticalTypesOnly = true, + bool EnableUnifiedReturnType = true) + : MaximizeParamScore(MaximizeParamScore), + IdenticalTypesOnly(IdenticalTypesOnly), + EnableUnifiedReturnType(EnableUnifiedReturnType) {} + + FunctionMergingOptions &maximizeParameterScore(bool MPS) { + MaximizeParamScore = MPS; + return *this; + } + + FunctionMergingOptions &matchOnlyIdenticalTypes(bool IT) { + IdenticalTypesOnly = IT; + return *this; + } + + FunctionMergingOptions &enableUnifiedReturnTypes(bool URT) { + EnableUnifiedReturnType = URT; + return *this; + } +}; + +class AlignedCode : public AlignedSequence { +public: + int Insts{0}; + int Matches{0}; + int CoreMatches{0}; + + AlignedCode() = default; + + AlignedCode(const AlignedCode &Other) + : AlignedSequence(Other), Insts{Other.Insts}, Matches{Other.Matches}, + CoreMatches{Other.CoreMatches} {} + + AlignedCode(AlignedCode &&Other) + : AlignedSequence(Other), Insts{Other.Insts}, Matches{Other.Matches}, + CoreMatches{Other.CoreMatches} {} + + AlignedCode(const AlignedSequence &Other) : AlignedSequence(Other) {} + + AlignedCode(AlignedSequence &&Other) : AlignedSequence(Other) {} + + AlignedCode(BasicBlock *B1, BasicBlock *B2); + + AlignedCode &operator=(const AlignedCode &Other) { + Data = Other.Data; + LargestMatch = Other.LargestMatch; + Insts = Other.Insts; + Matches = Other.Matches; + CoreMatches = Other.CoreMatches; + return (*this); + } + + void extend(const AlignedCode &Other); + void extend(int Index, const BasicBlock *BB); + + bool hasMatches() const { return (Matches == Insts) || (CoreMatches > 0); }; + bool isProfitable() const; +}; + +class FunctionMergeResult { +private: + Function *F1; + Function *F2; + Function *MergedFunction; + bool HasIdArg; + bool NeedUnifiedReturn; + std::map ParamMap1; + std::map ParamMap2; + + FunctionMergeResult() + : F1(nullptr), F2(nullptr), MergedFunction(nullptr), HasIdArg(false), + NeedUnifiedReturn(false) {} + +public: + // feise:to check if the function is successfully merged + bool Success = true; + FunctionMergeResult(bool Success) + : F1(nullptr), F2(nullptr), MergedFunction(nullptr), HasIdArg(false), + NeedUnifiedReturn(false), Success(Success) {} + + FunctionMergeResult(Function *F1, Function *F2, Function *MergedFunction, + bool NeedUnifiedReturn = false) + : F1(F1), F2(F2), MergedFunction(MergedFunction), HasIdArg(true), + NeedUnifiedReturn(NeedUnifiedReturn) {} + + std::pair getFunctions() { + return std::pair(F1, F2); + } + + std::map &getArgumentMapping(Function *F) { + return (F1 == F) ? ParamMap1 : ParamMap2; + } + + Value *getFunctionIdValue(Function *F) { + if (F == F1) + return ConstantInt::getTrue(IntegerType::get(F1->getContext(), 1)); + else if (F == F2) + return ConstantInt::getFalse(IntegerType::get(F2->getContext(), 1)); + else + return nullptr; + } + + void setFunctionIdArgument(bool HasFuncIdArg) { HasIdArg = HasFuncIdArg; } + + bool hasFunctionIdArgument() { return HasIdArg; } + + void setUnifiedReturn(bool NeedUnifiedReturn) { + this->NeedUnifiedReturn = NeedUnifiedReturn; + } + + bool needUnifiedReturn() { return NeedUnifiedReturn; } + + // returns whether or not the merge operation was successful + operator bool() const { return (MergedFunction != nullptr); } + + void setArgumentMapping(Function *F, std::map &ParamMap) { + if (F == F1) + ParamMap1 = ParamMap; + else if (F == F2) + ParamMap2 = ParamMap; + } + + void addArgumentMapping(Function *F, unsigned SrcArg, unsigned DstArg) { + if (F == F1) + ParamMap1[SrcArg] = DstArg; + else if (F == F2) + ParamMap2[SrcArg] = DstArg; + } + + Function *getMergedFunction() { return MergedFunction; } + + // static const FunctionMergeResult Error; +}; + +class FunctionMerger { +private: + Module *M; + + // ProfileSummaryInfo *PSI; + function_ref LookupBFI; + + Type *IntPtrTy; + + const DataLayout *DL; + LLVMContext *ContextPtr; + + // cache of linear functions + // KeyValueCache> LFCache; + + // statistics for analyzing this optimization for future improvements + // unsigned LastMaxParamScore = 0; + // unsigned TotalParamScore = 0; + // int CountOpReorder = 0; + // int CountBinOps = 0; + + enum LinearizationKind { LK_Random, LK_Canonical }; + + void linearize(Function *F, SmallVectorImpl &FVec, + LinearizationKind LK = LinearizationKind::LK_Canonical); + + void replaceByCall(Function *F, FunctionMergeResult &MergedFunc, + const FunctionMergingOptions &Options = {}); + bool replaceCallsWith(Function *F, FunctionMergeResult &MergedFunc, + const FunctionMergingOptions &Options = {}); + + void updateCallGraph(Function *F, FunctionMergeResult &MFR, + StringSet<> &AlwaysPreserved, + const FunctionMergingOptions &Options); + +public: + FunctionMerger(Module *M) : M(M), IntPtrTy(nullptr) { + //, ProfileSummaryInfo *PSI=nullptr, function_ref LookupBFI=nullptr) : M(M), PSI(PSI), LookupBFI(LookupBFI), + // IntPtrTy(nullptr) { + if (M) { + DL = &M->getDataLayout(); + ContextPtr = &M->getContext(); + IntPtrTy = DL->getIntPtrType(*ContextPtr); + } + } + + bool validMergeTypes(Function *F1, Function *F2, + const FunctionMergingOptions &Options = {}); + + static bool areTypesEquivalent(Type *Ty1, Type *Ty2, const DataLayout *DL, + const FunctionMergingOptions &Options = {}); + + static bool match(Value *V1, Value *V2); + static bool matchInstructions(Instruction *I1, Instruction *I2, + const FunctionMergingOptions &Options = {}); + static bool matchWholeBlocks(Value *V1, Value *V2); + static bool matchBlocks(BasicBlock *B1, BasicBlock *B2); + + void updateCallGraph(FunctionMergeResult &Result, + StringSet<> &AlwaysPreserved, + const FunctionMergingOptions &Options = {}); + + FunctionMergeResult merge(Function *F1, Function *F2, std::string Name = "", + const FunctionMergingOptions &Options = {}); + + class CodeGenerator { + private: + LLVMContext *ContextPtr; + Type *IntPtrTy; + + Value *IsFunc1; + + std::vector Blocks1; + std::vector Blocks2; + + BasicBlock *EntryBB1; + BasicBlock *EntryBB2; + BasicBlock *PreBB; + + Type *RetType1; + Type *RetType2; + Type *ReturnType; + + bool RequiresUnifiedReturn; + + Function *MergedFunc; + + SmallPtrSet CreatedBBs; + SmallPtrSet CreatedInsts; + + protected: + void removeRedundantInstructions(std::vector &WorkInst, + DominatorTree &DT); + + public: + CodeGenerator(Function *F1, Function *F2) { + for (BasicBlock &BB : *F1) + Blocks1.push_back(&BB); + for (BasicBlock &BB : *F2) + Blocks2.push_back(&BB); + } + virtual ~CodeGenerator() {} + + CodeGenerator &setContext(LLVMContext *ContextPtr) { + this->ContextPtr = ContextPtr; + return *this; + } + + CodeGenerator &setIntPtrType(Type *IntPtrTy) { + this->IntPtrTy = IntPtrTy; + return *this; + } + + CodeGenerator &setFunctionIdentifier(Value *IsFunc1) { + this->IsFunc1 = IsFunc1; + return *this; + } + + CodeGenerator &setEntryPoints(BasicBlock *EntryBB1, BasicBlock *EntryBB2) { + this->EntryBB1 = EntryBB1; + this->EntryBB2 = EntryBB2; + return *this; + } + + CodeGenerator &setReturnTypes(Type *RetType1, Type *RetType2) { + this->RetType1 = RetType1; + this->RetType2 = RetType2; + return *this; + } + + CodeGenerator &setMergedEntryPoint(BasicBlock *PreBB) { + this->PreBB = PreBB; + return *this; + } + + CodeGenerator &setMergedReturnType(Type *ReturnType, + bool RequiresUnifiedReturn = false) { + this->ReturnType = ReturnType; + this->RequiresUnifiedReturn = RequiresUnifiedReturn; + return *this; + } + + CodeGenerator &setMergedFunction(Function *MergedFunc) { + this->MergedFunc = MergedFunc; + return *this; + } + + Function *getMergedFunction() { return MergedFunc; } + Type *getMergedReturnType() { return ReturnType; } + bool getRequiresUnifiedReturn() { return RequiresUnifiedReturn; } + + Value *getFunctionIdentifier() { return IsFunc1; } + + LLVMContext &getContext() { return *ContextPtr; } + + std::vector &getBlocks1() { return Blocks1; } + std::vector &getBlocks2() { return Blocks2; } + + BasicBlock *getEntryBlock1() { return EntryBB1; } + BasicBlock *getEntryBlock2() { return EntryBB2; } + BasicBlock *getPreBlock() { return PreBB; } + + Type *getReturnType1() { return RetType1; } + Type *getReturnType2() { return RetType2; } + + Type *getIntPtrType() { return IntPtrTy; } + + void insert(BasicBlock *BB) { CreatedBBs.insert(BB); } + void insert(Instruction *I) { CreatedInsts.insert(I); } + + void erase(BasicBlock *BB) { CreatedBBs.erase(BB); } + void erase(Instruction *I) { CreatedInsts.erase(I); } + + virtual bool generate(AlignedCode &AlignedSeq, ValueToValueMapTy &VMap, + const FunctionMergingOptions &Options = {}) = 0; + + void destroyGeneratedCode(); + + SmallPtrSet::const_iterator begin() const { + return CreatedInsts.begin(); + } + SmallPtrSet::const_iterator end() const { + return CreatedInsts.end(); + } + }; + + class SALSSACodeGen : public FunctionMerger::CodeGenerator { + + public: + SALSSACodeGen(Function *F1, Function *F2) : CodeGenerator(F1, F2) {} + virtual ~SALSSACodeGen() {} + virtual bool generate(AlignedCode &AlignedSeq, ValueToValueMapTy &VMap, + const FunctionMergingOptions &Options = {}) override; + }; +}; + +FunctionMergeResult mergeFunctions(Function *F1, Function *F2, + const FunctionMergingOptions &Options = {}); + +class FunctionMergingPass : public PassInfoMixin { +public: + PreservedAnalyses run(Module &M, ModuleAnalysisManager &AM); +}; + +} // namespace llvm + +#endif diff --git a/llvm/include/llvm/Transforms/IPO/SearchStrategy.h b/llvm/include/llvm/Transforms/IPO/SearchStrategy.h new file mode 100644 index 000000000000..08e5c6ad122a --- /dev/null +++ b/llvm/include/llvm/Transforms/IPO/SearchStrategy.h @@ -0,0 +1,204 @@ +#ifndef LLVM_TRANSFORMS_IPO_SEARCHSTRATEGY_H +#define LLVM_TRANSFORMS_IPO_SEARCHSTRATEGY_H + +#include +#include +#include +#include + +class SearchStrategy { +private: + // Default values + const size_t NHashes{200}; + const size_t Rows{2}; + const size_t Bands{100}; + std::vector RandomHashFuncs; + +public: + SearchStrategy() = default; + + SearchStrategy(size_t Rows, size_t Bands) + : NHashes(Rows * Bands), Rows(Rows), Bands(Bands) { + updateRandomHashFunctions(NHashes - 1); + }; + + uint32_t fnv1a(const std::vector &Seq) { + uint32_t Hash = 2166136261; + const int Len = Seq.size(); + + for (int i = 0; i < Len; i++) { + Hash ^= Seq[i]; + Hash *= 1099511628211; + } + + return Hash; + } + + uint32_t fnv1a(const std::vector &Seq, uint32_t NewHash) { + uint32_t Hash = NewHash; + const int Len = Seq.size(); + + for (int i = 0; i < Len; i++) { + Hash ^= Seq[i]; + Hash *= 1099511628211; + } + + return Hash; + } + + // Generate shingles using a single hash -- unused as not effective for + // function merging + template + std::vector & + generateShinglesSingleHashPipelineTurbo(const std::vector &Seq, + std::vector &Ret) { + uint32_t Pipeline[K] = {0}; + const int Len = Seq.size(); + + Ret.resize(NHashes); + + std::unordered_set Set; + // set.reserve(NHashes); + uint32_t Last = 0; + + for (int i = 0; i < Len; i++) { + + for (int k = 0; k < K; k++) { + Pipeline[k] ^= Seq[i]; + Pipeline[k] *= 1099511628211; + } + + // Collect head of pipeline + if (Last <= NHashes - 1) { + Ret[Last++] = Pipeline[0]; + + if (Last > NHashes - 1) { + std::make_heap(Ret.begin(), Ret.end()); + std::sort_heap(Ret.begin(), Ret.end()); + } + } + + if (Pipeline[0] < Ret.front() && Last > NHashes - 1) { + if (Set.find(Pipeline[0]) == Set.end()) { + Set.insert(Pipeline[0]); + + Ret[Last] = Pipeline[0]; + + std::sort_heap(Ret.begin(), Ret.end()); + } + } + + // Shift pipeline + for (int k = 0; k < K - 1; k++) { + Pipeline[k] = Pipeline[k + 1]; + } + Pipeline[K - 1] = 2166136261; + } + + return Ret; + } + + // Generate MinHash fingerprint with multiple hash functions + template + std::vector & + generateShinglesMultipleHashPipelineTurbo(const std::vector &Seq, + std::vector &Ret) { + uint32_t Pipeline[K] = {0}; + const uint32_t Len = Seq.size(); + + uint32_t Smallest = std::numeric_limits::max(); + + std::vector ShingleHashes(Len); + + Ret.resize(NHashes); + + // Pipeline to hash all shingles using fnv1a + // Store all hashes + // While storing smallest + // Then for each shingle hash, rehash with an XOR of 32 bit random number + // and store smallest Do this NHashes-1 times to obtain NHashes minHashes + // quickly Sort the hashes at the end + + for (uint32_t i = 0; i < Len; i++) { + for (uint32_t k = 0; k < K; k++) { + Pipeline[k] ^= Seq[i]; + Pipeline[k] *= 1099511628211; + } + + // Collect head of pipeline + if (Pipeline[0] < Smallest) + Smallest = Pipeline[0]; + ShingleHashes[i] = Pipeline[0]; + + // Shift pipeline + for (uint32_t k = 0; k < K - 1; k++) + Pipeline[k] = Pipeline[k + 1]; + Pipeline[K - 1] = 2166136261; + } + + Ret[0] = Smallest; + + // Now for each hash function, rehash each shingle and store the smallest + // each time + for (uint32_t i = 0; i < RandomHashFuncs.size(); i++) { + Smallest = std::numeric_limits::max(); + + for (uint32_t j = 0; j < ShingleHashes.size(); j++) { + const uint32_t Temp = ShingleHashes[j] ^ RandomHashFuncs[i]; + + if (Temp < Smallest) + Smallest = Temp; + } + + Ret[i + 1] = Smallest; + } + + std::sort(Ret.begin(), Ret.end()); + + return Ret; + } + + void updateRandomHashFunctions(size_t Num) { + const size_t OldNum = RandomHashFuncs.size(); + RandomHashFuncs.resize(Num); + + // if we shrunk the vector, there is nothing more to do + if (Num <= OldNum) + return; + + // If we enlarged it, we need to generate new random numbers + // std::random_device rd; + // std::mt19937 gen(rd()); + std::mt19937 Gen(0); + std::uniform_real_distribution<> Distribution( + 0, std::numeric_limits::max()); + + // generating a random integer: + for (size_t i = OldNum; i < Num; i++) + RandomHashFuncs[i] = Distribution(Gen); + } + + std::vector &generateBands(const std::vector &MinHashes, + std::vector &LSHBands) { + LSHBands.resize(Bands); + + // Generate a hash for each band + for (size_t i = 0; i < Bands; i++) { + // Perform fnv1a on the rows + auto First = MinHashes.begin() + (i * Rows); + auto Last = MinHashes.begin() + (i * Rows) + Rows; + LSHBands[i] = fnv1a(std::vector{First, Last}); + } + + // Remove duplicate bands -- no need to place twice in the same bucket + std::sort(LSHBands.begin(), LSHBands.end()); + auto Last = std::unique(LSHBands.begin(), LSHBands.end()); + LSHBands.erase(Last, LSHBands.end()); + + return LSHBands; + } + + uint32_t itemFootprint() { return sizeof(uint32_t) * Bands * (Rows + 1); } +}; + +#endif // LLVM_TRANSFORMS_IPO_SEARCHSTRATEGY_H diff --git a/llvm/lib/CodeGen/TargetPassConfig.cpp b/llvm/lib/CodeGen/TargetPassConfig.cpp index 0bd229f4fc68..e0ce7895f305 100644 --- a/llvm/lib/CodeGen/TargetPassConfig.cpp +++ b/llvm/lib/CodeGen/TargetPassConfig.cpp @@ -1268,6 +1268,12 @@ void TargetPassConfig::addMachinePasses() { addPass(&StackMapLivenessID); addPass(&LiveDebugValuesID); +#ifdef ENABLE_CODESIZE_OPT + //====== code size === + if(EnableCodeSize && TM->Options.SupportsDefaultOutlining){ + addPass(createMachineOutlinerPass(true)); + }else{ + //==================== if (TM->Options.EnableMachineOutliner && getOptLevel() != CodeGenOpt::None && EnableMachineOutliner != RunOutliner::NeverOutline) { bool RunOnAllFunctions = @@ -1277,6 +1283,18 @@ void TargetPassConfig::addMachinePasses() { if (AddOutliner) addPass(createMachineOutlinerPass(RunOnAllFunctions)); } + } +#else + if (TM->Options.EnableMachineOutliner && getOptLevel() != CodeGenOpt::None && + EnableMachineOutliner != RunOutliner::NeverOutline) { + bool RunOnAllFunctions = + (EnableMachineOutliner == RunOutliner::AlwaysOutline); + bool AddOutliner = + RunOnAllFunctions || TM->Options.SupportsDefaultOutlining; + if (AddOutliner) + addPass(createMachineOutlinerPass(RunOnAllFunctions)); + } +#endif // Machine function splitter uses the basic block sections feature. Both // cannot be enabled at the same time. Basic block sections takes precedence. diff --git a/llvm/lib/Passes/PassBuilder.cpp b/llvm/lib/Passes/PassBuilder.cpp index 42fde3752724..4a080e42b4ac 100644 --- a/llvm/lib/Passes/PassBuilder.cpp +++ b/llvm/lib/Passes/PassBuilder.cpp @@ -102,6 +102,7 @@ #include "llvm/Transforms/IPO/ForceFunctionAttrs.h" #include "llvm/Transforms/IPO/FunctionAttrs.h" #include "llvm/Transforms/IPO/FunctionImport.h" +#include "llvm/Transforms/IPO/FunctionMerging.h" #include "llvm/Transforms/IPO/GlobalDCE.h" #include "llvm/Transforms/IPO/GlobalOpt.h" #include "llvm/Transforms/IPO/GlobalSplit.h" diff --git a/llvm/lib/Passes/PassBuilderPipelines.cpp b/llvm/lib/Passes/PassBuilderPipelines.cpp index 945ef512391b..bdf9aea45dcc 100644 --- a/llvm/lib/Passes/PassBuilderPipelines.cpp +++ b/llvm/lib/Passes/PassBuilderPipelines.cpp @@ -128,6 +128,10 @@ #include "llvm/Transforms/Vectorize/SLPVectorizer.h" #include "llvm/Transforms/Vectorize/VectorCombine.h" +#ifdef ENABLE_CODESIZE_OPT +#include "llvm/Transforms/IPO/FunctionMerging.h" //func-merging +#endif + using namespace llvm; static cl::opt UseInlineAdvisor( @@ -708,6 +712,14 @@ void PassBuilder::addPGOInstrPassesForO0(ModulePassManager &MPM, } static InlineParams getInlineParamsFromOptLevel(OptimizationLevel Level) { +#ifdef ENABLE_CODESIZE_OPT + //===for size==================== + if (EnableCodeSize) { + if (Level == OptimizationLevel::O2) + return getInlineParams(2, 1); + } + //===for size==================== +#endif return getInlineParams(Level.getSpeedupLevel(), Level.getSizeLevel()); } @@ -1324,6 +1336,13 @@ PassBuilder::buildPerModuleDefaultPipeline(OptimizationLevel Level, const ThinOrFullLTOPhase LTOPhase = LTOPreLink ? ThinOrFullLTOPhase::FullLTOPreLink : ThinOrFullLTOPhase::None; + +#ifdef ENABLE_CODESIZE_OPT + if (EnableCodeSize) { + MPM.addPass(MergeFunctionsPass()); + MPM.addPass(FunctionMergingPass()); + } +#endif // Add the core simplification pipeline. MPM.addPass(buildModuleSimplificationPipeline(Level, LTOPhase)); diff --git a/llvm/lib/Passes/PassRegistry.def b/llvm/lib/Passes/PassRegistry.def index 7c29bffbc327..a46e5c77bb5d 100644 --- a/llvm/lib/Passes/PassRegistry.def +++ b/llvm/lib/Passes/PassRegistry.def @@ -59,6 +59,7 @@ MODULE_PASS("elim-avail-extern", EliminateAvailableExternallyPass()) MODULE_PASS("extract-blocks", BlockExtractorPass()) MODULE_PASS("forceattrs", ForceFunctionAttrsPass()) MODULE_PASS("function-import", FunctionImportPass()) +MODULE_PASS("func-merging", FunctionMergingPass()) MODULE_PASS("function-specialization", FunctionSpecializationPass()) MODULE_PASS("globaldce", GlobalDCEPass()) MODULE_PASS("globalopt", GlobalOptPass()) diff --git a/llvm/lib/Support/CMakeLists.txt b/llvm/lib/Support/CMakeLists.txt index ff23ec74df96..c5ff301a4e99 100644 --- a/llvm/lib/Support/CMakeLists.txt +++ b/llvm/lib/Support/CMakeLists.txt @@ -144,6 +144,7 @@ add_llvm_component_library(LLVMSupport Chrono.cpp COM.cpp CodeGenCoverage.cpp + CodeSizeOpt.cpp CommandLine.cpp Compression.cpp CRC.cpp diff --git a/llvm/lib/Support/CodeSizeOpt.cpp b/llvm/lib/Support/CodeSizeOpt.cpp new file mode 100644 index 000000000000..8d6c638ca8b9 --- /dev/null +++ b/llvm/lib/Support/CodeSizeOpt.cpp @@ -0,0 +1,10 @@ +// CodeSizeOpt.cpp + +#include "llvm/Support/CodeSizeOpt.h" + +namespace llvm { +cl::opt EnableCodeSize( + "enable-code-size", cl::init(true), cl::Hidden, + cl::desc("Enable optimizations for code size as part of the optimization " + "pipeline")); +} // namespace llvm diff --git a/llvm/lib/Transforms/IPO/CMakeLists.txt b/llvm/lib/Transforms/IPO/CMakeLists.txt index f9833224d142..16c57a16e94f 100644 --- a/llvm/lib/Transforms/IPO/CMakeLists.txt +++ b/llvm/lib/Transforms/IPO/CMakeLists.txt @@ -15,6 +15,7 @@ add_llvm_component_library(LLVMipo ForceFunctionAttrs.cpp FunctionAttrs.cpp FunctionImport.cpp + FunctionMerging.cpp FunctionSpecialization.cpp GlobalDCE.cpp GlobalOpt.cpp diff --git a/llvm/lib/Transforms/IPO/FunctionMerging.cpp b/llvm/lib/Transforms/IPO/FunctionMerging.cpp new file mode 100644 index 000000000000..d5cac6d99a98 --- /dev/null +++ b/llvm/lib/Transforms/IPO/FunctionMerging.cpp @@ -0,0 +1,4393 @@ +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file implements the general function merging optimization. +// +// It identifies similarities between functions, and If profitable, merges them +// into a single function, replacing the original ones. Functions do not need +// to be identical to be merged. In fact, there is very little restriction to +// merge two function, however, the produced merged function can be larger than +// the two original functions together. For that reason, it uses the +// TargetTransformInfo analysis to estimate the code-size costs of instructions +// in order to estimate the profitability of merging two functions. +// +// This function merging transformation has three major parts: +// 1. The input functions are linearized, representing their CFGs as sequences +// of labels and instructions. +// 2. We apply a sequence alignment algorithm, namely, the Needleman-Wunsch +// algorithm, to identify similar code between the two linearized functions. +// 3. We use the aligned sequences to perform code generate, producing the new +// merged function, using an extra parameter to represent the function +// identifier. +// +// This pass integrates the function merging transformation with an exploration +// framework. For every function, the other functions are ranked based their +// degree of similarity, which is computed from the functions' fingerprints. +// Only the top candidates are analyzed in a greedy manner and if one of them +// produces a profitable result, the merged function is taken. +// +//===----------------------------------------------------------------------===// +// +// This optimization was proposed in +// +// Function Merging by Sequence Alignment (CGO'19) +// Rodrigo C. O. Rocha, Pavlos Petoumenos, Zheng Wang, Murray Cole, Hugh Leather +// +// Effective Function Merging in the SSA Form (PLDI'20) +// Rodrigo C. O. Rocha, Pavlos Petoumenos, Zheng Wang, Murray Cole, Hugh Leather +// +// HyFM: Function Merging for Free (LCTES'21) +// Rodrigo C. O. Rocha, Pavlos Petoumenos, Zheng Wang, Murray Cole, Kim +// Hazelwood, Hugh Leather +// +// F3M: Fast Focused Function Merging (CGO'22) +// Sean Sterling, Rodrigo C. O. Rocha, Hugh Leather, Kim Hazelwood, Michael +// O'Boyle, Pavlos Petoumenos +// +//===----------------------------------------------------------------------===// + +#include "llvm/Transforms/IPO/FunctionMerging.h" + +#include "llvm/ADT/STLFunctionalExtras.h" +#include "llvm/ADT/SequenceAlignment.h" +#include "llvm/ADT/SmallSet.h" +#include "llvm/ADT/SmallVector.h" +#include "llvm/ADT/StringSet.h" +#include "llvm/ADT/Twine.h" +#include "llvm/ADT/iterator_range.h" +#include "llvm/Analysis/CFG.h" +#include "llvm/Analysis/CallGraph.h" +#include "llvm/Analysis/InlineSizeEstimatorAnalysis.h" +#include "llvm/Analysis/InstructionSimplify.h" +#include "llvm/Analysis/IteratedDominanceFrontier.h" +#include "llvm/Analysis/LoopInfo.h" +#include "llvm/Analysis/PostDominators.h" +#include "llvm/Bitcode/BitcodeWriter.h" +#include "llvm/IR/Argument.h" +#include "llvm/IR/BasicBlock.h" +#include "llvm/IR/CFG.h" +#include "llvm/IR/Constants.h" +#include "llvm/IR/DerivedTypes.h" +#include "llvm/IR/Dominators.h" +#include "llvm/IR/Function.h" +#include "llvm/IR/GlobalValue.h" +#include "llvm/IR/IRBuilder.h" +#include "llvm/IR/InstIterator.h" +#include "llvm/IR/InstrTypes.h" +#include "llvm/IR/Instruction.h" +#include "llvm/IR/Instructions.h" +#include "llvm/IR/IntrinsicInst.h" +#include "llvm/IR/Intrinsics.h" +#include "llvm/IR/LegacyPassManager.h" +#include "llvm/IR/Operator.h" +#include "llvm/IR/PassManager.h" +#include "llvm/IR/Verifier.h" +#include "llvm/Support/Alignment.h" +#include "llvm/Support/Casting.h" +#include "llvm/Support/CommandLine.h" +#include "llvm/Support/ErrorHandling.h" +#include "llvm/Support/RandomNumberGenerator.h" +#include "llvm/Support/Timer.h" +#include "llvm/Support/raw_ostream.h" +#include "llvm/Transforms/IPO.h" +#include "llvm/Transforms/Scalar.h" +#include "llvm/Transforms/Utils/Cloning.h" +#include "llvm/Transforms/Utils/CodeExtractor.h" +#include "llvm/Transforms/Utils/FunctionComparator.h" +#include "llvm/Transforms/Utils/Local.h" +#include "llvm/Transforms/Utils/Mem2Reg.h" +#include "llvm/Transforms/Utils/PromoteMemToReg.h" +#include "llvm/Transforms/Utils/ValueMapper.h" + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#ifdef __unix__ +/* __unix__ is usually defined by compilers targeting Unix systems */ +#include +#elif defined(_WIN32) || defined(WIN32) +/* _Win32 is usually defined by compilers targeting 32 or 64 bit Windows + * systems */ +#include +#endif + +#define DEBUG_TYPE "func-merging" + +using namespace llvm; + +// feisen +static Value *getPossibleValueFm(Type *t, BasicBlock *pred) { + Value *v = nullptr; + for (Instruction &I : *pred) { + if (LandingPadInst *landingPadInst = dyn_cast(&I)) { + if (landingPadInst->getType() == t) { + v = landingPadInst; + // v = landingPadInst->getOperand(0); + } + } + if (I.getType() == t) { + v = &I; + } + } + return v; +} + +// feisen +static bool handlePhiNodeFm(PHINode *phiInst) { + bool changed = false; + for (BasicBlock *pred : phiInst->blocks()) { + Value *v = phiInst->getIncomingValueForBlock(pred); + if (UndefValue::classof(v)) { + Type *t = v->getType(); + Value *possibleValue = getPossibleValueFm(t, pred); + if (possibleValue != nullptr) { + phiInst->setIncomingValueForBlock(pred, possibleValue); + changed = true; + } + } + } + return changed; +} + +// feisen +static PreservedAnalyses resolvePHI_fm(Module &M, ModuleAnalysisManager &AM) { + bool changed = false; + for (Function &F : M) { + + std::string fName = F.getName().str(); + if (fName.size() < 5) { + continue; + } + if (fName.at(0) != '_' || fName.at(1) != 'm' || fName.at(2) != '_' || + fName.at(3) != 'f' || fName.at(4) != '_') { + continue; + } + + for (BasicBlock &B : F) { + for (Instruction &I : B) { + if (PHINode *phiInst = dyn_cast(&I)) { + changed |= handlePhiNodeFm(phiInst); + } + } + } + } + if (changed) { + return PreservedAnalyses::none(); + } + return PreservedAnalyses::all(); +} + +static bool resolvePHI(Function &F) { + bool changed = false; + std::string fName = F.getName().str(); + if (fName.size() < 5) { + return false; + } + if (fName.at(0) != '_' || fName.at(1) != 'm' || fName.at(2) != '_' || + fName.at(3) != 'f' || fName.at(4) != '_') { + return false; + } + + for (BasicBlock &B : F) { + for (Instruction &I : B) { + if (PHINode *phiInst = dyn_cast(&I)) { + changed |= handlePhiNodeFm(phiInst); + } + } + } + return changed; +} +//----- + +static cl::opt ExplorationThreshold("func-merging-explore", + cl::init(10), cl::Hidden); +static cl::opt RankingThreshold("func-merging-ranking-threshold", + cl::init(100), cl::Hidden); +static cl::opt MergingOverheadThreshold("func-merging-threshold", + cl::init(50), cl::Hidden); +static cl::opt MaxParamScore("func-merging-max-param", cl::init(true), + cl::Hidden); +static cl::opt Debug("func-merging-debug", cl::init(false), cl::Hidden); +static cl::opt Verbose("func-merging-verbose", cl::init(false), + cl::Hidden); +static cl::opt IdenticalType("func-merging-identic-type", + cl::init(false)); +static cl::opt EnableUnifiedReturnType("func-merging-unify-return", + cl::init(true), cl::Hidden); +static cl::opt EnableOperandReordering( + "func-merging-operand-reorder", cl::init(false), + cl::Hidden); +static cl::opt HasWholeProgram("func-merging-whole-program", + cl::init(false)); +static cl::opt + EnableHyFMPA("func-merging-hyfm-pa", cl::init(false), + cl::Hidden); +static cl::opt EnableHyFMNW( + "func-merging-hyfm-nw", cl::init(true), + cl::Hidden); +static cl::opt EnableSALSSACoalescing("func-merging-coalescing", + cl::init(true), cl::Hidden); +static cl::opt ReuseMergedFunctions("func-merging-reuse-merges", + cl::init(true), cl::Hidden); +static cl::opt MaxNumSelection("func-merging-max-selects", + cl::init(500), cl::Hidden); +static cl::opt HyFMProfitability("hyfm-profitability", cl::init(true), + cl::Hidden); +static cl::opt EnableF3M("func-merging-f3m", cl::init(true), cl::Hidden); +static cl::opt LSHRows("hyfm-f3m-rows", cl::init(2), cl::Hidden); +static cl::opt LSHBands("hyfm-f3m-bands", cl::init(100), cl::Hidden); +static cl::opt ShingleCrossBBs("shingling-cross-basic-blocks", + cl::init(true)); +static cl::opt AdaptiveThreshold("adaptive-threshold", cl::init(true), + cl::Hidden); +static cl::opt AdaptiveBands("adaptive-bands", cl::init(true), + cl::Hidden); +static cl::opt RankingDistance("ranking-distance", cl::init(1.0), + cl::Hidden); +static cl::opt EnableThunkPrediction("thunk-predictor", cl::init(false), + cl::Hidden); +static cl::opt ReportStats("func-merging-report", cl::init(false), + cl::Hidden); +static cl::opt MatcherStats("func-merging-matcher-report", + cl::init(false), cl::Hidden); +static cl::opt Deterministic("func-merging-deterministic", cl::init(true), + cl::Hidden); +static cl::opt BucketSizeCap("bucket-size-cap", cl::init(1000000000), + cl::Hidden); + +// Command line option to specify the function to merge. This is +// mainly used for debugging. +static cl::opt ToMergeFile( + "func-merging-pairs-file", cl::init(""), cl::value_desc("filename"), + cl::desc("File containing the functions and basic blocks to merge"), + cl::Hidden); + +static std::string GetValueName(const Value *V); + +#ifdef __unix__ /* __unix__ is usually defined by compilers targeting Unix \ + systems */ + +unsigned long long getTotalSystemMemory() { + long pages = sysconf(_SC_PHYS_PAGES); + long page_size = sysconf(_SC_PAGE_SIZE); + return pages * page_size; +} + +#elif defined(_WIN32) || \ + defined(WIN32) /* _Win32 is usually defined by compilers targeting 32 or \ + 64 bit Windows systems */ + +unsigned long long getTotalSystemMemory() { + MEMORYSTATUSEX status; + status.dwLength = sizeof(status); + GlobalMemoryStatusEx(&status); + return status.ullTotalPhys; +} + +#elif defined(__APPLE__) +#include +unsigned long long getTotalSystemMemory() { + int mib[2]; + mib[0] = CTL_HW; + mib[1] = HW_MEMSIZE; + + unsigned long long physicalMemory; + size_t len = sizeof(physicalMemory); + + if (sysctl(mib, 2, &physicalMemory, &len, NULL, 0) == 0) { + return physicalMemory; + } + return 1024 * 1024 * 1024; // 获取失败 + +} + +#endif + +class FunctionMerging { +public: + bool runImpl(Module &M) { + TargetTransformInfo TTI(M.getDataLayout()); + auto GTTI = [&](Function &F) -> TargetTransformInfo * { return &TTI; }; + return runImpl(M, GTTI); + } + bool runImpl(Module &M, function_ref GTTI); +}; + +FunctionMergeResult mergeFunctions(Function *F1, Function *F2, + const FunctionMergingOptions &Options) { + if (F1->getParent() != F2->getParent()) + return FunctionMergeResult(F1, F2, nullptr); + FunctionMerger Merger(F1->getParent()); + return Merger.merge(F1, F2, "", Options); +} + +static bool CmpNumbers(uint64_t L, uint64_t R) { return L == R; } + +// Any two pointers in the same address space are equivalent, intptr_t and +// pointers are equivalent. Otherwise, standard type equivalence rules apply. +static bool CmpTypes(Type *TyL, Type *TyR, const DataLayout *DL) { + auto *PTyL = dyn_cast(TyL); + auto *PTyR = dyn_cast(TyR); + + // const DataLayout &DL = FnL->getParent()->getDataLayout(); + if (PTyL && PTyL->getAddressSpace() == 0) + TyL = DL->getIntPtrType(TyL); + if (PTyR && PTyR->getAddressSpace() == 0) + TyR = DL->getIntPtrType(TyR); + + if (TyL == TyR) + return false; + + if (int Res = CmpNumbers(TyL->getTypeID(), TyR->getTypeID())) + return Res; + + switch (TyL->getTypeID()) { + default: + llvm_unreachable("Unknown type!"); + case Type::IntegerTyID: + return CmpNumbers(cast(TyL)->getBitWidth(), + cast(TyR)->getBitWidth()); + // TyL == TyR would have returned true earlier, because types are uniqued. + case Type::VoidTyID: + case Type::FloatTyID: + case Type::DoubleTyID: + case Type::X86_FP80TyID: + case Type::FP128TyID: + case Type::PPC_FP128TyID: + case Type::LabelTyID: + case Type::MetadataTyID: + case Type::TokenTyID: + return false; + + case Type::PointerTyID: + assert(PTyL && PTyR && "Both types must be pointers here."); + return CmpNumbers(PTyL->getAddressSpace(), PTyR->getAddressSpace()); + + case Type::StructTyID: { + auto *STyL = cast(TyL); + auto *STyR = cast(TyR); + if (STyL->getNumElements() != STyR->getNumElements()) + return CmpNumbers(STyL->getNumElements(), STyR->getNumElements()); + + if (STyL->isPacked() != STyR->isPacked()) + return CmpNumbers(STyL->isPacked(), STyR->isPacked()); + + for (unsigned i = 0, e = STyL->getNumElements(); i != e; ++i) { + if (int Res = + CmpTypes(STyL->getElementType(i), STyR->getElementType(i), DL)) + return Res; + } + return false; + } + + case Type::FunctionTyID: { + auto *FTyL = cast(TyL); + auto *FTyR = cast(TyR); + if (FTyL->getNumParams() != FTyR->getNumParams()) + return CmpNumbers(FTyL->getNumParams(), FTyR->getNumParams()); + + if (FTyL->isVarArg() != FTyR->isVarArg()) + return CmpNumbers(FTyL->isVarArg(), FTyR->isVarArg()); + + if (int Res = CmpTypes(FTyL->getReturnType(), FTyR->getReturnType(), DL)) + return Res; + + for (unsigned i = 0, e = FTyL->getNumParams(); i != e; ++i) { + if (int Res = CmpTypes(FTyL->getParamType(i), FTyR->getParamType(i), DL)) + return Res; + } + return false; + } + + case Type::ArrayTyID: { + auto *STyL = cast(TyL); + auto *STyR = cast(TyR); + if (STyL->getNumElements() != STyR->getNumElements()) + return CmpNumbers(STyL->getNumElements(), STyR->getNumElements()); + return CmpTypes(STyL->getElementType(), STyR->getElementType(), DL); + } + case Type::FixedVectorTyID: + case Type::ScalableVectorTyID: { + auto *STyL = cast(TyL); + auto *STyR = cast(TyR); + if (STyL->getElementCount().isScalable() != + STyR->getElementCount().isScalable()) + return CmpNumbers(STyL->getElementCount().isScalable(), + STyR->getElementCount().isScalable()); + if (STyL->getElementCount() != STyR->getElementCount()) + return CmpNumbers(STyL->getElementCount().getKnownMinValue(), + STyR->getElementCount().getKnownMinValue()); + return CmpTypes(STyL->getElementType(), STyR->getElementType(), DL); + } + } +} + +// Any two pointers in the same address space are equivalent, intptr_t and +// pointers are equivalent. Otherwise, standard type equivalence rules apply. +bool FunctionMerger::areTypesEquivalent(Type *Ty1, Type *Ty2, + const DataLayout *DL, + const FunctionMergingOptions &Options) { + if (Ty1 == Ty2) + return true; + if (Options.IdenticalTypesOnly) + return false; + + return CmpTypes(Ty1, Ty2, DL); +} + +static bool matchIntrinsicCalls(Intrinsic::ID ID, const CallBase *CI1, + const CallBase *CI2) { + Function *F = CI1->getCalledFunction(); + if (!F) + return false; + auto ID1 = (Intrinsic::ID)F->getIntrinsicID(); + + F = CI2->getCalledFunction(); + if (!F) + return false; + auto ID2 = (Intrinsic::ID)F->getIntrinsicID(); + + if (ID1 != ID) + return false; + if (ID1 != ID2) + return false; + + switch (ID) { + default: + break; + case Intrinsic::coro_id: { + break; + } + case Intrinsic::ctlz: // llvm.ctlz + case Intrinsic::cttz: // llvm.cttz + // is_zero_undef argument of bit counting intrinsics must be a constant int + return CI1->getArgOperand(1) == CI2->getArgOperand(1); + case Intrinsic::experimental_constrained_fadd: + case Intrinsic::experimental_constrained_fsub: + case Intrinsic::experimental_constrained_fmul: + case Intrinsic::experimental_constrained_fdiv: + case Intrinsic::experimental_constrained_frem: + case Intrinsic::experimental_constrained_fma: + case Intrinsic::experimental_constrained_sqrt: + case Intrinsic::experimental_constrained_pow: + case Intrinsic::experimental_constrained_powi: + case Intrinsic::experimental_constrained_sin: + case Intrinsic::experimental_constrained_cos: + case Intrinsic::experimental_constrained_exp: + case Intrinsic::experimental_constrained_exp2: + case Intrinsic::experimental_constrained_log: + case Intrinsic::experimental_constrained_log10: + case Intrinsic::experimental_constrained_log2: + case Intrinsic::experimental_constrained_rint: + case Intrinsic::experimental_constrained_nearbyint: + break; + case Intrinsic::dbg_declare: // llvm.dbg.declare + break; + case Intrinsic::dbg_value: // llvm.dbg.value + break; + case Intrinsic::dbg_label: // llvm.dbg.label + break; + case Intrinsic::memcpy: + case Intrinsic::memmove: + case Intrinsic::memset: { + // isvolatile argument of memory intrinsics must be a constant int + return CI1->getArgOperand(3) == CI2->getArgOperand(3); + } + case Intrinsic::memcpy_element_unordered_atomic: + case Intrinsic::memmove_element_unordered_atomic: + case Intrinsic::memset_element_unordered_atomic: { + const auto *AMI1 = cast(CI1); + const auto *AMI2 = cast(CI2); + + auto *ElementSizeCI1 = + dyn_cast(AMI1->getRawElementSizeInBytes()); + + auto *ElementSizeCI2 = + dyn_cast(AMI2->getRawElementSizeInBytes()); + + return (ElementSizeCI1 != nullptr && ElementSizeCI1 == ElementSizeCI2); + } + case Intrinsic::gcroot: + case Intrinsic::gcwrite: + case Intrinsic::gcread: + // llvm.gcroot parameter #2 must be a constant. + return CI1->getArgOperand(1) == CI2->getArgOperand(1); + case Intrinsic::init_trampoline: + break; + case Intrinsic::prefetch: + // arguments #2 and #3 in llvm.prefetch must be constants + return CI1->getArgOperand(1) == CI2->getArgOperand(1) && + CI1->getArgOperand(2) == CI2->getArgOperand(2); + case Intrinsic::stackprotector: + + break; + case Intrinsic::lifetime_start: + case Intrinsic::lifetime_end: + case Intrinsic::invariant_start: + // size argument of memory use markers must be a constant integer + return CI1->getArgOperand(0) == CI2->getArgOperand(0); + case Intrinsic::invariant_end: + // llvm.invariant.end parameter #2 must be a constant integer + return CI1->getArgOperand(1) == CI2->getArgOperand(1); + case Intrinsic::localescape: { + + break; + } + case Intrinsic::localrecover: { + + break; + } + }; + return false; // TODO: change to false by default +} + +static bool matchLandingPad(LandingPadInst *LP1, LandingPadInst *LP2) { + if (LP1->getType() != LP2->getType()) + return false; + if (LP1->isCleanup() != LP2->isCleanup()) + return false; + if (LP1->getNumClauses() != LP2->getNumClauses()) + return false; + for (unsigned i = 0; i < LP1->getNumClauses(); i++) { + if (LP1->isCatch(i) != LP2->isCatch(i)) + return false; + if (LP1->isFilter(i) != LP2->isFilter(i)) + return false; + if (LP1->getClause(i) != LP2->getClause(i)) + return false; + } + return true; +} + +static bool matchLoadInsts(const LoadInst *LI1, const LoadInst *LI2) { + return LI1->isVolatile() == LI2->isVolatile() && + LI1->getAlign() == LI2->getAlign() && + LI1->getOrdering() == LI2->getOrdering(); +} + +static bool matchStoreInsts(const StoreInst *SI1, const StoreInst *SI2) { + return SI1->isVolatile() == SI2->isVolatile() && + SI1->getAlign() == SI2->getAlign() && + SI1->getOrdering() == SI2->getOrdering(); +} + +static bool matchAllocaInsts(const AllocaInst *AI1, const AllocaInst *AI2) { + + if (AI1->getArraySize() != AI2->getArraySize() || + AI1->getAlign() != AI2->getAlign() || + AI1->getAllocatedType() != AI2->getAllocatedType()) + return false; + + if (AI1->getArraySize() != AI2->getArraySize() || + AI1->getAlign() != AI2->getAlign()) + return false; + + return true; +} + +static bool matchGetElementPtrInsts(const GetElementPtrInst *GEP1, + const GetElementPtrInst *GEP2) { + Type *Ty1 = GEP1->getSourceElementType(); + SmallVector Idxs1(GEP1->idx_begin(), GEP1->idx_end()); + + Type *Ty2 = GEP2->getSourceElementType(); + SmallVector Idxs2(GEP2->idx_begin(), GEP2->idx_end()); + + if (Ty1 != Ty2) + return false; + if (Idxs1.size() != Idxs2.size()) + return false; + + if (Idxs1.empty()) + return true; + + for (unsigned i = 1; i < Idxs1.size(); i++) { + Value *V1 = Idxs1[i]; + Value *V2 = Idxs2[i]; + + // structs must have constant indices, therefore they must be constants and + // must be identical when merging + if (isa(Ty1)) { + if (V1 != V2) + return false; + } + Ty1 = GetElementPtrInst::getTypeAtIndex(Ty1, V1); + Ty2 = GetElementPtrInst::getTypeAtIndex(Ty2, V2); + if (Ty1 != Ty2) + return false; + } + return true; +} + +static bool matchSwitchInsts(const SwitchInst *SI1, const SwitchInst *SI2) { + if (SI1->getNumCases() == SI2->getNumCases()) { + auto CaseIt1 = SI1->case_begin(), CaseEnd1 = SI1->case_end(); + auto CaseIt2 = SI2->case_begin(), CaseEnd2 = SI2->case_end(); + do { + auto *Case1 = &*CaseIt1; + auto *Case2 = &*CaseIt2; + if (Case1 != Case2) + return false; // TODO: could allow permutation! + ++CaseIt1; + ++CaseIt2; + } while (CaseIt1 != CaseEnd1 && CaseIt2 != CaseEnd2); + return true; + } + return false; +} + +static bool matchCallInsts(const CallBase *CI1, const CallBase *CI2) { + if (CI1->isInlineAsm() || CI2->isInlineAsm()) + return false; + + // if (CI1->getCalledFunction()==nullptr) return false; + + if (CI1->getCalledFunction() != CI2->getCalledFunction()) + return false; + + if (Function *F = CI1->getCalledFunction()) { + if (auto ID = (Intrinsic::ID)F->getIntrinsicID()) { + if (!matchIntrinsicCalls(ID, CI1, CI2)) + return false; + } + } + + return CI1->arg_size() == CI2->arg_size() && + CI1->getCallingConv() == CI2->getCallingConv() && + CI1->getAttributes() == CI2->getAttributes(); +} + +static bool matchInvokeInsts(const InvokeInst *II1, const InvokeInst *II2) { + return matchCallInsts(II1, II2) && + II1->getCallingConv() == II2->getCallingConv() && + II1->getAttributes() == II2->getAttributes() && + matchLandingPad(II1->getLandingPadInst(), II2->getLandingPadInst()); +} + +static bool matchInsertValueInsts(const InsertValueInst *IV1, + const InsertValueInst *IV2) { + return IV1->getIndices() == IV2->getIndices(); +} + +static bool matchExtractValueInsts(const ExtractValueInst *EV1, + const ExtractValueInst *EV2) { + return EV1->getIndices() == EV2->getIndices(); +} + +static bool matchFenceInsts(const FenceInst *FI1, const FenceInst *FI2) { + return FI1->getOrdering() == FI2->getOrdering() && + FI1->getSyncScopeID() == FI2->getSyncScopeID(); +} + +bool FunctionMerger::matchInstructions(Instruction *I1, Instruction *I2, + const FunctionMergingOptions &Options) { + + if (I1->getOpcode() != I2->getOpcode()) + return false; + + if (I1->getOpcode() == Instruction::CallBr) + return false; + + // Returns are special cases that can differ in the number of operands + if (I1->getOpcode() == Instruction::Ret) + return true; + + if (I1->getNumOperands() != I2->getNumOperands()) + return false; + + const DataLayout *DL = + &I1->getParent()->getParent()->getParent()->getDataLayout(); + + bool sameType = false; + if (Options.IdenticalTypesOnly) { + sameType = (I1->getType() == I2->getType()); + for (unsigned i = 0; i < I1->getNumOperands(); i++) { + sameType = sameType && + (I1->getOperand(i)->getType() == I2->getOperand(i)->getType()); + } + } else { + sameType = areTypesEquivalent(I1->getType(), I2->getType(), DL, Options); + for (unsigned i = 0; i < I1->getNumOperands(); i++) { + sameType = sameType && + areTypesEquivalent(I1->getOperand(i)->getType(), + I2->getOperand(i)->getType(), DL, Options); + } + } + if (!sameType) + return false; + + switch (I1->getOpcode()) { + + // #define MatchCaseInst(Kind, I1, I2) case Instruction::#Kind + case Instruction::ShuffleVector: // feisen:24/03/09/ + return cast(I1)->getShuffleMask() == + cast(I2)->getShuffleMask(); + case Instruction::Load: + return matchLoadInsts(dyn_cast(I1), dyn_cast(I2)); + case Instruction::Store: + return matchStoreInsts(dyn_cast(I1), dyn_cast(I2)); + case Instruction::Alloca: + return matchAllocaInsts(dyn_cast(I1), dyn_cast(I2)); + case Instruction::GetElementPtr: + return matchGetElementPtrInsts(dyn_cast(I1), + dyn_cast(I2)); + case Instruction::Switch: + return matchSwitchInsts(dyn_cast(I1), dyn_cast(I2)); + case Instruction::Call: + return matchCallInsts(dyn_cast(I1), dyn_cast(I2)); + case Instruction::Invoke: + return matchInvokeInsts(dyn_cast(I1), dyn_cast(I2)); + case Instruction::InsertValue: + return matchInsertValueInsts(dyn_cast(I1), + dyn_cast(I2)); + case Instruction::ExtractValue: + return matchExtractValueInsts(dyn_cast(I1), + dyn_cast(I2)); + case Instruction::Fence: + return matchFenceInsts(dyn_cast(I1), dyn_cast(I2)); + case Instruction::AtomicCmpXchg: { + const AtomicCmpXchgInst *CXI = dyn_cast(I1); + const AtomicCmpXchgInst *CXI2 = cast(I2); + return CXI->isVolatile() == CXI2->isVolatile() && + CXI->isWeak() == CXI2->isWeak() && + CXI->getSuccessOrdering() == CXI2->getSuccessOrdering() && + CXI->getFailureOrdering() == CXI2->getFailureOrdering() && + CXI->getSyncScopeID() == CXI2->getSyncScopeID(); + } + case Instruction::AtomicRMW: { + const AtomicRMWInst *RMWI = dyn_cast(I1); + return RMWI->getOperation() == cast(I2)->getOperation() && + RMWI->isVolatile() == cast(I2)->isVolatile() && + RMWI->getOrdering() == cast(I2)->getOrdering() && + RMWI->getSyncScopeID() == cast(I2)->getSyncScopeID(); + } + default: + if (auto *CI = dyn_cast(I1)) + return CI->getPredicate() == cast(I2)->getPredicate(); + if (isa(I1)) { + if (!isa(I2)) + return false; + if (I1->hasNoUnsignedWrap() != I2->hasNoUnsignedWrap()) + return false; + if (I1->hasNoSignedWrap() != I2->hasNoSignedWrap()) + return false; + } + if (isa(I1)) { + if (!isa(I2)) + return false; + if (I1->isExact() != I2->isExact()) + return false; + } + if (isa(I1)) { + if (!isa(I2)) + return false; + if (I1->isFast() != I2->isFast()) + return false; + if (I1->hasAllowReassoc() != I2->hasAllowReassoc()) + return false; + if (I1->hasNoNaNs() != I2->hasNoNaNs()) + return false; + if (I1->hasNoInfs() != I2->hasNoInfs()) + return false; + if (I1->hasNoSignedZeros() != I2->hasNoSignedZeros()) + return false; + if (I1->hasAllowReciprocal() != I2->hasAllowReciprocal()) + return false; + if (I1->hasAllowContract() != I2->hasAllowContract()) + return false; + if (I1->hasApproxFunc() != I2->hasApproxFunc()) + return false; + } + } + + return true; +} + +bool FunctionMerger::match(Value *V1, Value *V2) { + if (auto *I1 = dyn_cast(V1)) + if (auto *I2 = dyn_cast(V2)) + return matchInstructions(I1, I2); + + if (auto *BB1 = dyn_cast(V1)) + if (auto *BB2 = dyn_cast(V2)) + return matchBlocks(BB1, BB2); + + return false; +} + +bool FunctionMerger::matchBlocks(BasicBlock *BB1, BasicBlock *BB2) { + if (BB1 == nullptr || BB2 == nullptr) + return false; + if (BB1->isLandingPad() || BB2->isLandingPad()) { + LandingPadInst *LP1 = BB1->getLandingPadInst(); + LandingPadInst *LP2 = BB2->getLandingPadInst(); + if (LP1 == nullptr || LP2 == nullptr) + return false; + return matchLandingPad(LP1, LP2); + } + return true; +} + +bool FunctionMerger::matchWholeBlocks(Value *V1, Value *V2) { + auto *BB1 = dyn_cast(V1); + auto *BB2 = dyn_cast(V2); + if (BB1 == nullptr || BB2 == nullptr) + return false; + + if (!matchBlocks(BB1, BB2)) + return false; + + auto It1 = BB1->begin(); + auto It2 = BB2->begin(); + + while (isa(*It1) || isa(*It1)) + It1++; + while (isa(*It2) || isa(*It2)) + It2++; + + while (It1 != BB1->end() && It2 != BB2->end()) { + if (!matchInstructions(&*It1, &*It2)) + return false; + + It1++; + It2++; + } + + if (It1 != BB1->end() || It2 != BB2->end()) + return false; + + return true; +} + +static unsigned +RandomLinearizationOfBlocks(BasicBlock *BB, + std::list &OrederedBBs, + std::set &Visited) { + if (Visited.find(BB) != Visited.end()) + return 0; + Visited.insert(BB); + + Instruction *TI = BB->getTerminator(); + + std::vector NextBBs; + for (unsigned i = 0; i < TI->getNumSuccessors(); i++) { + NextBBs.push_back(TI->getSuccessor(i)); + } + std::random_device rd; + std::shuffle(NextBBs.begin(), NextBBs.end(), std::mt19937(rd())); + + unsigned SumSizes = 0; + for (BasicBlock *NextBlock : NextBBs) { + SumSizes += RandomLinearizationOfBlocks(NextBlock, OrederedBBs, Visited); + } + + OrederedBBs.push_front(BB); + return SumSizes + BB->size(); +} + +static unsigned +RandomLinearizationOfBlocks(Function *F, std::list &OrederedBBs) { + std::set Visited; + return RandomLinearizationOfBlocks(&F->getEntryBlock(), OrederedBBs, Visited); +} + +static unsigned +CanonicalLinearizationOfBlocks(BasicBlock *BB, + std::list &OrederedBBs, + std::set &Visited) { + if (Visited.find(BB) != Visited.end()) + return 0; + Visited.insert(BB); + + Instruction *TI = BB->getTerminator(); + + unsigned SumSizes = 0; + for (unsigned i = 0; i < TI->getNumSuccessors(); i++) { + SumSizes += CanonicalLinearizationOfBlocks(TI->getSuccessor(i), OrederedBBs, + Visited); + } + + OrederedBBs.push_front(BB); + return SumSizes + BB->size(); +} + +static unsigned +CanonicalLinearizationOfBlocks(Function *F, + std::list &OrederedBBs) { + std::set Visited; + return CanonicalLinearizationOfBlocks(&F->getEntryBlock(), OrederedBBs, + Visited); +} + +static void vectorizeBB(SmallVectorImpl &Vec, BasicBlock *BB) { + Vec.push_back(BB); + for (Instruction &I : *BB) + if (!isa(&I) && !isa(&I)) + Vec.push_back(&I); +} + +void FunctionMerger::linearize(Function *F, SmallVectorImpl &FVec, + FunctionMerger::LinearizationKind LK) { + std::list OrderedBBs; + + unsigned FReserve = 0; + switch (LK) { + case LinearizationKind::LK_Random: + FReserve = RandomLinearizationOfBlocks(F, OrderedBBs); + break; + + default: + FReserve = CanonicalLinearizationOfBlocks(F, OrderedBBs); + break; + } + + FVec.reserve(FReserve + OrderedBBs.size()); + for (BasicBlock *BB : OrderedBBs) + vectorizeBB(FVec, BB); +} + +bool FunctionMerger::validMergeTypes(Function *F1, Function *F2, + const FunctionMergingOptions &Options) { + bool EquivTypes = + areTypesEquivalent(F1->getReturnType(), F2->getReturnType(), DL, Options); + if (!EquivTypes && !F1->getReturnType()->isVoidTy() && + !F2->getReturnType()->isVoidTy()) { + return false; + } + return true; +} + +static bool validMergePair(Function *F1, Function *F2) { + if (!HasWholeProgram && (F1->hasAvailableExternallyLinkage() || + F2->hasAvailableExternallyLinkage())) + return false; + + if (!HasWholeProgram && + (F1->hasLinkOnceLinkage() || F2->hasLinkOnceLinkage())) + return false; + + if (F1->hasComdat() != F2->hasComdat()) + return false; + if (F1->hasComdat() && F1->getComdat() != F2->getComdat()) + return false; + + if (F1->hasPersonalityFn() != F2->hasPersonalityFn()) + return false; + if (F1->hasPersonalityFn()) { + Constant *PersonalityFn1 = F1->getPersonalityFn(); + Constant *PersonalityFn2 = F2->getPersonalityFn(); + if (PersonalityFn1 != PersonalityFn2) + return false; + } + + return true; +} + +static void MergeArguments(LLVMContext &Context, Function *F1, Function *F2, + AlignedCode &AlignedSeq, + std::map &ParamMap1, + std::map &ParamMap2, + std::vector &Args, + const FunctionMergingOptions &Options) { + + std::vector ArgsList1; + for (Argument &arg : F1->args()) { + ArgsList1.push_back(&arg); + } + + Args.push_back(IntegerType::get(Context, 1)); // push the function Id argument + unsigned ArgId = 0; + for (auto I = F1->arg_begin(), E = F1->arg_end(); I != E; I++) { + ParamMap1[ArgId] = Args.size(); + Args.push_back((*I).getType()); + ArgId++; + } + + auto AttrList1 = F1->getAttributes(); + auto AttrList2 = F2->getAttributes(); + + // merge arguments from Function2 with Function1 + ArgId = 0; + for (auto I = F2->arg_begin(), E = F2->arg_end(); I != E; I++) { + + std::map MatchingScore; + // first try to find an argument with the same name/type + // otherwise try to match by type only + for (unsigned i = 0; i < ArgsList1.size(); i++) { + if (ArgsList1[i]->getType() == (*I).getType()) { + + auto AttrSet1 = AttrList1.getParamAttrs(ArgsList1[i]->getArgNo()); + auto AttrSet2 = AttrList2.getParamAttrs((*I).getArgNo()); + if (AttrSet1 != AttrSet2) + continue; + + bool hasConflict = false; // check for conflict from a previous matching + for (auto ParamPair : ParamMap2) { + if (ParamPair.second == ParamMap1[i]) { + hasConflict = true; + break; + } + } + if (hasConflict) + continue; + MatchingScore[i] = 0; + if (!Options.MaximizeParamScore) + break; // if not maximize score, get the first one + } + } + + if (MatchingScore.size() > 0) { // maximize scores + for (auto &Entry : AlignedSeq) { + if (Entry.match()) { + auto *I1 = dyn_cast(Entry.get(0)); + auto *I2 = dyn_cast(Entry.get(1)); + if (I1 != nullptr && I2 != nullptr) { // test both for sanity + for (unsigned i = 0; i < I1->getNumOperands(); i++) { + for (auto KV : MatchingScore) { + if (I1->getOperand(i) == ArgsList1[KV.first]) { + if (i < I2->getNumOperands() && I2->getOperand(i) == &(*I)) { + MatchingScore[KV.first]++; + } + } + } + } + } + } + } + + int MaxScore = -1; + unsigned MaxId = 0; + + for (auto KV : MatchingScore) { + if (KV.second > MaxScore) { + MaxScore = KV.second; + MaxId = KV.first; + } + } + + ParamMap2[ArgId] = ParamMap1[MaxId]; + } else { + ParamMap2[ArgId] = Args.size(); + Args.push_back((*I).getType()); + } + + ArgId++; + } +} + +static void SetFunctionAttributes(Function *F1, Function *F2, + Function *MergedFunc) { + unsigned MaxAlignment = std::max(F1->getAlignment(), F2->getAlignment()); + if (F1->getAlignment() != F2->getAlignment()) { + } + if (MaxAlignment) + MergedFunc->setAlignment(Align(MaxAlignment)); + + if (F1->getCallingConv() == F2->getCallingConv()) { + MergedFunc->setCallingConv(F1->getCallingConv()); + } else { + } + + MergedFunc->setLinkage(GlobalValue::LinkageTypes::InternalLinkage); + + MergedFunc->setDSOLocal(true); + + if (F1->getSubprogram() == F2->getSubprogram()) { + MergedFunc->setSubprogram(F1->getSubprogram()); + } else { + } + + MergedFunc->setVisibility(GlobalValue::VisibilityTypes::DefaultVisibility); + + // Exception Handling requires landing pads to have the same personality + // function + if (F1->hasPersonalityFn() && F2->hasPersonalityFn()) { + Constant *PersonalityFn1 = F1->getPersonalityFn(); + Constant *PersonalityFn2 = F2->getPersonalityFn(); + if (PersonalityFn1 == PersonalityFn2) { + MergedFunc->setPersonalityFn(PersonalityFn1); + } + } else if (F1->hasPersonalityFn()) { + + MergedFunc->setPersonalityFn(F1->getPersonalityFn()); + + } else if (F2->hasPersonalityFn()) { + + MergedFunc->setPersonalityFn(F2->getPersonalityFn()); + } + + if (F1->hasComdat() && F2->hasComdat()) { + auto *Comdat1 = F1->getComdat(); + auto *Comdat2 = F2->getComdat(); + if (Comdat1 == Comdat2) { + MergedFunc->setComdat(Comdat1); + } + } else if (F1->hasComdat()) { + MergedFunc->setComdat(F1->getComdat()); + } else if (F2->hasComdat()) { + MergedFunc->setComdat(F2->getComdat()); + } + + if (F1->hasSection()) { + MergedFunc->setSection(F1->getSection()); + } +} + +static Value *createCastIfNeeded(Value *V, Type *DstType, IRBuilder<> &Builder, + Type *IntPtrTy, + const FunctionMergingOptions &Options = {}); + +void FunctionMerger::CodeGenerator::destroyGeneratedCode() { + for (Instruction *I : CreatedInsts) { + I->dropAllReferences(); + } + for (Instruction *I : CreatedInsts) { + I->eraseFromParent(); + } + for (BasicBlock *BB : CreatedBBs) { + BB->eraseFromParent(); + } + CreatedInsts.clear(); + CreatedBBs.clear(); +} + +unsigned instToInt(Instruction *I); + +inst_range getInstructions(Function *F) { return instructions(F); } + +iterator_range getInstructions(BasicBlock *BB) { + return make_range(BB->begin(), BB->end()); +} + +template class FingerprintMH { +private: + // The number of instructions defining a shingle. 2 or 3 is best. + static constexpr size_t K = 2; + static constexpr double threshold = 0.3; + static constexpr size_t MaxOpcode = 68; + const uint32_t _footprint; + +public: + uint64_t magnitude{0}; + std::vector hash; + std::vector bandHash; + +public: + FingerprintMH() = default; + + FingerprintMH(T owner, SearchStrategy &searchStrategy) + : _footprint(searchStrategy.itemFootprint()) { + std::vector integers; + std::array OpcodeFreq; + + for (size_t i = 0; i < MaxOpcode; i++) + OpcodeFreq[i] = 0; + + if (ShingleCrossBBs) { + for (Instruction &I : getInstructions(owner)) { + integers.push_back(instToInt(&I)); + OpcodeFreq[I.getOpcode()]++; + if (I.isTerminator()) + OpcodeFreq[0] += I.getNumSuccessors(); + } + } else { + for (BasicBlock &BB : *owner) { + + // Process normal instructions + for (Instruction &I : BB) { + integers.push_back(instToInt(&I)); + OpcodeFreq[I.getOpcode()]++; + if (I.isTerminator()) + OpcodeFreq[0] += I.getNumSuccessors(); + } + + // Add dummy instructions between basic blocks + for (size_t i = 0; i < K - 1; i++) { + integers.push_back(1); + } + } + } + + for (size_t i = 0; i < MaxOpcode; ++i) { + uint64_t val = OpcodeFreq[i]; + magnitude += val * val; + } + + searchStrategy.generateShinglesMultipleHashPipelineTurbo(integers, hash); + searchStrategy.generateBands(hash, bandHash); + } + + uint32_t footprint() const { return _footprint; } + + float distance(const FingerprintMH &FP2) const { + size_t nintersect = 0; + size_t pos1 = 0; + size_t pos2 = 0; + size_t nHashes = hash.size(); + + while (pos1 != nHashes && pos2 != nHashes) { + if (hash[pos1] == FP2.hash[pos2]) { + nintersect++; + pos1++; + pos2++; + } else if (hash[pos1] < FP2.hash[pos2]) { + pos1++; + } else { + pos2++; + } + } + + int nunion = 2 * nHashes - nintersect; + return 1.f - (nintersect / (float)nunion); + } + + float distance_under(const FingerprintMH &FP2, float best_distance) const { + size_t mismatches = 0; + size_t pos1 = 0; + size_t pos2 = 0; + size_t nHashes = hash.size(); + size_t best_nintersect = static_cast( + 2.0 * nHashes * (1.f - best_distance) / (2.f - best_distance)); + size_t best_mismatches = 2 * (nHashes - best_nintersect); + + while (pos1 != nHashes && pos2 != nHashes) { + if (hash[pos1] == FP2.hash[pos2]) { + pos1++; + pos2++; + } else if (hash[pos1] < FP2.hash[pos2]) { + mismatches++; + pos1++; + } else { + mismatches++; + pos2++; + } + if (mismatches > best_mismatches) + break; + } + + size_t nintersect = nHashes - (mismatches / 2); + int nunion = 2 * nHashes - nintersect; + return 1.f - (nintersect / (float)nunion); + } +}; + +template class Fingerprint { +public: + uint64_t magnitude{0}; + static const size_t MaxOpcode = 68; + std::array OpcodeFreq; + + Fingerprint() = default; + + Fingerprint(T owner) { + assert(owner != nullptr); + for (size_t i = 0; i < MaxOpcode; i++) + OpcodeFreq[i] = 0; + for (Instruction &I : getInstructions(owner)) { + if (I.getOpcode() > MaxOpcode || I.getOpcode() < 0) { + errs() << "Opcode is " << I.getOpcode() << "\n"; + exit(123); + } + OpcodeFreq[I.getOpcode()]++; + if (I.isTerminator()) + OpcodeFreq[0] += I.getNumSuccessors(); + } + for (size_t i = 0; i < MaxOpcode; i++) { + uint64_t val = OpcodeFreq[i]; + magnitude += val * val; + } + } + + uint32_t footprint() const { return sizeof(int) * MaxOpcode; } + + float distance(const Fingerprint &FP2) const { + int Distance = 0; + for (size_t i = 0; i < MaxOpcode; i++) { + int Freq1 = OpcodeFreq[i]; + int Freq2 = FP2.OpcodeFreq[i]; + Distance += std::abs(Freq1 - Freq2); + } + return static_cast(Distance); + } +}; + +class BlockFingerprint : public Fingerprint { +public: + BasicBlock *BB{nullptr}; + size_t Size{0}; + + BlockFingerprint(BasicBlock *BB) : Fingerprint(BB), BB(BB) { + for (Instruction &I : *BB) { + if (!isa(&I) && !isa(&I)) { + Size++; + } + } + } +}; + +template class MatchInfo { +public: + T candidate{nullptr}; + size_t Size{0}; + size_t OtherSize{0}; + size_t MergedSize{0}; + size_t Magnitude{0}; + size_t OtherMagnitude{0}; + float Distance{0}; + bool Valid{false}; + bool Profitable{false}; + + MatchInfo() = default; + MatchInfo(T candidate) : candidate(candidate){}; + MatchInfo(T candidate, size_t Size) : candidate(candidate), Size(Size){}; +}; + +template class Matcher { +public: + Matcher() = default; + virtual ~Matcher() = default; + + virtual void add_candidate(T candidate, size_t size) = 0; + virtual void remove_candidate(T candidate) = 0; + virtual T next_candidate() = 0; + virtual std::vector> &get_matches(T candidate) = 0; + virtual size_t size() = 0; + virtual void print_stats() = 0; +}; + +template class FPTy = Fingerprint> +class MatcherManual : public Matcher { +private: + struct MatcherEntry { + T candidate; + size_t size; + FPTy FP; + MatcherEntry() : MatcherEntry(nullptr, 0){}; + + template , typename T2 = Fingerprint> + MatcherEntry( + T candidate, size_t size, + typename std::enable_if_t::value, int> * = nullptr) + : candidate(candidate), size(size), FP(candidate) {} + + template , typename T2 = FingerprintMH> + MatcherEntry( + T candidate, size_t size, SearchStrategy &strategy, + typename std::enable_if_t::value, int> * = nullptr) + : candidate(candidate), size(size), FP(candidate, strategy) {} + }; + using MatcherIt = typename std::list::iterator; + + bool initialized{false}; + FunctionMerger &FM; + FunctionMergingOptions &Options; + std::list candidates; + std::unordered_map cache; + std::vector> matches; + std::unordered_map matchNames; + +public: + MatcherManual() = default; + MatcherManual(FunctionMerger &FM, FunctionMergingOptions &Options, + std::string Filename) + : FM(FM), Options(Options) { + std::ifstream File{Filename}; + std::string FuncName1, FuncName2; + while (File >> FuncName1 >> FuncName2) { + matchNames[FuncName1] = FuncName2; + matchNames[FuncName2] = FuncName1; + } + } + + virtual ~MatcherManual() = default; + + void add_candidate(T candidate, size_t size) override { + if (matchNames.count(GetValueName(candidate)) == 0) + return; + add_candidate_helper(candidate, size); + cache[candidate] = candidates.begin(); + } + + template , typename T2 = Fingerprint> + void add_candidate_helper( + T candidate, size_t size, + typename std::enable_if_t::value, int> * = nullptr) { + candidates.emplace_front(candidate, size); + } + + void remove_candidate(T candidate) override { + auto cache_it = cache.find(candidate); + assert(cache_it != cache.end()); + candidates.erase(cache_it->second); + } + + T next_candidate() override { + if (!initialized) { + candidates.sort([&](auto &item1, auto &item2) -> bool { + return item1.FP.magnitude > item2.FP.magnitude; + }); + initialized = true; + } + update_matches(candidates.begin()); + return candidates.front().candidate; + } + + std::vector> &get_matches(T candidate) override { + return matches; + } + + size_t size() override { return candidates.size(); } + + void print_stats() override { + + float MinDistance = std::numeric_limits::max(); + float MaxDistance = 0; + + int Index1 = 0; + for (auto It1 = candidates.begin(), E1 = candidates.end(); It1 != E1; + It1++) { + + int BestIndex = 0; + bool FoundCandidate = false; + float BestDist = std::numeric_limits::max(); + + unsigned CountCandidates = 0; + int Index2 = Index1; + for (auto It2 = It1, E2 = candidates.end(); It2 != E2; It2++) { + + if (It1->candidate == It2->candidate || Index1 == Index2) { + Index2++; + continue; + } + + if ((!FM.validMergeTypes(It1->candidate, It2->candidate, Options) && + !Options.EnableUnifiedReturnType) || + !validMergePair(It1->candidate, It2->candidate)) + continue; + + auto Dist = It1->FP.distance(It2->FP); + if (Dist < BestDist) { + BestDist = Dist; + FoundCandidate = true; + BestIndex = Index2; + } + if (RankingThreshold && CountCandidates > RankingThreshold) { + break; + } + CountCandidates++; + Index2++; + } + if (FoundCandidate) { + int Distance = std::abs(Index1 - BestIndex); + + if (Distance > MaxDistance) + MaxDistance = Distance; + if (Distance < MinDistance) + MinDistance = Distance; + } + Index1++; + } + } + +private: + void update_matches(MatcherIt it) { + matches.clear(); + + MatchInfo best_match; + best_match.OtherSize = it->size; + best_match.OtherMagnitude = it->FP.magnitude; + best_match.Distance = std::numeric_limits::max(); + + for (auto entry = std::next(candidates.cbegin()); + entry != candidates.cend(); ++entry) { + if ((!FM.validMergeTypes(it->candidate, entry->candidate, Options) && + !Options.EnableUnifiedReturnType) || + !validMergePair(it->candidate, entry->candidate)) + continue; + if (matchNames[GetValueName(it->candidate)] == + GetValueName(entry->candidate)) { + best_match.candidate = entry->candidate; + best_match.Size = entry->size; + best_match.Magnitude = entry->FP.magnitude; + best_match.Distance = 0; + break; + } + } + + if (best_match.candidate != nullptr) + matches.push_back(std::move(best_match)); + return; + } +}; + +template class FPTy = Fingerprint> +class MatcherFQ : public Matcher { +private: + struct MatcherEntry { + T candidate; + size_t size; + FPTy FP; + MatcherEntry() : MatcherEntry(nullptr, 0){}; + + template , typename T2 = Fingerprint> + MatcherEntry( + T candidate, size_t size, + typename std::enable_if_t::value, int> * = nullptr) + : candidate(candidate), size(size), FP(candidate) {} + + template , typename T2 = FingerprintMH> + MatcherEntry( + T candidate, size_t size, SearchStrategy &strategy, + typename std::enable_if_t::value, int> * = nullptr) + : candidate(candidate), size(size), FP(candidate, strategy) {} + }; + using MatcherIt = typename std::list::iterator; + + bool initialized{false}; + FunctionMerger &FM; + FunctionMergingOptions &Options; + std::list candidates; + std::unordered_map cache; + std::vector> matches; + SearchStrategy strategy; + +public: + MatcherFQ() = default; + MatcherFQ(FunctionMerger &FM, FunctionMergingOptions &Options, + size_t rows = 2, size_t bands = 100) + : FM(FM), Options(Options), strategy(rows, bands){}; + + virtual ~MatcherFQ() = default; + + void add_candidate(T candidate, size_t size) override { + add_candidate_helper(candidate, size); + cache[candidate] = candidates.begin(); + } + + template , typename T2 = Fingerprint> + void add_candidate_helper( + T candidate, size_t size, + typename std::enable_if_t::value, int> * = nullptr) { + candidates.emplace_front(candidate, size); + } + + template , typename T2 = Fingerprint> + void add_candidate_helper( + T candidate, size_t size, + typename std::enable_if_t::value, int> * = + nullptr) { + candidates.emplace_front(candidate, size, strategy); + } + + void remove_candidate(T candidate) override { + auto cache_it = cache.find(candidate); + assert(cache_it != cache.end()); + candidates.erase(cache_it->second); + } + + T next_candidate() override { + if (!initialized) { + candidates.sort([&](auto &item1, auto &item2) -> bool { + return item1.FP.magnitude > item2.FP.magnitude; + }); + initialized = true; + } + update_matches(candidates.begin()); + return candidates.front().candidate; + } + + std::vector> &get_matches(T candidate) override { + return matches; + } + + size_t size() override { return candidates.size(); } + + void print_stats() override { + + float MinDistance = std::numeric_limits::max(); + float MaxDistance = 0; + + int Index1 = 0; + for (auto It1 = candidates.begin(), E1 = candidates.end(); It1 != E1; + It1++) { + + int BestIndex = 0; + bool FoundCandidate = false; + float BestDist = std::numeric_limits::max(); + + unsigned CountCandidates = 0; + int Index2 = Index1; + for (auto It2 = It1, E2 = candidates.end(); It2 != E2; It2++) { + + if (It1->candidate == It2->candidate || Index1 == Index2) { + Index2++; + continue; + } + + if ((!FM.validMergeTypes(It1->candidate, It2->candidate, Options) && + !Options.EnableUnifiedReturnType) || + !validMergePair(It1->candidate, It2->candidate)) + continue; + + auto Dist = It1->FP.distance(It2->FP); + if (Dist < BestDist) { + BestDist = Dist; + FoundCandidate = true; + BestIndex = Index2; + } + if (RankingThreshold && CountCandidates > RankingThreshold) { + break; + } + CountCandidates++; + Index2++; + } + if (FoundCandidate) { + int Distance = std::abs(Index1 - BestIndex); + + if (Distance > MaxDistance) + MaxDistance = Distance; + if (Distance < MinDistance) + MinDistance = Distance; + } + Index1++; + } + } + +private: + void update_matches(MatcherIt it) { + size_t CountCandidates = 0; + matches.clear(); + + MatchInfo best_match; + best_match.OtherSize = it->size; + best_match.OtherMagnitude = it->FP.magnitude; + best_match.Distance = std::numeric_limits::max(); + + if (ExplorationThreshold == 1) { + for (auto entry = std::next(candidates.cbegin()); + entry != candidates.cend(); ++entry) { + if ((!FM.validMergeTypes(it->candidate, entry->candidate, Options) && + !Options.EnableUnifiedReturnType) || + !validMergePair(it->candidate, entry->candidate)) + continue; + auto new_distance = it->FP.distance(entry->FP); + if (new_distance < best_match.Distance) { + best_match.candidate = entry->candidate; + best_match.Size = entry->size; + best_match.Magnitude = entry->FP.magnitude; + best_match.Distance = new_distance; + } + if (RankingThreshold && (CountCandidates > RankingThreshold)) + break; + CountCandidates++; + } + if (best_match.candidate != nullptr) + if (!EnableF3M || best_match.Distance < RankingDistance) + matches.push_back(std::move(best_match)); + return; + } + + for (auto &entry : candidates) { + if (entry.candidate == it->candidate) + continue; + if ((!FM.validMergeTypes(it->candidate, entry.candidate, Options) && + !Options.EnableUnifiedReturnType) || + !validMergePair(it->candidate, entry.candidate)) + continue; + MatchInfo new_match(entry.candidate, entry.size); + new_match.Distance = it->FP.distance(entry.FP); + new_match.OtherSize = it->size; + new_match.OtherMagnitude = it->FP.magnitude; + new_match.Magnitude = entry.FP.magnitude; + if (!EnableF3M || new_match.Distance < RankingDistance) + matches.push_back(std::move(new_match)); + if (RankingThreshold && (CountCandidates > RankingThreshold)) + break; + CountCandidates++; + } + + if (ExplorationThreshold < matches.size()) { + std::partial_sort(matches.begin(), matches.begin() + ExplorationThreshold, + matches.end(), [&](auto &match1, auto &match2) -> bool { + return match1.Distance < match2.Distance; + }); + matches.resize(ExplorationThreshold); + std::reverse(matches.begin(), matches.end()); + } else { + std::sort(matches.begin(), matches.end(), + [&](auto &match1, auto &match2) -> bool { + return match1.Distance > match2.Distance; + }); + } + } +}; + +template class MatcherLSH : public Matcher { +private: + struct MatcherEntry { + T candidate; + size_t size; + FingerprintMH FP; + MatcherEntry() : MatcherEntry(nullptr, 0){}; + MatcherEntry(T candidate, size_t size, SearchStrategy &strategy) + : candidate(candidate), size(size), FP(candidate, strategy){}; + }; + using MatcherIt = typename std::list::iterator; + + bool initialized{false}; + const size_t rows{2}; + const size_t bands{100}; + FunctionMerger &FM; + FunctionMergingOptions &Options; + SearchStrategy strategy; + + std::list candidates; + std::unordered_map> lsh; + std::vector> cache; + std::vector> matches; + +public: + MatcherLSH() = default; + MatcherLSH(FunctionMerger &FM, FunctionMergingOptions &Options, size_t rows, + size_t bands) + : rows(rows), bands(bands), FM(FM), Options(Options), + strategy(rows, bands){}; + + virtual ~MatcherLSH() = default; + + void add_candidate(T candidate, size_t size) override { + candidates.emplace_front(candidate, size, strategy); + + auto it = candidates.begin(); + auto &bandHash = it->FP.bandHash; + for (size_t i = 0; i < bands; ++i) { + if (lsh.count(bandHash[i]) > 0) + lsh.at(bandHash[i]).push_back(it); + else + lsh.insert(std::make_pair(bandHash[i], std::vector(1, it))); + } + } + + void remove_candidate(T candidate) override { + auto cache_it = candidates.end(); + for (auto &cache_item : cache) { + if (cache_item.first == candidate) { + cache_it = cache_item.second; + break; + } + } + assert(cache_it != candidates.end()); + + auto &FP = cache_it->FP; + for (size_t i = 0; i < bands; ++i) { + if (lsh.count(FP.bandHash[i]) == 0) + continue; + + auto &foundFs = lsh.at(FP.bandHash[i]); + for (size_t j = 0; j < foundFs.size(); ++j) + if (foundFs[j]->candidate == candidate) + lsh.at(FP.bandHash[i]).erase(lsh.at(FP.bandHash[i]).begin() + j); + } + candidates.erase(cache_it); + } + + T next_candidate() override { + if (!initialized) { + candidates.sort([&](auto &item1, auto &item2) -> bool { + return item1.FP.magnitude > item2.FP.magnitude; + }); + initialized = true; + } + update_matches(candidates.begin()); + return candidates.front().candidate; + } + + std::vector> &get_matches(T candidate) override { + return matches; + } + + size_t size() override { return candidates.size(); } + + void print_stats() override { + std::unordered_set seen; + std::vector hist_bucket_size(20); + std::vector hist_distances(21); + std::vector hist_distances_diff(21); + uint32_t duplicate_hashes = 0; + + for (auto it = lsh.cbegin(); it != lsh.cend(); ++it) { + size_t idx = 31 - __builtin_clz(it->second.size()); + idx = idx < 20 ? idx : 19; + hist_bucket_size[idx]++; + } + for (size_t i = 0; i < 20; i++) + errs() << "STATS: Histogram Bucket Size " << (1 << i) << " : " + << hist_bucket_size[i] << "\n"; + return; + + for (auto it = candidates.begin(); it != candidates.end(); ++it) { + seen.clear(); + seen.reserve(candidates.size() / 10); + + float best_distance = std::numeric_limits::max(); + std::unordered_set temp(it->FP.hash.begin(), it->FP.hash.end()); + duplicate_hashes += it->FP.hash.size() - temp.size(); + + for (size_t i = 0; i < bands; ++i) { + auto &foundFs = lsh.at(it->FP.bandHash[i]); + size_t idx = 31 - __builtin_clz(foundFs.size()); + idx = idx < 20 ? idx : 19; + hist_bucket_size[idx]++; + for (size_t j = 0; j < foundFs.size(); ++j) { + auto match_it = foundFs[j]; + if ((match_it->candidate == NULL) || + (match_it->candidate == it->candidate)) + continue; + if ((!FM.validMergeTypes(it->candidate, match_it->candidate, + Options) && + !Options.EnableUnifiedReturnType) || + !validMergePair(it->candidate, match_it->candidate)) + continue; + + if (seen.count(match_it->candidate) == 1) + continue; + seen.insert(match_it->candidate); + + auto distance = it->FP.distance(match_it->FP); + best_distance = distance < best_distance ? distance : best_distance; + auto idx2 = static_cast(distance * 20); + idx2 = idx2 < 21 ? idx2 : 20; + hist_distances[idx2]++; + auto idx3 = static_cast((distance - best_distance) * 20); + idx3 = idx3 < 21 ? idx3 : 20; + hist_distances_diff[idx3]++; + } + } + } + } + +private: + void update_matches(MatcherIt it) { + size_t CountCandidates = 0; + std::unordered_set seen; + seen.reserve(candidates.size() / 10); + matches.clear(); + cache.clear(); + cache.emplace_back(it->candidate, it); + + auto &FP = it->FP; + MatchInfo best_match; + best_match.Distance = std::numeric_limits::max(); + for (size_t i = 0; i < bands; ++i) { + assert(lsh.count(FP.bandHash[i]) > 0); + + auto &foundFs = lsh.at(FP.bandHash[i]); + for (size_t j = 0; j < foundFs.size() && j < BucketSizeCap; ++j) { + auto match_it = foundFs[j]; + if ((match_it->candidate == NULL) || + (match_it->candidate == it->candidate)) + continue; + if ((!FM.validMergeTypes(it->candidate, match_it->candidate, Options) && + !Options.EnableUnifiedReturnType) || + !validMergePair(it->candidate, match_it->candidate)) + continue; + + if (seen.count(match_it->candidate) == 1) + continue; + seen.insert(match_it->candidate); + + MatchInfo new_match(match_it->candidate, match_it->size); + if (best_match.Distance < 0.1) + new_match.Distance = + FP.distance_under(match_it->FP, best_match.Distance); + else + new_match.Distance = FP.distance(match_it->FP); + new_match.OtherSize = it->size; + new_match.OtherMagnitude = FP.magnitude; + new_match.Magnitude = match_it->FP.magnitude; + if (new_match.Distance < best_match.Distance && + new_match.Distance < RankingDistance) + best_match = new_match; + if (ExplorationThreshold > 1) + if (new_match.Distance < RankingDistance) + matches.push_back(new_match); + cache.emplace_back(match_it->candidate, match_it); + if (RankingThreshold && (CountCandidates > RankingThreshold)) + break; + CountCandidates++; + } + // If we've gone through i = 0 without finding a distance of 0.0 + // the minimum distance we might ever find is 2.0 / (nHashes + 1) + if ((ExplorationThreshold == 1) && + (best_match.Distance < (2.0 / (rows * bands)))) + break; + if (RankingThreshold && (CountCandidates > RankingThreshold)) + break; + } + + if (ExplorationThreshold == 1) + if (best_match.candidate != nullptr) + matches.push_back(std::move(best_match)); + + if (matches.size() <= 1) + return; + + size_t toRank = std::min((size_t)ExplorationThreshold, matches.size()); + + std::partial_sort(matches.begin(), matches.begin() + toRank, matches.end(), + [&](auto &match1, auto &match2) -> bool { + return match1.Distance < match2.Distance; + }); + matches.resize(toRank); + std::reverse(matches.begin(), matches.end()); + } +}; + +template class MatcherReport { +private: + struct MatcherEntry { + T candidate; + Fingerprint FPF; + FingerprintMH FPMH; + MatcherEntry(T candidate, SearchStrategy &strategy) + : candidate(candidate), FPF(candidate), FPMH(candidate, strategy){}; + }; + using MatcherIt = typename std::list::iterator; + + FunctionMerger &FM; + FunctionMergingOptions &Options; + SearchStrategy strategy; + std::vector candidates; + +public: + MatcherReport() = default; + MatcherReport(size_t rows, size_t bands, FunctionMerger &FM, + FunctionMergingOptions &Options) + : FM(FM), Options(Options), strategy(rows, bands){}; + + ~MatcherReport() = default; + + void add_candidate(T candidate) { + candidates.emplace_back(candidate, strategy); + } + + void report() const { + char distance_mh_str[20]; + + for (auto &entry : candidates) { + uint64_t val = 0; + for (auto &num : entry.FPF.OpcodeFreq) + val += num; + } + + std::string Name("_m_f_"); + for (auto it1 = candidates.cbegin(); it1 != candidates.cend(); ++it1) { + for (auto it2 = std::next(it1); it2 != candidates.cend(); ++it2) { + if ((!FM.validMergeTypes(it1->candidate, it2->candidate, Options) && + !Options.EnableUnifiedReturnType) || + !validMergePair(it1->candidate, it2->candidate)) + continue; + + auto distance_fq = it1->FPF.distance(it2->FPF); + auto distance_mh = it1->FPMH.distance(it2->FPMH); + std::snprintf(distance_mh_str, 20, "%.5f", distance_mh); + + FunctionMergeResult Result = + FM.merge(it1->candidate, it2->candidate, Name, Options); + } + } + } +}; + +AlignedCode::AlignedCode(BasicBlock *BB1, BasicBlock *BB2) { + // this should never happen + assert(BB1 != nullptr || BB2 != nullptr); + + // Add only BB1, skipping Phi nodes and Landing Pads + if (BB1 != nullptr && BB2 == nullptr) { + Data.emplace_back(BB1, nullptr, false); + for (Instruction &I : *BB1) { + if (isa(&I) || isa(&I)) + continue; + Data.emplace_back(&I, nullptr, false); + } + return; + } + + // Add only BB2, skipping Phi nodes and Landing Pads + if (BB1 == nullptr && BB2 != nullptr) { + Data.emplace_back(nullptr, BB2, false); + for (Instruction &I : *BB2) { + if (isa(&I) || isa(&I)) + continue; + Data.emplace_back(nullptr, &I, false); + } + return; + } + + // Add both, skipping Phi nodes and Landing Pads + Data.emplace_back(BB1, BB2, FunctionMerger::matchBlocks(BB1, BB2)); + + auto It1 = BB1->begin(); + while (isa(*It1) || isa(*It1)) + It1++; + + auto It2 = BB2->begin(); + while (isa(*It2) || isa(*It2)) + It2++; + + while (It1 != BB1->end() && It2 != BB2->end()) { + Instruction *I1 = &*It1; + Instruction *I2 = &*It2; + + if (FunctionMerger::matchInstructions(I1, I2)) { + Data.emplace_back(I1, I2, true); + } else { + Data.emplace_back(I1, nullptr, false); + Data.emplace_back(nullptr, I2, false); + } + + It1++; + It2++; + } + assert((It1 == BB1->end()) && (It2 == BB2->end())); +} + +bool AlignedCode::isProfitable() const { + int OriginalCost = 0; + int MergedCost = 0; + + bool InsideSplit = false; + + for (auto &Entry : Data) { + Instruction *I1 = nullptr; + if (Entry.get(0)) + I1 = dyn_cast(Entry.get(0)); + + Instruction *I2 = nullptr; + if (Entry.get(1)) + I2 = dyn_cast(Entry.get(1)); + + bool IsInstruction = I1 != nullptr || I2 != nullptr; + if (Entry.match()) { + if (IsInstruction) { + OriginalCost += 2; + MergedCost += 1; + } + if (InsideSplit) { + InsideSplit = false; + MergedCost += 2; + } + } else { + if (IsInstruction) { + OriginalCost += 1; + MergedCost += 1; + } + if (!InsideSplit) { + InsideSplit = true; + MergedCost += 1; + } + } + } + + bool Profitable = (MergedCost <= OriginalCost); + return Profitable; +} + +void AlignedCode::extend(const AlignedCode &Other) { + for (auto &Entry : Other) { + Instruction *I1 = nullptr; + if (Entry.get(0)) + I1 = dyn_cast(Entry.get(0)); + + Instruction *I2 = nullptr; + if (Entry.get(1)) + I2 = dyn_cast(Entry.get(1)); + + bool IsInstruction = I1 != nullptr || I2 != nullptr; + + Data.emplace_back(Entry.get(0), Entry.get(1), Entry.match()); + + if (IsInstruction) { + Insts++; + if (Entry.match()) { + Matches++; + Instruction *I = I1 ? I1 : I2; + if (!I->isTerminator()) + CoreMatches++; + } + } + } +} + +bool AcrossBlocks; + +FunctionMergeResult +FunctionMerger::merge(Function *F1, Function *F2, std::string Name, + const FunctionMergingOptions &Options) { + bool ProfitableFn = true; + LLVMContext &Context = *ContextPtr; + FunctionMergeResult ErrorResponse(F1, F2, nullptr); + + if (!validMergePair(F1, F2)) + return ErrorResponse; + + AlignedCode AlignedSeq; + NeedlemanWunschSA> SA(ScoringSystem(-1, 2), + FunctionMerger::match); + + if (EnableHyFMNW || EnableHyFMPA) { // Processing individual pairs of blocks + + // Fingerprints for all Blocks in F1 organized by size + std::map> Blocks; + for (BasicBlock &BB1 : *F1) { + BlockFingerprint BD1(&BB1); + + Blocks[BD1.Size].push_back(std::move(BD1)); + } + + for (BasicBlock &BIt : *F2) { + BasicBlock *BB2 = &BIt; + BlockFingerprint BD2(BB2); + + // list all the map entries in Blocks in order of distance from BD2.Size + auto ItSetIncr = Blocks.lower_bound(BD2.Size); + + auto ItSetDecr = std::reverse_iterator< + std::map>::iterator>(ItSetIncr); + + std::vector ItSets; + + if (EnableHyFMNW) { + while (ItSetDecr != Blocks.rend() && ItSetIncr != Blocks.end()) { + if (BD2.Size - ItSetDecr->first < ItSetIncr->first - BD2.Size) { + ItSets.push_back(std::prev(ItSetDecr.base())); + ItSetDecr++; + } else { + ItSets.push_back(ItSetIncr); + ItSetIncr++; + } + } + + while (ItSetDecr != Blocks.rend()) { + ItSets.push_back(std::prev(ItSetDecr.base())); + ItSetDecr++; + } + + while (ItSetIncr != Blocks.end()) { + ItSets.push_back(ItSetIncr); + ItSetIncr++; + } + } else { + ItSetIncr = Blocks.find(BD2.Size); + if (ItSetIncr != Blocks.end()) + ItSets.push_back(ItSetIncr); + } + + // Find the closest block starting from blocks with similar size + std::vector::iterator BestIt; + std::map>::iterator BestSet; + float BestDist = std::numeric_limits::max(); + + for (auto ItSet : ItSets) { + for (auto BDIt = ItSet->second.begin(), E = ItSet->second.end(); + BDIt != E; BDIt++) { + auto D = BD2.distance(*BDIt); + if (D < BestDist) { + BestDist = D; + BestIt = BDIt; + BestSet = ItSet; + if (BestDist < std::numeric_limits::epsilon()) + break; + } + } + if (BestDist < std::numeric_limits::epsilon()) + break; + } + + bool MergedBlock = false; + if (BestDist < std::numeric_limits::max()) { + BasicBlock *BB1 = BestIt->BB; + AlignedCode AlignedBlocks; + + if (EnableHyFMNW) { + SmallVector BB1Vec; + vectorizeBB(BB1Vec, BB1); + + SmallVector BB2Vec; + vectorizeBB(BB2Vec, BB2); + + AlignedBlocks = SA.getAlignment(BB1Vec, BB2Vec); + + } else if (EnableHyFMPA) { + AlignedBlocks = AlignedCode(BB1, BB2); + } + + if (!HyFMProfitability || AlignedBlocks.isProfitable()) { + AlignedSeq.extend(AlignedBlocks); + BestSet->second.erase(BestIt); + MergedBlock = true; + } + } + + if (!MergedBlock) + AlignedSeq.extend(AlignedCode(nullptr, BB2)); + } + + for (auto &Pair : Blocks) + for (auto &BD1 : Pair.second) + AlignedSeq.extend(AlignedCode(BD1.BB, nullptr)); + + ProfitableFn = AlignedSeq.hasMatches(); + } + + if (!ProfitableFn && !ReportStats) { + + return ErrorResponse; + } + + AcrossBlocks = false; + BasicBlock *CurrBB0 = nullptr; + BasicBlock *CurrBB1 = nullptr; + for (auto &Entry : AlignedSeq) { + if (Entry.match()) { + if (isa(Entry.get(1))) { + CurrBB1 = cast(Entry.get(1)); + } else if (auto *I = dyn_cast(Entry.get(1))) { + if (CurrBB1 == nullptr) + CurrBB1 = I->getParent(); + else if (CurrBB1 != I->getParent()) { + AcrossBlocks = true; + } + } + if (isa(Entry.get(0))) { + CurrBB0 = cast(Entry.get(0)); + } else if (auto *I = dyn_cast(Entry.get(0))) { + if (CurrBB0 == nullptr) + CurrBB0 = I->getParent(); + else if (CurrBB0 != I->getParent()) { + AcrossBlocks = true; + } + } + } else { + if (isa_and_nonnull(Entry.get(0))) + CurrBB1 = nullptr; + if (isa_and_nonnull(Entry.get(1))) + CurrBB0 = nullptr; + } + } + if (AcrossBlocks) { + } + + // errs() << "Code Gen\n"; + + // errs() << "Creating function type\n"; + + // Merging parameters + std::map ParamMap1; + std::map ParamMap2; + std::vector Args; + + // errs() << "Merging arguments\n"; + MergeArguments(Context, F1, F2, AlignedSeq, ParamMap1, ParamMap2, Args, + Options); + + Type *RetType1 = F1->getReturnType(); + Type *RetType2 = F2->getReturnType(); + Type *ReturnType = nullptr; + + bool RequiresUnifiedReturn = false; + + if (validMergeTypes(F1, F2, Options)) { + ReturnType = RetType1; + if (ReturnType->isVoidTy()) { + ReturnType = RetType2; + } + } else if (Options.EnableUnifiedReturnType) { + RequiresUnifiedReturn = true; + + auto SizeOfTy1 = DL->getTypeStoreSize(RetType1); + auto SizeOfTy2 = DL->getTypeStoreSize(RetType2); + if (SizeOfTy1 >= SizeOfTy2) { + ReturnType = RetType1; + } else { + ReturnType = RetType2; + } + } else { + return ErrorResponse; + } + FunctionType *FTy = + FunctionType::get(ReturnType, ArrayRef(Args), false); + + if (Name.empty()) { + Name = "_m_f"; + } + Function *MergedFunc = + Function::Create(FTy, // GlobalValue::LinkageTypes::InternalLinkage, + GlobalValue::LinkageTypes::PrivateLinkage, Twine(Name), + M); // merged.function + + ValueToValueMapTy VMap; + + std::vector ArgsList; + for (Argument &arg : MergedFunc->args()) { + ArgsList.push_back(&arg); + } + Value *FuncId = ArgsList[0]; + + int ArgId = 0; + for (auto I = F1->arg_begin(), E = F1->arg_end(); I != E; I++) { + VMap[&(*I)] = ArgsList[ParamMap1[ArgId]]; + + ArgId++; + } + + ArgId = 0; + for (auto I = F2->arg_begin(), E = F2->arg_end(); I != E; I++) { + VMap[&(*I)] = ArgsList[ParamMap2[ArgId]]; + + ArgId++; + } + + SetFunctionAttributes(F1, F2, MergedFunc); + + Value *IsFunc1 = FuncId; + + auto Gen = [&](auto &CG) { + CG.setFunctionIdentifier(IsFunc1) + .setEntryPoints(&F1->getEntryBlock(), &F2->getEntryBlock()) + .setReturnTypes(RetType1, RetType2) + .setMergedFunction(MergedFunc) + .setMergedEntryPoint(BasicBlock::Create(Context, "entry", MergedFunc)) + .setMergedReturnType(ReturnType, RequiresUnifiedReturn) + .setContext(ContextPtr) + .setIntPtrType(IntPtrTy); + if (!CG.generate(AlignedSeq, VMap, Options)) { + + MergedFunc->eraseFromParent(); + MergedFunc = nullptr; + if (Debug) + errs() << "ERROR: Failed to generate the merged function!\n"; + return false; + } + return true; + }; + + SALSSACodeGen CG(F1, F2); + if (!Gen(CG)) { + if (Debug) + errs() << "feisen\n"; + FunctionMergeResult Result(false); + return Result; + } + + FunctionMergeResult Result(F1, F2, MergedFunc, RequiresUnifiedReturn); + Result.setArgumentMapping(F1, ParamMap1); + Result.setArgumentMapping(F2, ParamMap2); + Result.setFunctionIdArgument(FuncId != nullptr); + return Result; +} + +void FunctionMerger::replaceByCall(Function *F, FunctionMergeResult &MFR, + const FunctionMergingOptions &Options) { + LLVMContext &Context = M->getContext(); + + Value *FuncId = MFR.getFunctionIdValue(F); + Function *MergedF = MFR.getMergedFunction(); + + // Make sure we preserve its linkage + auto Linkage = F->getLinkage(); + + F->deleteBody(); + BasicBlock *NewBB = BasicBlock::Create(Context, "", F); + IRBuilder<> Builder(NewBB); + + std::vector args; + for (unsigned i = 0; i < MergedF->getFunctionType()->getNumParams(); i++) { + args.push_back(nullptr); + } + + if (MFR.hasFunctionIdArgument()) { + args[0] = FuncId; + } + + std::vector ArgsList; + for (Argument &arg : F->args()) { + ArgsList.push_back(&arg); + } + + for (auto Pair : MFR.getArgumentMapping(F)) { + args[Pair.second] = ArgsList[Pair.first]; + } + + for (unsigned i = 0; i < args.size(); i++) { + if (args[i] == nullptr) { + args[i] = UndefValue::get(MergedF->getFunctionType()->getParamType(i)); + } + } + + F->setLinkage(Linkage); + + CallInst *CI = + (CallInst *)Builder.CreateCall(MergedF, ArrayRef(args)); + CI->setTailCall(); + CI->setCallingConv(MergedF->getCallingConv()); + CI->setAttributes(MergedF->getAttributes()); + CI->setIsNoInline(); + + if (F->getReturnType()->isVoidTy()) { + Builder.CreateRetVoid(); + } else { + Value *CastedV; + if (MFR.needUnifiedReturn()) { + Value *AddrCI = Builder.CreateAlloca(CI->getType()); + Builder.CreateStore(CI, AddrCI); + Value *CastedAddr = Builder.CreatePointerCast( + AddrCI, + PointerType::get(F->getReturnType(), DL->getAllocaAddrSpace())); + CastedV = Builder.CreateLoad(F->getReturnType(), CastedAddr); + } else { + CastedV = createCastIfNeeded(CI, F->getReturnType(), Builder, IntPtrTy, + Options); + } + Builder.CreateRet(CastedV); + } +} + +bool FunctionMerger::replaceCallsWith(Function *F, FunctionMergeResult &MFR, + const FunctionMergingOptions &Options) { + + Value *FuncId = MFR.getFunctionIdValue(F); + Function *MergedF = MFR.getMergedFunction(); + + unsigned CountUsers = 0; + std::vector Calls; + for (User *U : F->users()) { + CountUsers++; + if (auto *CI = dyn_cast(U)) { + if (CI->getCalledFunction() == F) { + Calls.push_back(CI); + } + } else if (auto *II = dyn_cast(U)) { + if (II->getCalledFunction() == F) { + Calls.push_back(II); + } + } + } + + if (Calls.size() < CountUsers) + return false; + + for (CallBase *CI : Calls) { + IRBuilder<> Builder(CI); + + std::vector args; + for (unsigned i = 0; i < MergedF->getFunctionType()->getNumParams(); i++) { + args.push_back(nullptr); + } + + if (MFR.hasFunctionIdArgument()) { + args[0] = FuncId; + } + + for (auto Pair : MFR.getArgumentMapping(F)) { + args[Pair.second] = CI->getArgOperand(Pair.first); + } + + for (unsigned i = 0; i < args.size(); i++) { + if (args[i] == nullptr) { + args[i] = UndefValue::get(MergedF->getFunctionType()->getParamType(i)); + } + } + + CallBase *NewCB = nullptr; + if (CI->getOpcode() == Instruction::Call) { + NewCB = (CallInst *)Builder.CreateCall(MergedF->getFunctionType(), + MergedF, args); + } else if (CI->getOpcode() == Instruction::Invoke) { + auto *II = dyn_cast(CI); + NewCB = (InvokeInst *)Builder.CreateInvoke(MergedF->getFunctionType(), + MergedF, II->getNormalDest(), + II->getUnwindDest(), args); + } + NewCB->setCallingConv(MergedF->getCallingConv()); + NewCB->setAttributes(MergedF->getAttributes()); + NewCB->setIsNoInline(); + Value *CastedV = NewCB; + if (!F->getReturnType()->isVoidTy()) { + if (MFR.needUnifiedReturn()) { + Value *AddrCI = Builder.CreateAlloca(NewCB->getType()); + Builder.CreateStore(NewCB, AddrCI); + Value *CastedAddr = Builder.CreatePointerCast( + AddrCI, + PointerType::get(F->getReturnType(), DL->getAllocaAddrSpace())); + CastedV = Builder.CreateLoad(F->getReturnType(), CastedAddr); + } else { + CastedV = createCastIfNeeded(NewCB, F->getReturnType(), Builder, + IntPtrTy, Options); + } + } + + if (CI->getNumUses() > 0) { + CI->replaceAllUsesWith(CastedV); + } + CI->eraseFromParent(); + } + + return true; +} + +static bool ShouldPreserveGV(const GlobalValue *GV) { + // Function must be defined here + if (GV->isDeclaration()) + return true; + + if (GV->hasDLLExportStorageClass()) + return true; + + if (GV->hasLocalLinkage()) + return false; + + return false; +} + +static int RequiresOriginalInterface(Function *F, FunctionMergeResult &MFR, + StringSet<> &AlwaysPreserved) { + bool CanErase = !F->hasAddressTaken(); + CanErase = + CanErase && (AlwaysPreserved.find(F->getName()) == AlwaysPreserved.end()); + if (!HasWholeProgram) { + CanErase = CanErase && F->isDiscardableIfUnused(); + } + return !CanErase; +} + +static int RequiresOriginalInterfaces(FunctionMergeResult &MFR, + StringSet<> &AlwaysPreserved) { + auto FPair = MFR.getFunctions(); + Function *F1 = FPair.first; + Function *F2 = FPair.second; + return (RequiresOriginalInterface(F1, MFR, AlwaysPreserved) ? 1 : 0) + + (RequiresOriginalInterface(F2, MFR, AlwaysPreserved) ? 1 : 0); +} + +void FunctionMerger::updateCallGraph(Function *F, FunctionMergeResult &MFR, + StringSet<> &AlwaysPreserved, + const FunctionMergingOptions &Options) { + replaceByCall(F, MFR, Options); + if (!RequiresOriginalInterface(F, MFR, AlwaysPreserved)) { + bool CanErase = replaceCallsWith(F, MFR, Options); + CanErase = CanErase && F->use_empty(); + CanErase = CanErase && + (AlwaysPreserved.find(F->getName()) == AlwaysPreserved.end()); + if (!HasWholeProgram) { + CanErase = CanErase && !ShouldPreserveGV(F); + CanErase = CanErase && F->isDiscardableIfUnused(); + } + if (CanErase) + F->eraseFromParent(); + } +} + +void FunctionMerger::updateCallGraph(FunctionMergeResult &MFR, + StringSet<> &AlwaysPreserved, + const FunctionMergingOptions &Options) { + auto FPair = MFR.getFunctions(); + Function *F1 = FPair.first; + Function *F2 = FPair.second; + updateCallGraph(F1, MFR, AlwaysPreserved, Options); + updateCallGraph(F2, MFR, AlwaysPreserved, Options); +} + +static int EstimateThunkOverhead(FunctionMergeResult &MFR, + StringSet<> &AlwaysPreserved) { + return RequiresOriginalInterfaces(MFR, AlwaysPreserved) * + (2 + MFR.getMergedFunction()->getFunctionType()->getNumParams()); +} + +static size_t EstimateFunctionSize(Function *F, TargetTransformInfo *TTI) { + float size = 0; + for (Instruction &I : instructions(F)) { + switch (I.getOpcode()) { + case Instruction::PHI: + size += 0.2; + break; + default: + auto cost = TTI->getInstructionCost( + &I, TargetTransformInfo::TargetCostKind::TCK_CodeSize); + size += cost.getValue().value(); + } + } + return size_t(std::ceil(size)); +} + +unsigned instToInt(Instruction *I) { + uint32_t value = 0; + static uint32_t pseudorand_value = 100; + + if (pseudorand_value > 10000) + pseudorand_value = 100; + + value = I->getOpcode(); + + uint32_t operands = + I->getOpcode() == Instruction::Ret ? 1 : I->getNumOperands(); + value = value * (operands + 1); + + auto instTypeID = static_cast(I->getType()->getTypeID()); + value = value * (instTypeID + 1); + auto *ITypePtr = I->getType(); + if (ITypePtr) { + value = value * (reinterpret_cast(ITypePtr) + 1); + } + + for (size_t i = 0; i < I->getNumOperands(); i++) { + auto operTypeID = + static_cast(I->getOperand(i)->getType()->getTypeID()); + value = value * (operTypeID + 1); + + auto *IOperTypePtr = I->getOperand(i)->getType(); + + if (IOperTypePtr) { + value = + value * + (reinterpret_cast(I->getOperand(i)->getType()) + 1); + } + + value = value * (i + 1); + } + return value; + + // Now for the funky stuff -- this is gonna be a wild ride + switch (I->getOpcode()) { + + case Instruction::Load: { + + const LoadInst *LI = dyn_cast(I); + uint32_t lValue = LI->isVolatile() ? 1 : 10; // Volatility + lValue += LI->getAlign().value(); // Alignment + lValue += static_cast(LI->getOrdering()); // Ordering + + value = value * lValue; + + break; + } + + case Instruction::Store: { + + const StoreInst *SI = dyn_cast(I); + uint32_t sValue = SI->isVolatile() ? 2 : 20; // Volatility + sValue += SI->getAlign().value(); // Alignment + sValue += static_cast(SI->getOrdering()); // Ordering + + value = value * sValue; + + break; + } + + case Instruction::Alloca: { + const AllocaInst *AI = dyn_cast(I); + uint32_t aValue = AI->getAlign().value(); // Alignment + + if (AI->getArraySize()) { + aValue += reinterpret_cast(AI->getArraySize()); + } + + value = value * (aValue + 1); + + break; + } + + case Instruction::GetElementPtr: // Important + { + + auto *GEP = dyn_cast(I); + uint32_t gValue = 1; + + SmallVector Indices(GEP->idx_begin(), GEP->idx_end()); + gValue = Indices.size() + 1; + + gValue += GEP->isInBounds() ? 3 : 30; + + Type *AggTy = GEP->getSourceElementType(); + gValue += static_cast(AggTy->getTypeID()); + + unsigned curIndex = 1; + for (; curIndex != Indices.size(); ++curIndex) { + + if (!AggTy || AggTy->isPointerTy()) { + if (Deterministic) + value = pseudorand_value++; + else + value = std::rand() % 10000 + 100; + break; + } + + Value *Idx = Indices[curIndex]; + + if (isa(AggTy)) { + if (!isa(Idx)) { + if (Deterministic) + value = pseudorand_value++; + else + value = + std::rand() % 10000 + 100; // Use a random number as we don't + // want this to match with anything + break; + } + + auto i = 0; + if (Idx) { + i = reinterpret_cast(Idx); + } + gValue += i; + } + } + + value = value * gValue; + + break; + } + + case Instruction::Switch: { + auto *SI = dyn_cast(I); + uint32_t sValue = 1; + sValue = SI->getNumCases(); + + auto CaseIt = SI->case_begin(), CaseEnd = SI->case_end(); + + while (CaseIt != CaseEnd) { + auto *Case = &*CaseIt; + if (Case) { + sValue += reinterpret_cast(Case); + } + CaseIt++; + } + + value = value * sValue; + + break; + } + + case Instruction::Call: { + auto *CI = dyn_cast(I); + uint32_t cValue = 1; + + if (CI->isInlineAsm()) { + if (Deterministic) + value = pseudorand_value++; + else + value = std::rand() % 10000 + 100; + break; + } + + if (CI->getCalledFunction()) { + cValue = reinterpret_cast(CI->getCalledFunction()); + } + + if (Function *F = CI->getCalledFunction()) { + if (auto ID = (Intrinsic::ID)F->getIntrinsicID()) { + cValue += static_cast(ID); + } + } + + cValue += static_cast(CI->getCallingConv()); + + value = value * cValue; + + break; + } + + case Instruction::Invoke: // Need to look at matching landing pads + { + auto *II = dyn_cast(I); + uint32_t iValue = 1; + + iValue = static_cast(II->getCallingConv()); + + if (II->getAttributes().getRawPointer()) { + iValue += + reinterpret_cast(II->getAttributes().getRawPointer()); + } + + value = value * iValue; + + break; + } + + case Instruction::InsertValue: { + auto *IVI = dyn_cast(I); + + uint32_t ivValue = 1; + + ivValue = IVI->getNumIndices(); + + // check element wise equality + auto Idx = IVI->getIndices(); + const auto *IdxIt = Idx.begin(); + const auto *IdxEnd = Idx.end(); + + while (IdxIt != IdxEnd) { + auto *val = &*IdxIt; + if (val) { + ivValue += reinterpret_cast(*val); + } + IdxIt++; + } + + value = value * ivValue; + + break; + } + + case Instruction::ExtractValue: { + auto *EVI = dyn_cast(I); + + uint32_t evValue = 1; + + evValue = EVI->getNumIndices(); + + // check element wise equality + auto Idx = EVI->getIndices(); + const auto *IdxIt = Idx.begin(); + const auto *IdxEnd = Idx.end(); + + while (IdxIt != IdxEnd) { + auto *val = &*IdxIt; + if (val) { + evValue += reinterpret_cast(*val); + } + IdxIt++; + } + + value = value * evValue; + + break; + } + + case Instruction::Fence: { + auto *FI = dyn_cast(I); + + uint32_t fValue = 1; + + fValue = static_cast(FI->getOrdering()); + + fValue += static_cast(FI->getSyncScopeID()); + + value = value * fValue; + + break; + } + + case Instruction::AtomicCmpXchg: { + auto *AXI = dyn_cast(I); + + uint32_t axValue = 1; + + axValue = AXI->isVolatile() ? 4 : 40; + axValue += AXI->isWeak() ? 5 : 50; + axValue += static_cast(AXI->getSuccessOrdering()); + axValue += static_cast(AXI->getFailureOrdering()); + axValue += static_cast(AXI->getSyncScopeID()); + + value = value * axValue; + + break; + } + + case Instruction::AtomicRMW: { + auto *ARI = dyn_cast(I); + + uint32_t arValue = 1; + + arValue = static_cast(ARI->getOperation()); + arValue += ARI->isVolatile() ? 6 : 60; + arValue += static_cast(ARI->getOrdering()); + arValue += static_cast(ARI->getSyncScopeID()); + + value = value * arValue; + break; + } + + case Instruction::PHI: { + if (Deterministic) + value = pseudorand_value++; + else + value = std::rand() % 10000 + 100; + break; + } + + default: + if (auto *CI = dyn_cast(I)) { + uint32_t cmpValue = 1; + + cmpValue = static_cast(CI->getPredicate()) + 1; + + value = value * cmpValue; + } + } + + // Return + return value; +} + +bool detectASM_fm(Function &F) { + for (BasicBlock &B : F) { + for (Instruction &I : B) { + if (CallInst *callInst = dyn_cast(&I)) { + if (callInst->isInlineAsm()) { + return true; + } + } + } + } + return false; +} + +bool detect_bad_ndelay(Function &F) { + for (BasicBlock &B : F) { + for (Instruction &I : B) { + if (CallInst *callInst = dyn_cast(&I)) { + if (Function *calledFunction = callInst->getCalledFunction()) { + if (calledFunction->getName() == "__bad_ndelay") { + return true; + } + } + } + } + } + return false; +} + +static bool checkAsmVolatility(Function &F) { + for (BasicBlock &B : F) { + for (Instruction &I : B) { + if (CallInst *callInst = dyn_cast(&I)) { + if (callInst->isInlineAsm()) { + return true; + } + } + } + } + return false; +} + +static bool skipFunction(Function &F) { + if (F.getName().equals("ftab_insert")) { + return true; + } + return false; +} + +bool ignoreFunction(Function &F) { + if (skipFunction(F)) { + return true; + } + if (checkAsmVolatility(F)) { + return true; + } + if (detect_bad_ndelay(F)) { + return true; + } + for (Instruction &I : instructions(F)) { + if (auto *CB = dyn_cast(&I)) { + if (Function *F2 = CB->getCalledFunction()) { + if (auto ID = (Intrinsic::ID)F2->getIntrinsicID()) { + if (Intrinsic::isOverloaded(ID)) + continue; + if (Intrinsic::getName(ID).contains("permvar")) + return true; + if (Intrinsic::getName(ID).contains("vcvtps")) + return true; + if (Intrinsic::getName(ID).contains("avx")) + return true; + if (Intrinsic::getName(ID).contains("x86")) + return true; + if (Intrinsic::getName(ID).contains("arm")) + return true; + } + } + } + } + return false; +} + +bool FunctionMerging::runImpl( + Module &M, function_ref GTTI) { + + StringSet<> AlwaysPreserved; + AlwaysPreserved.insert("main"); + + srand(time(nullptr)); // 设置随机种子 + + FunctionMergingOptions Options = + FunctionMergingOptions() + .maximizeParameterScore(MaxParamScore) + .matchOnlyIdenticalTypes(IdenticalType) + .enableUnifiedReturnTypes(EnableUnifiedReturnType); + + FunctionMerger FM(&M); + + std::unique_ptr> matcher; + + int size = 0; + for (auto &F : M) { + if (F.isDeclaration() || F.isVarArg() || + (!HasWholeProgram && F.hasAvailableExternallyLinkage())) + continue; + size++; + } + + // Create a threshold based on the application's size + if (AdaptiveThreshold || AdaptiveBands) { + double x = std::log10(size) / 10; + RankingDistance = (double)(x - 0.3); + if (RankingDistance < 0.05) + RankingDistance = 0.05; + if (RankingDistance > 0.4) + RankingDistance = 0.4; + + if (AdaptiveBands) { + float target_probability = 0.9; + float offset = 0.1; + unsigned tempBands = std::ceil( + std::log(1.0 - target_probability) / + std::log(1.0 - std::pow(RankingDistance + offset, LSHRows))); + if (tempBands < LSHBands) + LSHBands = tempBands; + } + if (AdaptiveThreshold) + RankingDistance = 1 - RankingDistance; + else + RankingDistance = 1.0; + } + if (Debug) { + errs() << "Threshold: " << RankingDistance << "\n"; + errs() << "LSHRows: " << LSHRows << "\n"; + errs() << "LSHBands: " << LSHBands << "\n"; + } + + if (!ToMergeFile.empty()) { + matcher = + std::make_unique>(FM, Options, ToMergeFile); + } else if (EnableF3M) { + matcher = std::make_unique>(FM, Options, LSHRows, + LSHBands); + } else { + matcher = std::make_unique>(FM, Options); + } + + SearchStrategy strategy(LSHRows, LSHBands); + for (auto &F : M) { + if (F.isDeclaration() || F.isVarArg() || + (!HasWholeProgram && F.hasAvailableExternallyLinkage())) + continue; + if (ignoreFunction(F)) + continue; + matcher->add_candidate(&F, EstimateFunctionSize(&F, GTTI(F))); + } + + if (Debug) { + errs() << "Number of Functions: " << matcher->size() << "\n"; + if (MatcherStats) { + return false; + } + } + + unsigned TotalMerges = 0; + + while (matcher->size() > 0) { + + Function *F1 = matcher->next_candidate(); + auto &Rank = matcher->get_matches(F1); + matcher->remove_candidate(F1); + + if (F1 == nullptr) + continue; + unsigned MergingTrialsCount = 0; + + std::string F1Name(GetValueName(F1)); + + while (!Rank.empty()) { + MatchInfo match = Rank.back(); + Rank.pop_back(); + Function *F2 = match.candidate; + + if (F1 == nullptr || F2 == nullptr) + continue; + if (verifyFunction(*F1) || verifyFunction(*F2)) { + errs() << "feisen:debug:verify function error\n"; + continue; + } + + if (Debug) { + errs() << "feisen:debug:F1: " << F1->getName() + << " F2: " << F2->getName() << "\n"; + errs() << F1->getName().equals("") << "\n"; + } + + std::string F2Name(GetValueName(F2)); + MergingTrialsCount++; + + if (Debug) + errs() << "Attempting: " << F1Name << ", " << F2Name << " : " + << match.Distance << "\n"; + + std::string Name = "_m_f_" + std::to_string(TotalMerges); + FunctionMergeResult Result = FM.merge(F1, F2, Name, Options); + if (Result.getMergedFunction() != nullptr && Result.Success) { + match.Valid = !verifyFunction(*Result.getMergedFunction()); + if (!match.Valid) { + Result.getMergedFunction()->eraseFromParent(); + } else { + size_t MergedSize = EstimateFunctionSize( + Result.getMergedFunction(), GTTI(*Result.getMergedFunction())); + size_t Overhead = EstimateThunkOverhead(Result, AlwaysPreserved); + + size_t SizeF12 = MergedSize + Overhead; + size_t SizeF1F2 = match.OtherSize + match.Size; + + match.MergedSize = SizeF12; + match.Profitable = (SizeF12 + MergingOverheadThreshold) < SizeF1F2; + + if (!ToMergeFile.empty() || match.Profitable) { + + TotalMerges++; + matcher->remove_candidate(F2); + + FM.updateCallGraph(Result, AlwaysPreserved, Options); + + // resolve phinode + resolvePHI(*(Result.getMergedFunction())); + + if (ReuseMergedFunctions) { + matcher->add_candidate( + Result.getMergedFunction(), + EstimateFunctionSize(Result.getMergedFunction(), + GTTI(*Result.getMergedFunction()))); + } + break; + } + Result.getMergedFunction()->eraseFromParent(); + + } + } + if (MergingTrialsCount >= ExplorationThreshold) + break; + } + } + + return true; +} + +PreservedAnalyses FunctionMergingPass::run(Module &M, + ModuleAnalysisManager &AM) { + FunctionMerging FM; + if (Debug) { + errs() << "ExplorationThreshold: " << ExplorationThreshold + << "\n"; // feisen + errs() << "RankingThreshold: " << RankingThreshold << "\n"; + errs() << "MergingOverheadThreshold" << MergingOverheadThreshold << "\n"; + errs() << "MaxParamScore: " << MaxParamScore << "\n"; + errs() << "IdenticalType: " << IdenticalType << "\n"; + errs() << "EnableUnifiedReturnType: " << EnableUnifiedReturnType << "\n"; + errs() << "EnableOperandReordering: " << EnableOperandReordering << "\n"; + errs() << "HasWholeProgram" << "HasWholeProgram" << "\n"; + errs() << "run \n"; // feisen + } + + // skip some modules + if (M.getName().find("/gold") != std::string::npos || + M.getName().find("/binutils") != std::string::npos || + M.getName().find("kernel-source") != std::string::npos || + (M.getName().find("arch/") != std::string::npos && + M.getName().str().compare(0, 5, "arch/") == 0) || + (M.getName().find("init/") != std::string::npos && + M.getName().str().compare(0, 5, "init/") == 0) || + (M.getName().find("kernel/") != std::string::npos && + M.getName().str().compare(0, 7, "kernel/") == 0)) { + return PreservedAnalyses::all(); + } + + if (!FM.runImpl(M)) { //, GTTI)) + return PreservedAnalyses::all(); + } + resolvePHI_fm(M, AM); + return PreservedAnalyses::none(); +} + +static std::string GetValueName(const Value *V) { + if (V) { + std::string name; + raw_string_ostream namestream(name); + V->printAsOperand(namestream, false); + return namestream.str(); + } + return "[null]"; +} + +/// Create a cast instruction if needed to cast V to type DstType. We treat +/// pointer and integer types of the same bitwidth as equivalent, so this can be +/// used to cast them to each other where needed. The function returns the Value +/// itself if no cast is needed, or a new CastInst instance inserted before +/// InsertBefore. The integer type equivalent to pointers must be passed as +/// IntPtrType (get it from DataLayout). This is guaranteed to generate no-op +/// casts, otherwise it will assert. +// Value *FunctionMerger::createCastIfNeeded(Value *V, Type *DstType, +// IRBuilder<> &Builder, const FunctionMergingOptions &Options) { +Value *createCastIfNeeded(Value *V, Type *DstType, IRBuilder<> &Builder, + Type *IntPtrTy, + const FunctionMergingOptions &Options) { + + if (V->getType() == DstType || Options.IdenticalTypesOnly) + return V; + + Value *Result; + Type *OrigType = V->getType(); + + if (OrigType->isStructTy()) { + assert(DstType->isStructTy()); + assert(OrigType->getStructNumElements() == DstType->getStructNumElements()); + + Result = UndefValue::get(DstType); + for (unsigned int I = 0, E = OrigType->getStructNumElements(); I < E; ++I) { + Value *ExtractedValue = + Builder.CreateExtractValue(V, ArrayRef(I)); + Value *Element = + createCastIfNeeded(ExtractedValue, DstType->getStructElementType(I), + Builder, IntPtrTy, Options); + Result = + Builder.CreateInsertValue(Result, Element, ArrayRef(I)); + } + return Result; + } + assert(!DstType->isStructTy()); + + if (OrigType->isPointerTy() && + (DstType->isIntegerTy() || DstType->isPointerTy())) { + Result = Builder.CreatePointerCast(V, DstType, "merge_cast"); + } else if (OrigType->isIntegerTy() && DstType->isPointerTy() && + OrigType == IntPtrTy) { + // Int -> Ptr + Result = Builder.CreateCast(CastInst::IntToPtr, V, DstType, "merge_cast"); + } else { + llvm_unreachable("Can only cast int -> ptr or ptr -> (ptr or int)"); + } + + return Result; +} + +void FunctionMerger::CodeGenerator::removeRedundantInstructions( + std::vector &WorkInst, DominatorTree &DT) { + std::set SkipList; + + std::map> UpdateList; + + for (Instruction *I1 : WorkInst) { + if (SkipList.find(I1) != SkipList.end()) + continue; + for (Instruction *I2 : WorkInst) { + if (I1 == I2) + continue; + if (SkipList.find(I2) != SkipList.end()) + continue; + assert(I1->getNumOperands() == I2->getNumOperands() && + "Should have the same num of operands!"); + bool AllEqual = true; + for (unsigned i = 0; i < I1->getNumOperands(); ++i) { + AllEqual = AllEqual && (I1->getOperand(i) == I2->getOperand(i)); + } + + if (AllEqual && DT.dominates(I1, I2)) { + UpdateList[I1].push_back(I2); + SkipList.insert(I2); + SkipList.insert(I1); + } + } + } + + for (auto &kv : UpdateList) { + for (auto *I : kv.second) { + erase(I); + I->replaceAllUsesWith(kv.first); + I->eraseFromParent(); + } + } +} + +//////////////////////////////////// SALSSA //////////////////////////////// + +static void postProcessFunction(Function &F) { + legacy::FunctionPassManager FPM(F.getParent()); + + FPM.add(createCFGSimplificationPass()); + FPM.doInitialization(); + FPM.run(F); + FPM.doFinalization(); +} + +template +static void CodeGen(BlockListType &Blocks1, BlockListType &Blocks2, + BasicBlock *EntryBB1, BasicBlock *EntryBB2, + Function *MergedFunc, Value *IsFunc1, BasicBlock *PreBB, + AlignedCode &AlignedSeq, ValueToValueMapTy &VMap, + std::unordered_map &BlocksF1, + std::unordered_map &BlocksF2, + std::unordered_map &MaterialNodes) { + + auto CloneInst = [](IRBuilder<> &Builder, Function *MF, + Instruction *I) -> Instruction * { + Instruction *NewI = nullptr; + if (I->getOpcode() == Instruction::Ret) { + if (MF->getReturnType()->isVoidTy()) { + NewI = Builder.CreateRetVoid(); + } else { + NewI = Builder.CreateRet(UndefValue::get(MF->getReturnType())); + } + } else { + NewI = I->clone(); + for (unsigned i = 0; i < NewI->getNumOperands(); i++) { + if (!isa(I->getOperand(i))) + NewI->setOperand(i, nullptr); + } + Builder.Insert(NewI); + } + + SmallVector, 8> MDs; + NewI->getAllMetadata(MDs); + for (std::pair MDPair : MDs) { + NewI->setMetadata(MDPair.first, nullptr); + } + return NewI; + }; + + for (auto &Entry : AlignedSeq) { + if (Entry.match()) { + + auto *I1 = dyn_cast(Entry.get(0)); + auto *I2 = dyn_cast(Entry.get(1)); + + std::string BBName = + (I1 == nullptr) ? "m.label.bb" + : (I1->isTerminator() ? "m.term.bb" : "m.inst.bb"); + + BasicBlock *MergedBB = + BasicBlock::Create(MergedFunc->getContext(), BBName, MergedFunc); + + MaterialNodes[Entry.get(0)] = MergedBB; + MaterialNodes[Entry.get(1)] = MergedBB; + + if (I1 != nullptr && I2 != nullptr) { + IRBuilder<> Builder(MergedBB); + Instruction *NewI = CloneInst(Builder, MergedFunc, I1); + + VMap[I1] = NewI; + VMap[I2] = NewI; + BlocksF1[MergedBB] = I1->getParent(); + BlocksF2[MergedBB] = I2->getParent(); + } else { + assert(isa(Entry.get(0)) && isa(Entry.get(1)) && + "Both nodes must be basic blocks!"); + auto *BB1 = dyn_cast(Entry.get(0)); + auto *BB2 = dyn_cast(Entry.get(1)); + + VMap[BB1] = MergedBB; + VMap[BB2] = MergedBB; + BlocksF1[MergedBB] = BB1; + BlocksF2[MergedBB] = BB2; + + // IMPORTANT: make sure any use in a blockaddress constant + // operation is updated correctly + for (User *U : BB1->users()) { + if (auto *BA = dyn_cast(U)) { + VMap[BA] = BlockAddress::get(MergedFunc, MergedBB); + } + } + for (User *U : BB2->users()) { + if (auto *BA = dyn_cast(U)) { + VMap[BA] = BlockAddress::get(MergedFunc, MergedBB); + } + } + + IRBuilder<> Builder(MergedBB); + for (Instruction &I : *BB1) { + if (isa(&I)) { + VMap[&I] = Builder.CreatePHI(I.getType(), 0); + } + } + for (Instruction &I : *BB2) { + if (isa(&I)) { + VMap[&I] = Builder.CreatePHI(I.getType(), 0); + } + } + } + } + } + + auto ChainBlocks = [](BasicBlock *SrcBB, BasicBlock *TargetBB, + Value *IsFunc1) { + IRBuilder<> Builder(SrcBB); + if (SrcBB->getTerminator() == nullptr) { + Builder.CreateBr(TargetBB); + } else { + auto *Br = dyn_cast(SrcBB->getTerminator()); + assert(Br && Br->isUnconditional() && + "Branch should be unconditional at this point!"); + BasicBlock *SuccBB = Br->getSuccessor(0); + Br->eraseFromParent(); + Builder.CreateCondBr(IsFunc1, SuccBB, TargetBB); + } + }; + + auto ProcessEachFunction_NonSeq = + [&](int FuncIdx, std::unordered_map &BlocksFX, + Value *IsFunc1) { + BasicBlock *LastMergedBB = nullptr; + BasicBlock *NewBB = nullptr; + + for (auto &Entry : AlignedSeq) { + Value *V = Entry.get(FuncIdx); + if (V == nullptr) + continue; + + if (BasicBlock *BB = dyn_cast(V)) { + LastMergedBB = nullptr; + NewBB = nullptr; + if (auto It = MaterialNodes.find(BB); It != MaterialNodes.end()) { + LastMergedBB = It->second; + } else { + std::string BBName = std::string("src.bb"); + NewBB = BasicBlock::Create(MergedFunc->getContext(), BBName, + MergedFunc); + VMap[BB] = NewBB; + BlocksFX[NewBB] = BB; + + for (User *U : BB->users()) { + if (auto *BA = dyn_cast(U)) { + VMap[BA] = BlockAddress::get(MergedFunc, NewBB); + } + } + + IRBuilder<> Builder(NewBB); + for (Instruction &I : *BB) { + if (isa(&I)) { + VMap[&I] = Builder.CreatePHI(I.getType(), 0); + } + } + } + } else if (Instruction *I = dyn_cast(V)) { + if (isa(I)) + continue; + if (isa(I)) + continue; + + if (auto It = MaterialNodes.find(I); It != MaterialNodes.end()) { + BasicBlock *NodeBB = It->second; + if (LastMergedBB) { + ChainBlocks(LastMergedBB, NodeBB, IsFunc1); + } else { + IRBuilder<> Builder(NewBB); + Builder.CreateBr(NodeBB); + } + // end keep track + LastMergedBB = NodeBB; + } else { + if (LastMergedBB) { + std::string BBName = std::string("split.bb"); + NewBB = BasicBlock::Create(MergedFunc->getContext(), BBName, + MergedFunc); + ChainBlocks(LastMergedBB, NewBB, IsFunc1); + BlocksFX[NewBB] = BB; + } + LastMergedBB = nullptr; + + IRBuilder<> Builder(NewBB); + Instruction *NewI = CloneInst(Builder, MergedFunc, I); + VMap[I] = NewI; + } + } else { + errs() << "Should never get here!\n"; + } + } + }; + + ProcessEachFunction_NonSeq(0, BlocksF1, IsFunc1); + ProcessEachFunction_NonSeq(1, BlocksF2, IsFunc1); + + auto *BB1 = dyn_cast(VMap[EntryBB1]); + auto *BB2 = dyn_cast(VMap[EntryBB2]); + + BlocksF1[PreBB] = BB1; + BlocksF2[PreBB] = BB2; + + if (BB1 == BB2) { + IRBuilder<> Builder(PreBB); + Builder.CreateBr(BB1); + } else { + IRBuilder<> Builder(PreBB); + Builder.CreateCondBr(IsFunc1, BB1, BB2); + } +} + +bool FunctionMerger::SALSSACodeGen::generate( + AlignedCode &AlignedSeq, ValueToValueMapTy &VMap, + const FunctionMergingOptions &Options) { + + LLVMContext &Context = CodeGenerator::getContext(); + Function *MergedFunc = CodeGenerator::getMergedFunction(); + Value *IsFunc1 = CodeGenerator::getFunctionIdentifier(); + Type *ReturnType = CodeGenerator::getMergedReturnType(); + bool RequiresUnifiedReturn = CodeGenerator::getRequiresUnifiedReturn(); + BasicBlock *EntryBB1 = CodeGenerator::getEntryBlock1(); + BasicBlock *EntryBB2 = CodeGenerator::getEntryBlock2(); + BasicBlock *PreBB = CodeGenerator::getPreBlock(); + + Type *RetType1 = CodeGenerator::getReturnType1(); + Type *RetType2 = CodeGenerator::getReturnType2(); + + Type *IntPtrTy = CodeGenerator::getIntPtrType(); + + std::vector &Blocks1 = CodeGenerator::getBlocks1(); + std::vector &Blocks2 = CodeGenerator::getBlocks2(); + + std::list LinearOffendingInsts; + std::set OffendingInsts; + std::map> + CoalescingCandidates; + + std::vector ListSelects; + + std::vector Allocas; + + Value *RetUnifiedAddr = nullptr; + Value *RetAddr1 = nullptr; + Value *RetAddr2 = nullptr; + + // maps new basic blocks in the merged function to their original + // correspondents + std::unordered_map BlocksF1; + std::unordered_map BlocksF2; + std::unordered_map MaterialNodes; + + CodeGen(Blocks1, Blocks2, EntryBB1, EntryBB2, MergedFunc, IsFunc1, PreBB, + AlignedSeq, VMap, BlocksF1, BlocksF2, MaterialNodes); + + if (RequiresUnifiedReturn) { + IRBuilder<> Builder(PreBB); + RetUnifiedAddr = Builder.CreateAlloca(ReturnType); + CodeGenerator::insert(dyn_cast(RetUnifiedAddr)); + + RetAddr1 = Builder.CreateAlloca(RetType1); + RetAddr2 = Builder.CreateAlloca(RetType2); + CodeGenerator::insert(dyn_cast(RetAddr1)); + CodeGenerator::insert(dyn_cast(RetAddr2)); + } + + std::set XorBrConds; + // assigning label operands + + for (auto &Entry : AlignedSeq) { + Instruction *I1 = nullptr; + Instruction *I2 = nullptr; + + if (Entry.get(0) != nullptr) + I1 = dyn_cast(Entry.get(0)); + if (Entry.get(1) != nullptr) + I2 = dyn_cast(Entry.get(1)); + + // Skip non-instructions + if (I1 == nullptr && I2 == nullptr) + continue; + + if (Entry.match()) { + + Instruction *I = I1; + if (I1->getOpcode() == Instruction::Ret) { + I = (I1->getNumOperands() >= I2->getNumOperands()) ? I1 : I2; + } else { + assert(I1->getNumOperands() == I2->getNumOperands() && + "Num of Operands SHOULD be EQUAL\n"); + } + + auto *NewI = dyn_cast(VMap[I]); + + bool Handled = false; + + if (!Handled) { + for (unsigned i = 0; i < I->getNumOperands(); i++) { + + Value *F1V = nullptr; + Value *V1 = nullptr; + if (i < I1->getNumOperands()) { + F1V = I1->getOperand(i); + V1 = MapValue(F1V, VMap); + if (V1 == nullptr) { + if (Debug) + errs() << "ERROR: Null value mapped: V1 = " + "MapValue(I1->getOperand(i), " + "VMap);\n"; + return false; + } + } else { + V1 = UndefValue::get(I2->getOperand(i)->getType()); + } + + Value *F2V = nullptr; + Value *V2 = nullptr; + if (i < I2->getNumOperands()) { + F2V = I2->getOperand(i); + V2 = MapValue(F2V, VMap); + // assert(V2!=nullptr && "Mapped value should NOT be NULL!"); + + if (V2 == nullptr) { + if (Debug) + errs() << "ERROR: Null value mapped: V2 = " + "MapValue(I2->getOperand(i), " + "VMap);\n"; + + return false; + } + + } else { + V2 = UndefValue::get(I1->getOperand(i)->getType()); + } + + assert(V1 != nullptr && "Value should NOT be null!"); + assert(V2 != nullptr && "Value should NOT be null!"); + + Value *V = V1; // first assume that V1==V2 + + // handling just label operands for now + if (!isa(V)) + continue; + + auto *F1BB = dyn_cast(F1V); + auto *F2BB = dyn_cast(F2V); + + if (V1 != V2) { + auto *BB1 = dyn_cast(V1); + auto *BB2 = dyn_cast(V2); + + BasicBlock *SelectBB = + BasicBlock::Create(Context, "bb.select", MergedFunc); + IRBuilder<> BuilderBB(SelectBB); + + BlocksF1[SelectBB] = I1->getParent(); + BlocksF2[SelectBB] = I2->getParent(); + + BuilderBB.CreateCondBr(IsFunc1, BB1, BB2); + V = SelectBB; + } + + if (F1BB->isLandingPad() || F2BB->isLandingPad()) { + LandingPadInst *LP1 = F1BB->getLandingPadInst(); + LandingPadInst *LP2 = F2BB->getLandingPadInst(); + assert((LP1 != nullptr && LP2 != nullptr) && + "Should be both as per the BasicBlock match!"); + (void)LP2; + + BasicBlock *LPadBB = + BasicBlock::Create(Context, "lpad.bb", MergedFunc); + IRBuilder<> BuilderBB(LPadBB); + + Instruction *NewLP = LP1->clone(); + BuilderBB.Insert(NewLP); + + BuilderBB.CreateBr(dyn_cast(V)); + + BlocksF1[LPadBB] = I1->getParent(); + BlocksF2[LPadBB] = I2->getParent(); + + VMap[F1BB->getLandingPadInst()] = NewLP; + VMap[F2BB->getLandingPadInst()] = NewLP; + + V = LPadBB; + } + NewI->setOperand(i, V); + } + } + + } else { // if(entry.match())-else + + auto AssignLabelOperands = + [&](Instruction *I, + std::unordered_map &BlocksReMap) + -> bool { + auto *NewI = dyn_cast(VMap[I]); + for (unsigned i = 0; i < I->getNumOperands(); i++) { + // handling just label operands for now + if (!isa(I->getOperand(i))) + continue; + auto *FXBB = dyn_cast(I->getOperand(i)); + + Value *V = MapValue(FXBB, VMap); + if (V == nullptr) + return false; // ErrorResponse; + + if (FXBB->isLandingPad()) { + + LandingPadInst *LP = FXBB->getLandingPadInst(); + assert(LP != nullptr && "Should have a landingpad inst!"); + + BasicBlock *LPadBB = + BasicBlock::Create(Context, "lpad.bb", MergedFunc); + IRBuilder<> BuilderBB(LPadBB); + + Instruction *NewLP = LP->clone(); + BuilderBB.Insert(NewLP); + VMap[LP] = NewLP; + BlocksReMap[LPadBB] = I->getParent(); // FXBB; + + BuilderBB.CreateBr(dyn_cast(V)); + + V = LPadBB; + } + + NewI->setOperand(i, V); + } + return true; + }; + + if (I1 != nullptr && !AssignLabelOperands(I1, BlocksF1)) { + if (Debug) + errs() << "ERROR: Value should NOT be null\n"; + + return false; + } + if (I2 != nullptr && !AssignLabelOperands(I2, BlocksF2)) { + if (Debug) + errs() << "ERROR: Value should NOT be null\n"; + + return false; + } + } + } + + auto MergeValues = [&](Value *V1, Value *V2, + Instruction *InsertPt) -> Value * { + if (V1 == V2) + return V1; + + if (V1 == ConstantInt::getTrue(Context) && + V2 == ConstantInt::getFalse(Context)) + return IsFunc1; + + if (V1 == ConstantInt::getFalse(Context) && + V2 == ConstantInt::getTrue(Context)) { + IRBuilder<> Builder(InsertPt); + return Builder.CreateNot(IsFunc1); + } + + auto *IV1 = dyn_cast(V1); + auto *IV2 = dyn_cast(V2); + + if (IV1 && IV2) { + if (BlocksF2.find(IV1->getParent()) == BlocksF2.end() && + BlocksF1.find(IV2->getParent()) == BlocksF1.end()) { + CoalescingCandidates[IV1][IV2]++; + CoalescingCandidates[IV2][IV1]++; + } + } + + IRBuilder<> Builder(InsertPt); + Instruction *Sel = (Instruction *)Builder.CreateSelect(IsFunc1, V1, V2); + ListSelects.push_back(dyn_cast(Sel)); + return Sel; + }; + + auto AssignOperands = [&](Instruction *I, bool IsFuncId1) -> bool { + auto *NewI = dyn_cast(VMap[I]); + IRBuilder<> Builder(NewI); + + if (I->getOpcode() == Instruction::Ret && RequiresUnifiedReturn) { + Value *V = MapValue(I->getOperand(0), VMap); + if (V == nullptr) { + return false; // ErrorResponse; + } + if (V->getType() != ReturnType) { + Value *Addr = Builder.CreateAlloca(V->getType()); + Builder.CreateStore(V, Addr); + Value *CastedAddr = + Builder.CreatePointerCast(Addr, RetUnifiedAddr->getType()); + V = Builder.CreateLoad(ReturnType, CastedAddr); + } + NewI->setOperand(0, V); + } else { + for (unsigned i = 0; i < I->getNumOperands(); i++) { + if (isa(I->getOperand(i))) + continue; + + Value *V = MapValue(I->getOperand(i), VMap); + if (V == nullptr) { + return false; // ErrorResponse; + } + NewI->setOperand(i, V); + } + } + + return true; + }; + + for (auto &Entry : AlignedSeq) { + Instruction *I1 = nullptr; + Instruction *I2 = nullptr; + + if (Entry.get(0) != nullptr) + I1 = dyn_cast(Entry.get(0)); + if (Entry.get(1) != nullptr) + I2 = dyn_cast(Entry.get(1)); + + if (I1 != nullptr && I2 != nullptr) { + + Instruction *I = I1; + if (I1->getOpcode() == Instruction::Ret) { + I = (I1->getNumOperands() >= I2->getNumOperands()) ? I1 : I2; + } else { + assert(I1->getNumOperands() == I2->getNumOperands() && + "Num of Operands SHOULD be EQUAL\n"); + } + + auto *NewI = dyn_cast(VMap[I]); + + IRBuilder<> Builder(NewI); + + if (EnableOperandReordering && isa(NewI) && + I->isCommutative()) { + + auto *BO1 = dyn_cast(I1); + auto *BO2 = dyn_cast(I2); + Value *VL1 = MapValue(BO1->getOperand(0), VMap); + Value *VL2 = MapValue(BO2->getOperand(0), VMap); + Value *VR1 = MapValue(BO1->getOperand(1), VMap); + Value *VR2 = MapValue(BO2->getOperand(1), VMap); + if (VL1 == VR2 && VL2 != VR2) { + std::swap(VL2, VR2); + // CountOpReorder++; + } else if (VL2 == VR1 && VL1 != VR1) { + std::swap(VL1, VR1); + } + + std::vector> Vs; + Vs.emplace_back(VL1, VL2); + Vs.emplace_back(VR1, VR2); + + for (unsigned i = 0; i < Vs.size(); i++) { + Value *V1 = Vs[i].first; + Value *V2 = Vs[i].second; + + Value *V = MergeValues(V1, V2, NewI); + if (V == nullptr) { + if (Debug) { + errs() << "Could Not select:\n"; + errs() << "ERROR: Value should NOT be null\n"; + } + // MergedFunc->eraseFromParent(); + return false; // ErrorResponse; + } + + Value *CastedV = createCastIfNeeded(V, NewI->getOperand(i)->getType(), + Builder, IntPtrTy); + NewI->setOperand(i, CastedV); + } + } else { + for (unsigned i = 0; i < I->getNumOperands(); i++) { + if (isa(I->getOperand(i))) + continue; + + Value *V1 = nullptr; + if (i < I1->getNumOperands()) { + V1 = MapValue(I1->getOperand(i), VMap); + if (V1 == nullptr) { + if (Debug) + errs() << "ERROR: Null value mapped: V1 = " + "MapValue(I1->getOperand(i), " + "VMap);\n"; + return false; + } + } else { + V1 = UndefValue::get(I2->getOperand(i)->getType()); + } + + Value *V2 = nullptr; + if (i < I2->getNumOperands()) { + V2 = MapValue(I2->getOperand(i), VMap); + + if (V2 == nullptr) { + if (Debug) + errs() << "ERROR: Null value mapped: V2 = " + "MapValue(I2->getOperand(i), " + "VMap);\n"; + + return false; + } + + } else { + V2 = UndefValue::get(I1->getOperand(i)->getType()); + } + + assert(V1 != nullptr && "Value should NOT be null!"); + assert(V2 != nullptr && "Value should NOT be null!"); + + Value *V = MergeValues(V1, V2, NewI); + if (V == nullptr) { + if (Debug) { + errs() << "Could Not select:\n"; + errs() << "ERROR: Value should NOT be null\n"; + } + + return false; // ErrorResponse; + } + + NewI->setOperand(i, V); + + } // end for operands + } + } // end if isomorphic + else { + if (I1 != nullptr && !AssignOperands(I1, true)) { + if (Debug) + errs() << "ERROR: Value should NOT be null\n"; + + return false; + } + if (I2 != nullptr && !AssignOperands(I2, false)) { + if (Debug) + errs() << "ERROR: Value should NOT be null\n"; + + return false; + } + } // end 'if-else' non-isomorphic + + } // end for nodes + if (Debug) + errs() << "NumSelects: " << ListSelects.size() << "\n"; + if (ListSelects.size() > MaxNumSelection) { + if (Debug) + errs() << "Bailing out: Operand selection threshold\n"; + + return false; + } + + auto AssignPHIOperandsInBlock = + [&](BasicBlock *BB, + std::unordered_map &BlocksReMap) -> bool { + for (Instruction &I : *BB) { + if (auto *PHI = dyn_cast(&I)) { + auto *NewPHI = dyn_cast(VMap[PHI]); + + std::set FoundIndices; + + for (auto It = pred_begin(NewPHI->getParent()), + E = pred_end(NewPHI->getParent()); + It != E; It++) { + + BasicBlock *NewPredBB = *It; + + Value *V = nullptr; + + if (BlocksReMap.find(NewPredBB) != BlocksReMap.end()) { + int Index = PHI->getBasicBlockIndex(BlocksReMap[NewPredBB]); + if (Index >= 0) { + V = MapValue(PHI->getIncomingValue(Index), VMap); + FoundIndices.insert(Index); + } + } + + if (V == nullptr) { + V = UndefValue::get(NewPHI->getType()); + } + + NewPHI->addIncoming(V, NewPredBB); + } + if (FoundIndices.size() != PHI->getNumIncomingValues()) { + if (Debug) { + PHI->print(errs()); + errs() << "\n"; + + errs() << "feisen: "; + errs() << "FoundIndices.size(): " << FoundIndices.size() << " \n"; + errs() << "PHI->getNumIncomingValues()" + << PHI->getNumIncomingValues() << " \n"; + } + return false; + } + } + } + return true; + }; + + for (BasicBlock *BB1 : Blocks1) { + if (!AssignPHIOperandsInBlock(BB1, BlocksF1)) { + if (Debug) + errs() << "ERROR: PHI assignment\n"; + + return false; + } + } + for (BasicBlock *BB2 : Blocks2) { + if (!AssignPHIOperandsInBlock(BB2, BlocksF2)) { + if (Debug) + errs() << "ERROR: PHI assignment\n"; + return false; + } + } + + // Replace select statements by merged PHIs + + // Collect candidate pairs of PHI Nodes + SmallSet, 16> CandPHI; + for (Instruction *I : ListSelects) { + SelectInst *SI = dyn_cast(I); + assert(SI != nullptr); + + PHINode *PT = dyn_cast(SI->getTrueValue()); + PHINode *PF = dyn_cast(SI->getFalseValue()); + + if (PT == nullptr || PF == nullptr) + continue; + + // Only pair PHI Nodes in the same block + if (PT->getParent() != PF->getParent()) + continue; + + CandPHI.insert({PT, PF}); + } + + SmallSet RemovedPHIs; + for (auto [PT, PF] : CandPHI) { + if ((RemovedPHIs.count(PT) > 0) || (RemovedPHIs.count(PF) > 0)) + continue; + // Merge PT and PF if: + // 1) their defined incoming values do not overlap + // 2) their uses are only select statements on IsFunc1 + bool valid = true; + SmallVector CandSel; + + // Are PHIs mergeable? + for (unsigned i = 0; i < PT->getNumIncomingValues() && valid; ++i) { + // if PT incoming value is Undef, this edge pair is mergeable + Value *VT = PT->getIncomingValue(i); + if (dyn_cast(VT) != nullptr) + continue; + + // if the PF incoming value for the same block is Undef, + // this edge pair is mergeable + BasicBlock *PredBB = PT->getIncomingBlock(i); + if (PF->getBasicBlockIndex(PredBB) < 0) { + errs() << "PHI ERROR\n"; + } + Value *VF = PF->getIncomingValueForBlock(PredBB); + if (dyn_cast(VF) != nullptr) + continue; + + // If the two incoming values are the same, then we can merge them + if (VT == VF) + continue; + + valid = false; + } + + if (!valid) + continue; + + // Are PHIs only used together in select statements? + for (auto *UI : PT->users()) { + SelectInst *SI = dyn_cast(UI); + if (SI == nullptr) { + valid = false; + break; + } + + if ((SI->getTrueValue() != PT) || (SI->getFalseValue() != PF)) { + valid = false; + break; + } + + if (SI->getCondition() != IsFunc1) { + valid = false; + break; + } + CandSel.push_back(SI); + } + + if (!valid) + continue; + + // Do the actual PHI merging using PT + for (unsigned i = 0; i < PT->getNumIncomingValues() && valid; ++i) { + // If edge is set, use it + if (dyn_cast(PT->getIncomingValue(i)) == nullptr) + continue; + + // If edge not set, copy it from PF + BasicBlock *PredBB = PT->getIncomingBlock(i); + PT->setIncomingValue(i, PF->getIncomingValueForBlock(PredBB)); + } + + PF->replaceAllUsesWith(PT); + PF->eraseFromParent(); + RemovedPHIs.insert(PF); + + // Replace all uses of the select statements with PT + for (SelectInst *SI : CandSel) { + SI->replaceAllUsesWith(PT); + SI->eraseFromParent(); + } + } + + DominatorTree DT(*MergedFunc); + + for (Instruction &I : instructions(MergedFunc)) { + if (auto *PHI = dyn_cast(&I)) { + for (unsigned i = 0; i < PHI->getNumIncomingValues(); i++) { + BasicBlock *BB = PHI->getIncomingBlock(i); + if (BB == nullptr) + errs() << "Null incoming block\n"; + Value *V = PHI->getIncomingValue(i); + if (V == nullptr) + errs() << "Null incoming value\n"; + if (auto *IV = dyn_cast(V)) { + if (BB->getTerminator() == nullptr) { + if (Debug) + errs() << "ERROR: Null terminator\n"; + return false; + } + if (!DT.dominates(IV, BB->getTerminator())) { + if (OffendingInsts.count(IV) == 0) { + OffendingInsts.insert(IV); + LinearOffendingInsts.push_back(IV); + } + } + } + } + } else { + for (unsigned i = 0; i < I.getNumOperands(); i++) { + if (I.getOperand(i) == nullptr) { + if (Debug) + errs() << "ERROR: Null operand\n"; + return false; + } + if (auto *IV = dyn_cast(I.getOperand(i))) { + if (!DT.dominates(IV, &I)) { + if (OffendingInsts.count(IV) == 0) { + OffendingInsts.insert(IV); + LinearOffendingInsts.push_back(IV); + } + } + } + } + } + } + + for (BranchInst *NewBr : XorBrConds) { + IRBuilder<> Builder(NewBr); + Value *XorCond = Builder.CreateXor(NewBr->getCondition(), IsFunc1); + NewBr->setCondition(XorCond); + } + + auto StoreInstIntoAddr = [](Instruction *IV, Value *Addr) { + IRBuilder<> Builder(IV->getParent()); + if (IV->isTerminator()) { + BasicBlock *SrcBB = IV->getParent(); + if (auto *II = dyn_cast(IV)) { + BasicBlock *DestBB = II->getNormalDest(); + + Builder.SetInsertPoint(&*DestBB->getFirstInsertionPt()); + // create PHI + PHINode *PHI = Builder.CreatePHI(IV->getType(), 0); + for (auto PredIt = pred_begin(DestBB), PredE = pred_end(DestBB); + PredIt != PredE; PredIt++) { + BasicBlock *PredBB = *PredIt; + if (PredBB == SrcBB) { + PHI->addIncoming(IV, PredBB); + } else { + PHI->addIncoming(UndefValue::get(IV->getType()), PredBB); + } + } + Builder.CreateStore(PHI, Addr); + } else { + for (auto SuccIt = succ_begin(SrcBB), SuccE = succ_end(SrcBB); + SuccIt != SuccE; SuccIt++) { + BasicBlock *DestBB = *SuccIt; + + Builder.SetInsertPoint(&*DestBB->getFirstInsertionPt()); + // create PHI + PHINode *PHI = Builder.CreatePHI(IV->getType(), 0); + for (auto PredIt = pred_begin(DestBB), PredE = pred_end(DestBB); + PredIt != PredE; PredIt++) { + BasicBlock *PredBB = *PredIt; + if (PredBB == SrcBB) { + PHI->addIncoming(IV, PredBB); + } else { + PHI->addIncoming(UndefValue::get(IV->getType()), PredBB); + } + } + Builder.CreateStore(PHI, Addr); + } + } + } else { + Instruction *LastI = nullptr; + Instruction *InsertPt = nullptr; + for (Instruction &I : *IV->getParent()) { + InsertPt = &I; + if (LastI == IV) + break; + LastI = &I; + } + if (isa(InsertPt) || isa(InsertPt)) { + Builder.SetInsertPoint(&*IV->getParent()->getFirstInsertionPt()); + } else + Builder.SetInsertPoint(InsertPt); + + Builder.CreateStore(IV, Addr); + } + }; + + auto MemfyInst = [&](std::set &InstSet) -> AllocaInst * { + if (InstSet.empty()) + return nullptr; + IRBuilder<> Builder(&*PreBB->getFirstInsertionPt()); + AllocaInst *Addr = Builder.CreateAlloca((*InstSet.begin())->getType()); + Type *Ty = Addr->getAllocatedType(); + + for (Instruction *I : InstSet) { + for (auto UIt = I->use_begin(), E = I->use_end(); UIt != E;) { + Use &UI = *UIt; + UIt++; + + auto *User = cast(UI.getUser()); + + if (auto *PHI = dyn_cast(User)) { + auto InsertionPt = + PHI->getIncomingBlock(UI.getOperandNo())->getTerminator(); + if (InsertionPt == I) + continue; + IRBuilder<> Builder(InsertionPt); + UI.set(Builder.CreateLoad(Ty, Addr)); + } else { + IRBuilder<> Builder(User); + UI.set(Builder.CreateLoad(Ty, Addr)); + } + } + } + + for (Instruction *I : InstSet) + StoreInstIntoAddr(I, Addr); + + return Addr; + }; + + auto isCoalescingProfitable = [&](Instruction *I1, Instruction *I2) -> bool { + std::set BBSet1; + std::set UnionBB; + for (User *U : I1->users()) { + if (auto *UI = dyn_cast(U)) { + BasicBlock *BB1 = UI->getParent(); + BBSet1.insert(BB1); + UnionBB.insert(BB1); + } + } + + unsigned Intersection = 0; + for (User *U : I2->users()) { + if (auto *UI = dyn_cast(U)) { + BasicBlock *BB2 = UI->getParent(); + UnionBB.insert(BB2); + if (BBSet1.find(BB2) != BBSet1.end()) + Intersection++; + } + } + + const float Threshold = 0.7; + return (float(Intersection) / float(UnionBB.size()) > Threshold); + }; + + auto OptimizeCoalescing = + [&](Instruction *I, std::set &InstSet, + std::map> + &CoalescingCandidates, + std::set &Visited) { + Instruction *OtherI = nullptr; + unsigned Score = 0; + if (CoalescingCandidates.find(I) != CoalescingCandidates.end()) { + for (auto &Pair : CoalescingCandidates[I]) { + if (Pair.second > Score && + Visited.find(Pair.first) == Visited.end()) { + if (isCoalescingProfitable(I, Pair.first)) { + OtherI = Pair.first; + Score = Pair.second; + } + } + } + } + if (OtherI) { + InstSet.insert(OtherI); + } + }; + + if (MergedFunc != nullptr) { + if (((float)OffendingInsts.size()) / ((float)AlignedSeq.size()) > 4.5) { + if (Debug) + errs() << "Bailing out\n"; + + return false; + } + std::set Visited; + for (Instruction *I : LinearOffendingInsts) { + if (Visited.find(I) != Visited.end()) + continue; + + std::set InstSet; + InstSet.insert(I); + + // Create a coalescing group in InstSet + if (EnableSALSSACoalescing) + OptimizeCoalescing(I, InstSet, CoalescingCandidates, Visited); + + for (Instruction *OtherI : InstSet) + Visited.insert(OtherI); + + AllocaInst *Addr = MemfyInst(InstSet); + if (Addr) + Allocas.push_back(Addr); + } + + DominatorTree DT(*MergedFunc); + PromoteMemToReg(Allocas, DT, nullptr); + + if (verifyFunction(*MergedFunc)) { + + return false; + } + + postProcessFunction(*MergedFunc); + } + + return MergedFunc != nullptr; +} diff --git a/llvm/lib/Transforms/IPO/MergeFunctions.cpp b/llvm/lib/Transforms/IPO/MergeFunctions.cpp index b850591b4aa6..e4bed874bb8e 100644 --- a/llvm/lib/Transforms/IPO/MergeFunctions.cpp +++ b/llvm/lib/Transforms/IPO/MergeFunctions.cpp @@ -325,6 +325,12 @@ ModulePass *llvm::createMergeFunctionsPass() { PreservedAnalyses MergeFunctionsPass::run(Module &M, ModuleAnalysisManager &AM) { MergeFunctions MF; +#ifdef ENABLE_CODESIZE_OPT + if(M.getName().find("/gold")!=std::string::npos + || M.getName().find("/binutils")!=std::string::npos){ + return PreservedAnalyses::all(); + } +#endif if (!MF.runOnModule(M)) return PreservedAnalyses::all(); return PreservedAnalyses::none(); diff --git a/llvm/lib/Transforms/Scalar/LoopUnrollPass.cpp b/llvm/lib/Transforms/Scalar/LoopUnrollPass.cpp index de5833f60adc..a144027141f3 100644 --- a/llvm/lib/Transforms/Scalar/LoopUnrollPass.cpp +++ b/llvm/lib/Transforms/Scalar/LoopUnrollPass.cpp @@ -222,6 +222,12 @@ TargetTransformInfo::UnrollingPreferences llvm::gatherUnrollingPreferences( (hasUnrollTransformation(L) != TM_ForcedByUser && llvm::shouldOptimizeForSize(L->getHeader(), PSI, BFI, PGSOQueryType::IRPass)); + + //for code size +#ifdef ENABLE_CODESIZE_OPT + if(EnableCodeSize) OptForSize = true; +#endif + if (OptForSize) { UP.Threshold = UP.OptSizeThreshold; UP.PartialThreshold = UP.PartialOptSizeThreshold; @@ -403,6 +409,11 @@ static Optional analyzeLoopUnrollCost( RootI.getFunction()->hasMinSize() ? TargetTransformInfo::TCK_CodeSize : TargetTransformInfo::TCK_SizeAndLatency; +#ifdef ENABLE_CODESIZE_OPT + // ============ code size + if(EnableCodeSize) CostKind = TargetTransformInfo::TCK_CodeSize; + // ============ code size +#endif for (;; --Iteration) { do { Instruction *I = CostWorklist.pop_back_val(); @@ -486,6 +497,13 @@ static Optional analyzeLoopUnrollCost( TargetTransformInfo::TargetCostKind CostKind = L->getHeader()->getParent()->hasMinSize() ? TargetTransformInfo::TCK_CodeSize : TargetTransformInfo::TCK_SizeAndLatency; + +#ifdef ENABLE_CODESIZE_OPT + // ============ code size + if(EnableCodeSize) CostKind = TargetTransformInfo::TCK_CodeSize; + // ============ code size +#endif + // Simulate execution of each iteration of the loop counting instructions, // which would be simplified. // Since the same load will take different values on different iterations, @@ -1172,6 +1190,12 @@ static LoopUnrollResult tryToUnrollLoop( return LoopUnrollResult::Unmodified; bool OptForSize = L->getHeader()->getParent()->hasOptSize(); + +#ifdef ENABLE_CODESIZE_OPT + //for code size + if(EnableCodeSize) OptForSize = true; +#endif + unsigned NumInlineCandidates; bool NotDuplicatable; bool Convergent; -- Gitee From 9645823594e18e6ebdcfe2edbd4bdd0bfd27d99e Mon Sep 17 00:00:00 2001 From: Orange Summer Date: Sat, 27 Apr 2024 15:52:19 +0800 Subject: [PATCH 2/2] feat: FunctionMerging Pass Test --- .../FunctionMerging/address-spaces.ll | 34 +++ .../test/Transforms/FunctionMerging/alloca.ll | 61 ++++++ .../Transforms/FunctionMerging/functions.ll | 27 +++ .../FunctionMerging/gep-base-type.ll | 45 ++++ .../merge-block-address-other-function.ll | 49 +++++ .../FunctionMerging/merge-example-1.ll | 57 +++++ .../FunctionMerging/merge-example-2.ll | 90 ++++++++ .../FunctionMerging/merge-example-3.ll | 104 +++++++++ .../FunctionMerging/merge-example-4.ll | 105 +++++++++ .../FunctionMerging/merge-example-5.ll | 64 ++++++ .../FunctionMerging/merge-example-6.ll | 206 ++++++++++++++++++ .../FunctionMerging/mismatching-attr-crash.ll | 21 ++ ...no-merge-block-address-different-labels.ll | 96 ++++++++ .../no-merge-block-address-other-function.ll | 61 ++++++ .../no-merge-const-ptr-and-int.ll | 19 ++ .../no-merge-ptr-different-sizes.ll | 24 ++ .../no-merge-ptr-int-different-values.ll | 23 ++ .../FunctionMerging/phi-check-blocks.ll | 50 +++++ .../Transforms/FunctionMerging/tailcall.ll | 23 ++ .../Transforms/FunctionMerging/too-small.ll | 16 ++ .../FunctionMerging/vector-GEP-crash.ll | 12 + .../Transforms/FunctionMerging/weak-small.ll | 16 ++ 22 files changed, 1203 insertions(+) create mode 100644 llvm/test/Transforms/FunctionMerging/address-spaces.ll create mode 100644 llvm/test/Transforms/FunctionMerging/alloca.ll create mode 100644 llvm/test/Transforms/FunctionMerging/functions.ll create mode 100644 llvm/test/Transforms/FunctionMerging/gep-base-type.ll create mode 100644 llvm/test/Transforms/FunctionMerging/merge-block-address-other-function.ll create mode 100644 llvm/test/Transforms/FunctionMerging/merge-example-1.ll create mode 100644 llvm/test/Transforms/FunctionMerging/merge-example-2.ll create mode 100644 llvm/test/Transforms/FunctionMerging/merge-example-3.ll create mode 100644 llvm/test/Transforms/FunctionMerging/merge-example-4.ll create mode 100644 llvm/test/Transforms/FunctionMerging/merge-example-5.ll create mode 100644 llvm/test/Transforms/FunctionMerging/merge-example-6.ll create mode 100644 llvm/test/Transforms/FunctionMerging/mismatching-attr-crash.ll create mode 100644 llvm/test/Transforms/FunctionMerging/no-merge-block-address-different-labels.ll create mode 100644 llvm/test/Transforms/FunctionMerging/no-merge-block-address-other-function.ll create mode 100644 llvm/test/Transforms/FunctionMerging/no-merge-const-ptr-and-int.ll create mode 100644 llvm/test/Transforms/FunctionMerging/no-merge-ptr-different-sizes.ll create mode 100644 llvm/test/Transforms/FunctionMerging/no-merge-ptr-int-different-values.ll create mode 100644 llvm/test/Transforms/FunctionMerging/phi-check-blocks.ll create mode 100644 llvm/test/Transforms/FunctionMerging/tailcall.ll create mode 100644 llvm/test/Transforms/FunctionMerging/too-small.ll create mode 100644 llvm/test/Transforms/FunctionMerging/vector-GEP-crash.ll create mode 100644 llvm/test/Transforms/FunctionMerging/weak-small.ll diff --git a/llvm/test/Transforms/FunctionMerging/address-spaces.ll b/llvm/test/Transforms/FunctionMerging/address-spaces.ll new file mode 100644 index 000000000000..0d239b6cfcd3 --- /dev/null +++ b/llvm/test/Transforms/FunctionMerging/address-spaces.ll @@ -0,0 +1,34 @@ +; RUN: opt -passes=func-merging -S < %s | FileCheck %s + +target datalayout = "p:32:32:32-p1:32:32:32-p2:16:16:16" + +declare void @foo(i32) nounwind + +; None of these functions should be merged + +define i32 @store_as0(i32* %x) { +; CHECK-LABEL: @store_as0( +; CHECK: call void @foo( + %gep = getelementptr i32, i32* %x, i32 4 + %y = load i32, i32* %gep + call void @foo(i32 %y) nounwind + ret i32 %y +} + +define i32 @store_as1(i32 addrspace(1)* %x) { +; CHECK-LABEL: @store_as1( +; CHECK: call void @foo( + %gep = getelementptr i32, i32 addrspace(1)* %x, i32 4 + %y = load i32, i32 addrspace(1)* %gep + call void @foo(i32 %y) nounwind + ret i32 %y +} + +define i32 @store_as2(i32 addrspace(2)* %x) { +; CHECK-LABEL: @store_as2( +; CHECK: call void @foo( + %gep = getelementptr i32, i32 addrspace(2)* %x, i32 4 + %y = load i32, i32 addrspace(2)* %gep + call void @foo(i32 %y) nounwind + ret i32 %y +} diff --git a/llvm/test/Transforms/FunctionMerging/alloca.ll b/llvm/test/Transforms/FunctionMerging/alloca.ll new file mode 100644 index 000000000000..5f5ad5d9abd7 --- /dev/null +++ b/llvm/test/Transforms/FunctionMerging/alloca.ll @@ -0,0 +1,61 @@ +; RUN: opt -passes=func-merging -S < %s | FileCheck %s + +; Make sure that two different allocas are not treated as equal. + +target datalayout = "e-m:w-p:32:32-i64:64-f80:32-n8:16:32-S32" + +%kv1 = type { i32, i32 } +%kv2 = type { i8 } +%kv3 = type { i64, i64 } + +; Size difference. + +; CHECK-LABEL: define void @size1 +; CHECK-NOT: call void @ +define void @size1(i8 *%f) { + %v = alloca %kv1, align 8 + %f_2 = bitcast i8* %f to void (%kv1 *)* + call void %f_2(%kv1 * %v) + call void %f_2(%kv1 * %v) + call void %f_2(%kv1 * %v) + call void %f_2(%kv1 * %v) + ret void +} + +; CHECK-LABEL: define void @size2 +; CHECK-NOT: call void @ +define void @size2(i8 *%f) { + %v = alloca %kv2, align 8 + %f_2 = bitcast i8* %f to void (%kv2 *)* + call void %f_2(%kv2 * %v) + call void %f_2(%kv2 * %v) + call void %f_2(%kv2 * %v) + call void %f_2(%kv2 * %v) + ret void +} + +; Alignment difference. + +; CHECK-LABEL: define void @align1 +; CHECK-NOT: call void @ +define void @align1(i8 *%f) { + %v = alloca %kv3, align 8 + %f_2 = bitcast i8* %f to void (%kv3 *)* + call void %f_2(%kv3 * %v) + call void %f_2(%kv3 * %v) + call void %f_2(%kv3 * %v) + call void %f_2(%kv3 * %v) + ret void +} + +; CHECK-LABEL: define void @align2 +; CHECK-NOT: call void @ +define void @align2(i8 *%f) { + %v = alloca %kv3, align 16 + %f_2 = bitcast i8* %f to void (%kv3 *)* + call void %f_2(%kv3 * %v) + call void %f_2(%kv3 * %v) + call void %f_2(%kv3 * %v) + call void %f_2(%kv3 * %v) + ret void +} diff --git a/llvm/test/Transforms/FunctionMerging/functions.ll b/llvm/test/Transforms/FunctionMerging/functions.ll new file mode 100644 index 000000000000..49e765ae5f2e --- /dev/null +++ b/llvm/test/Transforms/FunctionMerging/functions.ll @@ -0,0 +1,27 @@ +; RUN: opt -passes=func-merging -S < %s | FileCheck %s + +; Be sure we don't merge cross-referenced functions of same type. + +; CHECK-LABEL: @left +; CHECK-LABEL: entry-block +; CHECK-LABEL: call void @right(i64 %p) +define void @left(i64 %p) { +entry-block: + call void @right(i64 %p) + call void @right(i64 %p) + call void @right(i64 %p) + call void @right(i64 %p) + ret void +} + +; CHECK-LABEL: @right +; CHECK-LABEL: entry-block +; CHECK-LABEL: call void @left(i64 %p) +define void @right(i64 %p) { +entry-block: + call void @left(i64 %p) + call void @left(i64 %p) + call void @left(i64 %p) + call void @left(i64 %p) + ret void +} \ No newline at end of file diff --git a/llvm/test/Transforms/FunctionMerging/gep-base-type.ll b/llvm/test/Transforms/FunctionMerging/gep-base-type.ll new file mode 100644 index 000000000000..e5e7f9c7ba7b --- /dev/null +++ b/llvm/test/Transforms/FunctionMerging/gep-base-type.ll @@ -0,0 +1,45 @@ +; RUN: opt -passes=func-merging -S < %s | FileCheck %s +target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128" + +; These should not be merged, the type of the GEP pointer argument does not have +; the same stride. + +%"struct1" = type <{ i8*, i32, [4 x i8] }> +%"struct2" = type { i8*, { i64, i64 } } + +define internal %struct2* @Ffunc(%struct2* %P, i64 %i) { +; CHECK-LABEL: @Ffunc( +; CHECK-NEXT: getelementptr +; CHECK-NEXT: getelementptr +; CHECK-NEXT: getelementptr +; CHECK-NEXT: getelementptr +; CHECK-NEXT: getelementptr +; CHECK-NEXT: getelementptr +; CHECK-NEXT: ret + %1 = getelementptr inbounds %"struct2", %"struct2"* %P, i64 %i + %2 = getelementptr inbounds %"struct2", %"struct2"* %P, i64 %i + %3 = getelementptr inbounds %"struct2", %"struct2"* %P, i64 %i + %4 = getelementptr inbounds %"struct2", %"struct2"* %P, i64 %i + %5 = getelementptr inbounds %"struct2", %"struct2"* %P, i64 %i + %6 = getelementptr inbounds %"struct2", %"struct2"* %P, i64 %i + ret %struct2* %6 +} + + +define internal %struct1* @Gfunc(%struct1* %P, i64 %i) { +; CHECK-LABEL: @Gfunc( +; CHECK-NEXT: getelementptr +; CHECK-NEXT: getelementptr +; CHECK-NEXT: getelementptr +; CHECK-NEXT: getelementptr +; CHECK-NEXT: getelementptr +; CHECK-NEXT: getelementptr +; CHECK-NEXT: ret + %1 = getelementptr inbounds %"struct1", %"struct1"* %P, i64 %i + %2 = getelementptr inbounds %"struct1", %"struct1"* %P, i64 %i + %3 = getelementptr inbounds %"struct1", %"struct1"* %P, i64 %i + %4 = getelementptr inbounds %"struct1", %"struct1"* %P, i64 %i + %5 = getelementptr inbounds %"struct1", %"struct1"* %P, i64 %i + %6 = getelementptr inbounds %"struct1", %"struct1"* %P, i64 %i + ret %struct1* %6 +} diff --git a/llvm/test/Transforms/FunctionMerging/merge-block-address-other-function.ll b/llvm/test/Transforms/FunctionMerging/merge-block-address-other-function.ll new file mode 100644 index 000000000000..3c5d650bb5df --- /dev/null +++ b/llvm/test/Transforms/FunctionMerging/merge-block-address-other-function.ll @@ -0,0 +1,49 @@ +; RUN: opt -passes=func-merging -func-merging-threshold=0 -S < %s | FileCheck %s + +target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" +target triple = "x86_64-unknown-linux-gnu" + +define i32 @_Z1fi(i32 %i) #0 { +entry: + %retval = alloca i32, align 4 + %i.addr = alloca i32, align 4 + store i32 %i, i32* %i.addr, align 4 + %0 = load i32, i32* %i.addr, align 4 + %cmp = icmp eq i32 %0, 1 + br i1 %cmp, label %if.then, label %if.end + +if.then: + store i32 3, i32* %retval + br label %return + +if.end: + %1 = load i32, i32* %i.addr, align 4 + %cmp1 = icmp eq i32 %1, 3 + br i1 %cmp1, label %if.then.2, label %if.end.3 + +if.then.2: + store i32 56, i32* %retval + br label %return + +if.end.3: + store i32 0, i32* %retval + br label %return + +return: + %2 = load i32, i32* %retval + ret i32 %2 +} + + +define internal i8* @Afunc(i32* %P) { + store i32 1, i32* %P + store i32 3, i32* %P + ret i8* blockaddress(@_Z1fi, %if.then.2) +} + +define internal i8* @Bfunc(i32* %P) { +; CHECK-NOT: @Bfunc + store i32 1, i32* %P + store i32 3, i32* %P + ret i8* blockaddress(@_Z1fi, %if.then.2) +} diff --git a/llvm/test/Transforms/FunctionMerging/merge-example-1.ll b/llvm/test/Transforms/FunctionMerging/merge-example-1.ll new file mode 100644 index 000000000000..b1db0e59b1a1 --- /dev/null +++ b/llvm/test/Transforms/FunctionMerging/merge-example-1.ll @@ -0,0 +1,57 @@ +; RUN: opt -passes=func-merging -func-merging-threshold=0 -S < %s | FileCheck %s + +; A linked list type and simple payload +%LL = type { %S, %LL* } +%S = type { void (%S*, i32)* } + +; Table refers to itself via GEP +@Table = internal global [3 x %LL] [%LL { %S { void (%S*, i32)* @B }, %LL* getelementptr inbounds ([3 x %LL], [3 x %LL]* @Table, i32 0, i32 0) }, %LL { %S { void (%S*, i32)* @A }, %LL* getelementptr inbounds ([3 x %LL], [3 x %LL]* @Table, i32 0, i32 0) }, %LL { %S { void (%S*, i32)* @A }, %LL* getelementptr inbounds ([3 x %LL], [3 x %LL]* @Table, i32 0, i32 0) }], align 16 + +; The body of this is irrelevant; it is long so that mergefunc doesn't skip it as a small function. +define internal void @A(%S* %self, i32 %a) { +; CHECK: tail call +; CHECK: ret void + %1 = add i32 %a, 32 + %2 = add i32 %1, 32 + %3 = add i32 %2, 32 + %4 = add i32 %3, 32 + %5 = add i32 %4, 32 + %6 = add i32 %5, 32 + %7 = add i32 %6, 32 + %8 = add i32 %7, 32 + %9 = add i32 %8, 32 + %10 = add i32 %9, 32 + %11 = add i32 %10, 32 + ret void +} + +define internal void @B(%S* %self, i32 %a) { +; CHECK: tail call +; CHECK: ret void + %1 = add i32 %a, 32 + %2 = add i32 %1, 32 + %3 = add i32 %2, 32 + %4 = add i32 %3, 32 + %5 = add i32 %4, 32 + %6 = add i32 %5, 32 + %7 = add i32 %6, 32 + %8 = add i32 %7, 32 + %9 = add i32 %8, 32 + %10 = add i32 %9, 32 + %11 = add i32 %10, 32 + ret void +} + +; CHECK: entry: +; CHECK: %3 = add i32 %2, 32 +; CHECK: %4 = add i32 %3, 32 +; CHECK: %5 = add i32 %4, 32 +; CHECK: %6 = add i32 %5, 32 +; CHECK: %7 = add i32 %6, 32 +; CHECK: %8 = add i32 %7, 32 +; CHECK: %9 = add i32 %8, 32 +; CHECK: %10 = add i32 %9, 32 +; CHECK: %11 = add i32 %10, 32 +; CHECK: %12 = add i32 %11, 32 +; CHECK: %13 = add i32 %12, 32 +; CHECK: ret void diff --git a/llvm/test/Transforms/FunctionMerging/merge-example-2.ll b/llvm/test/Transforms/FunctionMerging/merge-example-2.ll new file mode 100644 index 000000000000..77c40396da46 --- /dev/null +++ b/llvm/test/Transforms/FunctionMerging/merge-example-2.ll @@ -0,0 +1,90 @@ +; RUN: opt -passes=func-merging -func-merging-threshold=0 -S < %s | FileCheck %s + +@.str = private unnamed_addr constant [7 x i8] c"hello\0A\00", align 1 +@.str.1 = private unnamed_addr constant [4 x i8] c"%d\0A\00", align 1 + +; Function Attrs: noinline nounwind optnone ssp uwtable +define i32 @print_hello(i32 noundef %0) #0 { +; CHECK-LABEL: print_hello +; CHECK: tail call i32 @_m_f_0 +; CHECK: ret + %2 = alloca i32, align 4 + %3 = alloca i32, align 4 + store i32 %0, i32* %2, align 4 + store i32 0, i32* %3, align 4 + br label %4 + +4: ; preds = %10, %1 + %5 = load i32, i32* %3, align 4 + %6 = load i32, i32* %2, align 4 + %7 = icmp slt i32 %5, %6 + br i1 %7, label %8, label %13 + +8: ; preds = %4 + %9 = call i32 (i8*, ...) @printf(i8* noundef getelementptr inbounds ([7 x i8], [7 x i8]* @.str, i64 0, i64 0)) + br label %10 + +10: ; preds = %8 + %11 = load i32, i32* %3, align 4 + %12 = add nsw i32 %11, 1 + store i32 %12, i32* %3, align 4 + br label %4, !llvm.loop !6 + +13: ; preds = %4 + %14 = load i32, i32* %2, align 4 + ret i32 %14 +} + +declare i32 @printf(i8* noundef, ...) #1 + +; Function Attrs: noinline nounwind optnone ssp uwtable +define i32 @print_fib(i32 noundef %0) #0 { +; CHECK-LABEL: print_fib +; CHECK: tail call i32 @_m_f_0 +; CHECK: ret + %2 = alloca i32, align 4 + %3 = alloca i32, align 4 + store i32 %0, i32* %2, align 4 + store i32 0, i32* %3, align 4 + br label %4 + +4: ; preds = %12, %1 + %5 = load i32, i32* %3, align 4 + %6 = load i32, i32* %2, align 4 + %7 = icmp slt i32 %5, %6 + br i1 %7, label %8, label %15 + +8: ; preds = %4 + %9 = load i32, i32* %3, align 4 + %10 = call i32 @fib(i32 noundef %9) + %11 = call i32 (i8*, ...) @printf(i8* noundef getelementptr inbounds ([4 x i8], [4 x i8]* @.str.1, i64 0, i64 0), i32 noundef %10) + br label %12 + +12: ; preds = %8 + %13 = load i32, i32* %3, align 4 + %14 = add nsw i32 %13, 1 + store i32 %14, i32* %3, align 4 + br label %4, !llvm.loop !8 + +15: ; preds = %4 + %16 = load i32, i32* %2, align 4 + ret i32 %16 +} + +declare i32 @fib(i32 noundef) #1 + +attributes #0 = { noinline nounwind optnone ssp uwtable "darwin-stkchk-strong-link" "frame-pointer"="all" "min-legal-vector-width"="0" "no-trapping-math"="true" "probe-stack"="___chkstk_darwin" "stack-protector-buffer-size"="8" "target-cpu"="penryn" "target-features"="+cx16,+cx8,+fxsr,+mmx,+sahf,+sse,+sse2,+sse3,+sse4.1,+ssse3,+x87" "tune-cpu"="generic" } +attributes #1 = { "darwin-stkchk-strong-link" "frame-pointer"="all" "no-trapping-math"="true" "probe-stack"="___chkstk_darwin" "stack-protector-buffer-size"="8" "target-cpu"="penryn" "target-features"="+cx16,+cx8,+fxsr,+mmx,+sahf,+sse,+sse2,+sse3,+sse4.1,+ssse3,+x87" "tune-cpu"="generic" } + +!llvm.module.flags = !{!0, !1, !2, !3, !4} +!llvm.ident = !{!5} + +!0 = !{i32 2, !"SDK Version", [2 x i32] [i32 13, i32 3]} +!1 = !{i32 1, !"wchar_size", i32 4} +!2 = !{i32 7, !"PIC Level", i32 2} +!3 = !{i32 7, !"uwtable", i32 2} +!4 = !{i32 7, !"frame-pointer", i32 2} +!5 = !{!"Apple clang version 14.0.3 (clang-1403.0.22.14.1)"} +!6 = distinct !{!6, !7} +!7 = !{!"llvm.loop.mustprogress"} +!8 = distinct !{!8, !7} diff --git a/llvm/test/Transforms/FunctionMerging/merge-example-3.ll b/llvm/test/Transforms/FunctionMerging/merge-example-3.ll new file mode 100644 index 000000000000..4ce3d4469d3f --- /dev/null +++ b/llvm/test/Transforms/FunctionMerging/merge-example-3.ll @@ -0,0 +1,104 @@ +; RUN: opt -passes=func-merging -func-merging-threshold=0 -S < %s | FileCheck %s + +; Function Attrs: noinline nounwind optnone ssp uwtable +define i32 @Sum1(i32 noundef %0) #0 { +; CHECK-LABEL: Sum1 +; CHECK: tail call i32 @_m_f_0 +; CHECK: ret + %2 = alloca i32, align 4 + %3 = alloca i32, align 4 + %4 = alloca i32, align 4 + store i32 %0, ptr %2, align 4 + store i32 0, ptr %3, align 4 + store i32 0, ptr %4, align 4 + br label %5 + +5: ; preds = %13, %1 + %6 = load i32, ptr %4, align 4 + %7 = load i32, ptr %2, align 4 + %8 = icmp sle i32 %6, %7 + br i1 %8, label %9, label %16 + +9: ; preds = %5 + %10 = load i32, ptr %4, align 4 + %11 = load i32, ptr %3, align 4 + %12 = add nsw i32 %11, %10 + store i32 %12, ptr %3, align 4 + br label %13 + +13: ; preds = %9 + %14 = load i32, ptr %4, align 4 + %15 = add nsw i32 %14, 1 + store i32 %15, ptr %4, align 4 + br label %5, !llvm.loop !5 + +16: ; preds = %5 + %17 = load i32, ptr %3, align 4 + ret i32 %17 +} + +; Function Attrs: noinline nounwind optnone ssp uwtable +define i32 @Sum2(i32 noundef %0) #0 { +; CHECK-LABEL: Sum2 +; CHECK: tail call i32 @_m_f_0 +; CHECK: ret + %2 = alloca i32, align 4 + %3 = alloca i32, align 4 + %4 = alloca i32, align 4 + store i32 %0, ptr %2, align 4 + store i32 0, ptr %3, align 4 + store i32 0, ptr %4, align 4 + br label %5 + +5: ; preds = %13, %1 + %6 = load i32, ptr %4, align 4 + %7 = load i32, ptr %2, align 4 + %8 = icmp sle i32 %6, %7 + br i1 %8, label %9, label %16 + +9: ; preds = %5 + %10 = load i32, ptr %4, align 4 + %11 = load i32, ptr %3, align 4 + %12 = add nsw i32 %11, %10 + store i32 %12, ptr %3, align 4 + br label %13 + +13: ; preds = %9 + %14 = load i32, ptr %4, align 4 + %15 = add nsw i32 %14, 1 + store i32 %15, ptr %4, align 4 + br label %5, !llvm.loop !7 + +16: ; preds = %5 + %17 = load i32, ptr %3, align 4 + ret i32 %17 +} + +; CHECK: define internal i32 @_m_f_0 +; CHECK: entry: +; CHECK: alloca i32, align 4 +; CHECK: alloca i32, align 4 +; CHECK: alloca i32, align 4 +; CHECK: store +; CHECK: store +; CHECK: store + +; CHECK: m.label.bb +; CHECK: load +; CHECK: load +; CHECK: icmp +; CHECK: br + +attributes #0 = { noinline nounwind optnone ssp uwtable "frame-pointer"="all" "min-legal-vector-width"="0" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="penryn" "target-features"="+cmov,+cx16,+cx8,+fxsr,+mmx,+sahf,+sse,+sse2,+sse3,+sse4.1,+ssse3,+x87" "tune-cpu"="generic" } + +!llvm.module.flags = !{!0, !1, !2, !3} +!llvm.ident = !{!4} + +!0 = !{i32 1, !"wchar_size", i32 4} +!1 = !{i32 8, !"PIC Level", i32 2} +!2 = !{i32 7, !"uwtable", i32 2} +!3 = !{i32 7, !"frame-pointer", i32 2} +!4 = !{!"Homebrew clang version 17.0.6"} +!5 = distinct !{!5, !6} +!6 = !{!"llvm.loop.mustprogress"} +!7 = distinct !{!7, !6} diff --git a/llvm/test/Transforms/FunctionMerging/merge-example-4.ll b/llvm/test/Transforms/FunctionMerging/merge-example-4.ll new file mode 100644 index 000000000000..e73e07388a33 --- /dev/null +++ b/llvm/test/Transforms/FunctionMerging/merge-example-4.ll @@ -0,0 +1,105 @@ +; RUN: opt -passes=func-merging -func-merging-threshold=0 -S < %s | FileCheck %s + +; Function Attrs: noinline nounwind optnone ssp uwtable +define i32 @PositiveSum(i32 noundef %0) #0 { +; CHECK-LABEL: PositiveSum +; CHECK: tail call i32 @_m_f_0 +; CHECK: ret + %2 = alloca i32, align 4 + %3 = alloca i32, align 4 + %4 = alloca i32, align 4 + store i32 %0, ptr %2, align 4 + store i32 0, ptr %3, align 4 + store i32 0, ptr %4, align 4 + br label %5 + +5: ; preds = %13, %1 + %6 = load i32, ptr %4, align 4 + %7 = load i32, ptr %2, align 4 + %8 = icmp sle i32 %6, %7 + br i1 %8, label %9, label %16 + +9: ; preds = %5 + %10 = load i32, ptr %4, align 4 + %11 = load i32, ptr %3, align 4 + %12 = add nsw i32 %11, %10 + store i32 %12, ptr %3, align 4 + br label %13 + +13: ; preds = %9 + %14 = load i32, ptr %4, align 4 + %15 = add nsw i32 %14, 1 + store i32 %15, ptr %4, align 4 + br label %5, !llvm.loop !5 + +16: ; preds = %5 + %17 = load i32, ptr %3, align 4 + ret i32 %17 +} + +; Function Attrs: noinline nounwind optnone ssp uwtable +define i32 @NegativeSum(i32 noundef %0) #0 { +; CHECK-LABEL: NegativeSum +; CHECK: tail call i32 @_m_f_0 +; CHECK: ret + %2 = alloca i32, align 4 + %3 = alloca i32, align 4 + %4 = alloca i32, align 4 + store i32 %0, ptr %2, align 4 + store i32 0, ptr %3, align 4 + %5 = load i32, ptr %2, align 4 + store i32 %5, ptr %4, align 4 + br label %6 + +6: ; preds = %13, %1 + %7 = load i32, ptr %4, align 4 + %8 = icmp slt i32 %7, 0 + br i1 %8, label %9, label %16 + +9: ; preds = %6 + %10 = load i32, ptr %4, align 4 + %11 = load i32, ptr %3, align 4 + %12 = add nsw i32 %11, %10 + store i32 %12, ptr %3, align 4 + br label %13 + +13: ; preds = %9 + %14 = load i32, ptr %4, align 4 + %15 = add nsw i32 %14, 1 + store i32 %15, ptr %4, align 4 + br label %6, !llvm.loop !7 + +16: ; preds = %6 + %17 = load i32, ptr %3, align 4 + ret i32 %17 +} + +; CHECK-LABEL: define internal i32 @_m_f_0 +; CHECK-NEXT: entry: +; CHECK: alloca i32, align 4 +; CHECK: alloca i32, align 4 +; CHECK: alloca i32, align 4 +; CHECK: store +; CHECK: store +; CHECK: load +; CHECK: select +; CHECK: select +; CHECK: store +; CHECK: br + +; CHECK: m.label.bb + + +attributes #0 = { noinline nounwind optnone ssp uwtable "frame-pointer"="all" "min-legal-vector-width"="0" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="penryn" "target-features"="+cmov,+cx16,+cx8,+fxsr,+mmx,+sahf,+sse,+sse2,+sse3,+sse4.1,+ssse3,+x87" "tune-cpu"="generic" } + +!llvm.module.flags = !{!0, !1, !2, !3} +!llvm.ident = !{!4} + +!0 = !{i32 1, !"wchar_size", i32 4} +!1 = !{i32 8, !"PIC Level", i32 2} +!2 = !{i32 7, !"uwtable", i32 2} +!3 = !{i32 7, !"frame-pointer", i32 2} +!4 = !{!"Homebrew clang version 17.0.6"} +!5 = distinct !{!5, !6} +!6 = !{!"llvm.loop.mustprogress"} +!7 = distinct !{!7, !6} diff --git a/llvm/test/Transforms/FunctionMerging/merge-example-5.ll b/llvm/test/Transforms/FunctionMerging/merge-example-5.ll new file mode 100644 index 000000000000..a24254aad16c --- /dev/null +++ b/llvm/test/Transforms/FunctionMerging/merge-example-5.ll @@ -0,0 +1,64 @@ +; RUN: opt -passes=func-merging -func-merging-threshold=0 -S < %s | FileCheck %s + +; Function Attrs: noinline nounwind optnone ssp uwtable +define i32 @IntType(ptr noundef %0) #0 { +; CHECK-LABEL: IntType +; CHECK: tail call i32 @_m_f_0 +; CHECK: ret + %2 = alloca ptr, align 8 + store ptr %0, ptr %2, align 8 + %3 = load ptr, ptr %2, align 8 + %4 = load i32, ptr %3, align 4 + %5 = add nsw i32 %4, 1 + store i32 %5, ptr %3, align 4 + %6 = load ptr, ptr %2, align 8 + %7 = load i32, ptr %6, align 4 + %8 = add nsw i32 %7, 2 + store i32 %8, ptr %6, align 4 + %9 = load ptr, ptr %2, align 8 + %10 = load i32, ptr %9, align 4 + %11 = add nsw i32 %10, 3 + store i32 %11, ptr %9, align 4 + %12 = load ptr, ptr %2, align 8 + %13 = load i32, ptr %12, align 4 + ret i32 %13 +} + +; Function Attrs: noinline nounwind optnone ssp uwtable +define void @VoidType(ptr noundef %0) #0 { +; CHECK-LABEL: VoidType +; CHECK: tail call i32 @_m_f_0 +; CHECK: ret + %2 = alloca ptr, align 8 + store ptr %0, ptr %2, align 8 + %3 = load ptr, ptr %2, align 8 + %4 = load i32, ptr %3, align 4 + %5 = add nsw i32 %4, 1 + store i32 %5, ptr %3, align 4 + %6 = load ptr, ptr %2, align 8 + %7 = load i32, ptr %6, align 4 + %8 = add nsw i32 %7, 2 + store i32 %8, ptr %6, align 4 + %9 = load ptr, ptr %2, align 8 + %10 = load i32, ptr %9, align 4 + %11 = add nsw i32 %10, 3 + store i32 %11, ptr %9, align 4 + ret void +} + +; CHECK-LABEL: define internal i32 @_m_f_0 +; CHECK-NEXT: entry: +; CHECK: split.bb +; CHECK-COUNT-2: load +; CHECK: br label %m.term.bb + +attributes #0 = { noinline nounwind optnone ssp uwtable "frame-pointer"="all" "min-legal-vector-width"="0" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="penryn" "target-features"="+cmov,+cx16,+cx8,+fxsr,+mmx,+sahf,+sse,+sse2,+sse3,+sse4.1,+ssse3,+x87" "tune-cpu"="generic" } + +!llvm.module.flags = !{!0, !1, !2, !3} +!llvm.ident = !{!4} + +!0 = !{i32 1, !"wchar_size", i32 4} +!1 = !{i32 8, !"PIC Level", i32 2} +!2 = !{i32 7, !"uwtable", i32 2} +!3 = !{i32 7, !"frame-pointer", i32 2} +!4 = !{!"Homebrew clang version 17.0.6"} diff --git a/llvm/test/Transforms/FunctionMerging/merge-example-6.ll b/llvm/test/Transforms/FunctionMerging/merge-example-6.ll new file mode 100644 index 000000000000..d05d7b411b2a --- /dev/null +++ b/llvm/test/Transforms/FunctionMerging/merge-example-6.ll @@ -0,0 +1,206 @@ +; RUN: opt -passes=func-merging -func-merging-threshold=0 -S < %s | FileCheck %s + +; Function Attrs: noinline nounwind optnone ssp uwtable +define void @ArrayAdd(ptr noundef %0, ptr noundef %1, i32 noundef %2) #0 { +; CHECK-LABEL: ArrayAdd +; CHECK: call void @_m_f_2 +; CHECK: ret + %4 = alloca ptr, align 8 + %5 = alloca ptr, align 8 + %6 = alloca i32, align 4 + %7 = alloca i32, align 4 + store ptr %0, ptr %4, align 8 + store ptr %1, ptr %5, align 8 + store i32 %2, ptr %6, align 4 + store i32 0, ptr %7, align 4 + br label %8 + +8: ; preds = %24, %3 + %9 = load i32, ptr %7, align 4 + %10 = load i32, ptr %6, align 4 + %11 = icmp slt i32 %9, %10 + br i1 %11, label %12, label %27 + +12: ; preds = %8 + %13 = load ptr, ptr %5, align 8 + %14 = load i32, ptr %7, align 4 + %15 = sext i32 %14 to i64 + %16 = getelementptr inbounds i32, ptr %13, i64 %15 + %17 = load i32, ptr %16, align 4 + %18 = load ptr, ptr %4, align 8 + %19 = load i32, ptr %7, align 4 + %20 = sext i32 %19 to i64 + %21 = getelementptr inbounds i32, ptr %18, i64 %20 + %22 = load i32, ptr %21, align 4 + %23 = add nsw i32 %22, %17 + store i32 %23, ptr %21, align 4 + br label %24 + +24: ; preds = %12 + %25 = load i32, ptr %7, align 4 + %26 = add nsw i32 %25, 1 + store i32 %26, ptr %7, align 4 + br label %8, !llvm.loop !5 + +27: ; preds = %8 + ret void +} + +; Function Attrs: noinline nounwind optnone ssp uwtable +define void @ArraySub(ptr noundef %0, ptr noundef %1, i32 noundef %2) #0 { +; CHECK-LABEL: ArraySub +; CHECK: call void @_m_f_2 +; CHECK: ret + %4 = alloca ptr, align 8 + %5 = alloca ptr, align 8 + %6 = alloca i32, align 4 + %7 = alloca i32, align 4 + store ptr %0, ptr %4, align 8 + store ptr %1, ptr %5, align 8 + store i32 %2, ptr %6, align 4 + store i32 0, ptr %7, align 4 + br label %8 + +8: ; preds = %24, %3 + %9 = load i32, ptr %7, align 4 + %10 = load i32, ptr %6, align 4 + %11 = icmp slt i32 %9, %10 + br i1 %11, label %12, label %27 + +12: ; preds = %8 + %13 = load ptr, ptr %5, align 8 + %14 = load i32, ptr %7, align 4 + %15 = sext i32 %14 to i64 + %16 = getelementptr inbounds i32, ptr %13, i64 %15 + %17 = load i32, ptr %16, align 4 + %18 = load ptr, ptr %4, align 8 + %19 = load i32, ptr %7, align 4 + %20 = sext i32 %19 to i64 + %21 = getelementptr inbounds i32, ptr %18, i64 %20 + %22 = load i32, ptr %21, align 4 + %23 = sub nsw i32 %22, %17 + store i32 %23, ptr %21, align 4 + br label %24 + +24: ; preds = %12 + %25 = load i32, ptr %7, align 4 + %26 = add nsw i32 %25, 1 + store i32 %26, ptr %7, align 4 + br label %8, !llvm.loop !7 + +27: ; preds = %8 + ret void +} + +; Function Attrs: noinline nounwind optnone ssp uwtable +define void @ArrayMul(ptr noundef %0, ptr noundef %1, i32 noundef %2) #0 { +; CHECK-LABEL: ArrayMul +; CHECK: call void @_m_f_2 +; CHECK: ret + %4 = alloca ptr, align 8 + %5 = alloca ptr, align 8 + %6 = alloca i32, align 4 + %7 = alloca i32, align 4 + store ptr %0, ptr %4, align 8 + store ptr %1, ptr %5, align 8 + store i32 %2, ptr %6, align 4 + store i32 0, ptr %7, align 4 + br label %8 + +8: ; preds = %24, %3 + %9 = load i32, ptr %7, align 4 + %10 = load i32, ptr %6, align 4 + %11 = icmp slt i32 %9, %10 + br i1 %11, label %12, label %27 + +12: ; preds = %8 + %13 = load ptr, ptr %5, align 8 + %14 = load i32, ptr %7, align 4 + %15 = sext i32 %14 to i64 + %16 = getelementptr inbounds i32, ptr %13, i64 %15 + %17 = load i32, ptr %16, align 4 + %18 = load ptr, ptr %4, align 8 + %19 = load i32, ptr %7, align 4 + %20 = sext i32 %19 to i64 + %21 = getelementptr inbounds i32, ptr %18, i64 %20 + %22 = load i32, ptr %21, align 4 + %23 = mul nsw i32 %22, %17 + store i32 %23, ptr %21, align 4 + br label %24 + +24: ; preds = %12 + %25 = load i32, ptr %7, align 4 + %26 = add nsw i32 %25, 1 + store i32 %26, ptr %7, align 4 + br label %8, !llvm.loop !8 + +27: ; preds = %8 + ret void +} + +; Function Attrs: noinline nounwind optnone ssp uwtable +define void @ArrayDiv(ptr noundef %0, ptr noundef %1, i32 noundef %2) #0 { +; CHECK-LABEL: ArrayDiv +; CHECK: call void @_m_f_2 +; CHECK: ret + %4 = alloca ptr, align 8 + %5 = alloca ptr, align 8 + %6 = alloca i32, align 4 + %7 = alloca i32, align 4 + store ptr %0, ptr %4, align 8 + store ptr %1, ptr %5, align 8 + store i32 %2, ptr %6, align 4 + store i32 0, ptr %7, align 4 + br label %8 + +8: ; preds = %24, %3 + %9 = load i32, ptr %7, align 4 + %10 = load i32, ptr %6, align 4 + %11 = icmp slt i32 %9, %10 + br i1 %11, label %12, label %27 + +12: ; preds = %8 + %13 = load ptr, ptr %5, align 8 + %14 = load i32, ptr %7, align 4 + %15 = sext i32 %14 to i64 + %16 = getelementptr inbounds i32, ptr %13, i64 %15 + %17 = load i32, ptr %16, align 4 + %18 = load ptr, ptr %4, align 8 + %19 = load i32, ptr %7, align 4 + %20 = sext i32 %19 to i64 + %21 = getelementptr inbounds i32, ptr %18, i64 %20 + %22 = load i32, ptr %21, align 4 + %23 = sdiv i32 %22, %17 + store i32 %23, ptr %21, align 4 + br label %24 + +24: ; preds = %12 + %25 = load i32, ptr %7, align 4 + %26 = add nsw i32 %25, 1 + store i32 %26, ptr %7, align 4 + br label %8, !llvm.loop !9 + +27: ; preds = %8 + ret void +} + +; CHECK-LABEL: define internal void @_m_f_2 +; CHECK-NEXT: entry: +; CHECK-COUNT-9: select +; CHECK-LABEL: m.label.bb + +attributes #0 = { noinline nounwind optnone ssp uwtable "frame-pointer"="all" "min-legal-vector-width"="0" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="penryn" "target-features"="+cmov,+cx16,+cx8,+fxsr,+mmx,+sahf,+sse,+sse2,+sse3,+sse4.1,+ssse3,+x87" "tune-cpu"="generic" } + +!llvm.module.flags = !{!0, !1, !2, !3} +!llvm.ident = !{!4} + +!0 = !{i32 1, !"wchar_size", i32 4} +!1 = !{i32 8, !"PIC Level", i32 2} +!2 = !{i32 7, !"uwtable", i32 2} +!3 = !{i32 7, !"frame-pointer", i32 2} +!4 = !{!"Homebrew clang version 17.0.6"} +!5 = distinct !{!5, !6} +!6 = !{!"llvm.loop.mustprogress"} +!7 = distinct !{!7, !6} +!8 = distinct !{!8, !6} +!9 = distinct !{!9, !6} diff --git a/llvm/test/Transforms/FunctionMerging/mismatching-attr-crash.ll b/llvm/test/Transforms/FunctionMerging/mismatching-attr-crash.ll new file mode 100644 index 000000000000..5a2520d53abb --- /dev/null +++ b/llvm/test/Transforms/FunctionMerging/mismatching-attr-crash.ll @@ -0,0 +1,21 @@ +; RUN: opt -passes=func-merging -S < %s | FileCheck %s + +; CHECK-LABEL: define void @foo +; CHECK: call void %bc +define void @foo(i8* byval(i8) %a0, i8* swiftself %a4) { +entry: + %bc = bitcast i8* %a0 to void (i8*, i8*)* + call void %bc(i8* byval(i8) %a0, i8* swiftself %a4) + ret void +} + +; CHECK-LABEL: define void @bar +; CHECK: call void %bc +define void @bar(i8* byval(i8) %a0, i8** swifterror %a4) { +entry: + %bc = bitcast i8* %a0 to void (i8*, i8**)* + call void %bc(i8* byval(i8) %a0, i8** swifterror %a4) + ret void +} + + diff --git a/llvm/test/Transforms/FunctionMerging/no-merge-block-address-different-labels.ll b/llvm/test/Transforms/FunctionMerging/no-merge-block-address-different-labels.ll new file mode 100644 index 000000000000..3623461e7f90 --- /dev/null +++ b/llvm/test/Transforms/FunctionMerging/no-merge-block-address-different-labels.ll @@ -0,0 +1,96 @@ +; RUN: opt -S -passes=func-merging < %s | FileCheck %s + +; There is a slight different in these two functions, in that the label values +; are switched. They are thus not mergeable. This tests that block addresses +; referring to blocks within each respective compared function are correctly +; ordered. + +target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" +target triple = "x86_64-unknown-linux-gnu" + +; Function Attrs: nounwind uwtable +define i32 @_Z1fi(i32 %i) #0 { +; CHECK-LABEL: define i32 @_Z1fi +; CHECK-NEXT: entry: +; CHECK-NEXT: alloca +entry: + %i.addr = alloca i32, align 4 + %ret = alloca i32, align 4 + %l = alloca i8*, align 8 + store i32 %i, i32* %i.addr, align 4 + store i32 0, i32* %ret, align 4 +; Right here, this is val_0, and later the if might assign val_1 + store i8* blockaddress(@_Z1fi, %val_0), i8** %l, align 8 + %0 = load i32, i32* %i.addr, align 4 + %and = and i32 %0, 256 + %cmp = icmp eq i32 %and, 0 + br i1 %cmp, label %if.then, label %if.end + +if.then: + store i8* blockaddress(@_Z1fi, %val_1), i8** %l, align 8 + br label %if.end + +if.end: + %1 = load i8*, i8** %l, align 8 + br label %indirectgoto + +val_0: + store i32 12, i32* %ret, align 4 + br label %end + +val_1: + store i32 42, i32* %ret, align 4 + br label %end + +end: + %2 = load i32, i32* %ret, align 4 + ret i32 %2 + +indirectgoto: + %indirect.goto.dest = phi i8* [ %1, %if.end ] + indirectbr i8* %indirect.goto.dest, [label %val_0, label %val_1] +} + +; Function Attrs: nounwind uwtable +define i32 @_Z1gi(i32 %i) #0 { +; CHECK-LABEL: define i32 @_Z1gi +; CHECK-NEXT: entry: +; CHECK-NEXT: alloca +entry: + %i.addr = alloca i32, align 4 + %ret = alloca i32, align 4 + %l = alloca i8*, align 8 + store i32 %i, i32* %i.addr, align 4 + store i32 0, i32* %ret, align 4 +; This time, we store val_1 initially, and later the if might assign val_0 + store i8* blockaddress(@_Z1gi, %val_1), i8** %l, align 8 + %0 = load i32, i32* %i.addr, align 4 + %and = and i32 %0, 256 + %cmp = icmp eq i32 %and, 0 + br i1 %cmp, label %if.then, label %if.end + +if.then: + store i8* blockaddress(@_Z1gi, %val_0), i8** %l, align 8 + br label %if.end + +if.end: + %1 = load i8*, i8** %l, align 8 + br label %indirectgoto + +val_0: + store i32 12, i32* %ret, align 4 + br label %end + +val_1: + store i32 42, i32* %ret, align 4 + br label %end + +end: + %2 = load i32, i32* %ret, align 4 + ret i32 %2 + +indirectgoto: + %indirect.goto.dest = phi i8* [ %1, %if.end ] + indirectbr i8* %indirect.goto.dest, [label %val_1, label %val_0] +} + diff --git a/llvm/test/Transforms/FunctionMerging/no-merge-block-address-other-function.ll b/llvm/test/Transforms/FunctionMerging/no-merge-block-address-other-function.ll new file mode 100644 index 000000000000..b16927a7797f --- /dev/null +++ b/llvm/test/Transforms/FunctionMerging/no-merge-block-address-other-function.ll @@ -0,0 +1,61 @@ +; RUN: opt -S -passes=func-merging < %s | FileCheck %s + +; We should not merge these two functions, because the blocks are different. +; This tests the handling of block addresses from different functions. +; ModuleID = '' +target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" +target triple = "x86_64-unknown-linux-gnu" + + +define internal i8* @Afunc(i32* %P) { +; CHECK-LABEL: @Afunc +; CHECK-NEXT: store +; CHECK-NEXT: store +; CHECK-NEXT: ret + store i32 1, i32* %P + store i32 3, i32* %P + ret i8* blockaddress(@_Z1fi, %if.then) +} + +define internal i8* @Bfunc(i32* %P) { +; CHECK-LABEL: @Bfunc +; CHECK-NEXT: store +; CHECK-NEXT: store +; CHECK-NEXT: ret + store i32 1, i32* %P + store i32 3, i32* %P + ret i8* blockaddress(@_Z1fi, %if.then.2) +} + + +; Function Attrs: nounwind uwtable +define i32 @_Z1fi(i32 %i) #0 { +entry: + %retval = alloca i32, align 4 + %i.addr = alloca i32, align 4 + store i32 %i, i32* %i.addr, align 4 + %0 = load i32, i32* %i.addr, align 4 + %cmp = icmp eq i32 %0, 1 + br i1 %cmp, label %if.then, label %if.end + +if.then: + store i32 3, i32* %retval + br label %return + +if.end: + %1 = load i32, i32* %i.addr, align 4 + %cmp1 = icmp eq i32 %1, 3 + br i1 %cmp1, label %if.then.2, label %if.end.3 + +if.then.2: + store i32 56, i32* %retval + br label %return + +if.end.3: + store i32 0, i32* %retval + br label %return + +return: + %2 = load i32, i32* %retval + ret i32 %2 +} diff --git a/llvm/test/Transforms/FunctionMerging/no-merge-const-ptr-and-int.ll b/llvm/test/Transforms/FunctionMerging/no-merge-const-ptr-and-int.ll new file mode 100644 index 000000000000..3d85eeb7075d --- /dev/null +++ b/llvm/test/Transforms/FunctionMerging/no-merge-const-ptr-and-int.ll @@ -0,0 +1,19 @@ +; RUN: opt -passes=func-merging -S < %s | FileCheck %s +target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128" + +; Afunc and Bfunc differ in that one returns i64, the other a pointer. +; These should not be merged. +define internal i64 @Afunc(i32* %P, i32* %Q) { +; CHECK-LABEL: define internal i64 @Afunc + store i32 4, i32* %P + store i32 6, i32* %Q + ret i64 0 +} + +define internal i64* @Bfunc(i32* %P, i32* %Q) { +; CHECK-LABEL: define internal i64* @Bfunc + store i32 4, i32* %P + store i32 6, i32* %Q + ret i64* null +} + diff --git a/llvm/test/Transforms/FunctionMerging/no-merge-ptr-different-sizes.ll b/llvm/test/Transforms/FunctionMerging/no-merge-ptr-different-sizes.ll new file mode 100644 index 000000000000..f593e098dc2e --- /dev/null +++ b/llvm/test/Transforms/FunctionMerging/no-merge-ptr-different-sizes.ll @@ -0,0 +1,24 @@ +; RUN: opt -passes=func-merging -S < %s | FileCheck %s +target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128" + +; These should not be merged, as the datalayout says a pointer is 64 bits. No +; sext/zext is specified, so these functions could lower differently. +define internal i32 @Ffunc(i32* %P, i32* %Q) { +; CHECK-LABEL: define internal i32 @Ffunc +; CHECK-NEXT: store +; CHECK-NEXT: store +; CHECK-NEXT: ret + store i32 1, i32* %P + store i32 3, i32* %Q + ret i32 0 +} + +define internal i64* @Gfunc(i32* %P, i32* %Q) { +; CHECK-LABEL: define internal i64* @Gfunc +; CHECK-NEXT: store +; CHECK-NEXT: store +; CHECK-NEXT: ret + store i32 1, i32* %P + store i32 3, i32* %Q + ret i64* null +} diff --git a/llvm/test/Transforms/FunctionMerging/no-merge-ptr-int-different-values.ll b/llvm/test/Transforms/FunctionMerging/no-merge-ptr-int-different-values.ll new file mode 100644 index 000000000000..0b0434b7c912 --- /dev/null +++ b/llvm/test/Transforms/FunctionMerging/no-merge-ptr-int-different-values.ll @@ -0,0 +1,23 @@ +; RUN: opt -passes=func-merging -S < %s | FileCheck %s +target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128" + +; These should not be merged, as 1 != 0. +define internal i64 @Ifunc(i32* %P, i32* %Q) { +; CHECK-LABEL: define internal i64 @Ifunc +; CHECK-NEXT: store +; CHECK-NEXT: store +; CHECK-NEXT: ret + store i32 10, i32* %P + store i32 10, i32* %Q + ret i64 1 +} + +define internal i64* @Jfunc(i32* %P, i32* %Q) { +; CHECK-LABEL: define internal i64* @Jfunc +; CHECK-NEXT: store +; CHECK-NEXT: store +; CHECK-NEXT: ret + store i32 10, i32* %P + store i32 10, i32* %Q + ret i64* null +} diff --git a/llvm/test/Transforms/FunctionMerging/phi-check-blocks.ll b/llvm/test/Transforms/FunctionMerging/phi-check-blocks.ll new file mode 100644 index 000000000000..483bdffca491 --- /dev/null +++ b/llvm/test/Transforms/FunctionMerging/phi-check-blocks.ll @@ -0,0 +1,50 @@ +; RUN: opt -passes=func-merging -S < %s | FileCheck %s + +; Ensure that we do not merge functions that are identical with the +; exception of the order of the incoming blocks to a phi. + +; CHECK-LABEL: define linkonce_odr hidden i1 @first(i2 %0) +define linkonce_odr hidden i1 @first(i2 %0) { +entry: +; CHECK: switch i2 + switch i2 %0, label %default [ + i2 0, label %L1 + i2 1, label %L2 + i2 -2, label %L3 + ] +default: + unreachable +L1: + br label %done +L2: + br label %done +L3: + br label %done +done: + %result = phi i1 [ true, %L1 ], [ false, %L2 ], [ false, %L3 ] +; CHECK: ret i1 + ret i1 %result +} + +; CHECK-LABEL: define linkonce_odr hidden i1 @second(i2 %0) +define linkonce_odr hidden i1 @second(i2 %0) { +entry: +; CHECK: switch i2 + switch i2 %0, label %default [ + i2 0, label %L1 + i2 1, label %L2 + i2 -2, label %L3 + ] +default: + unreachable +L1: + br label %done +L2: + br label %done +L3: + br label %done +done: + %result = phi i1 [ true, %L3 ], [ false, %L2 ], [ false, %L1 ] +; CHECK: ret i1 + ret i1 %result +} diff --git a/llvm/test/Transforms/FunctionMerging/tailcall.ll b/llvm/test/Transforms/FunctionMerging/tailcall.ll new file mode 100644 index 000000000000..953b3e72f7b1 --- /dev/null +++ b/llvm/test/Transforms/FunctionMerging/tailcall.ll @@ -0,0 +1,23 @@ +; RUN: opt -passes=func-merging -S < %s | FileCheck %s + +; Do not treat call and musttail call as the same thing. + +declare void @dummy() + +; CHECK-LABEL: define{{.*}}@foo +; CHECK: call {{.*}}@dummy +; CHECK: musttail {{.*}}@dummy +define void @foo() { + call void @dummy() + musttail call void @dummy() + ret void +} + +; CHECK-LABEL: define{{.*}}@bar +; CHECK: call {{.*}}@dummy +; CHECK: call {{.*}}@dummy +define void @bar() { + call void @dummy() + call void @dummy() + ret void +} diff --git a/llvm/test/Transforms/FunctionMerging/too-small.ll b/llvm/test/Transforms/FunctionMerging/too-small.ll new file mode 100644 index 000000000000..f8562c444898 --- /dev/null +++ b/llvm/test/Transforms/FunctionMerging/too-small.ll @@ -0,0 +1,16 @@ +; RUN: opt -passes=func-merging -S < %s | FileCheck %s + +; Too small for merging to be profitable + +define void @foo(i32 %x) { +; CHECK-LABEL: @foo( +; CHECK-NOT: call + ret void +} + +define void @bar(i32 %x) { +; CHECK-LABEL: @bar( +; CHECK-NOT: call + ret void +} + diff --git a/llvm/test/Transforms/FunctionMerging/vector-GEP-crash.ll b/llvm/test/Transforms/FunctionMerging/vector-GEP-crash.ll new file mode 100644 index 000000000000..ed930e26a82b --- /dev/null +++ b/llvm/test/Transforms/FunctionMerging/vector-GEP-crash.ll @@ -0,0 +1,12 @@ +; RUN: opt -passes=func-merging -disable-output < %s +; This used to cause a crash when compairing the GEPs + +define void @foo(<2 x i64*>) { + %tmp = getelementptr i64, <2 x i64*> %0, <2 x i64> + ret void +} + +define void @bar(<2 x i64*>) { + %tmp = getelementptr i64, <2 x i64*> %0, <2 x i64> + ret void +} diff --git a/llvm/test/Transforms/FunctionMerging/weak-small.ll b/llvm/test/Transforms/FunctionMerging/weak-small.ll new file mode 100644 index 000000000000..b17bd3f1bb20 --- /dev/null +++ b/llvm/test/Transforms/FunctionMerging/weak-small.ll @@ -0,0 +1,16 @@ +; RUN: opt -passes=func-merging -S < %s | FileCheck %s + +; Weak functions too small for merging to be profitable + +; CHECK: define weak i32 @foo(i8* %0, i32 %1) +; CHECK-NEXT: ret i32 %1 +; CHECK: define weak i32 @bar(i8* %0, i32 %1) +; CHECK-NEXT: ret i32 %1 + +define weak i32 @foo(i8* %0, i32 %1) #0 { + ret i32 %1 +} + +define weak i32 @bar(i8* %0, i32 %1) #0 { + ret i32 %1 +} -- Gitee