From 5151b4d2c3a222ce66c2e1e3b55b25e88e2a7252 Mon Sep 17 00:00:00 2001 From: liuzixin15 Date: Sun, 28 Sep 2025 17:48:58 +0800 Subject: [PATCH] [AArch64][weakconsistency] add weakconsistencypass During compilation, a barrier (dmb sy) is automatically inserted before the ldr/str instruction, or ldar/stlr is automatically replaced. --- llvm/lib/Target/AArch64/AArch64.h | 1 + .../Target/AArch64/AArch64TargetMachine.cpp | 1 + llvm/lib/Target/AArch64/CMakeLists.txt | 2 + .../AArch64/WeakConsistencyAllowlist.cpp | 169 ++++ .../Target/AArch64/WeakConsistencyAllowlist.h | 58 ++ .../Target/AArch64/WeakConsistencyConfig.h | 24 + .../Target/AArch64/WeakConsistencyPass.cpp | 855 ++++++++++++++++++ llvm/test/CodeGen/AArch64/O0-pipeline.ll | 1 + llvm/test/CodeGen/AArch64/O3-pipeline.ll | 1 + .../AArch64/arm64-opt-remarks-lazy-bfi.ll | 8 + 10 files changed, 1120 insertions(+) create mode 100644 llvm/lib/Target/AArch64/WeakConsistencyAllowlist.cpp create mode 100644 llvm/lib/Target/AArch64/WeakConsistencyAllowlist.h create mode 100644 llvm/lib/Target/AArch64/WeakConsistencyConfig.h create mode 100644 llvm/lib/Target/AArch64/WeakConsistencyPass.cpp diff --git a/llvm/lib/Target/AArch64/AArch64.h b/llvm/lib/Target/AArch64/AArch64.h index d338fd0e0193..46e105e76a52 100644 --- a/llvm/lib/Target/AArch64/AArch64.h +++ b/llvm/lib/Target/AArch64/AArch64.h @@ -54,6 +54,7 @@ FunctionPass *createFalkorHWPFFixPass(); FunctionPass *createFalkorMarkStridedAccessesPass(); FunctionPass *createAArch64BranchTargetsPass(); FunctionPass *createAArch64MIPeepholeOptPass(); +FunctionPass *createWeakConsistencyPass(); FunctionPass *createAArch64CleanupLocalDynamicTLSPass(); diff --git a/llvm/lib/Target/AArch64/AArch64TargetMachine.cpp b/llvm/lib/Target/AArch64/AArch64TargetMachine.cpp index 601cc4b19fa1..1c0032be035c 100644 --- a/llvm/lib/Target/AArch64/AArch64TargetMachine.cpp +++ b/llvm/lib/Target/AArch64/AArch64TargetMachine.cpp @@ -847,6 +847,7 @@ void AArch64PassConfig::addPreEmitPass2() { // SVE bundles move prefixes with destructive operations. BLR_RVMARKER pseudo // instructions are lowered to bundles as well. addPass(createUnpackMachineBundles(nullptr)); + addPass(createWeakConsistencyPass()); } MachineFunctionInfo *AArch64TargetMachine::createMachineFunctionInfo( diff --git a/llvm/lib/Target/AArch64/CMakeLists.txt b/llvm/lib/Target/AArch64/CMakeLists.txt index 4035f439a69a..97a03e1c268b 100644 --- a/llvm/lib/Target/AArch64/CMakeLists.txt +++ b/llvm/lib/Target/AArch64/CMakeLists.txt @@ -87,6 +87,8 @@ add_llvm_target(AArch64CodeGen SMEABIPass.cpp SVEIntrinsicOpts.cpp AArch64SIMDInstrOpt.cpp + WeakConsistencyPass.cpp + WeakConsistencyAllowlist.cpp DEPENDS intrinsics_gen diff --git a/llvm/lib/Target/AArch64/WeakConsistencyAllowlist.cpp b/llvm/lib/Target/AArch64/WeakConsistencyAllowlist.cpp new file mode 100644 index 000000000000..11362024ffcf --- /dev/null +++ b/llvm/lib/Target/AArch64/WeakConsistencyAllowlist.cpp @@ -0,0 +1,169 @@ +//===- WeakConsistencyAllowlist.cpp - Weak Consistency Pass ----------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "WeakConsistencyAllowlist.h" +#include "WeakConsistencyConfig.h" +#include "llvm/Demangle/Demangle.h" +#include +#include +#include +#ifdef WIN32 +#include +#endif +#include + +namespace { +struct Path { + explicit Path(const std::string &base) : ori(base) {} + + std::string Realpath() const; + +private: + std::string ori; +}; + +std::string Path::Realpath() const { + char tempBuf[PATH_MAX] = {0x00}; +#ifdef WIN32 + GetFullPathName(ori.c_str(), PATH_MAX, tempBuf, NULL); +#else + if (realpath(ori.c_str(), tempBuf) == nullptr) + return ""; +#endif + return tempBuf; +} + +std::string &trim(std::string &s) { + const std::string WHITESPACE = " \n\r\t\f\v"; + if (s.empty()) + return s; + s.erase(0, s.find_first_not_of(WHITESPACE)); + s.erase(s.find_last_not_of(WHITESPACE) + 1); + return s; +} + +std::string getFuncnameFromFile(const std::string &func) { + std::string ret(func); + auto iter = ret.find_last_of('('); + if (iter != std::string::npos) + ret.erase(iter); + trim(ret); + iter = ret.find_last_of(" \t"); + if (iter != std::string::npos) + ret.erase(0, iter + 1); + iter = ret.find_last_of(':'); + if (iter != std::string::npos) + ret.erase(0, iter + 1); + return ret; +} +} + +std::string WeakConsistencyAllowlist::getFunctionName(const std::string &mangledName) { + llvm::ItaniumPartialDemangler IPD; + if (IPD.partialDemangle(mangledName.c_str())) + return mangledName; + size_t n = ipdSize; + auto res = IPD.getFunctionBaseName(ipdBuf, &n); + if (res == nullptr) + return mangledName; + if (res != ipdBuf) { + ipdBuf = res; + ipdSize = n; + } + return ipdBuf; +} + +// Permitted list format +// The file list starts with "files: "and occupies one line exclusively. +// The function list starts with "functions: "and occupies one line exclusively. +// Spaces are allowed at the beginning and end. +bool WeakConsistencyAllowlist::parseTag(std::string &tag) { + tag.pop_back(); + trim(tag); + if (tag == FUNCTION_TAG) + parseState = PARSE_FUNC; + else if (tag == FILE_TAG) + parseState = PARSE_FILE; + else + return false; + return true; +} + +bool WeakConsistencyAllowlist::addFile(std::string &line) +{ + std::string realPath = Path(line).Realpath(); + if (realPath.empty()) + return true; + files.insert(realPath); + return true; +} + +bool WeakConsistencyAllowlist::addFunc(std::string &line) +{ + auto funcname = getFuncnameFromFile(line); + if (funcname.empty()) + return true; + funcs.insert(funcname); + return true; +} + +bool WeakConsistencyAllowlist::parseLine(std::string &line) { + trim(line); + if (line.empty()) + return true; + if (line.back() == END_TAG) + return parseTag(line); + switch (parseState) { + case PARSE_FILE: + return addFile(line); + case PARSE_FUNC: + return addFunc(line); + case PARSE_NONE: + return false; + } + return false; +} + +bool WeakConsistencyAllowlist::Initialize(const std::string &filename) +{ + std::string buf; + std::string realPath = Path(filename).Realpath(); + if (realPath.empty()) + return true; + std::ifstream in(realPath); + if (in.fail()) { + llvm::WithColor::error(llvm::errs(), "WeakConsistencyPass") + << "'WeakConsistency allowlist' open failed: permission error.\n"; + return false; + } + hasAllowlist = true; + bool ret = true; + while (std::getline(in, buf)) { + if (!parseLine(buf)) { + llvm::WithColor::error(llvm::errs(), "WeakConsistencyPass") + << "'WeakConsistency allowlist' read failed: format error.\n"; + ret = false; + break; + } + } + in.close(); + return ret; +} + +bool WeakConsistencyAllowlist::Check(const std::string &filename, const std::string &funcname) +{ + if (!hasAllowlist) + return true; + std::string realPath = Path(filename).Realpath(); + if (files.find(realPath) != files.end()) + return true; + std::string realFunc = getFunctionName(funcname); + return funcs.find(realFunc) != funcs.end(); +} + + diff --git a/llvm/lib/Target/AArch64/WeakConsistencyAllowlist.h b/llvm/lib/Target/AArch64/WeakConsistencyAllowlist.h new file mode 100644 index 000000000000..d563d838c5e5 --- /dev/null +++ b/llvm/lib/Target/AArch64/WeakConsistencyAllowlist.h @@ -0,0 +1,58 @@ +//===- WeakConsistencyAllowlist.h - Weak Consistency Pass ----------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_LIB_TARGET_AARCH64_WEAKCONSISTENCYALLOWLIST_H +#define LLVM_LIB_TARGET_AARCH64_WEAKCONSISTENCYALLOWLIST_H + +#include +#include +#include + +class WeakConsistencyAllowlist { +public: + WeakConsistencyAllowlist() { + ipdBuf = static_cast(std::malloc(ipdSize)); + assert(ipdBuf); + ipdBuf[ipdSize-1] = '\0'; + } + ~WeakConsistencyAllowlist() { + if(ipdBuf != nullptr) { + free(ipdBuf); + } + } + + bool Initialize(const std::string &filename); + bool Check(const std::string &filename, const std::string &funcname); + +private: + bool parseLine(std::string &line); + bool parseTag(std::string &tag); + bool addFile(std::string &line); + bool addFunc(std::string &line); + std::string getFunctionName(const std::string &mangledName); + +private: + enum ParseState : uint8_t { + PARSE_NONE, + PARSE_FILE, + PARSE_FUNC, + }; + + bool hasAllowlist = false; + std::unordered_set files = {}; + std::unordered_set funcs = {}; + const std::string FILE_TAG = "files"; + const std::string FUNCTION_TAG = "functions"; + const char END_TAG = ':'; + ParseState parseState = PARSE_NONE; + char *ipdBuf = nullptr; + size_t ipdSize = 2048; +}; + +#endif + diff --git a/llvm/lib/Target/AArch64/WeakConsistencyConfig.h b/llvm/lib/Target/AArch64/WeakConsistencyConfig.h new file mode 100644 index 000000000000..0f51a21e3eb0 --- /dev/null +++ b/llvm/lib/Target/AArch64/WeakConsistencyConfig.h @@ -0,0 +1,24 @@ +//===- WeakConsistencyConfig.h - Weak Consistency Pass ----------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_LIB_TARGET_AARCH64_WEAKCONSISTENCYCONFIG_H +#define LLVM_LIB_TARGET_AARCH64_WEAKCONSISTENCYCONFIG_H + +#include "llvm/Support/WithColor.h": +#include "Utils/AArch64BaseInfo.h" +#include "AArch64RegisterInfo.h" + +enum RELAXED_ORDERING_LEVEL : char { + RO_DISABLE = 0, + RO_1 = 1, + RO_2 = 2, + RO_3 = 3, +}; + +#endif + diff --git a/llvm/lib/Target/AArch64/WeakConsistencyPass.cpp b/llvm/lib/Target/AArch64/WeakConsistencyPass.cpp new file mode 100644 index 000000000000..197f81856118 --- /dev/null +++ b/llvm/lib/Target/AArch64/WeakConsistencyPass.cpp @@ -0,0 +1,855 @@ +//===- WeakConsistencyPass.cpp - Weak Consistency Pass ----------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +//This pass is a weak memory sequence compilation repair tool. +//Basic repair logic: During compilation, a barrier (dmb sy) is automatically +//inserted before the ldr/str instruction, or ldar/stlr is automatically replaced. +// +//===----------------------------------------------------------------------===// + +#include "AArch64.h" +#include "WeakConsistencyAllowlist.h" +#include "WeakConsistencyConfig.h" +#include "llvm/ADT/StringRef.h" +#include "llvm/CodeGen/MachineBasicBlock.h" +#include "llvm/CodeGen/MachineFunctionPass.h" +#include "llvm/CodeGen/MachineInstr.h" +#include "llvm/CodeGen/MachineOperand.h" +#include "llvm/IR/Value.h" +#include "llvm/Support/Casting.h" +#include "llvm/Support/CommandLine.h" +#include "llvm/CodeGen/MachineFunction.h" +#include "llvm/CodeGen/TargetSubtargetInfo.h" +#include "llvm/CodeGen/MachineInstrBuilder.h" +#include "llvm/CodeGen/TargetInstrInfo.h" +#include "llvm/Demangle/Demangle.h" +#include "llvm/IR/Module.h" +#include +#include +#include +#include + + + +namespace { +const std::string WEAKCONSISTENCYPASS_NAME = "WeakConsistencyPass"; +const int WEAKCONSISTENCY_LARGE_FUNCTION = 9000; +} + +using llvm::MachineInstr; +using llvm::MachineFunction; +using llvm::GlobalValue; +using llvm::MachineMemOperand; +using llvm::isa; +using llvm::cast; +using llvm::MachineBasicBlock; + +enum ModifyType : uint8_t { + NO_MODIFY, + ADD_DMB, + DELETE_MI, +}; + +namespace opts { +static llvm::cl::opt + level("relaxed-ordering-level", llvm::cl::desc("Set relaxed level"), + llvm::cl::init(RO_DISABLE), + llvm::cl::values(clEnumValN(RO_DISABLE, "disable", + + + + "disable weakconsistency pass"), + clEnumValN(RO_1, "0", "level 0"), + clEnumValN(RO_2, "1", "level 1"), + clEnumValN(RO_3, "2", "level 2")), + llvm::cl::ZeroOrMore, llvm::cl::NotHidden); + +static llvm::cl::opt + allowlist("relaxed-ordering-allowlist", + llvm::cl::desc("Set relaxed ordering allow list"), + llvm::cl::NotHidden); +} // end namespace opts + +namespace { +class BaseWeakConsistencyPass : public llvm::MachineFunctionPass { +public: + static char ID; + BaseWeakConsistencyPass() : llvm::MachineFunctionPass(ID) {} + virtual bool runOnMachineFunction(MachineFunction &MF) override; + llvm::StringRef getPassName() const override final { + return WEAKCONSISTENCYPASS_NAME; + } +}; + +class WeakConsistencyPass_level1 : public BaseWeakConsistencyPass { +public: + WeakConsistencyPass_level1(); + bool runOnMachineFunction(MachineFunction &MF) final; + +protected: + virtual ModifyType checkMI(MachineInstr &MI) const; + virtual void preCheckMI(MachineInstr &MI); + virtual void postCheckMI(MachineInstr &MI); + virtual bool initPass(const MachineFunction &MF); + bool isFrameSetupOrDestroy(const MachineInstr &MI) const; + + bool hasDMB = false; + WeakConsistencyAllowlist allowlist; + +private: + bool isVirtualTable(const MachineInstr &MI) const; + bool isThreadLocal(const MachineInstr &MI) const; + bool isFPOrSPOperand(const MachineInstr &MI) const; + virtual bool checkOpcode(const MachineInstr &MI) const; + + int totalInst = 0; + int ldstInst = 0; +}; + +class WeakConsistencyPass_level2 : public WeakConsistencyPass_level1 { +public: + WeakConsistencyPass_level2() = default; + +protected: + void preCheckMI(MachineInstr &MI) override; + void postCheckMI(MachineInstr &MI) override; + ModifyType checkMI(MachineInstr &MI) const override; + bool initPass(const MachineFunction &MF) override; + bool isNormalRegs(const MachineInstr &MI) const; + bool isNormalLoad(const MachineInstr &MI) const; + bool isNormalStore(const MachineInstr &MI) const; + + std::unordered_set localRegs; + std::vector toRemove; + +private: + bool isGotOperand(const MachineInstr &MI) const; + void updateLocalRegs(const MachineInstr &MI); + bool isLocalRegOpt(const MachineInstr &MI) const; + bool isAtomic(const MachineInstr &MI) const; + void removeLocalRegs(const std::vector ®s); + void removeLocalReg(unsigned int reg); + bool isAsCheapAsMove(const MachineInstr &MI) const; +}; + +class WeakConsistencyPass_level3 : public WeakConsistencyPass_level2 { +public: + WeakConsistencyPass_level3() = default; + +protected: + ModifyType checkMI(MachineInstr &MI) const override; + bool initPass(const MachineFunction &MF) override; + +private: + void useLdar(MachineInstr &MI, unsigned int opcode) const; + void useLdaxr(MachineInstr &MI) const; + bool tryToUseLdar(MachineInstr &MI) const; + bool checkLdstInst(MachineInstr &MI, unsigned min_align, unsigned opts, unsigned opcode) const; + bool checkRegists(const MachineInstr &MI) const; + void addLdarImm(MachineInstr &MI, int64_t imm, bool isPost) const; + bool isPostLdstInst(const MachineInstr &MI) const; + unsigned getTargetLdpCode(const MachineInstr &MI) const; + bool isAligned(const MachineMemOperand &MI, unsigned min_align) const; + void useCASAL(MachineInstr &MI) const; + bool checkOpcode(const MachineInstr &MI) const override; +}; + +} // end anonymous namespace + +llvm::FunctionPass *llvm::createWeakConsistencyPass() { + switch (opts::level) { + case RELAXED_ORDERING_LEVEL::RO_DISABLE: + return new BaseWeakConsistencyPass(); + case RELAXED_ORDERING_LEVEL::RO_1: + return new WeakConsistencyPass_level1(); + case RELAXED_ORDERING_LEVEL::RO_2: + return new WeakConsistencyPass_level2(); + case RELAXED_ORDERING_LEVEL::RO_3: + return new WeakConsistencyPass_level3(); + } + return new BaseWeakConsistencyPass(); +} + +bool WeakConsistencyPass_level1::initPass(const MachineFunction &MF) { + totalInst = 0; + ldstInst = 0; + for (auto &MBB : MF) { + for (auto &MI : MBB) { + totalInst++; + if (MI.mayLoadOrStore()) + ldstInst ++; + } + } + if (totalInst > WEAKCONSISTENCY_LARGE_FUNCTION) { + llvm::WithColor::error(llvm::errs(), "WeakConsistencyPass") + << "Ignore large funtion: " << MF.getName() << "\n"; + return false; + } + + auto &filename = MF.getFunction().getParent()->getSourceFileName(); + return allowlist.Check(filename, MF.getName().str()); +} + +bool WeakConsistencyPass_level2::initPass(const MachineFunction &MF) { + if (!WeakConsistencyPass_level1::initPass(MF)) + return false; + + localRegs = {llvm::AArch64::SP, llvm::AArch64::FP, llvm::AArch64::LR}; + + // X0 of the constructor is the memory address newly allocated to the current + // thread and has not been synchronized to other threads. In this case, X0 can be + // securely identified as a local variable. + llvm::ItaniumPartialDemangler IPD; + if (!IPD.partialDemangle(MF.getName().data())) + if (IPD.isCtorOrDtor()) + localRegs.insert(llvm::AArch64::X0); + return true; +} + +bool WeakConsistencyPass_level3::initPass(const MachineFunction &MF) { + if (!WeakConsistencyPass_level2::initPass(MF)) + return false; + + // Do not process the first four parameters (load/store) transferred by the function. + // ldr x19, [x0, #8] // The first-layer LDR operation of the input parameter is not processed. + // cbz x19, .LBB4_16 + // dmb sy + // ldr x0, [x19, #4912] // Add dmb to the second input ldr parameter. + for (int i = llvm::AArch64::X0; i <= llvm::AArch64::X3; i++) + localRegs.insert(i); + return true; +} + +char BaseWeakConsistencyPass::ID = 0; +bool BaseWeakConsistencyPass::runOnMachineFunction(MachineFunction &MF) { + return false; +} + +WeakConsistencyPass_level1::WeakConsistencyPass_level1() + : BaseWeakConsistencyPass() +{ + if (!opts::allowlist.empty()) + allowlist.Initialize(opts::allowlist); +} + +bool WeakConsistencyPass_level1::runOnMachineFunction(MachineFunction &MF) { + if (!initPass(MF)) + return false; + + bool modified = false; + const llvm::TargetInstrInfo &TII = *MF.getSubtarget().getInstrInfo(); + for (auto &MBB : MF) { + MachineBasicBlock::iterator E = MBB.end(); + MachineBasicBlock::iterator NextI; + for (auto MII = next_nodbg(MBB.begin(), E); MII != E; MII = NextI) { + NextI = next_nodbg(MII, E); + preCheckMI(*MII); + ModifyType ret = checkMI(*MII); + postCheckMI(*MII); + if (ret == ADD_DMB) { + BuildMI(MBB, *MII, MII->getDebugLoc(), TII.get(llvm::AArch64::DMB)).addImm(0xf); + modified = true; + } + if (ret == DELETE_MI) + modified = true; + } + } + + return modified; +} + +namespace { +static inline const llvm::StringRef getCallFunctionName(const MachineInstr &MI) { + if (!MI.isCall()) + return ""; + + const auto &op = MI.getOperand(0); + switch (op.getType()) { + case llvm::MachineOperand::MO_GlobalAddress:{ + return op.getGlobal()->getName(); + } + default: + return ""; + } +} + +static std::vector _getRegs(const MachineInstr &MI, bool ignoreZR = true) { + std::vector regs; + for (auto operand : MI.operands()) { + if (operand.isReg() && !operand.isImplicit() && !operand.isEarlyClobber()) { + auto id = operand.getReg().id(); + if (!ignoreZR || (id != llvm::AArch64::XZR && id != llvm::AArch64::WZR)) + regs.push_back(operand.getReg().id()); + } + } + return regs; +} + +bool isDmbInst(const MachineInstr &MI) { + static const std::unordered_set idmbs = { + llvm::AArch64::DMB, + llvm::AArch64::DSB, + llvm::AArch64::ISB, + llvm::AArch64::DSBnXS + }; + auto opcode = MI.getOpcode(); + return idmbs.find(opcode) != idmbs.end(); +} + +inline bool isInRange(unsigned opcode, unsigned start, unsigned end) { + return opcode >= start && opcode <= end; +} + +// Find the call malloc... instructions +inline bool isNewMalloc(const MachineInstr &MI) { + static const std::unordered_set news = { + "malloc", "calloc", "_Znwm", "realloc" + }; + llvm::StringRef name = getCallFunctionName(MI); + return news.find(name.str()) != news.end(); +} +} // end anonymous namespace + +// Instructions such as clrex hint are not fixed. +bool WeakConsistencyPass_level1::checkOpcode(const MachineInstr &MI) const { + auto code = MI.getOpcode(); + return code == llvm::AArch64::CLREX || code == llvm::AArch64::HINT; +} + +// [c,c++] Check whether the function is initialized or part of the memory is returned. +// initialize: frame-setup STPXi killed $fp, killed $lr, $sp, 2 +// return: $fp, $lr = frame-destroy LDPXi $sp, 2 +bool WeakConsistencyPass_level1::isFrameSetupOrDestroy(const MachineInstr &MI) const { + return MI.getFlag(MachineInstr::FrameSetup) || MI.getFlag(MachineInstr::FrameDestroy); +} + +// [c,c++] Check whether the address is read or written in the SP/FP. +bool WeakConsistencyPass_level1::isFPOrSPOperand(const MachineInstr &MI) const { + std::vector regs(_getRegs(MI)); + for (auto id : regs) + if (id == llvm::AArch64::SP || id == llvm::AArch64::FP || id == llvm::AArch64::LR) + return true; + return false; +} + +// [c,c++] Check the thread_local variable operation. +bool WeakConsistencyPass_level1::isThreadLocal(const MachineInstr &MI) const { + for (auto oprand = MI.memoperands_begin(); oprand != MI.memoperands_end(); oprand ++) { + const llvm::MachineMemOperand *MMO = *oprand; + if (MMO->getValue() == nullptr) + continue; + + const llvm::Value &val = *MMO->getValue(); + if (isa(val) && cast(val).isThreadLocal()) + return true; + } + return false; +} + +// [c++] Check whether the operation is a virtual table read operation. +// LDRXui killed $x9, target-flags(aarch64-pageoff, aarch64-got, aarch64-nc) @_ZTVN4llvm16itanium_ +bool WeakConsistencyPass_level1::isVirtualTable(const MachineInstr &MI) const { + if (!MI.mayLoad() || MI.getNumMemOperands() > 0) + return false; + + for (auto operand = MI.operands_begin(); operand != MI.operands_end(); operand++) + if (operand->getType() == llvm::MachineOperand::MO_GlobalAddress) + return operand->getGlobal()->getName().startswith("_ZTVN"); + return false; +} + +void WeakConsistencyPass_level1::preCheckMI(MachineInstr &MI) { + if (isDmbInst(MI)) { + hasDMB = true; + return; + } +} + +void WeakConsistencyPass_level1::postCheckMI(MachineInstr &MI) { + if (hasDMB) + if (MI.mayLoadOrStore()) + hasDMB = false; +} + + +// LEVEL 1: Do not add memory barriers before local variables +// If true is returned, the memory barrier needs to be inserted. If false is returned, the +// memory barrier does not need to be inserted. +ModifyType WeakConsistencyPass_level1::checkMI(MachineInstr &MI) const { + // dmb The mayLoadOrStore instruction returns true. + if (!MI.mayLoadOrStore() || isDmbInst(MI)) + return NO_MODIFY; + + if (isFrameSetupOrDestroy(MI)) + return NO_MODIFY; + + if (checkOpcode(MI)) + return NO_MODIFY; + + if (hasDMB) + return NO_MODIFY; + + if (isFPOrSPOperand(MI)) + return NO_MODIFY; + + if (isThreadLocal(MI)) + return NO_MODIFY; + + if (isVirtualTable(MI)) + return NO_MODIFY; + + return ADD_DMB; +} + +// [c,c++] Indicates whether the operation is a Got operation. +// LDRXui killed $x0, target-flags(aarch64-pageoff, aarch64-got, aarch64-nc) @_ZSt4cout +bool WeakConsistencyPass_level2::isGotOperand(const MachineInstr &MI) const { + if (MI.getNumMemOperands() > 0 || !MI.mayLoad()) + return false; + + for (unsigned i = 1; i < MI.getNumOperands(); i++) { + auto &operand = MI.getOperand(i); + if (!operand.isGlobal()) + continue; + + if (operand.getTargetFlags() & llvm::AArch64II::MO_GOT) + return true; + } + return false; +} + +void WeakConsistencyPass_level2::removeLocalReg(unsigned int reg) { + // Convert w0 to x0 + if (isInRange(reg, llvm::AArch64::W0, llvm::AArch64::W28)) + reg += llvm::AArch64::X0 - llvm::AArch64::W0; + + if (isInRange(reg, llvm::AArch64::X0, llvm::AArch64::X28)) + localRegs.erase(reg); +} + +void WeakConsistencyPass_level2::removeLocalRegs(const std::vector ®s) { + for (auto id : regs) + removeLocalReg(id); +} + +// Check whether the current load and store addresses are unique to threads based on the context. +// example: constructor_and_new.s +bool WeakConsistencyPass_level2::isLocalRegOpt(const MachineInstr &MI) const { + std::vector regs(_getRegs(MI, false)); + if (regs.size() < 2) + return false; + + if (isNormalLoad(MI) || isNormalStore(MI)) + return localRegs.find(regs[1]) != localRegs.end(); + + return localRegs.find(regs.back()) != localRegs.end(); +} + +// [c,c++] Radical policy: Do not process read/write instructions that contain atomic operations. +bool WeakConsistencyPass_level2::isAtomic(const MachineInstr &MI) const { + auto opcode = MI.getOpcode(); + if (isInRange(opcode, llvm::AArch64::LDARB, llvm::AArch64::LDARX)) + return true; + + if (isInRange(opcode, llvm::AArch64::LDAPRB, llvm::AArch64::LDAXRX)) + return true; + + if (isInRange(opcode, llvm::AArch64::STLRB, llvm::AArch64::STLRX)) + return true; + + if (isInRange(opcode, llvm::AArch64::STLLRB, llvm::AArch64::STLXRX)) + return true; + + if (isInRange(opcode, llvm::AArch64::LDEORAB, llvm::AArch64::LDEORX)) + return true; + + if (isInRange(opcode, llvm::AArch64::LDADDAB, llvm::AArch64::LDADDX)) + return true; + + if (isInRange(opcode, llvm::AArch64::CASALB, llvm::AArch64::CASALX)) + return true; + + if (isInRange(opcode, llvm::AArch64::CASPALW, llvm::AArch64::CASPALX)) + return true; + + return false; +} + +bool WeakConsistencyPass_level2::isAsCheapAsMove(const MachineInstr &MI) const { + static const std::unordered_set opcodes = { + llvm::AArch64::ORRXrs, llvm::AArch64::ORRXri, + llvm::AArch64::EXTRWrri, llvm::AArch64::EXTRXrri, + llvm::AArch64::UBFMWri, llvm::AArch64::UBFMXri, + }; + auto code = MI.getOpcode(); + if (isInRange(code, llvm::AArch64::ADDXri, llvm::AArch64::ADDXrx)) + return true; + + if (MI.isAsCheapAsAMove()) + return true; + + return opcodes.find(code) != opcodes.end(); +} + +void WeakConsistencyPass_level2::preCheckMI(MachineInstr &MI) { + WeakConsistencyPass_level1::preCheckMI(MI); + auto code = MI.getOpcode(); + if (isFrameSetupOrDestroy(MI) || isDmbInst(MI) || MI.isCompare() || MI.mayStore()) + return; + + if (isNewMalloc(MI)) { + localRegs.insert(llvm::AArch64::X0); + return; + } + if (MI.mayLoad() || MI.isConditionalBranch() || code == llvm::AArch64::KILL) + return; + + if (MI.isCall()) { + localRegs.erase(llvm::AArch64::X0); + return; + } + std::vector regs(_getRegs(MI, false)); + if (regs.empty()) + return; + + if (isAsCheapAsMove(MI)) { + bool found = false; + for (unsigned i = 1; i < regs.size(); i++) { + if (localRegs.find(regs[i]) != localRegs.end()) { + found = true; + break; + } + } + if (found) + localRegs.insert(regs[0]); + else + removeLocalReg(regs[0]); + return; + } + if (!isInRange(code, llvm::AArch64::ADR, llvm::AArch64::ADR_UXTW_ZZZ_D_3)) + regs.pop_back(); + + removeLocalRegs(regs); +} + +bool WeakConsistencyPass_level2::isNormalRegs(const MachineInstr &MI) const { + auto regs = _getRegs(MI); + for (auto reg : regs) { + if (isInRange(reg, llvm::AArch64::X22_X23_X24_X25_X26_X27_X28_FP, llvm::AArch64::X26_X27)) + continue; + + if (isInRange(reg, llvm::AArch64::W0, llvm::AArch64::X28)) + continue; + + static const std::unordered_set ots = { + llvm::AArch64::SP, llvm::AArch64::FP, llvm::AArch64::LR, llvm::AArch64::WZR, llvm::AArch64::XZR + }; + if (ots.find(reg) == ots.end()) + return false; + } + return true; +} + +bool WeakConsistencyPass_level2::isNormalLoad(const MachineInstr &MI) const +{ + auto code = MI.getOpcode(); + if (isInRange(code, llvm::AArch64::LDRBBpost, llvm::AArch64::LDRBui)) + return true; + + if (isInRange(code, llvm::AArch64::LDRHHpost, llvm::AArch64::LDRHui)) + return true; + + if (isInRange(code, llvm::AArch64::LDRWl, llvm::AArch64::LDRWui)) + return true; + + if (isInRange(code, llvm::AArch64::LDRXl, llvm::AArch64::LDRXui)) + return true; + + return false; +} + +bool WeakConsistencyPass_level2::isNormalStore(const MachineInstr &MI) const +{ + auto code = MI.getOpcode(); + if (isInRange(code, llvm::AArch64::STRBBpost, llvm::AArch64::STRBui)) + return true; + + if (isInRange(code, llvm::AArch64::STRHHpost, llvm::AArch64::STRHui)) + return true; + + if (isInRange(code, llvm::AArch64::STRWpost, llvm::AArch64::STRWui)) + return true; + + if (isInRange(code, llvm::AArch64::STRXpost, llvm::AArch64::STRXui)) + return true; + + return false; +} + +void WeakConsistencyPass_level2::postCheckMI(MachineInstr &MI) { + WeakConsistencyPass_level1::postCheckMI(MI); + std::vector regs(_getRegs(MI, false)); + if (MI.mayLoad() && !regs.empty() && isNormalRegs(MI)) { + removeLocalReg(regs[0]); + if (!isNormalLoad(MI)) + removeLocalReg(regs[1]); + } +} + +// LEVEL 2: No memory barrier is added to the aliases of local variables. +// If true is returned, the memory barrier needs to be inserted. If false is returned, +// the memory barrier does not need to be inserted. +ModifyType WeakConsistencyPass_level2::checkMI(MachineInstr &MI) const { + ModifyType ret = WeakConsistencyPass_level1::checkMI(MI); + if (ret != ADD_DMB) + return ret; + + if (isGotOperand(MI)) + return NO_MODIFY; + + if (isLocalRegOpt(MI)) + return NO_MODIFY; + + if (isAtomic(MI)) + return NO_MODIFY; + + return ADD_DMB; +} + +void WeakConsistencyPass_level3::useLdar(MachineInstr &MI, unsigned int opcode) const { + const llvm::TargetInstrInfo &TII = *MI.getMF()->getSubtarget().getInstrInfo(); + + MI.setDesc(TII.get(opcode)); + for (int i = MI.getNumOperands() - 1; i >= 0; i--) + if (!MI.getOperand(i).isReg()) + MI.removeOperand(i); +} + +// Convert: +// ldr x0, [x1], imm +// to: +// ldar x0, [x1] +// add x1, x1, imm +// Convert: +// ldr x0, [x1, imm] +// to: +// add x1, x1, imm +// ldar x0, [x1] +void WeakConsistencyPass_level3::addLdarImm(MachineInstr &MI, int64_t imm, bool isPrePost) const { + llvm::MachineBasicBlock *const MBB = MI.getParent(); + if (!isPrePost || imm == 0) + return; + + auto &operand = MI.getOperand(0); + for (int i = MI.getNumOperands() - 1; i >= 0; i--) { + operand = MI.getOperand(i); + if (operand.isReg()) + break; + } + + const llvm::TargetInstrInfo &TII = *MI.getMF()->getSubtarget().getInstrInfo(); + if (imm > 0) + BuildMI(*MBB, MI, MI.getDebugLoc(), TII.get(llvm::AArch64::ADDXrx)) + .add(operand) + .add(operand) + .addImm(imm); + else { + imm = -imm; + BuildMI(*MBB, MI, MI.getDebugLoc(), TII.get(llvm::AArch64::SUBXrx)) + .add(operand) + .add(operand) + .addImm(imm); + } +} + +bool WeakConsistencyPass_level3::isPostLdstInst(const MachineInstr &MI) const { + static const std::unordered_set postInsts = { + llvm::AArch64::LDRBBpre, + llvm::AArch64::LDRBpre, + llvm::AArch64::LDRHHpre, + llvm::AArch64::LDRHpre, + llvm::AArch64::LDRWpre, + llvm::AArch64::LDRXpre, + llvm::AArch64::STRBBpre, + llvm::AArch64::STRBpre, + llvm::AArch64::STRHHpre, + llvm::AArch64::STRHpre, + llvm::AArch64::STRWpre, + llvm::AArch64::STRXpre, + llvm::AArch64::LDPXpre, + llvm::AArch64::STPXpre, + }; + return postInsts.find(MI.getOpcode()) != postInsts.end(); +} + +bool WeakConsistencyPass_level3::isAligned(const MachineMemOperand &MI, unsigned min_align) const { + unsigned align = MI.getAlign().value(); + unsigned baseAlign = MI.getBaseAlign().value(); + + return align != 0 && baseAlign != 0 && align % min_align == 0 && align % min_align == 0; +} + +// In the target load/store instruction, if [xn] and imm meet the alignment requirements, +// replace ldar/stlr with ldar/stlr. +bool WeakConsistencyPass_level3::checkLdstInst( + MachineInstr &MI, unsigned min_align, unsigned opts, unsigned distCode) const +{ + if (MI.getNumMemOperands() == 0) + return false; + + auto &memOperand = *(MI.memoperands_end() - 1); + if (!isAligned(*memOperand, min_align)) + return false; + + if (MI.getNumOperands() != opts) + return false; + + auto &operand = *(MI.operands_end() - 1); + if (!operand.isImm()) + return false; + + auto imm = operand.getImm(); + // imm != 0 need to call addLdarImm once. Currently, this function is faulty and will be fixed later + if (imm != 0) + return false; + + addLdarImm(MI, imm, !isPostLdstInst(MI)); + useLdar(MI, distCode); + addLdarImm(MI, imm, isPostLdstInst(MI)); + return true; +} + +void WeakConsistencyPass_level3::useLdaxr(MachineInstr &MI) const { + static const std::unordered_map opts = { + {llvm::AArch64::LDXPW, llvm::AArch64::LDAXPW}, + {llvm::AArch64::LDXPX, llvm::AArch64::LDAXPX}, + {llvm::AArch64::LDXRB, llvm::AArch64::LDAXRB}, + {llvm::AArch64::LDXRH, llvm::AArch64::LDAXRH}, + {llvm::AArch64::LDXRW, llvm::AArch64::LDAXRW}, + {llvm::AArch64::LDXRX, llvm::AArch64::LDAXRX}, + + {llvm::AArch64::STXPW, llvm::AArch64::STLXPW}, + {llvm::AArch64::STXPX, llvm::AArch64::STLXPX}, + {llvm::AArch64::STXRB, llvm::AArch64::STLXRB}, + {llvm::AArch64::STXRH, llvm::AArch64::STLXRH}, + {llvm::AArch64::STXRW, llvm::AArch64::STLXRW}, + {llvm::AArch64::STXRX, llvm::AArch64::STLXRX}, + }; + + const llvm::TargetInstrInfo &TII = *MI.getMF()->getSubtarget().getInstrInfo(); + auto it = opts.find(MI.getOpcode()); + if (it != opts.end()) + MI.setDesc(TII.get(it->second)); +} + + +void WeakConsistencyPass_level3::useCASAL(MachineInstr &MI) const { + static const std::unordered_set opB = { + llvm::AArch64::CASAB, llvm::AArch64::CASLB, llvm::AArch64::CASB + }; + static const std::unordered_set opH = { + llvm::AArch64::CASAH, llvm::AArch64::CASLH, llvm::AArch64::CASH + }; + static const std::unordered_set opW = { + llvm::AArch64::CASAW, llvm::AArch64::CASLW, llvm::AArch64::CASW + }; + static const std::unordered_set opX = { + llvm::AArch64::CASAX, llvm::AArch64::CASLX, llvm::AArch64::CASX + }; + static const std::unordered_set opPW = { + llvm::AArch64::CASPAW, llvm::AArch64::CASPLW, llvm::AArch64::CASPW + }; + static const std::unordered_set opPX = { + llvm::AArch64::CASPAX, llvm::AArch64::CASPLX, llvm::AArch64::CASPX + }; + const llvm::TargetInstrInfo &TII = *MI.getMF()->getSubtarget().getInstrInfo(); + auto code = MI.getOpcode(); + unsigned distCode = 0; + if (opB.find(code) != opB.end()) + distCode = llvm::AArch64::CASALB; + else if (opH.find(code) != opH.end()) + distCode = llvm::AArch64::CASALH; + else if (opW.find(code) != opW.end()) + distCode = llvm::AArch64::CASALW; + else if (opX.find(code) != opX.end()) + distCode = llvm::AArch64::CASALX; + else if (opPW.find(code) != opPW.end()) + distCode = llvm::AArch64::CASPALW; + else if (opPX.find(code) != opPX.end()) + distCode = llvm::AArch64::CASPALX; + if (distCode != 0) + MI.setDesc(TII.get(distCode)); +} + +// [c,c++] Convert ldr to ldar as much as possible. +bool WeakConsistencyPass_level3::tryToUseLdar(MachineInstr &MI) const { + auto code = MI.getOpcode(); + if (isInRange(code, llvm::AArch64::LDXPW, llvm::AArch64::LDXRX) + || isInRange(code, llvm::AArch64::STXPW, llvm::AArch64::STXRX)) { + useLdaxr(MI); + return true; + } + if (isInRange(code, llvm::AArch64::CASAB, llvm::AArch64::CASX)) { + useCASAL(MI); + return true; + } + if (isInRange(code, llvm::AArch64::LDRBBpost, llvm::AArch64::LDRBui) + || isInRange(code, llvm::AArch64::STRBBpost, llvm::AArch64::STRBui)) + return checkLdstInst(MI, 1, 3, MI.mayLoad() ? llvm::AArch64::LDARB : llvm::AArch64::STLRB); + + if (isInRange(code, llvm::AArch64::LDRHHpost, llvm::AArch64::LDRHui) + || isInRange(code, llvm::AArch64::STRHHpost, llvm::AArch64::STRHui)) + return checkLdstInst(MI, 2, 3, MI.mayLoad() ? llvm::AArch64::LDARH : llvm::AArch64::STLRH); + + if (isInRange(code, llvm::AArch64::LDRWpost, llvm::AArch64::LDRWui) + || isInRange(code, llvm::AArch64::STRWpost, llvm::AArch64::STRWui)) + return checkLdstInst(MI, 4, 3, MI.mayLoad() ? llvm::AArch64::LDARW : llvm::AArch64::STLRW); + + if (isInRange(code, llvm::AArch64::LDRXpost, llvm::AArch64::LDRXui) + || isInRange(code, llvm::AArch64::STRXpost, llvm::AArch64::STRXui)) + return checkLdstInst(MI, 8, 3, MI.mayLoad() ? llvm::AArch64::LDARX : llvm::AArch64::STLRX); + + return false; +} + +bool WeakConsistencyPass_level3::checkOpcode(const MachineInstr &MI) const { + auto code = MI.getOpcode(); + if (isInRange(code, llvm::AArch64::CPYE, llvm::AArch64::CPY_ZPzI_S)) + return true; + + if (MI.isInlineAsm()) + return true; + + return isInRange(code, llvm::AArch64::MOPSMemoryCopyPseudo, llvm::AArch64::MOPSMemorySetTaggingPseudo); +} + +// Only the read and write instructions of the W0-Z28 register are repaired. +bool WeakConsistencyPass_level3::checkRegists(const MachineInstr &MI) const { + return !isNormalRegs(MI); +} + +// LEVEL 3: Use aggressive algorithms to further reduce memory barrier insertions +// If true is returned, the memory barrier needs to be inserted. If false is returned, +// the memory barrier does not need to be inserted. +ModifyType WeakConsistencyPass_level3::checkMI(MachineInstr &MI) const { + ModifyType ret = WeakConsistencyPass_level2::checkMI(MI); + if (ret != ADD_DMB) + return ret; + + if (checkOpcode(MI)) + return NO_MODIFY; + + if (checkRegists(MI)) + return NO_MODIFY; + + if (tryToUseLdar(MI)) + return DELETE_MI; + + return ADD_DMB; +} + diff --git a/llvm/test/CodeGen/AArch64/O0-pipeline.ll b/llvm/test/CodeGen/AArch64/O0-pipeline.ll index 6fbd162ea116..81320afe52cd 100644 --- a/llvm/test/CodeGen/AArch64/O0-pipeline.ll +++ b/llvm/test/CodeGen/AArch64/O0-pipeline.ll @@ -78,6 +78,7 @@ ; CHECK-NEXT: Machine Optimization Remark Emitter ; CHECK-NEXT: Stack Frame Layout Analysis ; CHECK-NEXT: Unpack machine instruction bundles +; CHECK-NEXT: WeakConsistencyPass ; CHECK-NEXT: Lazy Machine Block Frequency Analysis ; CHECK-NEXT: Machine Optimization Remark Emitter ; CHECK-NEXT: AArch64 Assembly Printer diff --git a/llvm/test/CodeGen/AArch64/O3-pipeline.ll b/llvm/test/CodeGen/AArch64/O3-pipeline.ll index 2a07b4ffa6e3..fad55a823f3c 100644 --- a/llvm/test/CodeGen/AArch64/O3-pipeline.ll +++ b/llvm/test/CodeGen/AArch64/O3-pipeline.ll @@ -230,6 +230,7 @@ ; CHECK-NEXT: Stack Frame Layout Analysis ; CHECK-NEXT: Pseudo Probe Inserter ; CHECK-NEXT: Unpack machine instruction bundles +; CHECK-NEXT: WeakConsistencyPass ; CHECK-NEXT: Lazy Machine Block Frequency Analysis ; CHECK-NEXT: Machine Optimization Remark Emitter ; CHECK-NEXT: AArch64 Assembly Printer diff --git a/llvm/test/CodeGen/AArch64/arm64-opt-remarks-lazy-bfi.ll b/llvm/test/CodeGen/AArch64/arm64-opt-remarks-lazy-bfi.ll index 9dfe84c46e9f..fd421e4e4ee7 100644 --- a/llvm/test/CodeGen/AArch64/arm64-opt-remarks-lazy-bfi.ll +++ b/llvm/test/CodeGen/AArch64/arm64-opt-remarks-lazy-bfi.ll @@ -46,6 +46,10 @@ ; HOTNESS-NEXT: Freeing Pass 'Insert CFI remember/restore state instructions' on Function 'empty_func' ; HOTNESS-NEXT: Executing Pass 'Verify generated machine code' ; HOTNESS-NEXT: Freeing Pass 'Verify generated machine code' +; HOTNESS-NEXT: Executing Pass 'WeakConsistencyPass' +; HOTNESS-NEXT: Freeing Pass 'WeakConsistencyPass' +; HOTNESS-NEXT: Executing Pass 'Verify generated machine code' +; HOTNESS-NEXT: Freeing Pass 'Verify generated machine code' ; HOTNESS: Executing Pass 'Unpack machine instruction bundles' ; HOTNESS-NEXT: Freeing Pass 'Unpack machine instruction bundles' ; HOTNESS-NEXT: Executing Pass 'Verify generated machine code' @@ -67,6 +71,10 @@ ; NO_HOTNESS-NEXT: Executing Pass 'Function Pass Manager' ; NO_HOTNESS-NEXT: Executing Pass 'Verify generated machine code' ; NO_HOTNESS-NEXT: Freeing Pass 'Verify generated machine code' +; NO_HOTNESS-NEXT: Executing Pass 'WeakConsistencyPass' +; NO_HOTNESS-NEXT: Freeing Pass 'WeakConsistencyPass' +; NO_HOTNESS-NEXT: Executing Pass 'Verify generated machine code' +; NO_HOTNESS-NEXT: Freeing Pass 'Verify generated machine code' ; NO_HOTNESS-NEXT: Executing Pass 'Branch relaxation pass' on Function 'empty_func' ; NO_HOTNESS-NEXT: Freeing Pass 'Branch relaxation pass' on Function 'empty_func' ; NO_HOTNESS-NEXT: Executing Pass 'Verify generated machine code' -- Gitee