From 0299a113b73a244946136891bbaba23ea02dd250 Mon Sep 17 00:00:00 2001 From: Cathy Sheng Date: Tue, 5 Aug 2025 13:41:02 -0400 Subject: [PATCH] [ISEL] Reduce emitted load/stores when changing endianness --- .../Target/AArch64/AArch64ISelDAGToDAG.cpp | 696 ++++++++++++++++++ .../test/CodeGen/AArch64/merge-trunc-store.ll | 54 +- .../test/CodeGen/AArch64/reduce-load-store.ll | 285 +++++++ 3 files changed, 1017 insertions(+), 18 deletions(-) create mode 100644 llvm/test/CodeGen/AArch64/reduce-load-store.ll diff --git a/llvm/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp b/llvm/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp index f79d4d1934aa..691149cd6540 100644 --- a/llvm/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp +++ b/llvm/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp @@ -31,6 +31,11 @@ using namespace llvm; #define DEBUG_TYPE "aarch64-isel" #define PASS_NAME "AArch64 Instruction Selection" +static cl::opt EnableEndiannessOpts( + "aarch64-endianness-opts", cl::Hidden, + cl::desc("Allow endianness opts to reduce load/store instructions"), + cl::init(false)); + //===--------------------------------------------------------------------===// /// AArch64DAGToDAGISel - AArch64 specific code to select AArch64 machine /// instructions for SelectionDAG operations. @@ -427,6 +432,23 @@ public: bool trySelectCastFixedLengthToScalableVector(SDNode *N); bool trySelectCastScalableToFixedLengthVector(SDNode *N); + void tryWriteOpt(SDNode *N); + void SelectWrite3(SDNode *N, std::vector::iterator St, + SDValue Addr); + void SelectWrite6(SDNode *N, std::vector::iterator St, + SDValue Addr); + void SelectWrite7(SDNode *N, std::vector::iterator St, + SDValue Addr); + void ReplaceStoreNodes(std::vector::iterator St, SDNode *NewSt, + int NumSt); + bool tryReadOpt(SDNode *N); + void SelectRead3(SDNode *N, + SmallVector> &LD); + void SelectRead6(SDNode *N, + SmallVector> &LD); + void SelectRead7(SDNode *N, + SmallVector> &LD); + // Include the pieces autogenerated from the target description. #include "AArch64GenDAGISel.inc" @@ -4182,6 +4204,671 @@ bool AArch64DAGToDAGISel::trySelectCastScalableToFixedLengthVector(SDNode *N) { return true; } +// Returns false if the address of the store is an add node +// with a constant second operand not in the range [-256, 255]. +// Returns true otherwise. +static bool canGetLoadStoreOffset(MemSDNode *N) { + SDValue Addr = N->getBasePtr(); + if (Addr.getOpcode() == ISD::ADD && + Addr.getOperand(1)->getOpcode() == ISD::Constant) + if (cast(Addr.getOperand(1))->getSExtValue() < -256 || + cast(Addr.getOperand(1))->getSExtValue() > 255) + return false; + return true; +} + +// Assumes canGetLoadStoreOffset(N) returns true. +// Returns 0 if the address of the store is not an add node +// with a constant second operand. +// Otherwise, returns the value of the constant operand. +static int64_t getLoadStoreOffset(MemSDNode *N) { + SDValue Addr = N->getBasePtr(); + if (Addr.getOpcode() == ISD::ADD && + Addr.getOperand(1)->getOpcode() == ISD::Constant) + return cast(Addr.getOperand(1))->getSExtValue(); + return 0; +} + +// Returns the address (without offset) of the load/store +static SDValue getLoadStoreAddrWithoutOffset(MemSDNode *N) { + SDValue Addr = N->getBasePtr(); + if (Addr.getOpcode() == ISD::ADD && + Addr.getOperand(1)->getOpcode() == ISD::Constant) + return Addr.getOperand(0); + return Addr; +} + +// For each operand of N, if it is a store node, add it to AllSt, +// where AllSt is a map where keys are the addresses stored to +// and values are heap allocated vectors of store nodes storing to the key. +static void +getAllStores(SDNode *N, + DenseMap *> &AllSt) { + for (auto op = N->op_begin(); op != N->op_end(); ++op) { + // The operand is a store node. + if (op->getNode()->getOpcode() == ISD::STORE) { + StoreSDNode *St = cast(op->getNode()); + // It must store an i8. + if (St->getMemoryVT() == MVT::i8) { + // The node must have an offset in the range [-256, 255]. + if (canGetLoadStoreOffset(St)) { + // Add St to AllSt. + SDValue Addr = getLoadStoreAddrWithoutOffset(St); + auto it = AllSt.find(Addr); + if (it == AllSt.end()) { + std::vector *Vec = new std::vector(); + Vec->push_back(St); + AllSt.insert( + std::pair *>(Addr, Vec)); + } else + it->second->push_back(St); + } + } + } + } +} + +// Begin points to a vector of store nodes that all store to the same address. +// If the stores can be combined, return true. +// If the stores cannot be combined, return false. +static bool canCombineStores(std::vector::iterator Begin, + std::vector::iterator End, + int NumSt) { + int64_t Offset = getLoadStoreOffset(*Begin); + + // Need at least NumSt stores. + if (End - Begin < NumSt) + return false; + + for (int i = 0; i < NumSt; ++i) { + StoreSDNode *St = Begin[i]; + // All stores should be simple. + if (!St->isSimple()) + return false; + // Stores must have consecutive offsets. + if (getLoadStoreOffset(St) != Offset + i) + return false; + // If the stored value is used elsewhere, then it can't be changed. + if (i != NumSt - 1 && !St->getOperand(1)->hasOneUse()) + return false; + } + + SDValue Stored = Begin[NumSt - 1]->getOperand(1); + + // The value stored should be an i64. + if (Stored.getValueType() != MVT::i64) + return false; + + SDValue Chain = Begin[NumSt - 1]->getChain(); + + for (int i = 0; i < NumSt - 1; ++i) { + SDValue Shift = Begin[i]->getOperand(1); + // Chains should all be the same. + if (Begin[i]->getChain() != Chain) + return false; + // The data stored should be shifted right. + if (Shift->getOpcode() != ISD::SRL) + return false; + // The shifted data should be same as Stored. + if (Shift->getOperand(0) != Stored) + return false; + // This should be shifted by a constant. + if (Shift->getOperand(1).getOpcode() != ISD::Constant) + return false; + // This should be shifted by (NumSt-1-i)*8. + if (cast(Shift->getOperand(1))->getSExtValue() != + (NumSt - 1 - i) * 8) + return false; + } + + // The store nodes can be combined. + return true; +} + +// Replace the first NumSt store nodes in St with NewSt. +void AArch64DAGToDAGISel::ReplaceStoreNodes( + std::vector::iterator St, SDNode *NewSt, int NumSt) { + for (NumSt -= 1; NumSt >= 0; --NumSt) { + ReplaceNode(St[NumSt], NewSt); + } +} + +void AArch64DAGToDAGISel::SelectWrite3(SDNode *N, + std::vector::iterator St, + SDValue Addr) { + SDLoc dl(N); + + SDValue Stored = St[2]->getOperand(1); + + SDValue Subreg = + CurDAG->getTargetExtractSubreg(AArch64::sub_32, dl, MVT::i32, Stored); + + SDNode *Rev16 = + CurDAG->getMachineNode(AArch64::REV16Wr, dl, MVT::i32, Subreg); + + MachineMemOperand *MemOp = St[1]->getMemOperand(); + MachineMemOperand *MMO = CurDAG->getMachineFunction().getMachineMemOperand( + MemOp, MemOp->getPointerInfo(), getLLTForMVT(MVT::i16)); + + SDValue Chain = St[1]->getChain(); + SDValue Const = + CurDAG->getTargetConstant(getLoadStoreOffset(St[1]), dl, MVT::i64); + SDValue Ops[] = {SDValue(Rev16, 0), Addr, Const, Chain}; + + MachineSDNode *Strh = + CurDAG->getMachineNode(AArch64::STURHHi, dl, MVT::Other, Ops); + CurDAG->setNodeMemRefs(Strh, {MMO}); + + ReplaceStoreNodes(St + 1, Strh, 2); +} + +void AArch64DAGToDAGISel::SelectWrite6(SDNode *N, + std::vector::iterator St, + SDValue Addr) { + SDLoc dl(N); + + SDValue Stored = St[5]->getOperand(1); + SDValue Chain = St[5]->getChain(); + + SDValue LsrConst = CurDAG->getTargetConstant(32, dl, MVT::i64); + SDValue Const63 = CurDAG->getTargetConstant(63, dl, MVT::i64); + SDValue LsrOps[] = {Stored, LsrConst, Const63}; + SDNode *Lsr = CurDAG->getMachineNode(AArch64::UBFMXri, dl, MVT::i64, LsrOps); + + SDValue LsrSubreg = CurDAG->getTargetExtractSubreg(AArch64::sub_32, dl, + MVT::i32, SDValue(Lsr, 0)); + + SDNode *Rev16 = + CurDAG->getMachineNode(AArch64::REV16Wr, dl, MVT::i32, LsrSubreg); + + MachineMemOperand *StrhMemOp = St[0]->getMemOperand(); + MachineMemOperand *StrhMMO = + CurDAG->getMachineFunction().getMachineMemOperand( + StrhMemOp, StrhMemOp->getPointerInfo(), getLLTForMVT(MVT::i16)); + + SDValue StrhConst = + CurDAG->getTargetConstant(getLoadStoreOffset(St[0]), dl, MVT::i64); + SDValue StrhOps[] = {SDValue(Rev16, 0), Addr, StrhConst, Chain}; + + MachineSDNode *Strh = + CurDAG->getMachineNode(AArch64::STURHHi, dl, MVT::Other, StrhOps); + CurDAG->setNodeMemRefs(Strh, {StrhMMO}); + + ReplaceStoreNodes(St, Strh, 2); + + SDValue Subreg = + CurDAG->getTargetExtractSubreg(AArch64::sub_32, dl, MVT::i32, Stored); + + SDNode *Rev = CurDAG->getMachineNode(AArch64::REVWr, dl, MVT::i32, Subreg); + + MachineMemOperand *SturMemOp = St[2]->getMemOperand(); + MachineMemOperand *SturMMO = + CurDAG->getMachineFunction().getMachineMemOperand( + SturMemOp, SturMemOp->getPointerInfo(), getLLTForMVT(MVT::i32)); + + SDValue SturConst = + CurDAG->getTargetConstant(getLoadStoreOffset(St[2]), dl, MVT::i64); + SDValue SturOps[] = {SDValue(Rev, 0), Addr, SturConst, Chain}; + + MachineSDNode *Stur = + CurDAG->getMachineNode(AArch64::STURWi, dl, MVT::Other, SturOps); + CurDAG->setNodeMemRefs(Stur, {SturMMO}); + + ReplaceStoreNodes(St + 2, Stur, 4); +} + +void AArch64DAGToDAGISel::SelectWrite7(SDNode *N, + std::vector::iterator St, + SDValue Addr) { + SDLoc dl(N); + + SDValue Stored = St[6]->getOperand(1); + SDValue Chain = St[6]->getChain(); + + SDValue Lsr1Const = CurDAG->getTargetConstant(32, dl, MVT::i64); + SDValue Const63 = CurDAG->getTargetConstant(63, dl, MVT::i64); + SDValue Lsr1Ops[] = {Stored, Lsr1Const, Const63}; + SDNode *Lsr1 = + CurDAG->getMachineNode(AArch64::UBFMXri, dl, MVT::i64, Lsr1Ops); + + SDValue Lsr1Subreg = CurDAG->getTargetExtractSubreg( + AArch64::sub_32, dl, MVT::i32, SDValue(Lsr1, 0)); + + SDNode *Rev16 = + CurDAG->getMachineNode(AArch64::REV16Wr, dl, MVT::i32, Lsr1Subreg); + + MachineMemOperand *StrhMemOp = St[1]->getMemOperand(); + MachineMemOperand *StrhMMO = + CurDAG->getMachineFunction().getMachineMemOperand( + StrhMemOp, StrhMemOp->getPointerInfo(), getLLTForMVT(MVT::i16)); + + SDValue StrhConst = + CurDAG->getTargetConstant(getLoadStoreOffset(St[1]), dl, MVT::i64); + SDValue StrhOps[] = {SDValue(Rev16, 0), Addr, StrhConst, Chain}; + + MachineSDNode *Strh = + CurDAG->getMachineNode(AArch64::STURHHi, dl, MVT::Other, StrhOps); + CurDAG->setNodeMemRefs(Strh, {StrhMMO}); + + ReplaceStoreNodes(St + 1, Strh, 2); + + SDValue Lsr2Const = CurDAG->getTargetConstant(24, dl, MVT::i64); + SDValue Const31 = CurDAG->getTargetConstant(31, dl, MVT::i64); + SDValue Lsr2Ops[] = {SDValue(Rev16, 0), Lsr2Const, Const31}; + SDNode *Lsr2 = + CurDAG->getMachineNode(AArch64::UBFMWri, dl, MVT::i32, Lsr2Ops); + + SDValue StrbConst = + CurDAG->getTargetConstant(getLoadStoreOffset(St[0]), dl, MVT::i64); + SDValue StrbOps[] = {SDValue(Lsr2, 0), Addr, StrbConst, Chain}; + + MachineSDNode *Strb = + CurDAG->getMachineNode(AArch64::STRBBui, dl, MVT::Other, StrbOps); + CurDAG->setNodeMemRefs(Strb, {St[0]->getMemOperand()}); + + ReplaceStoreNodes(St, Strb, 1); + + SDValue Subreg = + CurDAG->getTargetExtractSubreg(AArch64::sub_32, dl, MVT::i32, Stored); + SDNode *Rev = CurDAG->getMachineNode(AArch64::REVWr, dl, MVT::i32, Subreg); + + MachineMemOperand *SturMemOp = St[3]->getMemOperand(); + MachineMemOperand *SturMMO = + CurDAG->getMachineFunction().getMachineMemOperand( + SturMemOp, SturMemOp->getPointerInfo(), getLLTForMVT(MVT::i32)); + + SDValue SturConst = + CurDAG->getTargetConstant(getLoadStoreOffset(St[3]), dl, MVT::i64); + SDValue SturOps[] = {SDValue(Rev, 0), Addr, SturConst, Chain}; + + MachineSDNode *Stur = + CurDAG->getMachineNode(AArch64::STURWi, dl, MVT::Other, SturOps); + CurDAG->setNodeMemRefs(Stur, {SturMMO}); + + ReplaceStoreNodes(St + 3, Stur, 4); +} + +void AArch64DAGToDAGISel::tryWriteOpt(SDNode *N) { + assert(N->getOpcode() == ISD::TokenFactor && "Expected TokenFactor"); + + // AllSt is a map such that the key is an address stored to by a store operand + // of N and the value is a heap allocated vector that holds these stores. + DenseMap *> AllSt; + getAllStores(N, AllSt); + + // For each address, check if the stores can be combined. + for (auto it = AllSt.begin(); it != AllSt.end(); ++it) { + SDValue Addr = it->first; + std::vector *StVec = it->second; + + // Sort the store nodes in increasing offset order. + std::sort(StVec->begin(), StVec->end(), [](StoreSDNode *a, StoreSDNode *b) { + return getLoadStoreOffset(a) < getLoadStoreOffset(b); + }); + + // For each store node, check if it can be combined. + for (auto St = StVec->begin(); St != StVec->end(); ++St) { + SDValue Stored = (*St)->getOperand(1); + if (Stored->getOpcode() == ISD::SRL && + Stored->getOperand(1).getOpcode() == ISD::Constant) { + int64_t Shift = + cast(Stored->getOperand(1))->getSExtValue(); + switch (Shift) { + case 48: + if (canCombineStores(St, StVec->end(), 7)) { + SelectWrite7(N, St, Addr); + St += 6; + } + break; + case 40: + if (canCombineStores(St, StVec->end(), 6)) { + SelectWrite6(N, St, Addr); + St += 5; + } + break; + case 16: + if (canCombineStores(St, StVec->end(), 3)) { + SelectWrite3(N, St, Addr); + St += 2; + } + break; + default: + continue; + } + } + } + delete StVec; + } + + return; +} + +// LdVec is a vector of load nodes and their shifts in increasing offset order. +// If the loads can be combined, return true. +// If the loads cannot be combined, return false. +static bool canCombineLoads(SmallVector> LdVec, + int NumLd) { + + // Need NumLd loads. + if (LdVec.size() != NumLd) + return false; + + for (int i = 0; i < NumLd; ++i) { + int64_t Shift = LdVec[i].second; + // The shifts should start at (NumLd-1) * 8 and decrease by 8. + if (NumLd - Shift / 8 != i + 1) + return false; + } + + return true; +} + +static bool canCombineLoad(LoadSDNode *N) { + // All loads should be simple. + if (!N->isSimple()) + return false; + // All loads should be zero extended. + if (N->getExtensionType() != ISD::ZEXTLOAD) + return false; + // All loads should be byte loads. + if (N->getMemoryVT() != MVT::i8) + return false; + // Load addresses should be of a certain form. + if (!canGetLoadStoreOffset(N)) + return false; + return true; +} + +static bool getAllLoadsHelper(SDNode *N, + SmallVector> &LD, + MVT VT, int64_t Shift = 0) { + for (SDValue Op : {N->getOperand(0), N->getOperand(1)}) { + + // The operands of the OR node must have one use + // and the correct value type to be combined. + if (!Op.hasOneUse() || Op.getValueType() != VT) + return false; + + if (Op.getOpcode() == ISD::OR) { + // If it is an OR node, continue traversing DAG. + if (!getAllLoadsHelper(Op.getNode(), LD, VT, Shift)) + return false; + } else if (Op.getOpcode() == ISD::SHL) { + // If it is an SHL node, there are two possible cases: + // 1. There is a LOAD node that needs to be added to LD. + // 2. There is an OR node that needs to be traversed. + + // The shift must be by a constant. + if (Op.getOperand(1).getOpcode() != ISD::Constant) + return false; + int64_t ShiftImm = + cast(Op.getOperand(1).getNode())->getSExtValue(); + // The shift should be in range and divisible by 8. + if (ShiftImm < 0 || ShiftImm % 8 != 0 || + (ShiftImm > 32 && VT == MVT::i32) || + (ShiftImm > 64 && VT == MVT::i64)) + return false; + // The data that is shifted should have one use. + Op = Op.getOperand(0); + if (!Op.hasOneUse() || Op.getValueType() != VT) + return false; + // This is to account for the case where the data shifted + // is extended to a different value type. + if (VT == MVT::i64 && ShiftImm == 32 && + Op.getOpcode() == ISD::ANY_EXTEND) { + Op = Op.getOperand(0); + VT = Op.getSimpleValueType(); + if (!Op.hasOneUse() || VT != MVT::i32) + return false; + } + if (Op.getOpcode() == ISD::LOAD) { + // The value shifted is from a load. + LoadSDNode *Load = cast(Op.getNode()); + if (!canCombineLoad(Load)) + return false; + LD.push_back(std::make_pair(Load, Shift + ShiftImm)); + } else if (Op.getOpcode() == ISD::OR) { + // The value shifted is from an or. + if (!getAllLoadsHelper(Op.getNode(), LD, VT, Shift + ShiftImm)) + return false; + } else { + return false; + } + } else if (Op.getOpcode() == ISD::LOAD) { + // If it is a LOAD node, add it to LD. + LoadSDNode *Load = cast(Op.getNode()); + if (!canCombineLoad(Load)) + return false; + LD.push_back(std::make_pair(Load, Shift)); + } else { + return false; + } + } + return true; +} + +// All the LOAD nodes in the DAG reachable from the OR node N +// that can be combined are added to the vector LdVec with their shift +// in increasing offset order. +// Returns true if the loads can be combined, false otherwise. +static bool getAllLoads(SDNode *N, + SmallVector> &LdVec) { + if (!getAllLoadsHelper(N, LdVec, N->getSimpleValueType(0))) + return false; + + if (LdVec.empty()) + return false; + + // Sort the load nodes in increasing offset order. + std::sort(LdVec.begin(), LdVec.end(), + [](std::pair a, + std::pair b) { + return getLoadStoreOffset(a.first) < getLoadStoreOffset(b.first); + }); + + SDValue Addr = getLoadStoreAddrWithoutOffset(LdVec[0].first); + int64_t Offset = getLoadStoreOffset(LdVec[0].first); + SDValue Chain = LdVec[0].first->getChain(); + + for (int i = LdVec.size() - 1; i >= 0; --i) { + LoadSDNode *Ld = LdVec[i].first; + // The loads should all load from the same address. + if (getLoadStoreAddrWithoutOffset(Ld) != Addr) + return false; + // The loads should have consecutive offsets. + if (getLoadStoreOffset(Ld) != Offset + i) + return false; + // The chains should all be the same. + if (Ld->getChain() != Chain) + return false; + } + + return true; +} + +void AArch64DAGToDAGISel::SelectRead3( + SDNode *N, SmallVector> &LD) { + assert(N->getOpcode() == ISD::OR && "Expected OR instruction"); + + if (N->getValueType(0) != MVT::i32) + return; + + SDValue Addr = getLoadStoreAddrWithoutOffset(LD[0].first); + int64_t Offset = getLoadStoreOffset(LD[0].first); + SDValue Chain = LD[0].first->getChain(); + + SDLoc dl(N); + + MachineMemOperand *MemOp = LD[0].first->getMemOperand(); + MachineMemOperand *MMO = CurDAG->getMachineFunction().getMachineMemOperand( + MemOp, MemOp->getPointerInfo(), getLLTForMVT(MVT::i16)); + + SDValue Off = CurDAG->getTargetConstant(Offset, dl, MVT::i64); + SDValue LdrhOps[] = {Addr, Off, Chain}; + + MachineSDNode *Ldrh = CurDAG->getMachineNode( + AArch64::LDURHHi, dl, LD[0].first->getVTList(), LdrhOps); + CurDAG->setNodeMemRefs(Ldrh, {MMO}); + + SDNode *Rev16 = + CurDAG->getMachineNode(AArch64::REV16Wr, dl, MVT::i32, SDValue(Ldrh, 0)); + + SDValue Const = CurDAG->getTargetConstant(8, dl, MVT::i64); + SDValue Ops[] = {SDValue(LD[2].first, 0), SDValue(Rev16, 0), Const}; + + SDNode *Or = CurDAG->getMachineNode(AArch64::ORRWrs, dl, N->getVTList(), Ops); + + ReplaceNode(N, Or); +} + +void AArch64DAGToDAGISel::SelectRead6( + SDNode *N, SmallVector> &LD) { + assert(N->getOpcode() == ISD::OR && "Expected OR instruction"); + + if (N->getValueType(0) != MVT::i64) + return; + + SDValue Addr = getLoadStoreAddrWithoutOffset(LD[0].first); + int64_t Offset = getLoadStoreOffset(LD[0].first); + SDValue Chain = LD[0].first->getChain(); + + SDLoc dl(N); + SDValue Const0 = CurDAG->getTargetConstant(0, dl, MVT::i32); + + MachineMemOperand *LdrhMemOp = LD[0].first->getMemOperand(); + MachineMemOperand *LdrhMMO = + CurDAG->getMachineFunction().getMachineMemOperand( + LdrhMemOp, LdrhMemOp->getPointerInfo(), getLLTForMVT(MVT::i16)); + + SDValue LdrhConst = CurDAG->getTargetConstant(Offset, dl, MVT::i64); + SDValue LdrhOps[] = {Addr, LdrhConst, Chain}; + + MachineSDNode *Ldrh = CurDAG->getMachineNode( + AArch64::LDURHHi, dl, LD[0].first->getVTList(), LdrhOps); + CurDAG->setNodeMemRefs(Ldrh, {LdrhMMO}); + + SDNode *Rev16 = + CurDAG->getMachineNode(AArch64::REV16Wr, dl, MVT::i32, SDValue(Ldrh, 0)); + + SDValue Sub32 = CurDAG->getTargetConstant(AArch64::sub_32, dl, MVT::i64); + SDValue Subreg0Ops[] = {Const0, SDValue(Rev16, 0), Sub32}; + SDNode *Subreg0 = + CurDAG->getMachineNode(AArch64::SUBREG_TO_REG, dl, MVT::i64, Subreg0Ops); + + MachineMemOperand *LdurMemOp = LD[2].first->getMemOperand(); + MachineMemOperand *LdurMMO = + CurDAG->getMachineFunction().getMachineMemOperand( + LdurMemOp, LdurMemOp->getPointerInfo(), getLLTForMVT(MVT::i32)); + + SDValue LdurConst = CurDAG->getTargetConstant(Offset + 2, dl, MVT::i64); + SDValue LdurOps[] = {Addr, LdurConst, Chain}; + + MachineSDNode *Ldur = CurDAG->getMachineNode( + AArch64::LDURWi, dl, LD[2].first->getVTList(), LdurOps); + CurDAG->setNodeMemRefs(Ldur, {LdurMMO}); + + SDNode *Rev = + CurDAG->getMachineNode(AArch64::REVWr, dl, MVT::i32, SDValue(Ldur, 0)); + + SDValue Subreg1Ops[] = {Const0, SDValue(Rev, 0), Sub32}; + SDNode *Subreg1 = + CurDAG->getMachineNode(AArch64::SUBREG_TO_REG, dl, MVT::i64, Subreg1Ops); + + SDValue Const = CurDAG->getTargetConstant(32, dl, MVT::i64); + SDValue Ops[] = {SDValue(Subreg1, 0), SDValue(Subreg0, 0), Const}; + SDNode *Or = CurDAG->getMachineNode(AArch64::ORRXrs, dl, N->getVTList(), Ops); + + ReplaceNode(N, Or); +} + +void AArch64DAGToDAGISel::SelectRead7( + SDNode *N, SmallVector> &LD) { + assert(N->getOpcode() == ISD::OR && "Expected OR instruction"); + + if (N->getValueType(0) != MVT::i64) + return; + + SDValue Addr = getLoadStoreAddrWithoutOffset(LD[0].first); + int64_t Offset = getLoadStoreOffset(LD[0].first); + SDValue Chain = LD[0].first->getChain(); + + SDLoc dl(N); + SDValue Const0 = CurDAG->getTargetConstant(0, dl, MVT::i32); + + MachineMemOperand *LdrhMemOp = LD[0].first->getMemOperand(); + MachineMemOperand *LdrhMMO = + CurDAG->getMachineFunction().getMachineMemOperand( + LdrhMemOp, LdrhMemOp->getPointerInfo(), getLLTForMVT(MVT::i16)); + + SDValue LdrhConst = CurDAG->getTargetConstant(Offset, dl, MVT::i64); + SDValue LdrhOps[] = {Addr, LdrhConst, Chain}; + + MachineSDNode *Ldrh = CurDAG->getMachineNode( + AArch64::LDURHHi, dl, LD[0].first->getVTList(), LdrhOps); + CurDAG->setNodeMemRefs(Ldrh, {LdrhMMO}); + + SDNode *Rev16 = + CurDAG->getMachineNode(AArch64::REV16Wr, dl, MVT::i32, SDValue(Ldrh, 0)); + + SDValue OrrConst = CurDAG->getTargetConstant(8, dl, MVT::i64); + SDValue OrrOps[] = {SDValue(LD[2].first, 0), SDValue(Rev16, 0), OrrConst}; + SDNode *Orr = CurDAG->getMachineNode(AArch64::ORRWrs, dl, MVT::i32, OrrOps); + + SDValue Sub32 = CurDAG->getTargetConstant(AArch64::sub_32, dl, MVT::i64); + SDValue Subreg0Ops[] = {Const0, SDValue(Orr, 0), Sub32}; + SDNode *Subreg0 = + CurDAG->getMachineNode(AArch64::SUBREG_TO_REG, dl, MVT::i64, Subreg0Ops); + + MachineMemOperand *LdurMemOp = LD[3].first->getMemOperand(); + MachineMemOperand *LdurMMO = + CurDAG->getMachineFunction().getMachineMemOperand( + LdurMemOp, LdurMemOp->getPointerInfo(), getLLTForMVT(MVT::i32)); + + SDValue LdurConst = CurDAG->getTargetConstant(Offset + 3, dl, MVT::i64); + SDValue LdurOps[] = {Addr, LdurConst, Chain}; + + MachineSDNode *Ldur = CurDAG->getMachineNode( + AArch64::LDURWi, dl, LD[3].first->getVTList(), LdurOps); + CurDAG->setNodeMemRefs(Ldur, {LdurMMO}); + + SDNode *Rev = + CurDAG->getMachineNode(AArch64::REVWr, dl, MVT::i32, SDValue(Ldur, 0)); + + SDValue Subreg1Ops[] = {Const0, SDValue(Rev, 0), Sub32}; + SDNode *Subreg1 = + CurDAG->getMachineNode(AArch64::SUBREG_TO_REG, dl, MVT::i64, Subreg1Ops); + + SDValue Const = CurDAG->getTargetConstant(32, dl, MVT::i64); + SDValue Ops[] = {SDValue(Subreg1, 0), SDValue(Subreg0, 0), Const}; + SDNode *Or = CurDAG->getMachineNode(AArch64::ORRXrs, dl, N->getVTList(), Ops); + + ReplaceNode(N, Or); +} + +bool AArch64DAGToDAGISel::tryReadOpt(SDNode *N) { + assert(N->getOpcode() == ISD::OR && "Expected OR instruction"); + + // AllLd is a vector of all reachable (load, shift) pairs. + SmallVector> AllLd; + if (!getAllLoads(N, AllLd)) + return false; + + if (canCombineLoads(AllLd, 7)) { + SelectRead7(N, AllLd); + return true; + } + if (canCombineLoads(AllLd, 6)) { + SelectRead6(N, AllLd); + return true; + } + if (canCombineLoads(AllLd, 3)) { + SelectRead3(N, AllLd); + return true; + } + + return false; +} + void AArch64DAGToDAGISel::Select(SDNode *Node) { // If we have a custom node, we already have selected! if (Node->isMachineOpcode()) { @@ -4245,6 +4932,10 @@ void AArch64DAGToDAGISel::Select(SDNode *Node) { case ISD::OR: if (tryBitfieldInsertOp(Node)) return; + if (EnableEndiannessOpts) { + if (tryReadOpt(Node)) + return; + } break; case ISD::EXTRACT_SUBVECTOR: { @@ -6424,6 +7115,11 @@ void AArch64DAGToDAGISel::Select(SDNode *Node) { } break; } + case ISD::TokenFactor: + if (EnableEndiannessOpts) { + tryWriteOpt(Node); + } + break; } // Select the default instruction diff --git a/llvm/test/CodeGen/AArch64/merge-trunc-store.ll b/llvm/test/CodeGen/AArch64/merge-trunc-store.ll index 0f2cb775e98e..d3e23770c065 100644 --- a/llvm/test/CodeGen/AArch64/merge-trunc-store.ll +++ b/llvm/test/CodeGen/AArch64/merge-trunc-store.ll @@ -1,6 +1,8 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc < %s -mtriple=aarch64-- | FileCheck %s --check-prefixes=CHECK,LE -; RUN: llc < %s -mtriple=aarch64_be-- | FileCheck %s --check-prefixes=CHECK,BE +; RUN: llc < %s -mtriple=aarch64-- | FileCheck %s --check-prefixes=CHECK,LE,NO-ENDIAN-OPTS +; RUN: llc < %s -mtriple=aarch64_be-- | FileCheck %s --check-prefixes=CHECK,BE,NO-ENDIAN-OPTS +; RUN: llc < %s -mtriple=aarch64-- -aarch64-endianness-opts | FileCheck %s --check-prefixes=CHECK,LE,ENDIAN-OPTS +; RUN: llc < %s -mtriple=aarch64_be-- -aarch64-endianness-opts | FileCheck %s --check-prefixes=CHECK,BE,ENDIAN-OPTS define void @le_i16_to_i8(i16 %x, ptr %p0) { ; LE-LABEL: le_i16_to_i8: @@ -744,22 +746,38 @@ define void @i32_to_i8_incomplete(i32 %x, ptr %p0) { ; Negative test - no store of 't3' define void @i64_to_i8_incomplete(i64 %x, ptr %p0) { -; CHECK-LABEL: i64_to_i8_incomplete: -; CHECK: // %bb.0: -; CHECK-NEXT: lsr x8, x0, #56 -; CHECK-NEXT: lsr x9, x0, #48 -; CHECK-NEXT: lsr x10, x0, #40 -; CHECK-NEXT: lsr x11, x0, #32 -; CHECK-NEXT: strb w0, [x1, #7] -; CHECK-NEXT: strb w8, [x1] -; CHECK-NEXT: lsr x8, x0, #16 -; CHECK-NEXT: strb w9, [x1, #1] -; CHECK-NEXT: lsr x9, x0, #8 -; CHECK-NEXT: strb w10, [x1, #2] -; CHECK-NEXT: strb w11, [x1, #3] -; CHECK-NEXT: strb w8, [x1, #5] -; CHECK-NEXT: strb w9, [x1, #6] -; CHECK-NEXT: ret +; NO-ENDIAN-OPTS-LABEL: i64_to_i8_incomplete: +; NO-ENDIAN-OPTS: // %bb.0: +; NO-ENDIAN-OPTS-NEXT: lsr x8, x0, #56 +; NO-ENDIAN-OPTS-NEXT: lsr x9, x0, #48 +; NO-ENDIAN-OPTS-NEXT: lsr x10, x0, #40 +; NO-ENDIAN-OPTS-NEXT: lsr x11, x0, #32 +; NO-ENDIAN-OPTS-NEXT: strb w0, [x1, #7] +; NO-ENDIAN-OPTS-NEXT: strb w8, [x1] +; NO-ENDIAN-OPTS-NEXT: lsr x8, x0, #16 +; NO-ENDIAN-OPTS-NEXT: strb w9, [x1, #1] +; NO-ENDIAN-OPTS-NEXT: lsr x9, x0, #8 +; NO-ENDIAN-OPTS-NEXT: strb w10, [x1, #2] +; NO-ENDIAN-OPTS-NEXT: strb w11, [x1, #3] +; NO-ENDIAN-OPTS-NEXT: strb w8, [x1, #5] +; NO-ENDIAN-OPTS-NEXT: strb w9, [x1, #6] +; NO-ENDIAN-OPTS-NEXT: ret +; +; ENDIAN-OPTS-LABEL: i64_to_i8_incomplete: +; ENDIAN-OPTS: // %bb.0: +; ENDIAN-OPTS-NEXT: rev16 w8, w0 +; ENDIAN-OPTS-NEXT: lsr x9, x0, #56 +; ENDIAN-OPTS-NEXT: lsr x10, x0, #48 +; ENDIAN-OPTS-NEXT: lsr x11, x0, #40 +; ENDIAN-OPTS-NEXT: sturh w8, [x1, #6] +; ENDIAN-OPTS-NEXT: lsr x8, x0, #32 +; ENDIAN-OPTS-NEXT: strb w9, [x1] +; ENDIAN-OPTS-NEXT: lsr x9, x0, #16 +; ENDIAN-OPTS-NEXT: strb w10, [x1, #1] +; ENDIAN-OPTS-NEXT: strb w11, [x1, #2] +; ENDIAN-OPTS-NEXT: strb w8, [x1, #3] +; ENDIAN-OPTS-NEXT: strb w9, [x1, #5] +; ENDIAN-OPTS-NEXT: ret %sh1 = lshr i64 %x, 8 %sh2 = lshr i64 %x, 16 %sh3 = lshr i64 %x, 24 diff --git a/llvm/test/CodeGen/AArch64/reduce-load-store.ll b/llvm/test/CodeGen/AArch64/reduce-load-store.ll new file mode 100644 index 000000000000..bc5fc901cf93 --- /dev/null +++ b/llvm/test/CodeGen/AArch64/reduce-load-store.ll @@ -0,0 +1,285 @@ +; RUN: llc < %s -mtriple=aarch64 -aarch64-endianness-opts | FileCheck %s --check-prefixes=CHECK + +define i32 @test_read_3(ptr %b) { +; CHECK-LABEL: test_read_3: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: ldurh w8, [x0] +; CHECK-NEXT: ldrb w9, [x0, #2] +; CHECK-NEXT: rev16 w8, w8 +; CHECK-NEXT: orr w0, w9, w8, lsl #8 +; CHECK-NEXT: ret +entry: + %0 = load i8, ptr %b, align 1 + %conv = zext i8 %0 to i32 + %shl = shl nuw nsw i32 %conv, 16 + %arrayidx1 = getelementptr inbounds i8, ptr %b, i64 1 + %1 = load i8, ptr %arrayidx1, align 1 + %conv2 = zext i8 %1 to i32 + %shl3 = shl nuw nsw i32 %conv2, 8 + %or = or i32 %shl3, %shl + %arrayidx4 = getelementptr inbounds i8, ptr %b, i64 2 + %2 = load i8, ptr %arrayidx4, align 1 + %conv5 = zext i8 %2 to i32 + %or6 = or i32 %or, %conv5 + ret i32 %or6 +} + +define i64 @test_read_6(ptr %b) { +; CHECK-LABEL: test_read_6: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: ldurh w8, [x0] +; CHECK-NEXT: ldur w9, [x0, #2] +; CHECK-NEXT: rev16 w8, w8 +; CHECK-NEXT: rev w9, w9 +; CHECK-NEXT: orr x0, x9, x8, lsl #32 +; CHECK-NEXT: ret +entry: + %0 = load i8, ptr %b, align 1 + %conv.i = zext i8 %0 to i64 + %arrayidx1.i = getelementptr inbounds i8, ptr %b, i64 1 + %1 = load i8, ptr %arrayidx1.i, align 1 + %conv2.i = zext i8 %1 to i64 + %add.ptr = getelementptr inbounds i8, ptr %b, i64 2 + %2 = load i8, ptr %add.ptr, align 1 + %conv.i5 = zext i8 %2 to i64 + %shl.i6 = shl nuw nsw i64 %conv.i5, 24 + %arrayidx1.i7 = getelementptr inbounds i8, ptr %b, i64 3 + %3 = load i8, ptr %arrayidx1.i7, align 1 + %conv2.i8 = zext i8 %3 to i64 + %shl3.i = shl nuw nsw i64 %conv2.i8, 16 + %arrayidx4.i = getelementptr inbounds i8, ptr %b, i64 4 + %4 = load i8, ptr %arrayidx4.i, align 1 + %conv5.i = zext i8 %4 to i64 + %shl6.i = shl nuw nsw i64 %conv5.i, 8 + %arrayidx8.i = getelementptr inbounds i8, ptr %b, i64 5 + %5 = load i8, ptr %arrayidx8.i, align 1 + %conv9.i = zext i8 %5 to i64 + %6 = shl nuw nsw i64 %conv.i, 40 + %7 = shl nuw nsw i64 %conv2.i, 32 + %or.i9 = or i64 %7, %6 + %or7.i = or i64 %or.i9, %shl.i6 + %or10.i = or i64 %or7.i, %shl3.i + %shl.i10 = or i64 %or10.i, %shl6.i + %or.i11 = or i64 %shl.i10, %conv9.i + ret i64 %or.i11 +} + +define i64 @test_read_7(ptr %b) { +; CHECK-LABEL: test_read_7: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: ldurh w8, [x0] +; CHECK-NEXT: ldrb w9, [x0, #2] +; CHECK-NEXT: ldur w10, [x0, #3] +; CHECK-NEXT: rev16 w8, w8 +; CHECK-NEXT: orr w8, w9, w8, lsl #8 +; CHECK-NEXT: rev w9, w10 +; CHECK-NEXT: orr x0, x9, x8, lsl #32 +; CHECK-NEXT: ret +entry: + %0 = load i8, ptr %b, align 1 + %conv.i = zext i8 %0 to i64 + %shl.i = shl nuw nsw i64 %conv.i, 16 + %arrayidx1.i = getelementptr inbounds i8, ptr %b, i64 1 + %1 = load i8, ptr %arrayidx1.i, align 1 + %conv2.i = zext i8 %1 to i64 + %shl3.i = shl nuw nsw i64 %conv2.i, 8 + %or.i = or i64 %shl3.i, %shl.i + %arrayidx4.i = getelementptr inbounds i8, ptr %b, i64 2 + %2 = load i8, ptr %arrayidx4.i, align 1 + %conv5.i = zext i8 %2 to i64 + %or6.i = or i64 %or.i, %conv5.i + %add.ptr = getelementptr inbounds i8, ptr %b, i64 3 + %3 = load i8, ptr %add.ptr, align 1 + %conv.i5 = zext i8 %3 to i64 + %shl.i6 = shl nuw nsw i64 %conv.i5, 24 + %arrayidx1.i7 = getelementptr inbounds i8, ptr %b, i64 4 + %4 = load i8, ptr %arrayidx1.i7, align 1 + %conv2.i8 = zext i8 %4 to i64 + %shl3.i9 = shl nuw nsw i64 %conv2.i8, 16 + %or.i10 = or i64 %shl3.i9, %shl.i6 + %arrayidx4.i11 = getelementptr inbounds i8, ptr %b, i64 5 + %5 = load i8, ptr %arrayidx4.i11, align 1 + %conv5.i12 = zext i8 %5 to i64 + %shl6.i = shl nuw nsw i64 %conv5.i12, 8 + %arrayidx8.i = getelementptr inbounds i8, ptr %b, i64 6 + %6 = load i8, ptr %arrayidx8.i, align 1 + %conv9.i = zext i8 %6 to i64 + %shl.i13 = shl nuw nsw i64 %or6.i, 32 + %or7.i = or i64 %or.i10, %shl.i13 + %or10.i = or i64 %or7.i, %shl6.i + %or.i14 = or i64 %or10.i, %conv9.i + ret i64 %or.i14 +} + +define void @test_write_3(ptr %b, i64 %n) { +; CHECK-LABEL: test_write_3: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: rev16 w8, w1 +; CHECK-NEXT: lsr x9, x1, #16 +; CHECK-NEXT: sturh w8, [x0, #1] +; CHECK-NEXT: strb w9, [x0] +; CHECK-NEXT: ret +entry: + %shr = lshr i64 %n, 16 + %conv = trunc i64 %shr to i8 + store i8 %conv, ptr %b, align 1 + %shr1 = lshr i64 %n, 8 + %conv2 = trunc i64 %shr1 to i8 + %arrayidx3 = getelementptr inbounds i8, ptr %b, i64 1 + store i8 %conv2, ptr %arrayidx3, align 1 + %conv4 = trunc i64 %n to i8 + %arrayidx5 = getelementptr inbounds i8, ptr %b, i64 2 + store i8 %conv4, ptr %arrayidx5, align 1 + ret void +} + +define void @test_write_6(ptr %b, i64 %n) { +; CHECK-LABEL: test_write_6: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: lsr x9, x1, #32 +; CHECK-NEXT: rev w8, w1 +; CHECK-NEXT: rev16 w9, w9 +; CHECK-NEXT: stur w8, [x0, #2] +; CHECK-NEXT: sturh w9, [x0] +; CHECK-NEXT: ret +entry: + %shr = lshr i64 %n, 32 + %shr.i = lshr i64 %n, 40 + %conv.i = trunc i64 %shr.i to i8 + store i8 %conv.i, ptr %b, align 1 + %conv1.i = trunc i64 %shr to i8 + %arrayidx2.i = getelementptr inbounds i8, ptr %b, i64 1 + store i8 %conv1.i, ptr %arrayidx2.i, align 1 + %add.ptr = getelementptr inbounds i8, ptr %b, i64 2 + %shr.i3 = lshr i64 %n, 24 + %conv.i4 = trunc i64 %shr.i3 to i8 + store i8 %conv.i4, ptr %add.ptr, align 1 + %shr1.i = lshr i64 %n, 16 + %conv2.i = trunc i64 %shr1.i to i8 + %arrayidx3.i = getelementptr inbounds i8, ptr %b, i64 3 + store i8 %conv2.i, ptr %arrayidx3.i, align 1 + %shr4.i = lshr i64 %n, 8 + %conv5.i = trunc i64 %shr4.i to i8 + %arrayidx6.i = getelementptr inbounds i8, ptr %b, i64 4 + store i8 %conv5.i, ptr %arrayidx6.i, align 1 + %conv7.i = trunc i64 %n to i8 + %arrayidx8.i = getelementptr inbounds i8, ptr %b, i64 5 + store i8 %conv7.i, ptr %arrayidx8.i, align 1 + ret void +} + +define void @test_write_7(ptr %b, i64 %n) { +; CHECK-LABEL: test_write_7: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: lsr x9, x1, #32 +; CHECK-NEXT: rev w8, w1 +; CHECK-NEXT: rev16 w9, w9 +; CHECK-NEXT: lsr w10, w9, #24 +; CHECK-NEXT: stur w8, [x0, #3] +; CHECK-NEXT: sturh w9, [x0, #1] +; CHECK-NEXT: strb w10, [x0] +; CHECK-NEXT: ret +entry: + %shr = lshr i64 %n, 32 + %shr.i = lshr i64 %n, 48 + %conv.i = trunc i64 %shr.i to i8 + store i8 %conv.i, ptr %b, align 1 + %shr1.i = lshr i64 %n, 40 + %conv2.i = trunc i64 %shr1.i to i8 + %arrayidx3.i = getelementptr inbounds i8, ptr %b, i64 1 + store i8 %conv2.i, ptr %arrayidx3.i, align 1 + %conv4.i = trunc i64 %shr to i8 + %arrayidx5.i = getelementptr inbounds i8, ptr %b, i64 2 + store i8 %conv4.i, ptr %arrayidx5.i, align 1 + %add.ptr = getelementptr inbounds i8, ptr %b, i64 3 + %shr.i3 = lshr i64 %n, 24 + %conv.i4 = trunc i64 %shr.i3 to i8 + store i8 %conv.i4, ptr %add.ptr, align 1 + %shr1.i5 = lshr i64 %n, 16 + %conv2.i6 = trunc i64 %shr1.i5 to i8 + %arrayidx3.i7 = getelementptr inbounds i8, ptr %b, i64 4 + store i8 %conv2.i6, ptr %arrayidx3.i7, align 1 + %shr4.i = lshr i64 %n, 8 + %conv5.i = trunc i64 %shr4.i to i8 + %arrayidx6.i = getelementptr inbounds i8, ptr %b, i64 5 + store i8 %conv5.i, ptr %arrayidx6.i, align 1 + %conv7.i = trunc i64 %n to i8 + %arrayidx8.i = getelementptr inbounds i8, ptr %b, i64 6 + store i8 %conv7.i, ptr %arrayidx8.i, align 1 + ret void +} + +define void @test_write_with_different_chains(ptr noalias %b, i64 %n, ptr noalias %c) { +; CHECK-LABEL: test_write_with_different_chains: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: ldr x8, [x2] +; CHECK-NEXT: lsr x9, x1, #16 +; CHECK-NEXT: rev16 w10, w1 +; CHECK-NEXT: rev16 w11, w8 +; CHECK-NEXT: lsr x8, x8, #16 +; CHECK-NEXT: strb w9, [x0] +; CHECK-NEXT: sturh w10, [x0, #1] +; CHECK-NEXT: sturh w11, [x2, #1] +; CHECK-NEXT: strb w8, [x2] +; CHECK-NEXT: ret +entry: + %shr = lshr i64 %n, 16 + %conv = trunc i64 %shr to i8 + store i8 %conv, ptr %b, align 1 + %shr1 = lshr i64 %n, 8 + %conv2 = trunc i64 %shr1 to i8 + %arrayidx3 = getelementptr inbounds i8, ptr %b, i64 1 + store i8 %conv2, ptr %arrayidx3, align 1 + %conv4 = trunc i64 %n to i8 + %arrayidx5 = getelementptr inbounds i8, ptr %b, i64 2 + store i8 %conv4, ptr %arrayidx5, align 1 + %0 = load i64, ptr %c, align 1 + %shr10 = lshr i64 %0, 16 + %conv10 = trunc i64 %shr10 to i8 + store i8 %conv10, ptr %c, align 1 + %shr11 = lshr i64 %0, 8 + %conv12 = trunc i64 %shr11 to i8 + %arrayidx13 = getelementptr inbounds i8, ptr %c, i64 1 + store i8 %conv12, ptr %arrayidx13, align 1 + %conv14 = trunc i64 %0 to i8 + %arrayidx15 = getelementptr inbounds i8, ptr %c, i64 2 + store i8 %conv14, ptr %arrayidx15, align 1 + ret void +} + +define void @test_write_multiple_to_same_address(ptr %b, i64 %n, i64 %m) { +; CHECK-LABEL: test_write_multiple_to_same_address: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: rev16 w8, w2 +; CHECK-NEXT: rev16 w9, w1 +; CHECK-NEXT: lsr x10, x1, #16 +; CHECK-NEXT: lsr x11, x2, #16 +; CHECK-NEXT: sturh w8, [x0, #8] +; CHECK-NEXT: sturh w9, [x0, #1] +; CHECK-NEXT: strb w10, [x0] +; CHECK-NEXT: strb w11, [x0, #7] +; CHECK-NEXT: ret +entry: + %shr = lshr i64 %n, 16 + %conv = trunc i64 %shr to i8 + store i8 %conv, ptr %b, align 1 + %shr1 = lshr i64 %n, 8 + %conv2 = trunc i64 %shr1 to i8 + %arrayidx3 = getelementptr inbounds i8, ptr %b, i64 1 + store i8 %conv2, ptr %arrayidx3, align 1 + %conv4 = trunc i64 %n to i8 + %arrayidx5 = getelementptr inbounds i8, ptr %b, i64 2 + store i8 %conv4, ptr %arrayidx5, align 1 + %shr10 = lshr i64 %m, 16 + %conv10 = trunc i64 %shr10 to i8 + %arrayidx11 = getelementptr inbounds i8, ptr %b, i64 7 + store i8 %conv10, ptr %arrayidx11, align 1 + %shr11 = lshr i64 %m, 8 + %conv12 = trunc i64 %shr11 to i8 + %arrayidx13 = getelementptr inbounds i8, ptr %b, i64 8 + store i8 %conv12, ptr %arrayidx13, align 1 + %conv14 = trunc i64 %m to i8 + %arrayidx15 = getelementptr inbounds i8, ptr %b, i64 9 + store i8 %conv14, ptr %arrayidx15, align 1 + ret void +} \ No newline at end of file -- Gitee