diff --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp b/llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp index 61fc31715d718b67001c25437cc7fe63d77ec203..80530f8c5b032c47917f084b79c4878cadfea6ff 100644 --- a/llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp @@ -3133,6 +3133,23 @@ bool SelectionDAGLegalize::ExpandNode(SDNode *Node) { Results.push_back(Res.getValue(1)); break; } + case ISD::ATOMIC_LOAD_SUB: { + SDLoc DL(Node); + EVT VT = Node->getValueType(0); + SDValue RHS = Node->getOperand(2); + AtomicSDNode *AN = cast(Node); + if (RHS->getOpcode() == ISD::SIGN_EXTEND_INREG && + cast(RHS->getOperand(1))->getVT() == AN->getMemoryVT()) + RHS = RHS->getOperand(0); + SDValue NewRHS = + DAG.getNode(ISD::SUB, DL, VT, DAG.getConstant(0, DL, VT), RHS); + SDValue Res = DAG.getAtomic(ISD::ATOMIC_LOAD_ADD, DL, AN->getMemoryVT(), + Node->getOperand(0), Node->getOperand(1), + NewRHS, AN->getMemOperand()); + Results.push_back(Res); + Results.push_back(Res.getValue(1)); + break; + } case ISD::DYNAMIC_STACKALLOC: ExpandDYNAMIC_STACKALLOC(Node, Results); break; diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp index 64da3f041b686b0aa12ac40dce8302b38887a8c1..23f1607544e48696d750547aa21f2ac710f12785 100644 --- a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp +++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp @@ -789,8 +789,13 @@ AArch64TargetLowering::AArch64TargetLowering(const TargetMachine &TM, setOperationAction(ISD::SET_ROUNDING, MVT::Other, Custom); setOperationAction(ISD::ATOMIC_CMP_SWAP, MVT::i128, Custom); - setOperationAction(ISD::ATOMIC_LOAD_SUB, MVT::i32, Custom); - setOperationAction(ISD::ATOMIC_LOAD_SUB, MVT::i64, Custom); + if (!Subtarget->hasLSE() && !Subtarget->outlineAtomics()) { + setOperationAction(ISD::ATOMIC_LOAD_SUB, MVT::i32, LibCall); + setOperationAction(ISD::ATOMIC_LOAD_SUB, MVT::i64, LibCall); + } else { + setOperationAction(ISD::ATOMIC_LOAD_SUB, MVT::i32, Expand); + setOperationAction(ISD::ATOMIC_LOAD_SUB, MVT::i64, Expand); + } setOperationAction(ISD::ATOMIC_LOAD_AND, MVT::i32, Custom); setOperationAction(ISD::ATOMIC_LOAD_AND, MVT::i64, Custom); @@ -6087,8 +6092,6 @@ SDValue AArch64TargetLowering::LowerOperation(SDValue Op, case ISD::VECREDUCE_FMAXIMUM: case ISD::VECREDUCE_FMINIMUM: return LowerVECREDUCE(Op, DAG); - case ISD::ATOMIC_LOAD_SUB: - return LowerATOMIC_LOAD_SUB(Op, DAG); case ISD::ATOMIC_LOAD_AND: return LowerATOMIC_LOAD_AND(Op, DAG); case ISD::DYNAMIC_STACKALLOC: @@ -13879,23 +13882,6 @@ SDValue AArch64TargetLowering::LowerVECREDUCE(SDValue Op, } } -SDValue AArch64TargetLowering::LowerATOMIC_LOAD_SUB(SDValue Op, - SelectionDAG &DAG) const { - auto &Subtarget = DAG.getSubtarget(); - if (!Subtarget.hasLSE() && !Subtarget.outlineAtomics()) - return SDValue(); - - // LSE has an atomic load-add instruction, but not a load-sub. - SDLoc dl(Op); - MVT VT = Op.getSimpleValueType(); - SDValue RHS = Op.getOperand(2); - AtomicSDNode *AN = cast(Op.getNode()); - RHS = DAG.getNode(ISD::SUB, dl, VT, DAG.getConstant(0, dl, VT), RHS); - return DAG.getAtomic(ISD::ATOMIC_LOAD_ADD, dl, AN->getMemoryVT(), - Op.getOperand(0), Op.getOperand(1), RHS, - AN->getMemOperand()); -} - SDValue AArch64TargetLowering::LowerATOMIC_LOAD_AND(SDValue Op, SelectionDAG &DAG) const { auto &Subtarget = DAG.getSubtarget(); diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.h b/llvm/lib/Target/AArch64/AArch64ISelLowering.h index 2f6dc303934d1c387d1ced799a9348580d5eef4e..3002c4cb30d8dab615a290065e568dbc0afd4578 100644 --- a/llvm/lib/Target/AArch64/AArch64ISelLowering.h +++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.h @@ -1103,7 +1103,6 @@ private: SDValue LowerVSCALE(SDValue Op, SelectionDAG &DAG) const; SDValue LowerTRUNCATE(SDValue Op, SelectionDAG &DAG) const; SDValue LowerVECREDUCE(SDValue Op, SelectionDAG &DAG) const; - SDValue LowerATOMIC_LOAD_SUB(SDValue Op, SelectionDAG &DAG) const; SDValue LowerATOMIC_LOAD_AND(SDValue Op, SelectionDAG &DAG) const; SDValue LowerDYNAMIC_STACKALLOC(SDValue Op, SelectionDAG &DAG) const; SDValue LowerWindowsDYNAMIC_STACKALLOC(SDValue Op, SDValue Chain, diff --git a/llvm/lib/Target/ARM/ARMISelLowering.cpp b/llvm/lib/Target/ARM/ARMISelLowering.cpp index 5239e5c4d91b100fee6ea6f88be1f05fd0c61ea1..c95ddee7795724b7997fccb8f3e9efc6d3f43502 100644 --- a/llvm/lib/Target/ARM/ARMISelLowering.cpp +++ b/llvm/lib/Target/ARM/ARMISelLowering.cpp @@ -1344,19 +1344,19 @@ ARMTargetLowering::ARMTargetLowering(const TargetMachine &TM, setOperationAction(ISD::ATOMIC_FENCE, MVT::Other, Subtarget->hasAnyDataBarrier() ? Custom : Expand); - // Set them all for expansion, which will force libcalls. - setOperationAction(ISD::ATOMIC_CMP_SWAP, MVT::i32, Expand); - setOperationAction(ISD::ATOMIC_SWAP, MVT::i32, Expand); - setOperationAction(ISD::ATOMIC_LOAD_ADD, MVT::i32, Expand); - setOperationAction(ISD::ATOMIC_LOAD_SUB, MVT::i32, Expand); - setOperationAction(ISD::ATOMIC_LOAD_AND, MVT::i32, Expand); - setOperationAction(ISD::ATOMIC_LOAD_OR, MVT::i32, Expand); - setOperationAction(ISD::ATOMIC_LOAD_XOR, MVT::i32, Expand); - setOperationAction(ISD::ATOMIC_LOAD_NAND, MVT::i32, Expand); - setOperationAction(ISD::ATOMIC_LOAD_MIN, MVT::i32, Expand); - setOperationAction(ISD::ATOMIC_LOAD_MAX, MVT::i32, Expand); - setOperationAction(ISD::ATOMIC_LOAD_UMIN, MVT::i32, Expand); - setOperationAction(ISD::ATOMIC_LOAD_UMAX, MVT::i32, Expand); + // Set them all for libcall, which will force libcalls. + setOperationAction(ISD::ATOMIC_CMP_SWAP, MVT::i32, LibCall); + setOperationAction(ISD::ATOMIC_SWAP, MVT::i32, LibCall); + setOperationAction(ISD::ATOMIC_LOAD_ADD, MVT::i32, LibCall); + setOperationAction(ISD::ATOMIC_LOAD_SUB, MVT::i32, LibCall); + setOperationAction(ISD::ATOMIC_LOAD_AND, MVT::i32, LibCall); + setOperationAction(ISD::ATOMIC_LOAD_OR, MVT::i32, LibCall); + setOperationAction(ISD::ATOMIC_LOAD_XOR, MVT::i32, LibCall); + setOperationAction(ISD::ATOMIC_LOAD_NAND, MVT::i32, LibCall); + setOperationAction(ISD::ATOMIC_LOAD_MIN, MVT::i32, LibCall); + setOperationAction(ISD::ATOMIC_LOAD_MAX, MVT::i32, LibCall); + setOperationAction(ISD::ATOMIC_LOAD_UMIN, MVT::i32, LibCall); + setOperationAction(ISD::ATOMIC_LOAD_UMAX, MVT::i32, LibCall); // Mark ATOMIC_LOAD and ATOMIC_STORE custom so we can handle the // Unordered/Monotonic case. if (!InsertFencesForAtomic) { diff --git a/llvm/lib/Target/Mips/Mips16ISelLowering.cpp b/llvm/lib/Target/Mips/Mips16ISelLowering.cpp index ea35608e6a7b774b8fdfedb7a2cadf9785548ab6..d97f59b5b2c76bd62bb6e399ac80b85daa4a6524 100644 --- a/llvm/lib/Target/Mips/Mips16ISelLowering.cpp +++ b/llvm/lib/Target/Mips/Mips16ISelLowering.cpp @@ -127,19 +127,19 @@ Mips16TargetLowering::Mips16TargetLowering(const MipsTargetMachine &TM, if (!Subtarget.useSoftFloat()) setMips16HardFloatLibCalls(); - setOperationAction(ISD::ATOMIC_FENCE, MVT::Other, Expand); - setOperationAction(ISD::ATOMIC_CMP_SWAP, MVT::i32, Expand); - setOperationAction(ISD::ATOMIC_SWAP, MVT::i32, Expand); - setOperationAction(ISD::ATOMIC_LOAD_ADD, MVT::i32, Expand); - setOperationAction(ISD::ATOMIC_LOAD_SUB, MVT::i32, Expand); - setOperationAction(ISD::ATOMIC_LOAD_AND, MVT::i32, Expand); - setOperationAction(ISD::ATOMIC_LOAD_OR, MVT::i32, Expand); - setOperationAction(ISD::ATOMIC_LOAD_XOR, MVT::i32, Expand); - setOperationAction(ISD::ATOMIC_LOAD_NAND, MVT::i32, Expand); - setOperationAction(ISD::ATOMIC_LOAD_MIN, MVT::i32, Expand); - setOperationAction(ISD::ATOMIC_LOAD_MAX, MVT::i32, Expand); - setOperationAction(ISD::ATOMIC_LOAD_UMIN, MVT::i32, Expand); - setOperationAction(ISD::ATOMIC_LOAD_UMAX, MVT::i32, Expand); + setOperationAction(ISD::ATOMIC_FENCE, MVT::Other, LibCall); + setOperationAction(ISD::ATOMIC_CMP_SWAP, MVT::i32, LibCall); + setOperationAction(ISD::ATOMIC_SWAP, MVT::i32, LibCall); + setOperationAction(ISD::ATOMIC_LOAD_ADD, MVT::i32, LibCall); + setOperationAction(ISD::ATOMIC_LOAD_SUB, MVT::i32, LibCall); + setOperationAction(ISD::ATOMIC_LOAD_AND, MVT::i32, LibCall); + setOperationAction(ISD::ATOMIC_LOAD_OR, MVT::i32, LibCall); + setOperationAction(ISD::ATOMIC_LOAD_XOR, MVT::i32, LibCall); + setOperationAction(ISD::ATOMIC_LOAD_NAND, MVT::i32, LibCall); + setOperationAction(ISD::ATOMIC_LOAD_MIN, MVT::i32, LibCall); + setOperationAction(ISD::ATOMIC_LOAD_MAX, MVT::i32, LibCall); + setOperationAction(ISD::ATOMIC_LOAD_UMIN, MVT::i32, LibCall); + setOperationAction(ISD::ATOMIC_LOAD_UMAX, MVT::i32, LibCall); setOperationAction(ISD::ROTR, MVT::i32, Expand); setOperationAction(ISD::ROTR, MVT::i64, Expand); diff --git a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp index f030982cb815d7b7bdf64544fd9cf136b199f46c..30abb592a20e8c6b2ad8162252f6d1a936603f3e 100644 --- a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp +++ b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp @@ -1132,14 +1132,17 @@ RISCVTargetLowering::RISCVTargetLowering(const TargetMachine &TM, } } + if (Subtarget.hasStdExtA()) + setOperationAction(ISD::ATOMIC_LOAD_SUB, XLenVT, Expand); + if (Subtarget.hasForcedAtomics()) { - // Set atomic rmw/cas operations to expand to force __sync libcalls. + // Force __sync libcalls to be emitted for atomic rmw/cas operations. setOperationAction( {ISD::ATOMIC_CMP_SWAP, ISD::ATOMIC_SWAP, ISD::ATOMIC_LOAD_ADD, ISD::ATOMIC_LOAD_SUB, ISD::ATOMIC_LOAD_AND, ISD::ATOMIC_LOAD_OR, ISD::ATOMIC_LOAD_XOR, ISD::ATOMIC_LOAD_NAND, ISD::ATOMIC_LOAD_MIN, ISD::ATOMIC_LOAD_MAX, ISD::ATOMIC_LOAD_UMIN, ISD::ATOMIC_LOAD_UMAX}, - XLenVT, Expand); + XLenVT, LibCall); } if (Subtarget.hasVendorXTHeadMemIdx()) { diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfoA.td b/llvm/lib/Target/RISCV/RISCVInstrInfoA.td index 8421109b85147e7de5b830b9bade8ada6ea8adc3..db1fa6db8e37db48315e718d6283544a7567b801 100644 --- a/llvm/lib/Target/RISCV/RISCVInstrInfoA.td +++ b/llvm/lib/Target/RISCV/RISCVInstrInfoA.td @@ -160,17 +160,6 @@ defm : AMOPat<"atomic_load_min_32", "AMOMIN_W">; defm : AMOPat<"atomic_load_umax_32", "AMOMAXU_W">; defm : AMOPat<"atomic_load_umin_32", "AMOMINU_W">; -def : Pat<(XLenVT (atomic_load_sub_32_monotonic GPR:$addr, GPR:$incr)), - (AMOADD_W GPR:$addr, (SUB (XLenVT X0), GPR:$incr))>; -def : Pat<(XLenVT (atomic_load_sub_32_acquire GPR:$addr, GPR:$incr)), - (AMOADD_W_AQ GPR:$addr, (SUB (XLenVT X0), GPR:$incr))>; -def : Pat<(XLenVT (atomic_load_sub_32_release GPR:$addr, GPR:$incr)), - (AMOADD_W_RL GPR:$addr, (SUB (XLenVT X0), GPR:$incr))>; -def : Pat<(XLenVT (atomic_load_sub_32_acq_rel GPR:$addr, GPR:$incr)), - (AMOADD_W_AQ_RL GPR:$addr, (SUB (XLenVT X0), GPR:$incr))>; -def : Pat<(XLenVT (atomic_load_sub_32_seq_cst GPR:$addr, GPR:$incr)), - (AMOADD_W_AQ_RL GPR:$addr, (SUB (XLenVT X0), GPR:$incr))>; - /// Pseudo AMOs class PseudoAMO : Pseudo<(outs GPR:$res, GPR:$scratch), @@ -330,19 +319,6 @@ defm : AMOPat<"atomic_load_min_64", "AMOMIN_D", i64>; defm : AMOPat<"atomic_load_umax_64", "AMOMAXU_D", i64>; defm : AMOPat<"atomic_load_umin_64", "AMOMINU_D", i64>; -/// 64-bit AMOs - -def : Pat<(i64 (atomic_load_sub_64_monotonic GPR:$addr, GPR:$incr)), - (AMOADD_D GPR:$addr, (SUB (XLenVT X0), GPR:$incr))>; -def : Pat<(i64 (atomic_load_sub_64_acquire GPR:$addr, GPR:$incr)), - (AMOADD_D_AQ GPR:$addr, (SUB (XLenVT X0), GPR:$incr))>; -def : Pat<(i64 (atomic_load_sub_64_release GPR:$addr, GPR:$incr)), - (AMOADD_D_RL GPR:$addr, (SUB (XLenVT X0), GPR:$incr))>; -def : Pat<(i64 (atomic_load_sub_64_acq_rel GPR:$addr, GPR:$incr)), - (AMOADD_D_AQ_RL GPR:$addr, (SUB (XLenVT X0), GPR:$incr))>; -def : Pat<(i64 (atomic_load_sub_64_seq_cst GPR:$addr, GPR:$incr)), - (AMOADD_D_AQ_RL GPR:$addr, (SUB (XLenVT X0), GPR:$incr))>; - /// 64-bit pseudo AMOs let Size = 20 in diff --git a/llvm/test/CodeGen/Mips/atomicops.ll b/llvm/test/CodeGen/Mips/atomicops.ll index a67b6206c37ebc04293cb949c54c4848e68093eb..14e401e1f09632becfea7b7ca97b5e544bddcfcb 100644 --- a/llvm/test/CodeGen/Mips/atomicops.ll +++ b/llvm/test/CodeGen/Mips/atomicops.ll @@ -12,6 +12,15 @@ entry: ; 16: lw ${{[0-9]+}}, %call16(__sync_fetch_and_add_4)(${{[0-9]+}}) } +define i32 @atomic_load_sub(ptr %mem, i32 %val, i32 %c) nounwind { +; 16-LABEL: atomic_load_sub: +; 16: lw ${{[0-9]+}}, %call16(__sync_synchronize)(${{[0-9]+}}) +; 16: lw ${{[0-9]+}}, %call16(__sync_fetch_and_sub_4)(${{[0-9]+}}) +entry: + %0 = atomicrmw sub ptr %mem, i32 %val seq_cst + ret i32 %0 +} + define i32 @main() nounwind { entry: %x = alloca i32, align 4 @@ -37,5 +46,3 @@ entry: } declare i32 @printf(ptr nocapture, ...) nounwind - - diff --git a/llvm/test/CodeGen/RISCV/atomic-rmw-sub.ll b/llvm/test/CodeGen/RISCV/atomic-rmw-sub.ll new file mode 100644 index 0000000000000000000000000000000000000000..9fcf4c1b0541bd6daa03a5af3d792c6e7102f607 --- /dev/null +++ b/llvm/test/CodeGen/RISCV/atomic-rmw-sub.ll @@ -0,0 +1,181 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 2 +; RUN: llc -mtriple=riscv32 -verify-machineinstrs < %s \ +; RUN: | FileCheck -check-prefix=RV32I %s +; RUN: llc -mtriple=riscv32 -mattr=+a -verify-machineinstrs < %s \ +; RUN: | FileCheck -check-prefixes=RV32IA %s +; RUN: llc -mtriple=riscv64 -verify-machineinstrs < %s \ +; RUN: | FileCheck -check-prefix=RV64I %s +; RUN: llc -mtriple=riscv64 -mattr=+a -verify-machineinstrs < %s \ +; RUN: | FileCheck -check-prefixes=RV64IA %s + +define i32 @atomicrmw_sub_i32_constant(ptr %a) nounwind { +; RV32I-LABEL: atomicrmw_sub_i32_constant: +; RV32I: # %bb.0: +; RV32I-NEXT: addi sp, sp, -16 +; RV32I-NEXT: sw ra, 12(sp) # 4-byte Folded Spill +; RV32I-NEXT: li a1, 1 +; RV32I-NEXT: li a2, 5 +; RV32I-NEXT: call __atomic_fetch_sub_4@plt +; RV32I-NEXT: lw ra, 12(sp) # 4-byte Folded Reload +; RV32I-NEXT: addi sp, sp, 16 +; RV32I-NEXT: ret +; +; RV32IA-LABEL: atomicrmw_sub_i32_constant: +; RV32IA: # %bb.0: +; RV32IA-NEXT: li a1, -1 +; RV32IA-NEXT: amoadd.w.aqrl a0, a1, (a0) +; RV32IA-NEXT: ret +; +; RV64I-LABEL: atomicrmw_sub_i32_constant: +; RV64I: # %bb.0: +; RV64I-NEXT: addi sp, sp, -16 +; RV64I-NEXT: sd ra, 8(sp) # 8-byte Folded Spill +; RV64I-NEXT: li a1, 1 +; RV64I-NEXT: li a2, 5 +; RV64I-NEXT: call __atomic_fetch_sub_4@plt +; RV64I-NEXT: ld ra, 8(sp) # 8-byte Folded Reload +; RV64I-NEXT: addi sp, sp, 16 +; RV64I-NEXT: ret +; +; RV64IA-LABEL: atomicrmw_sub_i32_constant: +; RV64IA: # %bb.0: +; RV64IA-NEXT: li a1, -1 +; RV64IA-NEXT: amoadd.w.aqrl a0, a1, (a0) +; RV64IA-NEXT: ret + %1 = atomicrmw sub ptr %a, i32 1 seq_cst + ret i32 %1 +} + +define i64 @atomicrmw_sub_i64_constant(ptr %a) nounwind { +; RV32I-LABEL: atomicrmw_sub_i64_constant: +; RV32I: # %bb.0: +; RV32I-NEXT: addi sp, sp, -16 +; RV32I-NEXT: sw ra, 12(sp) # 4-byte Folded Spill +; RV32I-NEXT: li a1, 1 +; RV32I-NEXT: li a3, 5 +; RV32I-NEXT: li a2, 0 +; RV32I-NEXT: call __atomic_fetch_sub_8@plt +; RV32I-NEXT: lw ra, 12(sp) # 4-byte Folded Reload +; RV32I-NEXT: addi sp, sp, 16 +; RV32I-NEXT: ret +; +; RV32IA-LABEL: atomicrmw_sub_i64_constant: +; RV32IA: # %bb.0: +; RV32IA-NEXT: addi sp, sp, -16 +; RV32IA-NEXT: sw ra, 12(sp) # 4-byte Folded Spill +; RV32IA-NEXT: li a1, 1 +; RV32IA-NEXT: li a3, 5 +; RV32IA-NEXT: li a2, 0 +; RV32IA-NEXT: call __atomic_fetch_sub_8@plt +; RV32IA-NEXT: lw ra, 12(sp) # 4-byte Folded Reload +; RV32IA-NEXT: addi sp, sp, 16 +; RV32IA-NEXT: ret +; +; RV64I-LABEL: atomicrmw_sub_i64_constant: +; RV64I: # %bb.0: +; RV64I-NEXT: addi sp, sp, -16 +; RV64I-NEXT: sd ra, 8(sp) # 8-byte Folded Spill +; RV64I-NEXT: li a1, 1 +; RV64I-NEXT: li a2, 5 +; RV64I-NEXT: call __atomic_fetch_sub_8@plt +; RV64I-NEXT: ld ra, 8(sp) # 8-byte Folded Reload +; RV64I-NEXT: addi sp, sp, 16 +; RV64I-NEXT: ret +; +; RV64IA-LABEL: atomicrmw_sub_i64_constant: +; RV64IA: # %bb.0: +; RV64IA-NEXT: li a1, -1 +; RV64IA-NEXT: amoadd.d.aqrl a0, a1, (a0) +; RV64IA-NEXT: ret + %1 = atomicrmw sub ptr %a, i64 1 seq_cst + ret i64 %1 +} + +define i32 @atomicrmw_sub_i32_neg(ptr %a, i32 %x, i32 %y) nounwind { +; RV32I-LABEL: atomicrmw_sub_i32_neg: +; RV32I: # %bb.0: +; RV32I-NEXT: addi sp, sp, -16 +; RV32I-NEXT: sw ra, 12(sp) # 4-byte Folded Spill +; RV32I-NEXT: sub a1, a1, a2 +; RV32I-NEXT: li a2, 5 +; RV32I-NEXT: call __atomic_fetch_sub_4@plt +; RV32I-NEXT: lw ra, 12(sp) # 4-byte Folded Reload +; RV32I-NEXT: addi sp, sp, 16 +; RV32I-NEXT: ret +; +; RV32IA-LABEL: atomicrmw_sub_i32_neg: +; RV32IA: # %bb.0: +; RV32IA-NEXT: sub a2, a2, a1 +; RV32IA-NEXT: amoadd.w.aqrl a0, a2, (a0) +; RV32IA-NEXT: ret +; +; RV64I-LABEL: atomicrmw_sub_i32_neg: +; RV64I: # %bb.0: +; RV64I-NEXT: addi sp, sp, -16 +; RV64I-NEXT: sd ra, 8(sp) # 8-byte Folded Spill +; RV64I-NEXT: subw a1, a1, a2 +; RV64I-NEXT: li a2, 5 +; RV64I-NEXT: call __atomic_fetch_sub_4@plt +; RV64I-NEXT: ld ra, 8(sp) # 8-byte Folded Reload +; RV64I-NEXT: addi sp, sp, 16 +; RV64I-NEXT: ret +; +; RV64IA-LABEL: atomicrmw_sub_i32_neg: +; RV64IA: # %bb.0: +; RV64IA-NEXT: sub a2, a2, a1 +; RV64IA-NEXT: amoadd.w.aqrl a0, a2, (a0) +; RV64IA-NEXT: ret + %b = sub i32 %x, %y + %1 = atomicrmw sub ptr %a, i32 %b seq_cst + ret i32 %1 +} + +define i64 @atomicrmw_sub_i64_neg(ptr %a, i64 %x, i64 %y) nounwind { +; RV32I-LABEL: atomicrmw_sub_i64_neg: +; RV32I: # %bb.0: +; RV32I-NEXT: addi sp, sp, -16 +; RV32I-NEXT: sw ra, 12(sp) # 4-byte Folded Spill +; RV32I-NEXT: sltu a5, a1, a3 +; RV32I-NEXT: sub a2, a2, a4 +; RV32I-NEXT: sub a2, a2, a5 +; RV32I-NEXT: sub a1, a1, a3 +; RV32I-NEXT: li a3, 5 +; RV32I-NEXT: call __atomic_fetch_sub_8@plt +; RV32I-NEXT: lw ra, 12(sp) # 4-byte Folded Reload +; RV32I-NEXT: addi sp, sp, 16 +; RV32I-NEXT: ret +; +; RV32IA-LABEL: atomicrmw_sub_i64_neg: +; RV32IA: # %bb.0: +; RV32IA-NEXT: addi sp, sp, -16 +; RV32IA-NEXT: sw ra, 12(sp) # 4-byte Folded Spill +; RV32IA-NEXT: sltu a5, a1, a3 +; RV32IA-NEXT: sub a2, a2, a4 +; RV32IA-NEXT: sub a2, a2, a5 +; RV32IA-NEXT: sub a1, a1, a3 +; RV32IA-NEXT: li a3, 5 +; RV32IA-NEXT: call __atomic_fetch_sub_8@plt +; RV32IA-NEXT: lw ra, 12(sp) # 4-byte Folded Reload +; RV32IA-NEXT: addi sp, sp, 16 +; RV32IA-NEXT: ret +; +; RV64I-LABEL: atomicrmw_sub_i64_neg: +; RV64I: # %bb.0: +; RV64I-NEXT: addi sp, sp, -16 +; RV64I-NEXT: sd ra, 8(sp) # 8-byte Folded Spill +; RV64I-NEXT: sub a1, a1, a2 +; RV64I-NEXT: li a2, 5 +; RV64I-NEXT: call __atomic_fetch_sub_8@plt +; RV64I-NEXT: ld ra, 8(sp) # 8-byte Folded Reload +; RV64I-NEXT: addi sp, sp, 16 +; RV64I-NEXT: ret +; +; RV64IA-LABEL: atomicrmw_sub_i64_neg: +; RV64IA: # %bb.0: +; RV64IA-NEXT: sub a2, a2, a1 +; RV64IA-NEXT: amoadd.d.aqrl a0, a2, (a0) +; RV64IA-NEXT: ret + %b = sub i64 %x, %y + %1 = atomicrmw sub ptr %a, i64 %b seq_cst + ret i64 %1 +}