diff --git a/llvm/lib/Target/AArch64/AArch64.td b/llvm/lib/Target/AArch64/AArch64.td index e990abad40182f637b3fe764cfa92a2407c7eb51..333b6232479abead8d25176cb31ce1782db497be 100644 --- a/llvm/lib/Target/AArch64/AArch64.td +++ b/llvm/lib/Target/AArch64/AArch64.td @@ -71,6 +71,9 @@ def FeatureRASv2 : SubtargetFeature<"rasv2", "HasRASv2", "true", def FeatureLSE : SubtargetFeature<"lse", "HasLSE", "true", "Enable ARMv8.1 Large System Extension (LSE) atomic instructions (FEAT_LSE)">; +def FeatureNoLSECAS : SubtargetFeature<"no-lse-cas", "HasNoLSECAS", "false", + "Disable ARMv8.1 Large System Extension (LSE) atomic instructions CAS">; + def FeatureLSE2 : SubtargetFeature<"lse2", "HasLSE2", "true", "Enable ARMv8.4 Large System Extension 2 (LSE2) atomicity rules (FEAT_LSE2)">; diff --git a/llvm/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp b/llvm/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp index 691149cd65401a6e0076aa3a1fad16cab60e3b53..be968112d3fece1d826e25ebbb37df9a510d11dc 100644 --- a/llvm/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp +++ b/llvm/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp @@ -3872,7 +3872,8 @@ bool AArch64DAGToDAGISel::SelectCMP_SWAP(SDNode *N) { EVT MemTy = cast(N)->getMemoryVT(); // Leave IR for LSE if subtarget supports it. - if (Subtarget->hasLSE()) return false; + if (Subtarget->hasLSE() && !Subtarget->hasFeature(AArch64::FeatureNoLSECAS)) + return false; if (MemTy == MVT::i8) Opcode = AArch64::CMP_SWAP_8; diff --git a/llvm/lib/Target/AArch64/AArch64InstrAtomics.td b/llvm/lib/Target/AArch64/AArch64InstrAtomics.td index 1427886d71c0738296d6cdfdb224f7c56c5c56ad..997bf512e6dfb47b4c4fd862bc7fc97cb0624bae 100644 --- a/llvm/lib/Target/AArch64/AArch64InstrAtomics.td +++ b/llvm/lib/Target/AArch64/AArch64InstrAtomics.td @@ -534,7 +534,6 @@ let Predicates = [HasLSE] in { defm : LDOPregister_patterns<"LDUMAX", "atomic_load_umax">; defm : LDOPregister_patterns<"LDUMIN", "atomic_load_umin">; defm : LDOPregister_patterns<"SWP", "atomic_swap">; - defm : CASregister_patterns<"CAS", "atomic_cmp_swap">; // These two patterns are only needed for global isel, selection dag isel // converts atomic load-sub into a sub and atomic load-add, and likewise for @@ -543,6 +542,10 @@ let Predicates = [HasLSE] in { defm : LDOPregister_patterns_mod<"LDCLR", "atomic_load_and", "ORN">; } +let Predicates = [HasLSEAndAllowCAS] in { + defm : CASregister_patterns<"CAS", "atomic_cmp_swap">; +} + // v8.9a/v9.4a FEAT_LRCPC patterns let Predicates = [HasRCPC3, HasNEON] in { // LDAP1 loads diff --git a/llvm/lib/Target/AArch64/AArch64InstrInfo.td b/llvm/lib/Target/AArch64/AArch64InstrInfo.td index f0fc36fb26f1a92ef3aca6af96de302e3018231f..681db5d5c7a70943c1f65a2a7d31e5c07f6e62ab 100644 --- a/llvm/lib/Target/AArch64/AArch64InstrInfo.td +++ b/llvm/lib/Target/AArch64/AArch64InstrInfo.td @@ -118,6 +118,7 @@ def HasCSSC : Predicate<"Subtarget->hasCSSC()">, def HasNoCSSC : Predicate<"!Subtarget->hasCSSC()">; def HasLSE : Predicate<"Subtarget->hasLSE()">, AssemblerPredicateWithAll<(all_of FeatureLSE), "lse">; +def HasLSEAndAllowCAS : Predicate<"Subtarget->hasLSE() && !Subtarget->hasFeature(AArch64::FeatureNoLSECAS)">; def HasNoLSE : Predicate<"!Subtarget->hasLSE()">; def HasRAS : Predicate<"Subtarget->hasRAS()">, AssemblerPredicateWithAll<(all_of FeatureRAS), "ras">; diff --git a/llvm/test/CodeGen/AArch64/atomic_cmp_swap.ll b/llvm/test/CodeGen/AArch64/atomic_cmp_swap.ll new file mode 100644 index 0000000000000000000000000000000000000000..396331c8771c58b1e9085aaf616860cb8538b55a --- /dev/null +++ b/llvm/test/CodeGen/AArch64/atomic_cmp_swap.ll @@ -0,0 +1,38 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: +; RUN: llc -mtriple=aarch64-none-linux-gnu -verify-machineinstrs -mattr=+lse %s -o - | FileCheck %s --check-prefix=CHECK-LSE +; RUN: llc -mtriple=aarch64-none-linux-gnu -verify-machineinstrs -mattr=+lse,+no-lse-cas %s -o - | FileCheck %s --check-prefix=CHECK-FALLBACK + +; Check LSE path: expect CAS instruction (case-insensitive asm may be lowercase) +; CHECK-LSE: casal + +; Check fallback path: expect LDAXR and STLXR +; CHECK-FALLBACK: ldaxr +; CHECK-FALLBACK: stlxr + +@var8 = dso_local global i8 0 +define dso_local i8 @test_atomic_cmpxchg_i8_seq_cst(i8 %wanted, i8 %new) nounwind { +; CHECK-LSE-LABEL: test_atomic_cmpxchg_i8_seq_cst: +; CHECK-LSE: // %bb.0: +; CHECK-LSE-NEXT: adrp x8, var8 +; CHECK-LSE-NEXT: add x8, x8, :lo12:var8 +; CHECK-LSE-NEXT: casalb w0, w1, [x8] +; CHECK-LSE-NEXT: ret +; +; CHECK-FALLBACK-LABEL: test_atomic_cmpxchg_i8_seq_cst: +; CHECK-FALLBACK: // %bb.0: +; CHECK-FALLBACK-NEXT: adrp x9, var8 +; CHECK-FALLBACK-NEXT: add x9, x9, :lo12:var8 +; CHECK-FALLBACK-NEXT: .LBB0_1: // =>This Inner Loop Header: Depth=1 +; CHECK-FALLBACK-NEXT: ldaxrb w8, [x9] +; CHECK-FALLBACK-NEXT: cmp w8, w0, uxtb +; CHECK-FALLBACK-NEXT: b.ne .LBB0_3 +; CHECK-FALLBACK-NEXT: // %bb.2: // in Loop: Header=BB0_1 Depth=1 +; CHECK-FALLBACK-NEXT: stlxrb wzr, w1, [x9] +; CHECK-FALLBACK-NEXT: cbnz wzr, .LBB0_1 +; CHECK-FALLBACK-NEXT: .LBB0_3: +; CHECK-FALLBACK-NEXT: mov w0, w8 +; CHECK-FALLBACK-NEXT: ret + %pair = cmpxchg ptr @var8, i8 %wanted, i8 %new seq_cst seq_cst + %old = extractvalue { i8, i1 } %pair, 0 + ret i8 %old +}