From 14b255b3cfd9d8816572429208261717e300ed43 Mon Sep 17 00:00:00 2001 From: Matthew Devereau Date: Mon, 20 Nov 2023 08:37:52 +0000 Subject: [PATCH] [AArch64][SME] Add support for sme-fa64 (#70809) --- clang/lib/Basic/Targets/AArch64.cpp | 8 +++++ clang/lib/Basic/Targets/AArch64.h | 1 + .../llvm/TargetParser/AArch64TargetParser.h | 2 ++ llvm/lib/Target/AArch64/AArch64.td | 5 ++- llvm/lib/Target/AArch64/AArch64InstrInfo.td | 2 ++ llvm/lib/Target/AArch64/AArch64SchedA64FX.td | 2 +- llvm/lib/Target/AArch64/AArch64Subtarget.cpp | 10 +++--- .../AArch64/AsmParser/AArch64AsmParser.cpp | 1 + ...eaming-mode-fixed-length-fp-reduce-fa64.ll | 33 +++++++++++++++++++ ...ing-mode-fixed-length-int-mla-neon-fa64.ll | 26 +++++++++++++++ llvm/test/MC/AArch64/SME/fa64-implies-sve2.s | 5 +++ .../TargetParser/TargetParserTest.cpp | 4 ++- 12 files changed, 91 insertions(+), 8 deletions(-) create mode 100644 llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-fp-reduce-fa64.ll create mode 100644 llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-int-mla-neon-fa64.ll create mode 100644 llvm/test/MC/AArch64/SME/fa64-implies-sve2.s diff --git a/clang/lib/Basic/Targets/AArch64.cpp b/clang/lib/Basic/Targets/AArch64.cpp index 7c4cc5fb33f8..2f6e8cd26610 100644 --- a/clang/lib/Basic/Targets/AArch64.cpp +++ b/clang/lib/Basic/Targets/AArch64.cpp @@ -666,6 +666,7 @@ bool AArch64TargetInfo::hasFeature(StringRef Feature) const { .Case("sme", HasSME) .Case("sme-f64f64", HasSMEF64F64) .Case("sme-i16i64", HasSMEI16I64) + .Case("sme-fa64", HasSMEFA64) .Cases("memtag", "memtag2", HasMTE) .Case("sb", HasSB) .Case("predres", HasPredRes) @@ -795,6 +796,13 @@ bool AArch64TargetInfo::handleTargetFeatures(std::vector &Features, HasBFloat16 = true; HasFullFP16 = true; } + if (Feature == "+sme-fa64") { + FPU |= NeonMode; + FPU |= SveMode; + HasSME = true; + HasSVE2 = true; + HasSMEFA64 = true; + } if (Feature == "+sb") HasSB = true; if (Feature == "+predres") diff --git a/clang/lib/Basic/Targets/AArch64.h b/clang/lib/Basic/Targets/AArch64.h index 4304693e473d..0072b803cdbc 100644 --- a/clang/lib/Basic/Targets/AArch64.h +++ b/clang/lib/Basic/Targets/AArch64.h @@ -83,6 +83,7 @@ class LLVM_LIBRARY_VISIBILITY AArch64TargetInfo : public TargetInfo { bool HasFMV = true; bool HasGCS = false; bool HasRCPC3 = false; + bool HasSMEFA64 = false; const llvm::AArch64::ArchInfo *ArchInfo = &llvm::AArch64::ARMV8A; diff --git a/llvm/include/llvm/TargetParser/AArch64TargetParser.h b/llvm/include/llvm/TargetParser/AArch64TargetParser.h index 8b25cce0abdc..154e7e1ce987 100644 --- a/llvm/include/llvm/TargetParser/AArch64TargetParser.h +++ b/llvm/include/llvm/TargetParser/AArch64TargetParser.h @@ -154,6 +154,7 @@ enum ArchExtKind : uint64_t { AEK_RASv2 = 1ULL << 54, // FEAT_RASv2 AEK_ITE = 1ULL << 55, // FEAT_ITE AEK_GCS = 1ULL << 56, // FEAT_GCS + AEK_SMEFA64 = 1ULL << 57, // FEAT_SME_FA64 }; // clang-format on @@ -260,6 +261,7 @@ inline constexpr ExtensionInfo Extensions[] = { {"tme", AArch64::AEK_TME, "+tme", "-tme", FEAT_MAX, "", 0}, {"wfxt", AArch64::AEK_NONE, {}, {}, FEAT_WFXT, "+wfxt", 550}, {"gcs", AArch64::AEK_GCS, "+gcs", "-gcs", FEAT_MAX, "", 0}, + {"sme-fa64", AArch64::AEK_SMEFA64, "+sme-fa64", "-sme-fa64", FEAT_MAX, "", 0}, // Special cases {"none", AArch64::AEK_NONE, {}, {}, FEAT_MAX, "", ExtensionInfo::MaxFMVPriority}, }; diff --git a/llvm/lib/Target/AArch64/AArch64.td b/llvm/lib/Target/AArch64/AArch64.td index fdb931a0fe6c..cd62313cf0fe 100644 --- a/llvm/lib/Target/AArch64/AArch64.td +++ b/llvm/lib/Target/AArch64/AArch64.td @@ -503,6 +503,9 @@ def FeatureSMEI16I64 : SubtargetFeature<"sme-i16i64", "HasSMEI16I64", "true", def FeatureSMEF16F16 : SubtargetFeature<"sme-f16f16", "HasSMEF16F16", "true", "Enable SME2.1 non-widening Float16 instructions (FEAT_SME_F16F16)", []>; +def FeatureSMEFA64 : SubtargetFeature<"sme-fa64", "HasSMEFA64", "true", + "Enable the full A64 instruction set in streaming SVE mode (FEAT_SME_FA64)", [FeatureSME, FeatureSVE2]>; + def FeatureSME2 : SubtargetFeature<"sme2", "HasSME2", "true", "Enable Scalable Matrix Extension 2 (SME2) instructions", [FeatureSME]>; @@ -746,7 +749,7 @@ def SME2Unsupported : AArch64Unsupported { } def SMEUnsupported : AArch64Unsupported { - let F = !listconcat([HasSME, HasSMEI16I64, HasSMEF16F16, HasSMEF64F64], + let F = !listconcat([HasSME, HasSMEI16I64, HasSMEF16F16, HasSMEF64F64, HasSMEFA64], SME2Unsupported.F); } diff --git a/llvm/lib/Target/AArch64/AArch64InstrInfo.td b/llvm/lib/Target/AArch64/AArch64InstrInfo.td index 9b9103e01d67..3e3dc863dc5f 100644 --- a/llvm/lib/Target/AArch64/AArch64InstrInfo.td +++ b/llvm/lib/Target/AArch64/AArch64InstrInfo.td @@ -154,6 +154,8 @@ def HasSMEF64F64 : Predicate<"Subtarget->hasSMEF64F64()">, AssemblerPredicateWithAll<(all_of FeatureSMEF64F64), "sme-f64f64">; def HasSMEF16F16 : Predicate<"Subtarget->hasSMEF16F16()">, AssemblerPredicateWithAll<(all_of FeatureSMEF16F16), "sme-f16f16">; +def HasSMEFA64 : Predicate<"Subtarget->hasSMEFA64()">, + AssemblerPredicateWithAll<(all_of FeatureSMEFA64), "sme-fa64">; def HasSMEI16I64 : Predicate<"Subtarget->hasSMEI16I64()">, AssemblerPredicateWithAll<(all_of FeatureSMEI16I64), "sme-i16i64">; def HasSME2 : Predicate<"Subtarget->hasSME2()">, diff --git a/llvm/lib/Target/AArch64/AArch64SchedA64FX.td b/llvm/lib/Target/AArch64/AArch64SchedA64FX.td index cb88eddc2b22..2d33ad50ab3d 100644 --- a/llvm/lib/Target/AArch64/AArch64SchedA64FX.td +++ b/llvm/lib/Target/AArch64/AArch64SchedA64FX.td @@ -23,7 +23,7 @@ def A64FXModel : SchedMachineModel { list UnsupportedFeatures = [HasSVE2, HasSVE2AES, HasSVE2SM4, HasSVE2SHA3, HasSVE2BitPerm, HasPAuth, HasSVE2orSME, HasMTE, HasMatMulInt8, HasBF16, HasSME2, HasSME2p1, HasSVE2p1, - HasSVE2p1_or_HasSME2p1, HasSMEF16F16]; + HasSVE2p1_or_HasSME2p1, HasSMEF16F16, HasSMEFA64]; let FullInstRWOverlapCheck = 0; } diff --git a/llvm/lib/Target/AArch64/AArch64Subtarget.cpp b/llvm/lib/Target/AArch64/AArch64Subtarget.cpp index 3d2e9304746a..d1ad8b69deae 100644 --- a/llvm/lib/Target/AArch64/AArch64Subtarget.cpp +++ b/llvm/lib/Target/AArch64/AArch64Subtarget.cpp @@ -489,11 +489,11 @@ bool AArch64Subtarget::isStreamingCompatible() const { } bool AArch64Subtarget::isNeonAvailable() const { - return hasNEON() && !isStreaming() && !isStreamingCompatible(); + return hasNEON() && + (hasSMEFA64() || (!isStreaming() && !isStreamingCompatible())); } -bool AArch64Subtarget::isSVEAvailable() const{ - // FIXME: Also return false if FEAT_FA64 is set, but we can't do this yet - // as we don't yet support the feature in LLVM. - return hasSVE() && !isStreaming() && !isStreamingCompatible(); +bool AArch64Subtarget::isSVEAvailable() const { + return hasSVE() && + (hasSMEFA64() || (!isStreaming() && !isStreamingCompatible())); } diff --git a/llvm/lib/Target/AArch64/AsmParser/AArch64AsmParser.cpp b/llvm/lib/Target/AArch64/AsmParser/AArch64AsmParser.cpp index 4756746063d5..f4b731db05b6 100644 --- a/llvm/lib/Target/AArch64/AsmParser/AArch64AsmParser.cpp +++ b/llvm/lib/Target/AArch64/AsmParser/AArch64AsmParser.cpp @@ -3625,6 +3625,7 @@ static const struct Extension { {"sb", {AArch64::FeatureSB}}, {"ssbs", {AArch64::FeatureSSBS}}, {"tme", {AArch64::FeatureTME}}, + {"sme-fa64", {AArch64::FeatureSMEFA64}}, }; static void setRequiredFeatureString(FeatureBitset FBS, std::string &Str) { diff --git a/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-fp-reduce-fa64.ll b/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-fp-reduce-fa64.ll new file mode 100644 index 000000000000..b56e67d95ba0 --- /dev/null +++ b/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-fp-reduce-fa64.ll @@ -0,0 +1,33 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc -mattr=+sme-fa64 -force-streaming-compatible-sve < %s | FileCheck %s -check-prefix=FA64 +; RUN: llc -mattr=+sve -force-streaming-compatible-sve < %s | FileCheck %s -check-prefix=NO-FA64 + + +target triple = "aarch64-unknown-linux-gnu" + +define half @fadda_v4f16(half %start, <4 x half> %a) { +; FA64-LABEL: fadda_v4f16: +; FA64: // %bb.0: +; FA64-NEXT: ptrue p0.h, vl4 +; FA64-NEXT: // kill: def $h0 killed $h0 def $z0 +; FA64-NEXT: // kill: def $d1 killed $d1 def $z1 +; FA64-NEXT: fadda h0, p0, h0, z1.h +; FA64-NEXT: // kill: def $h0 killed $h0 killed $z0 +; FA64-NEXT: ret +; +; NO-FA64-LABEL: fadda_v4f16: +; NO-FA64: // %bb.0: +; NO-FA64-NEXT: // kill: def $d1 killed $d1 def $z1 +; NO-FA64-NEXT: fadd h0, h0, h1 +; NO-FA64-NEXT: mov z2.h, z1.h[1] +; NO-FA64-NEXT: fadd h0, h0, h2 +; NO-FA64-NEXT: mov z2.h, z1.h[2] +; NO-FA64-NEXT: mov z1.h, z1.h[3] +; NO-FA64-NEXT: fadd h0, h0, h2 +; NO-FA64-NEXT: fadd h0, h0, h1 +; NO-FA64-NEXT: ret + %res = call half @llvm.vector.reduce.fadd.v4f16(half %start, <4 x half> %a) + ret half %res +} + +declare half @llvm.vector.reduce.fadd.v4f16(half, <4 x half>) diff --git a/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-int-mla-neon-fa64.ll b/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-int-mla-neon-fa64.ll new file mode 100644 index 000000000000..149ad6d1e267 --- /dev/null +++ b/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-int-mla-neon-fa64.ll @@ -0,0 +1,26 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc -mattr=+sme-fa64 -force-streaming-compatible-sve < %s | FileCheck %s -check-prefix=FA64 +; RUN: llc -mattr=+sve -force-streaming-compatible-sve < %s | FileCheck %s -check-prefix=NO-FA64 + +target triple = "aarch64-unknown-linux-gnu" + +define <8 x i8> @mla8xi8(<8 x i8> %A, <8 x i8> %B, <8 x i8> %C) { +; FA64-LABEL: mla8xi8: +; FA64: // %bb.0: +; FA64-NEXT: mla v2.8b, v0.8b, v1.8b +; FA64-NEXT: fmov d0, d2 +; FA64-NEXT: ret +; +; NO-FA64-LABEL: mla8xi8: +; NO-FA64: // %bb.0: +; NO-FA64-NEXT: ptrue p0.b, vl8 +; NO-FA64-NEXT: // kill: def $d0 killed $d0 def $z0 +; NO-FA64-NEXT: // kill: def $d2 killed $d2 def $z2 +; NO-FA64-NEXT: // kill: def $d1 killed $d1 def $z1 +; NO-FA64-NEXT: mad z0.b, p0/m, z1.b, z2.b +; NO-FA64-NEXT: // kill: def $d0 killed $d0 killed $z0 +; NO-FA64-NEXT: ret + %tmp1 = mul <8 x i8> %A, %B; + %tmp2 = add <8 x i8> %C, %tmp1; + ret <8 x i8> %tmp2 +} diff --git a/llvm/test/MC/AArch64/SME/fa64-implies-sve2.s b/llvm/test/MC/AArch64/SME/fa64-implies-sve2.s new file mode 100644 index 000000000000..80989fa8bc0b --- /dev/null +++ b/llvm/test/MC/AArch64/SME/fa64-implies-sve2.s @@ -0,0 +1,5 @@ +// RUN: llvm-mc -triple aarch64-none-linux-gnu -show-encoding -mattr=+sme-fa64 < %s | FileCheck %s + +// Verify sme-fa64 implies SVE2 +ldnt1sh z0.s, p0/z, [z1.s] +// CHECK: ldnt1sh { z0.s }, p0/z, [z1.s] diff --git a/llvm/unittests/TargetParser/TargetParserTest.cpp b/llvm/unittests/TargetParser/TargetParserTest.cpp index daa38474004e..465efa04c3da 100644 --- a/llvm/unittests/TargetParser/TargetParserTest.cpp +++ b/llvm/unittests/TargetParser/TargetParserTest.cpp @@ -1610,7 +1610,7 @@ TEST(TargetParserTest, AArch64ExtensionFeatures) { AArch64::AEK_B16B16, AArch64::AEK_SMEF16F16, AArch64::AEK_CSSC, AArch64::AEK_RCPC3, AArch64::AEK_THE, AArch64::AEK_D128, AArch64::AEK_LSE128, AArch64::AEK_SPECRES2, AArch64::AEK_RASv2, - AArch64::AEK_ITE, AArch64::AEK_GCS, + AArch64::AEK_ITE, AArch64::AEK_GCS, AArch64::AEK_SMEFA64, }; std::vector Features; @@ -1682,6 +1682,7 @@ TEST(TargetParserTest, AArch64ExtensionFeatures) { EXPECT_TRUE(llvm::is_contained(Features, "+specres2")); EXPECT_TRUE(llvm::is_contained(Features, "+ite")); EXPECT_TRUE(llvm::is_contained(Features, "+gcs")); + EXPECT_TRUE(llvm::is_contained(Features, "+sme-fa64")); // Assuming we listed every extension above, this should produce the same // result. (note that AEK_NONE doesn't have a name so it won't be in the @@ -1794,6 +1795,7 @@ TEST(TargetParserTest, AArch64ArchExtFeature) { {"f32mm", "nof32mm", "+f32mm", "-f32mm"}, {"f64mm", "nof64mm", "+f64mm", "-f64mm"}, {"sme", "nosme", "+sme", "-sme"}, + {"sme-fa64", "nosme-fa64", "+sme-fa64", "-sme-fa64"}, {"sme-f64f64", "nosme-f64f64", "+sme-f64f64", "-sme-f64f64"}, {"sme-i16i64", "nosme-i16i64", "+sme-i16i64", "-sme-i16i64"}, {"sme-f16f16", "nosme-f16f16", "+sme-f16f16", "-sme-f16f16"}, -- Gitee