From 14b255b3cfd9d8816572429208261717e300ed43 Mon Sep 17 00:00:00 2001
From: Matthew Devereau <matthew.devereau@arm.com>
Date: Mon, 20 Nov 2023 08:37:52 +0000
Subject: [PATCH] [AArch64][SME] Add support for sme-fa64 (#70809)

---
 clang/lib/Basic/Targets/AArch64.cpp           |  8 +++++
 clang/lib/Basic/Targets/AArch64.h             |  1 +
 .../llvm/TargetParser/AArch64TargetParser.h   |  2 ++
 llvm/lib/Target/AArch64/AArch64.td            |  5 ++-
 llvm/lib/Target/AArch64/AArch64InstrInfo.td   |  2 ++
 llvm/lib/Target/AArch64/AArch64SchedA64FX.td  |  2 +-
 llvm/lib/Target/AArch64/AArch64Subtarget.cpp  | 10 +++---
 .../AArch64/AsmParser/AArch64AsmParser.cpp    |  1 +
 ...eaming-mode-fixed-length-fp-reduce-fa64.ll | 33 +++++++++++++++++++
 ...ing-mode-fixed-length-int-mla-neon-fa64.ll | 26 +++++++++++++++
 llvm/test/MC/AArch64/SME/fa64-implies-sve2.s  |  5 +++
 .../TargetParser/TargetParserTest.cpp         |  4 ++-
 12 files changed, 91 insertions(+), 8 deletions(-)
 create mode 100644 llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-fp-reduce-fa64.ll
 create mode 100644 llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-int-mla-neon-fa64.ll
 create mode 100644 llvm/test/MC/AArch64/SME/fa64-implies-sve2.s

diff --git a/clang/lib/Basic/Targets/AArch64.cpp b/clang/lib/Basic/Targets/AArch64.cpp
index 7c4cc5fb33f8..2f6e8cd26610 100644
--- a/clang/lib/Basic/Targets/AArch64.cpp
+++ b/clang/lib/Basic/Targets/AArch64.cpp
@@ -666,6 +666,7 @@ bool AArch64TargetInfo::hasFeature(StringRef Feature) const {
       .Case("sme", HasSME)
       .Case("sme-f64f64", HasSMEF64F64)
       .Case("sme-i16i64", HasSMEI16I64)
+      .Case("sme-fa64", HasSMEFA64)
       .Cases("memtag", "memtag2", HasMTE)
       .Case("sb", HasSB)
       .Case("predres", HasPredRes)
@@ -795,6 +796,13 @@ bool AArch64TargetInfo::handleTargetFeatures(std::vector<std::string> &Features,
       HasBFloat16 = true;
       HasFullFP16 = true;
     }
+    if (Feature == "+sme-fa64") {
+      FPU |= NeonMode;
+      FPU |= SveMode;
+      HasSME = true;
+      HasSVE2 = true;
+      HasSMEFA64 = true;
+    }
     if (Feature == "+sb")
       HasSB = true;
     if (Feature == "+predres")
diff --git a/clang/lib/Basic/Targets/AArch64.h b/clang/lib/Basic/Targets/AArch64.h
index 4304693e473d..0072b803cdbc 100644
--- a/clang/lib/Basic/Targets/AArch64.h
+++ b/clang/lib/Basic/Targets/AArch64.h
@@ -83,6 +83,7 @@ class LLVM_LIBRARY_VISIBILITY AArch64TargetInfo : public TargetInfo {
   bool HasFMV = true;
   bool HasGCS = false;
   bool HasRCPC3 = false;
+  bool HasSMEFA64 = false;
 
   const llvm::AArch64::ArchInfo *ArchInfo = &llvm::AArch64::ARMV8A;
 
diff --git a/llvm/include/llvm/TargetParser/AArch64TargetParser.h b/llvm/include/llvm/TargetParser/AArch64TargetParser.h
index 8b25cce0abdc..154e7e1ce987 100644
--- a/llvm/include/llvm/TargetParser/AArch64TargetParser.h
+++ b/llvm/include/llvm/TargetParser/AArch64TargetParser.h
@@ -154,6 +154,7 @@ enum ArchExtKind : uint64_t {
   AEK_RASv2 =       1ULL << 54, // FEAT_RASv2
   AEK_ITE =         1ULL << 55, // FEAT_ITE
   AEK_GCS =         1ULL << 56, // FEAT_GCS
+  AEK_SMEFA64 =     1ULL << 57, // FEAT_SME_FA64
 };
 // clang-format on
 
@@ -260,6 +261,7 @@ inline constexpr ExtensionInfo Extensions[] = {
     {"tme", AArch64::AEK_TME, "+tme", "-tme", FEAT_MAX, "", 0},
     {"wfxt", AArch64::AEK_NONE, {}, {}, FEAT_WFXT, "+wfxt", 550},
     {"gcs", AArch64::AEK_GCS, "+gcs", "-gcs", FEAT_MAX, "", 0},
+    {"sme-fa64",  AArch64::AEK_SMEFA64,  "+sme-fa64", "-sme-fa64",  FEAT_MAX, "", 0},
     // Special cases
     {"none", AArch64::AEK_NONE, {}, {}, FEAT_MAX, "", ExtensionInfo::MaxFMVPriority},
 };
diff --git a/llvm/lib/Target/AArch64/AArch64.td b/llvm/lib/Target/AArch64/AArch64.td
index fdb931a0fe6c..cd62313cf0fe 100644
--- a/llvm/lib/Target/AArch64/AArch64.td
+++ b/llvm/lib/Target/AArch64/AArch64.td
@@ -503,6 +503,9 @@ def FeatureSMEI16I64 : SubtargetFeature<"sme-i16i64", "HasSMEI16I64", "true",
 def FeatureSMEF16F16 : SubtargetFeature<"sme-f16f16", "HasSMEF16F16", "true",
   "Enable SME2.1 non-widening Float16 instructions (FEAT_SME_F16F16)", []>;
 
+def FeatureSMEFA64 : SubtargetFeature<"sme-fa64", "HasSMEFA64", "true",
+  "Enable the full A64 instruction set in streaming SVE mode (FEAT_SME_FA64)", [FeatureSME, FeatureSVE2]>;
+
 def FeatureSME2 : SubtargetFeature<"sme2", "HasSME2", "true",
   "Enable Scalable Matrix Extension 2 (SME2) instructions", [FeatureSME]>;
 
@@ -746,7 +749,7 @@ def SME2Unsupported : AArch64Unsupported {
 }
 
 def SMEUnsupported : AArch64Unsupported {
-  let F = !listconcat([HasSME, HasSMEI16I64, HasSMEF16F16, HasSMEF64F64],
+  let F = !listconcat([HasSME, HasSMEI16I64, HasSMEF16F16, HasSMEF64F64, HasSMEFA64],
                       SME2Unsupported.F);
 }
 
diff --git a/llvm/lib/Target/AArch64/AArch64InstrInfo.td b/llvm/lib/Target/AArch64/AArch64InstrInfo.td
index 9b9103e01d67..3e3dc863dc5f 100644
--- a/llvm/lib/Target/AArch64/AArch64InstrInfo.td
+++ b/llvm/lib/Target/AArch64/AArch64InstrInfo.td
@@ -154,6 +154,8 @@ def HasSMEF64F64     : Predicate<"Subtarget->hasSMEF64F64()">,
                                  AssemblerPredicateWithAll<(all_of FeatureSMEF64F64), "sme-f64f64">;
 def HasSMEF16F16     : Predicate<"Subtarget->hasSMEF16F16()">,
                                  AssemblerPredicateWithAll<(all_of FeatureSMEF16F16), "sme-f16f16">;
+def HasSMEFA64       : Predicate<"Subtarget->hasSMEFA64()">,
+                                 AssemblerPredicateWithAll<(all_of FeatureSMEFA64), "sme-fa64">;
 def HasSMEI16I64     : Predicate<"Subtarget->hasSMEI16I64()">,
                                  AssemblerPredicateWithAll<(all_of FeatureSMEI16I64), "sme-i16i64">;
 def HasSME2          : Predicate<"Subtarget->hasSME2()">,
diff --git a/llvm/lib/Target/AArch64/AArch64SchedA64FX.td b/llvm/lib/Target/AArch64/AArch64SchedA64FX.td
index cb88eddc2b22..2d33ad50ab3d 100644
--- a/llvm/lib/Target/AArch64/AArch64SchedA64FX.td
+++ b/llvm/lib/Target/AArch64/AArch64SchedA64FX.td
@@ -23,7 +23,7 @@ def A64FXModel : SchedMachineModel {
   list<Predicate> UnsupportedFeatures =
     [HasSVE2, HasSVE2AES, HasSVE2SM4, HasSVE2SHA3, HasSVE2BitPerm, HasPAuth,
      HasSVE2orSME, HasMTE, HasMatMulInt8, HasBF16, HasSME2, HasSME2p1, HasSVE2p1,
-     HasSVE2p1_or_HasSME2p1, HasSMEF16F16];
+     HasSVE2p1_or_HasSME2p1, HasSMEF16F16, HasSMEFA64];
 
   let FullInstRWOverlapCheck = 0;
 }
diff --git a/llvm/lib/Target/AArch64/AArch64Subtarget.cpp b/llvm/lib/Target/AArch64/AArch64Subtarget.cpp
index 3d2e9304746a..d1ad8b69deae 100644
--- a/llvm/lib/Target/AArch64/AArch64Subtarget.cpp
+++ b/llvm/lib/Target/AArch64/AArch64Subtarget.cpp
@@ -489,11 +489,11 @@ bool AArch64Subtarget::isStreamingCompatible() const {
 }
 
 bool AArch64Subtarget::isNeonAvailable() const {
-  return hasNEON() && !isStreaming() && !isStreamingCompatible();
+  return hasNEON() &&
+         (hasSMEFA64() || (!isStreaming() && !isStreamingCompatible()));
 }
 
-bool AArch64Subtarget::isSVEAvailable() const{
-  // FIXME: Also return false if FEAT_FA64 is set, but we can't do this yet
-  // as we don't yet support the feature in LLVM.
-  return hasSVE() && !isStreaming() && !isStreamingCompatible();
+bool AArch64Subtarget::isSVEAvailable() const {
+  return hasSVE() &&
+         (hasSMEFA64() || (!isStreaming() && !isStreamingCompatible()));
 }
diff --git a/llvm/lib/Target/AArch64/AsmParser/AArch64AsmParser.cpp b/llvm/lib/Target/AArch64/AsmParser/AArch64AsmParser.cpp
index 4756746063d5..f4b731db05b6 100644
--- a/llvm/lib/Target/AArch64/AsmParser/AArch64AsmParser.cpp
+++ b/llvm/lib/Target/AArch64/AsmParser/AArch64AsmParser.cpp
@@ -3625,6 +3625,7 @@ static const struct Extension {
     {"sb", {AArch64::FeatureSB}},
     {"ssbs", {AArch64::FeatureSSBS}},
     {"tme", {AArch64::FeatureTME}},
+    {"sme-fa64",  {AArch64::FeatureSMEFA64}},
 };
 
 static void setRequiredFeatureString(FeatureBitset FBS, std::string &Str) {
diff --git a/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-fp-reduce-fa64.ll b/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-fp-reduce-fa64.ll
new file mode 100644
index 000000000000..b56e67d95ba0
--- /dev/null
+++ b/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-fp-reduce-fa64.ll
@@ -0,0 +1,33 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc -mattr=+sme-fa64 -force-streaming-compatible-sve < %s | FileCheck %s -check-prefix=FA64
+; RUN: llc -mattr=+sve -force-streaming-compatible-sve < %s | FileCheck %s -check-prefix=NO-FA64
+
+
+target triple = "aarch64-unknown-linux-gnu"
+
+define half @fadda_v4f16(half %start, <4 x half> %a) {
+; FA64-LABEL: fadda_v4f16:
+; FA64:       // %bb.0:
+; FA64-NEXT:    ptrue p0.h, vl4
+; FA64-NEXT:    // kill: def $h0 killed $h0 def $z0
+; FA64-NEXT:    // kill: def $d1 killed $d1 def $z1
+; FA64-NEXT:    fadda h0, p0, h0, z1.h
+; FA64-NEXT:    // kill: def $h0 killed $h0 killed $z0
+; FA64-NEXT:    ret
+;
+; NO-FA64-LABEL: fadda_v4f16:
+; NO-FA64:       // %bb.0:
+; NO-FA64-NEXT:    // kill: def $d1 killed $d1 def $z1
+; NO-FA64-NEXT:    fadd h0, h0, h1
+; NO-FA64-NEXT:    mov z2.h, z1.h[1]
+; NO-FA64-NEXT:    fadd h0, h0, h2
+; NO-FA64-NEXT:    mov z2.h, z1.h[2]
+; NO-FA64-NEXT:    mov z1.h, z1.h[3]
+; NO-FA64-NEXT:    fadd h0, h0, h2
+; NO-FA64-NEXT:    fadd h0, h0, h1
+; NO-FA64-NEXT:    ret
+  %res = call half @llvm.vector.reduce.fadd.v4f16(half %start, <4 x half> %a)
+  ret half %res
+}
+
+declare half @llvm.vector.reduce.fadd.v4f16(half, <4 x half>)
diff --git a/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-int-mla-neon-fa64.ll b/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-int-mla-neon-fa64.ll
new file mode 100644
index 000000000000..149ad6d1e267
--- /dev/null
+++ b/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-int-mla-neon-fa64.ll
@@ -0,0 +1,26 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc -mattr=+sme-fa64 -force-streaming-compatible-sve < %s | FileCheck %s -check-prefix=FA64
+; RUN: llc -mattr=+sve -force-streaming-compatible-sve < %s | FileCheck %s -check-prefix=NO-FA64
+
+target triple = "aarch64-unknown-linux-gnu"
+
+define <8 x i8> @mla8xi8(<8 x i8> %A, <8 x i8> %B, <8 x i8> %C) {
+; FA64-LABEL: mla8xi8:
+; FA64:       // %bb.0:
+; FA64-NEXT:    mla v2.8b, v0.8b, v1.8b
+; FA64-NEXT:    fmov d0, d2
+; FA64-NEXT:    ret
+;
+; NO-FA64-LABEL: mla8xi8:
+; NO-FA64:       // %bb.0:
+; NO-FA64-NEXT:    ptrue p0.b, vl8
+; NO-FA64-NEXT:    // kill: def $d0 killed $d0 def $z0
+; NO-FA64-NEXT:    // kill: def $d2 killed $d2 def $z2
+; NO-FA64-NEXT:    // kill: def $d1 killed $d1 def $z1
+; NO-FA64-NEXT:    mad z0.b, p0/m, z1.b, z2.b
+; NO-FA64-NEXT:    // kill: def $d0 killed $d0 killed $z0
+; NO-FA64-NEXT:    ret
+  %tmp1 = mul <8 x i8> %A, %B;
+  %tmp2 = add <8 x i8> %C, %tmp1;
+  ret <8 x i8> %tmp2
+}
diff --git a/llvm/test/MC/AArch64/SME/fa64-implies-sve2.s b/llvm/test/MC/AArch64/SME/fa64-implies-sve2.s
new file mode 100644
index 000000000000..80989fa8bc0b
--- /dev/null
+++ b/llvm/test/MC/AArch64/SME/fa64-implies-sve2.s
@@ -0,0 +1,5 @@
+// RUN: llvm-mc -triple aarch64-none-linux-gnu -show-encoding -mattr=+sme-fa64 < %s | FileCheck %s
+
+// Verify sme-fa64 implies SVE2
+ldnt1sh z0.s, p0/z, [z1.s]
+// CHECK: ldnt1sh { z0.s }, p0/z, [z1.s]
diff --git a/llvm/unittests/TargetParser/TargetParserTest.cpp b/llvm/unittests/TargetParser/TargetParserTest.cpp
index daa38474004e..465efa04c3da 100644
--- a/llvm/unittests/TargetParser/TargetParserTest.cpp
+++ b/llvm/unittests/TargetParser/TargetParserTest.cpp
@@ -1610,7 +1610,7 @@ TEST(TargetParserTest, AArch64ExtensionFeatures) {
       AArch64::AEK_B16B16,  AArch64::AEK_SMEF16F16, AArch64::AEK_CSSC,
       AArch64::AEK_RCPC3,   AArch64::AEK_THE,       AArch64::AEK_D128,
       AArch64::AEK_LSE128,  AArch64::AEK_SPECRES2,  AArch64::AEK_RASv2,
-      AArch64::AEK_ITE,     AArch64::AEK_GCS,
+      AArch64::AEK_ITE,     AArch64::AEK_GCS,       AArch64::AEK_SMEFA64,
   };
 
   std::vector<StringRef> Features;
@@ -1682,6 +1682,7 @@ TEST(TargetParserTest, AArch64ExtensionFeatures) {
   EXPECT_TRUE(llvm::is_contained(Features, "+specres2"));
   EXPECT_TRUE(llvm::is_contained(Features, "+ite"));
   EXPECT_TRUE(llvm::is_contained(Features, "+gcs"));
+  EXPECT_TRUE(llvm::is_contained(Features, "+sme-fa64"));
 
   // Assuming we listed every extension above, this should produce the same
   // result. (note that AEK_NONE doesn't have a name so it won't be in the
@@ -1794,6 +1795,7 @@ TEST(TargetParserTest, AArch64ArchExtFeature) {
       {"f32mm", "nof32mm", "+f32mm", "-f32mm"},
       {"f64mm", "nof64mm", "+f64mm", "-f64mm"},
       {"sme", "nosme", "+sme", "-sme"},
+      {"sme-fa64", "nosme-fa64", "+sme-fa64", "-sme-fa64"},
       {"sme-f64f64", "nosme-f64f64", "+sme-f64f64", "-sme-f64f64"},
       {"sme-i16i64", "nosme-i16i64", "+sme-i16i64", "-sme-i16i64"},
       {"sme-f16f16", "nosme-f16f16", "+sme-f16f16", "-sme-f16f16"},
-- 
Gitee