From 73ebac5a535c87fd2a46a6f28c242ce8c68efbab Mon Sep 17 00:00:00 2001
From: Brice Dobry <brice.dobry@futurewei.com>
Date: Mon, 24 May 2021 22:30:29 -0400
Subject: [PATCH 01/11] Re-enable vector support

This reverts commit 8aa69dd6c552f1446047bb6b26981bb8ae7e5c62.
---
 Clang2MapleVisitor.cpp | 445 ++++++++++++++++++++++++++++++++++++++---
 Clang2MapleVisitor.h   |   6 +
 2 files changed, 419 insertions(+), 32 deletions(-)

diff --git a/Clang2MapleVisitor.cpp b/Clang2MapleVisitor.cpp
index fab5826..f413f46 100644
--- a/Clang2MapleVisitor.cpp
+++ b/Clang2MapleVisitor.cpp
@@ -353,6 +353,11 @@ MIRSymbol *Clang2MapleVisitor::VisitParmVarDecl(const clang::ParmVarDecl *Var) {
 
 MIRSymbol *
 Clang2MapleVisitor::VisitFunctionDecl(const clang::FunctionDecl *Func) {
+  // Ignore Maple builtins' prototypes
+  if (Func->getName().startswith("__builtin")) {
+    return nullptr;
+  }
+
   std::string Name = Func->getName().str();
   if (Name[0] == '.') {
     Name[0] = '_';
@@ -569,6 +574,8 @@ Result Clang2MapleVisitor::VisitDeclStmt(const clang::DeclStmt *DS) {
             AddrofNode *BaseAddr = Builder->CreateExprAddrof(0, *Sym);
             BaseAddr->SetPrimType(PointerPrimTy);
             assignArray(Res, BaseAddr, ArrayTy, Init);
+          } else if (PrimitiveType(Sym->GetType()->GetPrimType()).IsVector()) {
+            assignVector(Res, Sym, Sym->GetType(), Init);
           } else {
             DassignNode *Assign =
                 Builder->CreateStmtDassign(Sym->GetStIdx(), 0, Init);
@@ -1382,12 +1389,15 @@ Result Clang2MapleVisitor::VisitCallExpr(const clang::CallExpr *CE) {
     BaseNode *Call;
 
     // Special cases for some builtins
-    if (CalleeDecl->getName() == "__builtin_va_start") {
+    unsigned int BuiltinID = CalleeDecl->getBuiltinID();
+    switch (CalleeDecl->getBuiltinID()) {
+    case clang::Builtin::BI__builtin_va_start:
       // The address of the ap_list parameter needs to be passed
       Args[0] = getAddrOfNode(Args[0]);
 
       Call = Builder->CreateStmtIntrinsicCall(INTRN_C_va_start, Args);
-    } else if (CalleeDecl->getName() == "__builtin_va_copy") {
+      break;
+    case clang::Builtin::BI__builtin_va_copy: {
       ASSERT(Args.size() == 2, "ap_copy expects 2 arguments");
       // The address of the ap_list parameters needs to be passed
       Args[0] = getAddrOfNode(Args[0]);
@@ -1399,18 +1409,11 @@ Result Clang2MapleVisitor::VisitCallExpr(const clang::CallExpr *CE) {
       Args.push_back(Builder->GetConstInt(APListTy->GetSize()));
 
       Call = Builder->CreateStmtIntrinsicCall(INTRN_C_memcpy, Args);
-    } else if (CalleeDecl->getName() == "__builtin_va_end") {
+    } break;
+    case clang::Builtin::BI__builtin_va_end:
       // Nothing needs to be done for this function
       return Res;
-      // } else if (CalleeDecl->getName() == "__builtin_memcmp") {
-      //   Call = Builder->CreateStmtIntrinsicCall(INTRN_C_memcmp, Args);
-      // } else if (CalleeDecl->getName() == "__builtin_memcpy") {
-      //   Call = Builder->CreateStmtIntrinsicCall(INTRN_C_memcpy, Args);
-      // } else if (CalleeDecl->getName() == "__builtin_memset") {
-      //   Call = Builder->CreateStmtIntrinsicCall(INTRN_C_memset, Args);
-      // } else if (CalleeDecl->getName() == "__builtin_ffs") {
-      //   Call = Builder->CreateStmtIntrinsicCall(INTRN_C_ffs, Args);
-    } else if (CalleeDecl->getName() == "__builtin_ctz") {
+    case clang::Builtin::BI__builtin_ctz:
       if (Ty->GetSize() == 4) {
         Call = Builder->CreateExprIntrinsicop(INTRN_C_ctz32, OP_intrinsicop,
                                               *Ty, Args);
@@ -1418,7 +1421,8 @@ Result Clang2MapleVisitor::VisitCallExpr(const clang::CallExpr *CE) {
         Call = Builder->CreateExprIntrinsicop(INTRN_C_ctz64, OP_intrinsicop,
                                               *Ty, Args);
       }
-    } else if (CalleeDecl->getName() == "__builtin_clz") {
+      break;
+    case clang::Builtin::BI__builtin_clz:
       if (Ty->GetSize() == 4) {
         Call = Builder->CreateExprIntrinsicop(INTRN_C_clz32, OP_intrinsicop,
                                               *Ty, Args);
@@ -1426,11 +1430,14 @@ Result Clang2MapleVisitor::VisitCallExpr(const clang::CallExpr *CE) {
         Call = Builder->CreateExprIntrinsicop(INTRN_C_clz64, OP_intrinsicop,
                                               *Ty, Args);
       }
-    } else if (CalleeDecl->getName() == "alloca" ||
-               CalleeDecl->getName() == "__builtin_alloca") {
+      break;
+    case clang::Builtin::BI__builtin_alloca:
+    case clang::Builtin::BI_alloca:
+    case clang::Builtin::BIalloca:
       Call = Builder->CreateExprUnary(OP_alloca, *Ty, Args[0]);
       Call->SetPrimType(PointerPrimTy);
-    } else if (CalleeDecl->getName() == "__builtin_constant_p") {
+      break;
+    case clang::Builtin::BI__builtin_constant_p: {
       int Val = CE->getArg(0)->isConstantInitializer(*Context, false) ? 1 : 0;
       // Pointers are not considered constant
       if (CE->getArg(0)->getType()->isPointerType() &&
@@ -1438,14 +1445,16 @@ Result Clang2MapleVisitor::VisitCallExpr(const clang::CallExpr *CE) {
         Val = 0;
       }
       Call = Builder->CreateIntConst(Val, Ty->GetPrimType());
-    } else if (CalleeDecl->getName() == "__builtin_classify_type") {
+    } break;
+    case clang::Builtin::BI__builtin_classify_type: {
       // Let Clang figure out the type classification
       clang::Expr::EvalResult R;
       bool Success = CE->EvaluateAsInt(R, *Context);
       ASSERT(Success, "Failed to evaluate __builtin_classify_type");
       llvm::APSInt Val = R.Val.getInt();
       Call = Builder->CreateIntConst(Val.getExtValue(), Ty->GetPrimType());
-    } else if (CalleeDecl->getName() == "__builtin_expect") {
+    } break;
+    case clang::Builtin::BI__builtin_expect: {
       ASSERT(Args.size() == 2, "__builtin_expect requires two arguments");
       // Arg 0 is the expression and arg 1 is the expected value.
       Call = Args[0];
@@ -1459,14 +1468,16 @@ Result Clang2MapleVisitor::VisitCallExpr(const clang::CallExpr *CE) {
         SNode = Builder->CreateStmtUnary(OP_eval, Args[1]);
       }
       Res.appendStmtBefore(SNode);
-    } else if (CalleeDecl->getName() == "__builtin_signbit") {
+    } break;
+    case clang::Builtin::BI__builtin_signbit: {
       MIRFunction *Callee =
           Builder->GetOrCreateFunction("__signbit", Ty->GetTypeIndex());
       Call = Builder->CreateStmtCall(Callee->GetPuidx(), Args);
-    } else if (CalleeDecl->getName() == "__builtin_prefetch") {
+    } break;
+    case clang::Builtin::BI__builtin_prefetch:
       // TODO: Do something useful here.
       return Res;
-    } else if (CalleeDecl->getName() == "__builtin_isinf_sign") {
+    case clang::Builtin::BI__builtin_isinf_sign:
       ASSERT(Args.size() == 1, "Incorrect arguments to isinf");
       if (Args[0]->GetPrimType() == PTY_f64) {
         Call = Builder->CreateStmtCall("__isinf", Args);
@@ -1475,15 +1486,101 @@ Result Clang2MapleVisitor::VisitCallExpr(const clang::CallExpr *CE) {
       } else {
         ASSERT(false, "Unsupported type passed to isinf");
       }
-    } else if (CalleeDecl->getName().startswith("__builtin_")) {
-      MIRFunction *Callee = Builder->GetOrCreateFunction(
-          CalleeDecl->getName().substr(10).str(), Ty->GetTypeIndex());
-      Call = Builder->CreateStmtCall(Callee->GetPuidx(), Args);
-    } else {
-      MIRSymbol *CalleeSym = decl2Mpl(CalleeDecl);
-      MIRFunction *Callee = CalleeSym->GetFunction();
+      break;
+    default: {
+      llvm::StringRef CalleeName = CalleeDecl->getName();
+      if (CalleeName.startswith("__builtin_mpl_vector_")) {
+        StringRef VectorOpName =
+            CalleeName.substr(strlen("__builtin_mpl_vector_"));
+
+// clang-format off
+#define VECTOR_INTRINSIC_TYPE(OP_NAME, VECTY)                                  \
+  if (VectorOpName.endswith(#VECTY)) {                                         \
+    Call = Builder->CreateExprIntrinsicop(INTRN_vector_##OP_NAME##_##VECTY,    \
+                                          OP_intrinsicop, *Ty, Args);          \
+  }
+#define VECTOR_INTRINSIC(OP_NAME)                                              \
+  if (VectorOpName.startswith(#OP_NAME)) {                                     \
+    VECTOR_INTRINSIC_TYPE(OP_NAME, v2i64)                                      \
+    else VECTOR_INTRINSIC_TYPE(OP_NAME, v4i32)                                 \
+    else VECTOR_INTRINSIC_TYPE(OP_NAME, v8i16)                                 \
+    else VECTOR_INTRINSIC_TYPE(OP_NAME, v16i8)                                 \
+    else VECTOR_INTRINSIC_TYPE(OP_NAME, v2u64)                                 \
+    else VECTOR_INTRINSIC_TYPE(OP_NAME, v4u32)                                 \
+    else VECTOR_INTRINSIC_TYPE(OP_NAME, v8u16)                                 \
+    else VECTOR_INTRINSIC_TYPE(OP_NAME, v16u8)                                 \
+    else VECTOR_INTRINSIC_TYPE(OP_NAME, v2f64)                                 \
+    else VECTOR_INTRINSIC_TYPE(OP_NAME, v4f32)                                 \
+    else VECTOR_INTRINSIC_TYPE(OP_NAME, v1i64)                                 \
+    else VECTOR_INTRINSIC_TYPE(OP_NAME, v2i32)                                 \
+    else VECTOR_INTRINSIC_TYPE(OP_NAME, v4i16)                                 \
+    else VECTOR_INTRINSIC_TYPE(OP_NAME, v8i8)                                  \
+    else VECTOR_INTRINSIC_TYPE(OP_NAME, v1u64)                                 \
+    else VECTOR_INTRINSIC_TYPE(OP_NAME, v2u32)                                 \
+    else VECTOR_INTRINSIC_TYPE(OP_NAME, v4u16)                                 \
+    else VECTOR_INTRINSIC_TYPE(OP_NAME, v8u8)                                  \
+    else VECTOR_INTRINSIC_TYPE(OP_NAME, v1f64)                                 \
+    else VECTOR_INTRINSIC_TYPE(OP_NAME, v2f32)                                 \
+  }
+
+        VECTOR_INTRINSIC(from_scalar)
+        else VECTOR_INTRINSIC(merge)
+        else VECTOR_INTRINSIC(get_element)
+        else VECTOR_INTRINSIC(load)
+        else VECTOR_INTRINSIC(reverse)
+        else VECTOR_INTRINSIC(set_element)
+        else VECTOR_INTRINSIC(store)
+        else if (VectorOpName.startswith("get_low")) {
+          VECTOR_INTRINSIC_TYPE(get_low, v2i64)
+          else VECTOR_INTRINSIC_TYPE(get_low, v4i32)
+          else VECTOR_INTRINSIC_TYPE(get_low, v8i16)
+          else VECTOR_INTRINSIC_TYPE(get_low, v16i8)
+          else VECTOR_INTRINSIC_TYPE(get_low, v2u64)
+          else VECTOR_INTRINSIC_TYPE(get_low, v4u32)
+          else VECTOR_INTRINSIC_TYPE(get_low, v8u16)
+          else VECTOR_INTRINSIC_TYPE(get_low, v16u8)
+          else VECTOR_INTRINSIC_TYPE(get_low, v2f64)
+          else VECTOR_INTRINSIC_TYPE(get_low, v4f32)
+        } else if (VectorOpName.startswith("get_high")) {
+          VECTOR_INTRINSIC_TYPE(get_high, v2i64)
+          else VECTOR_INTRINSIC_TYPE(get_high, v4i32)
+          else VECTOR_INTRINSIC_TYPE(get_high, v8i16)
+          else VECTOR_INTRINSIC_TYPE(get_high, v16i8)
+          else VECTOR_INTRINSIC_TYPE(get_high, v2u64)
+          else VECTOR_INTRINSIC_TYPE(get_high, v4u32)
+          else VECTOR_INTRINSIC_TYPE(get_high, v8u16)
+          else VECTOR_INTRINSIC_TYPE(get_high, v16u8)
+          else VECTOR_INTRINSIC_TYPE(get_high, v2f64)
+          else VECTOR_INTRINSIC_TYPE(get_high, v4f32)
+        } else if (VectorOpName.startswith("pairwise_add")) {
+          VECTOR_INTRINSIC_TYPE(pairwise_add, v4i32)
+          else VECTOR_INTRINSIC_TYPE(pairwise_add, v8i16)
+          else VECTOR_INTRINSIC_TYPE(pairwise_add, v16i8)
+          else VECTOR_INTRINSIC_TYPE(pairwise_add, v4u32)
+          else VECTOR_INTRINSIC_TYPE(pairwise_add, v8u16)
+          else VECTOR_INTRINSIC_TYPE(pairwise_add, v16u8)
+          else VECTOR_INTRINSIC_TYPE(pairwise_add, v2i32)
+          else VECTOR_INTRINSIC_TYPE(pairwise_add, v4i16)
+          else VECTOR_INTRINSIC_TYPE(pairwise_add, v8i8)
+          else VECTOR_INTRINSIC_TYPE(pairwise_add, v2u32)
+          else VECTOR_INTRINSIC_TYPE(pairwise_add, v4u16)
+          else VECTOR_INTRINSIC_TYPE(pairwise_add, v8u8)
+        }
+        // clang-format on
+      } else if (CalleeDecl->getName().startswith("__builtin_")) {
+        MIRFunction *Callee = Builder->GetOrCreateFunction(
+            CalleeDecl->getName().substr(10).str(), Ty->GetTypeIndex());
+        Call = Builder->CreateStmtCall(Callee->GetPuidx(), Args);
+      } else if (CalleeDecl->getName() == "alloca") {
+        Call = Builder->CreateExprUnary(OP_alloca, *Ty, Args[0]);
+        Call->SetPrimType(PointerPrimTy);
+      } else {
+        MIRSymbol *CalleeSym = decl2Mpl(CalleeDecl);
+        MIRFunction *Callee = CalleeSym->GetFunction();
 
-      Call = Builder->CreateStmtCall(Callee->GetPuidx(), Args);
+        Call = Builder->CreateStmtCall(Callee->GetPuidx(), Args);
+      }
+    }
     }
 
     Res.setNode(Call, Ty);
@@ -1628,7 +1725,8 @@ Result Clang2MapleVisitor::VisitChooseExpr(const clang::ChooseExpr *CE) {
 Result Clang2MapleVisitor::VisitCompoundLiteralExpr(
     const clang::CompoundLiteralExpr *CLE) {
   MIRType *Ty = type2Mpl(CLE->getType());
-  ASSERT(Ty->IsStructType() || Ty->GetKind() == kTypeArray,
+  ASSERT(Ty->IsStructType() || Ty->GetKind() == kTypeArray ||
+             PrimitiveType(Ty->GetPrimType()).IsVector(),
          "Unexpected type of CompoundLiteralExpr (not struct or array)");
 
   // Create a temporary value for this structure and initialize it using this
@@ -1644,6 +1742,8 @@ Result Clang2MapleVisitor::VisitCompoundLiteralExpr(
       if (Ty->IsStructType()) {
         MIRStructType *StructTy = static_cast<MIRStructType *>(Ty);
         assignStruct(Res, Temp, nullptr, StructTy, InitNode);
+      } else if (PrimitiveType(Ty->GetPrimType()).IsVector()) {
+        assignVector(Res, Temp, Ty, InitNode);
       } else { // array
         MIRArrayType *ArrayTy = static_cast<MIRArrayType *>(Ty);
         BaseNode *Base = Builder->CreateExprAddrof(0, *Temp);
@@ -1805,7 +1905,7 @@ Result Clang2MapleVisitor::VisitInitListExpr(const clang::InitListExpr *ILE) {
 
   // Ignore an init list expression for a scalar. For example:
   //   int x[3] = {{3}};
-  if (Ty->IsScalarType()) {
+  if (Ty->IsScalarType() && !PrimitiveType(Ty->GetPrimType()).IsVector()) {
     return Visit(ILE->getInit(0));
   }
 
@@ -2045,6 +2145,12 @@ Clang2MapleVisitor::VisitPredefinedExpr(const clang::PredefinedExpr *PE) {
   return Visit(PE->getFunctionName());
 }
 
+// FIXME: This needs to be properly implemented
+Result Clang2MapleVisitor::VisitShuffleVectorExpr(
+    const clang::ShuffleVectorExpr *SVE) {
+  return Visit(SVE->getExpr(0));
+}
+
 Result Clang2MapleVisitor::VisitStmtExpr(const clang::StmtExpr *SE) {
   Result Res(SE->getBeginLoc());
   const clang::CompoundStmt *CS = SE->getSubStmt();
@@ -2681,6 +2787,9 @@ void Clang2MapleVisitor::setupBuiltinTypes(void) {
       // unsigned __int128 is handled as a u64
       {Context->UnsignedInt128Ty.getTypePtr(),
        GlobalTables::GetTypeTable().GetUInt64()->GetTypeIndex()},
+      // __fp16 is handled as a float
+      {Context->HalfTy.getTypePtr(),
+       GlobalTables::GetTypeTable().GetFloat()->GetTypeIndex()},
       {Context->FloatTy.getTypePtr(),
        GlobalTables::GetTypeTable().GetFloat()->GetTypeIndex()},
       {Context->DoubleTy.getTypePtr(),
@@ -2882,7 +2991,132 @@ TyIdx Clang2MapleVisitor::type2MplIdx(clang::QualType QT) {
   } else if (Ty->isEnumeralType()) {
     const clang::EnumType *EnumTy = llvm::dyn_cast<clang::EnumType>(Ty);
     clang::QualType QT = EnumTy->getDecl()->getIntegerType();
-    return type2MplIdx(QT);
+    TyIdx TI = type2MplIdx(QT);
+    TypeMap.insert({Ty, TI});
+    return TI;
+  } else if (Ty->isVectorType()) {
+    const clang::VectorType *VecTy = llvm::dyn_cast<clang::VectorType>(Ty);
+    MIRType *ElemTy = type2Mpl(VecTy->getElementType());
+    unsigned NumElements = VecTy->getNumElements();
+
+    TyIdx TI(0);
+    switch (ElemTy->GetPrimType()) {
+    case PTY_i64:
+      if (NumElements == 1) {
+        TI =
+            GlobalTables::GetTypeTable().GetPrimType(PTY_v1i64)->GetTypeIndex();
+      } else if (NumElements == 2) {
+        TI =
+            GlobalTables::GetTypeTable().GetPrimType(PTY_v2i64)->GetTypeIndex();
+      } else {
+        ASSERT(false, "Unsupported vector type");
+      }
+      break;
+    case PTY_i32:
+      if (NumElements == 2) {
+        TI =
+            GlobalTables::GetTypeTable().GetPrimType(PTY_v2i32)->GetTypeIndex();
+      } else if (NumElements == 4) {
+        TI =
+            GlobalTables::GetTypeTable().GetPrimType(PTY_v4i32)->GetTypeIndex();
+      } else {
+        ASSERT(false, "Unsupported vector type");
+      }
+      break;
+    case PTY_i16:
+      if (NumElements == 4) {
+        TI =
+            GlobalTables::GetTypeTable().GetPrimType(PTY_v4i16)->GetTypeIndex();
+      } else if (NumElements == 8) {
+        TI =
+            GlobalTables::GetTypeTable().GetPrimType(PTY_v8i16)->GetTypeIndex();
+      } else {
+        ASSERT(false, "Unsupported vector type");
+      }
+      break;
+    case PTY_i8:
+      if (NumElements == 8) {
+        TI = GlobalTables::GetTypeTable().GetPrimType(PTY_v8i8)->GetTypeIndex();
+      } else if (NumElements == 16) {
+        TI =
+            GlobalTables::GetTypeTable().GetPrimType(PTY_v16i8)->GetTypeIndex();
+      } else {
+        ASSERT(false, "Unsupported vector type");
+      }
+      break;
+    case PTY_u64:
+      if (NumElements == 1) {
+        TI =
+            GlobalTables::GetTypeTable().GetPrimType(PTY_v1i64)->GetTypeIndex();
+      } else if (NumElements == 2) {
+        TI =
+            GlobalTables::GetTypeTable().GetPrimType(PTY_v2u64)->GetTypeIndex();
+      } else {
+        ASSERT(false, "Unsupported vector type");
+      }
+      break;
+    case PTY_u32:
+      if (NumElements == 2) {
+        TI =
+            GlobalTables::GetTypeTable().GetPrimType(PTY_v2u32)->GetTypeIndex();
+      } else if (NumElements == 4) {
+        TI =
+            GlobalTables::GetTypeTable().GetPrimType(PTY_v4u32)->GetTypeIndex();
+      } else {
+        ASSERT(false, "Unsupported vector type");
+      }
+      break;
+    case PTY_u16:
+      if (NumElements == 4) {
+        TI =
+            GlobalTables::GetTypeTable().GetPrimType(PTY_v4u16)->GetTypeIndex();
+      } else if (NumElements == 8) {
+        TI =
+            GlobalTables::GetTypeTable().GetPrimType(PTY_v8u16)->GetTypeIndex();
+      } else {
+        ASSERT(false, "Unsupported vector type");
+      }
+      break;
+    case PTY_u8:
+      if (NumElements == 8) {
+        TI = GlobalTables::GetTypeTable().GetPrimType(PTY_v8u8)->GetTypeIndex();
+      } else if (NumElements == 16) {
+        TI =
+            GlobalTables::GetTypeTable().GetPrimType(PTY_v16u8)->GetTypeIndex();
+      } else {
+        ASSERT(false, "Unsupported vector type");
+      }
+      break;
+    case PTY_f64:
+      if (NumElements == 1) {
+        TI =
+            GlobalTables::GetTypeTable().GetPrimType(PTY_v1f64)->GetTypeIndex();
+      } else if (NumElements == 2) {
+        TI =
+            GlobalTables::GetTypeTable().GetPrimType(PTY_v2f64)->GetTypeIndex();
+      } else {
+        ASSERT(false, "Unsupported vector type");
+      }
+      break;
+    case PTY_f32:
+      if (NumElements == 2) {
+        TI =
+            GlobalTables::GetTypeTable().GetPrimType(PTY_v2f32)->GetTypeIndex();
+      } else if (NumElements == 4) {
+        TI =
+            GlobalTables::GetTypeTable().GetPrimType(PTY_v4f32)->GetTypeIndex();
+      } else {
+        ASSERT(false, "Unsupported vector type");
+      }
+      break;
+    default:
+      Ty->dump();
+      ASSERT(false, "Unsupported vector type");
+      break;
+    }
+
+    TypeMap.insert({Ty, TI});
+    return TI;
   }
 
   Ty->dump();
@@ -3854,6 +4088,153 @@ unsigned Clang2MapleVisitor::assignArray(Result &Res, BaseNode *BaseAddr,
   return ArrayTy->GetSize();
 }
 
+unsigned Clang2MapleVisitor::assignVector(Result &Res, MIRSymbol *VecSym,
+                                          MIRType *Ty, BaseNode *Src) {
+  Result VecRes(VecSym, Ty, Res.getLoc());
+  SrcPosition Pos = sourceLocation2Mpl(Res.getLoc());
+  size_t SizeInited = 0;
+  if (Src->op == OP_constval) { // constant initializers
+    ConstvalNode *ConstNode = static_cast<ConstvalNode *>(Src);
+    MIRConst *Const = ConstNode->GetConstVal();
+    if (Const->GetKind() == kConstAggConst) {
+      MIRAggConst *Agg = static_cast<MIRAggConst *>(Const);
+      for (unsigned ElemID = 0; ElemID < Agg->GetConstVec().size(); ++ElemID) {
+        BaseNode *IndexNode =
+            Builder->CreateIntConst(ElemID, IntPointerTy->GetPrimType());
+        MIRConst *ElemConst = Agg->GetConstVecItem(ElemID);
+        BaseNode *Elem = constToNode(ElemConst);
+
+        StmtNode *ElemAssign = vectorSetLane(Ty, VecRes, IndexNode, Elem);
+        ElemAssign->SetSrcPos(Pos);
+        Res.appendStmtBefore(ElemAssign);
+        SizeInited += 1;
+      }
+    }
+  } else if (Src->op == OP_intrinsicopwithtype) { // non-constant initializers
+    NaryNode *N = static_cast<NaryNode *>(Src);
+    for (size_t ElemID = 0; ElemID < N->NumOpnds(); ++ElemID) {
+      BaseNode *IndexNode =
+          Builder->CreateIntConst(ElemID, IntPointerTy->GetPrimType());
+
+      StmtNode *ElemAssign =
+          vectorSetLane(Ty, VecRes, IndexNode, N->Opnd(ElemID));
+      ElemAssign->SetSrcPos(Pos);
+      Res.appendStmtBefore(ElemAssign);
+      SizeInited += 1;
+    }
+  } else {
+    LogInfo::MapleLogger()
+        << "Warning: Unhandled aggregate initializer in assignArray\n";
+    Src->Dump();
+  }
+
+  return SizeInited;
+}
+
+StmtNode *Clang2MapleVisitor::vectorSetLane(MIRType *Ty, Result &VecRes,
+                                            BaseNode *Index, BaseNode *Src) {
+  MapleVector<BaseNode *> Args(
+      Builder->GetCurrentFuncCodeMpAllocator()->Adapter());
+  Args.push_back(getNodeAsRVal(VecRes));
+  Args.push_back(Index);
+  Args.push_back(Src);
+
+  MIRIntrinsicID Intrinsic;
+  switch (Ty->GetPrimType()) {
+#define SETQ_LANE(TY)                                                          \
+  case PTY_##TY:                                                               \
+    Intrinsic = INTRN_vector_set_element_##TY;                                 \
+    break;
+
+    SETQ_LANE(v2i64)
+    SETQ_LANE(v4i32)
+    SETQ_LANE(v8i16)
+    SETQ_LANE(v16i8)
+    SETQ_LANE(v2u64)
+    SETQ_LANE(v4u32)
+    SETQ_LANE(v8u16)
+    SETQ_LANE(v16u8)
+    SETQ_LANE(v2f64)
+    SETQ_LANE(v4f32)
+    SETQ_LANE(v1i64)
+    SETQ_LANE(v2i32)
+    SETQ_LANE(v4i16)
+    SETQ_LANE(v8i8)
+    SETQ_LANE(v1u64)
+    SETQ_LANE(v2u32)
+    SETQ_LANE(v4u16)
+    SETQ_LANE(v8u8)
+    SETQ_LANE(v1f64)
+    SETQ_LANE(v2f32)
+  default:
+    ASSERT(false, "Unhandled vector type");
+    return nullptr;
+  }
+
+  BaseNode *CallIntrinsic =
+      Builder->CreateExprIntrinsicop(Intrinsic, OP_intrinsicop, *Ty, Args);
+  StmtNode *Assign;
+  if (VecRes.isDeref()) {
+    Assign = Builder->CreateStmtIassign(*VecRes.getAddrTy(), VecRes.getField(),
+                                        VecRes.getAddr(), CallIntrinsic);
+  } else if (VecRes.isSym()) {
+    Assign = Builder->CreateStmtDassign(*VecRes.getSym(), VecRes.getField(),
+                                        CallIntrinsic);
+  } else {
+    ASSERT(false, "Error: Destination vector is neither deref or symbol");
+  }
+
+  return Assign;
+}
+
+PrimType Clang2MapleVisitor::getVectorElementPrimType(PrimType VectorPrimType) {
+  switch (VectorPrimType) {
+
+  case PTY_v2i64:
+    return PTY_i64;
+  case PTY_v4i32:
+    return PTY_i32;
+  case PTY_v8i16:
+    return PTY_i16;
+  case PTY_v16i8:
+    return PTY_i8;
+  case PTY_v2u64:
+    return PTY_u64;
+  case PTY_v4u32:
+    return PTY_u32;
+  case PTY_v8u16:
+    return PTY_u16;
+  case PTY_v16u8:
+    return PTY_u8;
+  case PTY_v2f64:
+    return PTY_f64;
+  case PTY_v4f32:
+    return PTY_f32;
+  case PTY_v1i64:
+    return PTY_i64;
+  case PTY_v2i32:
+    return PTY_i32;
+  case PTY_v4i16:
+    return PTY_i16;
+  case PTY_v8i8:
+    return PTY_i8;
+  case PTY_v1u64:
+  case PTY_v2u32:
+    return PTY_u32;
+  case PTY_v4u16:
+    return PTY_u16;
+  case PTY_v8u8:
+    return PTY_u8;
+  case PTY_v1f64:
+    return PTY_i64;
+  case PTY_v2f32:
+    return PTY_f32;
+  default:
+    ASSERT(false, "Unhandled vector type");
+    return PTY_unknown;
+  }
+}
+
 // TODO: This function has a lot of duplication and may miss some possible
 // combinations. It should be improved, possibly using some recursion to
 // evaluate sub-expressions.
diff --git a/Clang2MapleVisitor.h b/Clang2MapleVisitor.h
index c5e0040..136227a 100644
--- a/Clang2MapleVisitor.h
+++ b/Clang2MapleVisitor.h
@@ -133,6 +133,7 @@ public:
   Result VisitOpaqueValueExpr(const clang::OpaqueValueExpr *OVE);
   Result VisitParenExpr(const clang::ParenExpr *PE);
   Result VisitPredefinedExpr(const clang::PredefinedExpr *PE);
+  Result VisitShuffleVectorExpr(const clang::ShuffleVectorExpr *SVE);
   Result VisitStmtExpr(const clang::StmtExpr *SE);
   Result VisitStringLiteral(const clang::StringLiteral *Lit);
   Result VisitTypeTraitExpr(const clang::TypeTraitExpr *TTE);
@@ -169,6 +170,11 @@ private:
                         FieldID Base = 0);
   unsigned assignArray(Result &Res, BaseNode *BaseAddr, MIRArrayType *StructTy,
                        BaseNode *Src);
+  unsigned assignVector(Result &Res, MIRSymbol *VecSym, MIRType *Ty,
+                        BaseNode *Src);
+  StmtNode *vectorSetLane(MIRType *Ty, Result &VecRes, BaseNode *Index,
+                          BaseNode *Src);
+  PrimType getVectorElementPrimType(PrimType VectorPrimType);
   MIRConst *getInitializer(MIRType *Ty, const clang::Expr *InitExpr);
   MIRConst *completeArrayInitializer(MIRAggConst *Agg, MIRArrayType *ArrTy);
   MIRType *isHomogenousAggregate(MIRType *Ty);
-- 
Gitee


From 4f00762308bc91cc66782f0f879e86d0936056b0 Mon Sep 17 00:00:00 2001
From: Brice Dobry <brice.dobry@futurewei.com>
Date: Wed, 26 May 2021 14:03:41 -0400
Subject: [PATCH 02/11] Handle 1-element vectors as scalar type with attr

Instead of adding a new primtype for the 1-element vector types, we
use the corresponding scalar type and add the attribute, `oneelem_simd`.
---
 Clang2MapleResult.cpp  |  20 +++++-
 Clang2MapleResult.h    |  33 +++++++---
 Clang2MapleVisitor.cpp | 134 ++++++++++++++++++++++++++++++-----------
 3 files changed, 140 insertions(+), 47 deletions(-)

diff --git a/Clang2MapleResult.cpp b/Clang2MapleResult.cpp
index eb09c2a..b508ecc 100644
--- a/Clang2MapleResult.cpp
+++ b/Clang2MapleResult.cpp
@@ -18,11 +18,12 @@ using namespace maple;
 
 BaseNode *Result::getNode() { return Node; }
 
-void Result::setNode(BaseNode *N, MIRType *Ty, bool MD) {
+void Result::setNode(BaseNode *N, MIRType *Ty, TypeAttrs Attrs, bool MD) {
   ASSERT(kOpcodeInfo.IsStmt(N->op) || Ty,
          "Must include a type with expression result");
   Node = N;
   NodeTy = Ty;
+  NodeTyAttrs = Attrs;
   MayDrop = MD;
 }
 
@@ -43,6 +44,11 @@ MIRType *Result::getAddrTy() {
   return NodeTy;
 }
 
+TypeAttrs Result::getAddrTyAttrs() {
+  ASSERT(isDeref(), "called getAddrTyAttrs on Result that is not a deref");
+  return NodeTyAttrs;
+}
+
 MIRType *Result::getValueTy() {
   if (isDeref()) {
     return DerefTy;
@@ -50,15 +56,25 @@ MIRType *Result::getValueTy() {
   return NodeTy;
 }
 
-void Result::setValueTy(MIRType *Ty, bool IsDeref) {
+TypeAttrs Result::getValueTyAttrs() {
+  if (isDeref()) {
+    return DerefTyAttrs;
+  }
+  return NodeTyAttrs;
+}
+
+void Result::setValueTy(MIRType *Ty, TypeAttrs Attrs, bool IsDeref) {
   if (isDeref()) {
     if (IsDeref) {
       DerefTy = Ty;
+      DerefTyAttrs = Attrs;
     } else {
       NodeTy = Ty;
+      NodeTyAttrs = Attrs;
     }
   } else {
     NodeTy = Ty;
+    NodeTyAttrs = Attrs;
     if (Node) {
       Node->SetPrimType(Ty->GetPrimType());
     }
diff --git a/Clang2MapleResult.h b/Clang2MapleResult.h
index fbd94d1..5f3a02d 100644
--- a/Clang2MapleResult.h
+++ b/Clang2MapleResult.h
@@ -56,9 +56,11 @@ private:
   FieldID Field = 0;
   // The type of the node. Should be set for all expressions.
   MIRType *NodeTy = nullptr;
+  TypeAttrs NodeTyAttrs;
   // The type of the dereferenced value. This also indicates that the expression
   // to be returned is dereferenced, for use by the parent.
   MIRType *DerefTy = nullptr;
+  TypeAttrs DerefTyAttrs;
   // Statements to run after this result.
   std::vector<StmtNode *> StmtsAfter;
   // Source location for this node.
@@ -73,20 +75,23 @@ public:
 
   Result(clang::SourceLocation Loc) : Loc(Loc) {}
   Result(BaseNode *Node, clang::SourceLocation Loc, MIRType *Ty = nullptr,
-         bool MayDrop = false)
-      : Node(Node), NodeTy(Ty), Loc(Loc), MayDrop(MayDrop) {
+         TypeAttrs Attrs = TypeAttrs(), bool MayDrop = false)
+      : Node(Node), NodeTy(Ty), NodeTyAttrs(Attrs), Loc(Loc), MayDrop(MayDrop) {
     ASSERT(kOpcodeInfo.IsStmt(Node->op) || Ty,
            "Must include a type with expression result");
   }
   Result(BaseNode *Node, MIRType *Ty, MIRType *DerefTy,
-         clang::SourceLocation Loc)
-      : Node(Node), NodeTy(Ty), DerefTy(DerefTy), Loc(Loc) {}
+         clang::SourceLocation Loc, TypeAttrs TyAttrs = TypeAttrs(),
+         TypeAttrs DerefTyAttrs = TypeAttrs())
+      : Node(Node), NodeTy(Ty), DerefTy(DerefTy), Loc(Loc),
+        NodeTyAttrs(TyAttrs), DerefTyAttrs(DerefTyAttrs) {}
   Result(MIRSymbol *Sym, MIRType *Ty, clang::SourceLocation Loc,
-         bool MayDrop = false)
-      : Sym(Sym), NodeTy(Ty), Loc(Loc), MayDrop(MayDrop) {}
+         TypeAttrs Attrs = TypeAttrs(), bool MayDrop = false)
+      : Sym(Sym), NodeTy(Ty), Loc(Loc), NodeTyAttrs(Attrs), MayDrop(MayDrop) {}
 
   BaseNode *getNode();
-  void setNode(BaseNode *Node, MIRType *Ty = nullptr, bool MayDrop = false);
+  void setNode(BaseNode *Node, MIRType *Ty = nullptr,
+               TypeAttrs TyAttrs = TypeAttrs(), bool MayDrop = false);
   MIRSymbol *getSym();
   void setSym(MIRSymbol *Sym, MIRType *Ty, bool MayDrop = false);
   clang::RecordDecl *getBaseRecordDecl() { return BaseRecord; }
@@ -95,10 +100,18 @@ public:
   void setField(FieldID Id) { Field = Id; }
   BaseNode *getAddr();
   MIRType *getAddrTy();
+  TypeAttrs getAddrTyAttrs();
   MIRType *getValueTy();
-  void setValueTy(MIRType *Ty, bool IsDeref = false);
-  void setDerefTypes(MIRType *AddrTy, MIRType *DTy) {
-    NodeTy = AddrTy, DerefTy = DTy;
+  TypeAttrs getValueTyAttrs();
+  void setValueTy(MIRType *Ty, TypeAttrs Attrs = TypeAttrs(),
+                  bool IsDeref = false);
+  void setDerefTypes(MIRType *AddrTy, MIRType *DTy,
+                     TypeAttrs AddrTyAttrs = TypeAttrs(),
+                     TypeAttrs DTyAttrs = TypeAttrs()) {
+    NodeTy = AddrTy;
+    DerefTy = DTy;
+    NodeTyAttrs = AddrTyAttrs;
+    DerefTyAttrs = DTyAttrs;
   }
   clang::SourceLocation getLoc() { return Loc; }
   void setResult(Result &Other);
diff --git a/Clang2MapleVisitor.cpp b/Clang2MapleVisitor.cpp
index f413f46..1e8871d 100644
--- a/Clang2MapleVisitor.cpp
+++ b/Clang2MapleVisitor.cpp
@@ -57,6 +57,8 @@ static MIRStorageClass StorageClass2Mpl(clang::StorageClass SC);
 static Opcode BinOpcode2Mpl(clang::BinaryOperatorKind Op, bool isSigned);
 static bool isAssign(Opcode Op);
 static bool isPointerType(MIRType *Ty);
+static bool isOneElementVector(clang::QualType QT);
+static bool isOneElementVector(const clang::Type *Ty);
 
 #ifdef DEBUG
 namespace maple {
@@ -191,6 +193,9 @@ Clang2MapleVisitor::VisitRecordDecl(const clang::RecordDecl *Record) {
     if (FieldQT.isConstQualified()) {
       Attrs.SetAttr(FLDATTR_const);
     }
+    if (isOneElementVector(FieldQT)) {
+      Attrs.SetAttr(FLDATTR_oneelem_simd);
+    }
 
     if (I->isBitField()) {
       MIRType *FieldTy =
@@ -323,6 +328,10 @@ MIRSymbol *Clang2MapleVisitor::VisitVarDecl(const clang::VarDecl *Var) {
     Symbol->SetAttrs(Attrs);
   }
 
+  if (isOneElementVector(Var->getType())) {
+    Symbol->SetAttr(ATTR_oneelem_simd);
+  }
+
   Symbol->SetSKind(kStVar);
   SrcPosition Pos = sourceLocation2Mpl(Var->getLocation());
   Symbol->SetSrcPosition(Pos);
@@ -403,6 +412,10 @@ Clang2MapleVisitor::VisitFunctionDecl(const clang::FunctionDecl *Func) {
     MFunc->SetAttr(FUNCATTR_static);
   }
 
+  if (isOneElementVector(Func->getReturnType())) {
+    MFunc->SetAttr(FUNCATTR_oneelem_simd);
+  }
+
   MIRFunction *LastFunc = Module->CurFunction();
   Module->SetCurFunction(MFunc);
   Builder->SetCurrentFunction(*MFunc);
@@ -1169,7 +1182,7 @@ Clang2MapleVisitor::VisitBinaryOperator(const clang::BinaryOperator *BO) {
     if (CompoundAssign) {
       Assign->SetSrcPos(sourceLocation2Mpl(BO->getExprLoc()));
       Res.appendStmtBefore(Assign);
-      Res.setNode(N, Lhs.getValueTy(), true);
+      Res.setNode(N, Lhs.getValueTy(), TypeAttrs(), true);
     } else {
       Res.setNode(Assign, Ty);
     }
@@ -1366,6 +1379,10 @@ Result Clang2MapleVisitor::VisitCallExpr(const clang::CallExpr *CE) {
 
   const clang::FunctionDecl *CalleeDecl = CE->getDirectCallee();
   MIRType *Ty = type2Mpl(CE->getType());
+  TypeAttrs Attrs;
+  if (isOneElementVector(CE->getType())) {
+    Attrs.SetAttr(ATTR_oneelem_simd);
+  }
 
   unsigned NumArgs = CE->getNumArgs();
   if (shouldEvaluateArgs(CalleeDecl)) {
@@ -1383,7 +1400,7 @@ Result Clang2MapleVisitor::VisitCallExpr(const clang::CallExpr *CE) {
 
     Args.insert(Args.begin(), getNodeAsRVal(CalleeRes));
     IcallNode *ICall = Builder->CreateStmtIcall(Args);
-    Res.setNode(ICall, Ty);
+    Res.setNode(ICall, Ty, Attrs);
     return Res;
   } else {
     BaseNode *Call;
@@ -1583,7 +1600,7 @@ Result Clang2MapleVisitor::VisitCallExpr(const clang::CallExpr *CE) {
     }
     }
 
-    Res.setNode(Call, Ty);
+    Res.setNode(Call, Ty, Attrs);
     return Res;
   }
 }
@@ -1596,10 +1613,18 @@ Result Clang2MapleVisitor::VisitCastExpr(const clang::CastExpr *CE) {
   Result Res(CE->getExprLoc());
   switch (CE->getCastKind()) {
   case clang::CK_BitCast: {
+    if (FromTy->GetPrimType() == ToTy->GetPrimType() &&
+        FromTy->IsScalarType()) {
+      // This case may show up when casting from a 1-element vector to its
+      // scalar type.
+      return SubExprRes;
+    }
+
     BaseNode *SubExpr = getNodeAsRVal(SubExprRes);
     if (ToTy->GetKind() == kTypePointer) {
       Res.setNode(SubExpr, ToTy);
     } else {
+      BaseNode *SubExpr = getNodeAsRVal(SubExprRes);
       Res.setNode(Builder->CreateExprRetype(*ToTy, *FromTy, SubExpr), ToTy);
     }
   } break;
@@ -2071,7 +2096,7 @@ Result Clang2MapleVisitor::VisitMemberExpr(const clang::MemberExpr *ME) {
                    FieldMap[Record][BaseRes.getField()].end(),
            "Structure or field not in map");
     BaseRes.setField(FieldMap[Record][BaseRes.getField()][FD]);
-    BaseRes.setValueTy(type2Mpl(ME->getType()), true);
+    BaseRes.setValueTy(type2Mpl(ME->getType()), TypeAttrs(), true);
     return BaseRes;
   }
 }
@@ -2336,7 +2361,8 @@ Result Clang2MapleVisitor::VisitUnaryOperator(const clang::UnaryOperator *UO) {
                                   getNodeAsRVal(SubExpr), IncAmountNode);
 
     if (UO->isPrefix()) {
-      Result Res(getNodeAsRVal(SubExpr), UO->getExprLoc(), ResTy, true);
+      Result Res(getNodeAsRVal(SubExpr), UO->getExprLoc(), ResTy, TypeAttrs(),
+                 true);
       Res.appendStmts(SubExpr);
       StmtNode *Assign = nullptr;
       if (SubExpr.isSym()) {
@@ -2358,7 +2384,7 @@ Result Clang2MapleVisitor::VisitUnaryOperator(const clang::UnaryOperator *UO) {
     } else {
       MIRSymbol *Temp = Builder->GetOrCreateLocalDecl(
           "post." + std::to_string(UO->getID(*Context)), *ResTy);
-      Result Res(Temp, ResTy, UO->getExprLoc(), true);
+      Result Res(Temp, ResTy, UO->getExprLoc(), TypeAttrs(), true);
       Res.appendStmts(SubExpr);
       StmtNode *TempAssign =
           Builder->CreateStmtDassign(*Temp, 0, getNodeAsRVal(SubExpr));
@@ -2839,7 +2865,15 @@ TyIdx Clang2MapleVisitor::type2MplIdx(clang::QualType QT) {
           GlobalTables::GetTypeTable().GetOrCreatePointerType(ReturnTy,
                                                               PointerPrimTy);
       ParamTypeList.push_back(ReturnTyPtr->GetTypeIndex());
-      ParamAttrsList.push_back(TypeAttrs());
+
+      TypeAttrs Attrs;
+      if (isOneElementVector(FT->getReturnType())) {
+        Attrs.SetAttr(ATTR_oneelem_simd);
+      }
+
+      // TODO: Add other attributes
+
+      ParamAttrsList.push_back(Attrs);
       ReturnTy = GlobalTables::GetTypeTable().GetVoid()->GetTypeIndex();
     }
 
@@ -2847,7 +2881,15 @@ TyIdx Clang2MapleVisitor::type2MplIdx(clang::QualType QT) {
             llvm::dyn_cast<clang::FunctionProtoType>(Ty)) {
       for (const clang::QualType &Param : FPT->param_types()) {
         ParamTypeList.push_back(type2MplIdx(Param));
-        ParamAttrsList.push_back(TypeAttrs()); // TODO: Add attributes
+
+        TypeAttrs Attrs;
+        if (isOneElementVector(Param.getTypePtr())) {
+          Attrs.SetAttr(ATTR_oneelem_simd);
+        }
+
+        // TODO: Add other attributes
+
+        ParamAttrsList.push_back(Attrs);
       }
 
       IsVariadic = FPT->isVariadic();
@@ -2884,6 +2926,9 @@ TyIdx Clang2MapleVisitor::type2MplIdx(clang::QualType QT) {
         Attrs.SetAlign(AlignmentBits / 8);
       }
     }
+    if (isOneElementVector(PtrTy->getPointeeType())) {
+      Attrs.SetAttr(ATTR_oneelem_simd);
+    }
     MTy->SetTypeAttrs(Attrs);
 
     TypeMap.insert({PtrTy, MTy->GetTypeIndex()});
@@ -2919,6 +2964,10 @@ TyIdx Clang2MapleVisitor::type2MplIdx(clang::QualType QT) {
         Attrs.SetAlign(AlignmentBits / 8);
       }
     }
+    if (isOneElementVector(ArrTy->getElementType())) {
+      Attrs.SetAttr(ATTR_oneelem_simd);
+    }
+
     MTy->SetTypeAttrs(Attrs);
 
     TypeMap.insert({ArrTy, MTy->GetTypeIndex()});
@@ -2944,6 +2993,10 @@ TyIdx Clang2MapleVisitor::type2MplIdx(clang::QualType QT) {
         Attrs.SetAlign(AlignmentBits / 8);
       }
     }
+    if (isOneElementVector(VarArrTy->getElementType())) {
+      Attrs.SetAttr(ATTR_oneelem_simd);
+    }
+
     MTy->SetTypeAttrs(Attrs);
 
     TypeMap.insert({VarArrTy, MTy->GetTypeIndex()});
@@ -2968,6 +3021,10 @@ TyIdx Clang2MapleVisitor::type2MplIdx(clang::QualType QT) {
         Attrs.SetAlign(AlignmentBits / 8);
       }
     }
+    if (isOneElementVector(IncArrTy->getElementType())) {
+      Attrs.SetAttr(ATTR_oneelem_simd);
+    }
+
     MTy->SetTypeAttrs(Attrs);
 
     TypeMap.insert({IncArrTy, MTy->GetTypeIndex()});
@@ -3003,8 +3060,7 @@ TyIdx Clang2MapleVisitor::type2MplIdx(clang::QualType QT) {
     switch (ElemTy->GetPrimType()) {
     case PTY_i64:
       if (NumElements == 1) {
-        TI =
-            GlobalTables::GetTypeTable().GetPrimType(PTY_v1i64)->GetTypeIndex();
+        TI = GlobalTables::GetTypeTable().GetPrimType(PTY_i64)->GetTypeIndex();
       } else if (NumElements == 2) {
         TI =
             GlobalTables::GetTypeTable().GetPrimType(PTY_v2i64)->GetTypeIndex();
@@ -3046,8 +3102,7 @@ TyIdx Clang2MapleVisitor::type2MplIdx(clang::QualType QT) {
       break;
     case PTY_u64:
       if (NumElements == 1) {
-        TI =
-            GlobalTables::GetTypeTable().GetPrimType(PTY_v1i64)->GetTypeIndex();
+        TI = GlobalTables::GetTypeTable().GetPrimType(PTY_u64)->GetTypeIndex();
       } else if (NumElements == 2) {
         TI =
             GlobalTables::GetTypeTable().GetPrimType(PTY_v2u64)->GetTypeIndex();
@@ -3089,8 +3144,7 @@ TyIdx Clang2MapleVisitor::type2MplIdx(clang::QualType QT) {
       break;
     case PTY_f64:
       if (NumElements == 1) {
-        TI =
-            GlobalTables::GetTypeTable().GetPrimType(PTY_v1f64)->GetTypeIndex();
+        TI = GlobalTables::GetTypeTable().GetPrimType(PTY_f64)->GetTypeIndex();
       } else if (NumElements == 2) {
         TI =
             GlobalTables::GetTypeTable().GetPrimType(PTY_v2f64)->GetTypeIndex();
@@ -3396,6 +3450,7 @@ BaseNode *Clang2MapleVisitor::getNodeAsRVal(Result &Res) {
       // rval
       MIRSymbol *Sym = Builder->GetOrCreateLocalDecl(
           "_result" + std::to_string(Res.getLoc().getRawEncoding()), *RetValTy);
+      Sym->SetAttrs(Res.getValueTyAttrs());
       StmtNode *TmpAssign = nullptr;
       if (Op == OP_call) {
         CallNode *Call = static_cast<CallNode *>(N);
@@ -3562,6 +3617,7 @@ StmtNode *Clang2MapleVisitor::getNodeAsStmt(Result &Res) {
         MIRSymbol *Sym = Builder->GetOrCreateLocalDecl(
             "_result" + std::to_string(Res.getLoc().getRawEncoding()),
             *RetValTy);
+        Sym->SetAttrs(Res.getValueTyAttrs());
         StmtNode *TmpAssign = nullptr;
         if (Op == OP_call) {
           CallNode *Call = static_cast<CallNode *>(N);
@@ -4156,16 +4212,22 @@ StmtNode *Clang2MapleVisitor::vectorSetLane(MIRType *Ty, Result &VecRes,
     SETQ_LANE(v16u8)
     SETQ_LANE(v2f64)
     SETQ_LANE(v4f32)
-    SETQ_LANE(v1i64)
     SETQ_LANE(v2i32)
     SETQ_LANE(v4i16)
     SETQ_LANE(v8i8)
-    SETQ_LANE(v1u64)
     SETQ_LANE(v2u32)
     SETQ_LANE(v4u16)
     SETQ_LANE(v8u8)
-    SETQ_LANE(v1f64)
     SETQ_LANE(v2f32)
+  case PTY_i64:
+    Intrinsic = INTRN_vector_set_element_v1i64;
+    break;
+  case PTY_u64:
+    Intrinsic = INTRN_vector_set_element_v1u64;
+    break;
+  case PTY_f64:
+    Intrinsic = INTRN_vector_set_element_v1f64;
+    break;
   default:
     ASSERT(false, "Unhandled vector type");
     return nullptr;
@@ -4193,40 +4255,28 @@ PrimType Clang2MapleVisitor::getVectorElementPrimType(PrimType VectorPrimType) {
   case PTY_v2i64:
     return PTY_i64;
   case PTY_v4i32:
+  case PTY_v2i32:
     return PTY_i32;
   case PTY_v8i16:
+  case PTY_v4i16:
     return PTY_i16;
   case PTY_v16i8:
+  case PTY_v8i8:
     return PTY_i8;
   case PTY_v2u64:
     return PTY_u64;
   case PTY_v4u32:
+  case PTY_v2u32:
     return PTY_u32;
   case PTY_v8u16:
+  case PTY_v4u16:
     return PTY_u16;
   case PTY_v16u8:
+  case PTY_v8u8:
     return PTY_u8;
   case PTY_v2f64:
     return PTY_f64;
   case PTY_v4f32:
-    return PTY_f32;
-  case PTY_v1i64:
-    return PTY_i64;
-  case PTY_v2i32:
-    return PTY_i32;
-  case PTY_v4i16:
-    return PTY_i16;
-  case PTY_v8i8:
-    return PTY_i8;
-  case PTY_v1u64:
-  case PTY_v2u32:
-    return PTY_u32;
-  case PTY_v4u16:
-    return PTY_u16;
-  case PTY_v8u8:
-    return PTY_u8;
-  case PTY_v1f64:
-    return PTY_i64;
   case PTY_v2f32:
     return PTY_f32;
   default:
@@ -4655,3 +4705,17 @@ static bool isAssign(Opcode Op) {
 static bool isPointerType(MIRType *Ty) {
   return Ty->IsMIRPtrType() || Ty->GetKind() == kTypeArray;
 }
+
+static bool isOneElementVector(clang::QualType QT) {
+  return isOneElementVector(QT.getTypePtr());
+}
+
+static bool isOneElementVector(const clang::Type *Ty) {
+  if (const clang::VectorType *VecTy = llvm::dyn_cast<clang::VectorType>(
+          Ty->getUnqualifiedDesugaredType())) {
+    if (VecTy->getNumElements() == 1) {
+      return true;
+    }
+  }
+  return false;
+}
-- 
Gitee


From 1bef72b771b4a30cb46e6adf47e33a075d62094f Mon Sep 17 00:00:00 2001
From: Brice Dobry <brice.dobry@futurewei.com>
Date: Wed, 26 May 2021 14:05:38 -0400
Subject: [PATCH 03/11] Add copyright to CMakeLists.in-tree.txt

---
 CMakeLists.in-tree.txt | 14 ++++++++++++++
 1 file changed, 14 insertions(+)

diff --git a/CMakeLists.in-tree.txt b/CMakeLists.in-tree.txt
index 27c9201..87f4ab5 100644
--- a/CMakeLists.in-tree.txt
+++ b/CMakeLists.in-tree.txt
@@ -1,3 +1,17 @@
+#
+#  Copyright (c) 2021 Futurewei Technologies, Inc.
+#
+#  clang2mpl is licensed under Mulan PSL v2.
+#  You can use this software according to the terms and conditions of the Mulan
+#  PSL v2. You may obtain a copy of Mulan PSL v2 at:
+#
+#      http://license.coscl.org.cn/MulanPSL2
+#
+#  THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY
+#  KIND, EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO
+#  NON-INFRINGEMENT, MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE. See the
+#  Mulan PSL v2 for more details.
+#
 set( LLVM_LINK_COMPONENTS
   ${LLVM_TARGETS_TO_BUILD}
   Option
-- 
Gitee


From 99172a89e58f8ecfaaf78f58efb6fd1475f2885f Mon Sep 17 00:00:00 2001
From: Brice Dobry <brice.dobry@futurewei.com>
Date: Wed, 26 May 2021 17:33:02 -0400
Subject: [PATCH 04/11] Install arm_neon.h and improve build

Update the configuration in Clang2Maple to ensure the proper include
paths are configured.
---
 CMakeLists.txt         | 17 ++++++++++++-----
 Clang2Maple.cpp        | 29 +++++++++++++----------------
 Clang2MapleVisitor.cpp | 10 ++++++++++
 Makefile               |  3 ++-
 4 files changed, 37 insertions(+), 22 deletions(-)

diff --git a/CMakeLists.txt b/CMakeLists.txt
index 635ce50..4a4769f 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -37,7 +37,7 @@ include_directories( $ENV{MAPLE_ROOT}/src/mapleall/maple_ir/include
   ${OPENSSL_INCLUDE_DIR}
   )
 
-link_directories( $ENV{MAPLE_ROOT}/output/$ENV{MAPLE_BUILD_TYPE}/lib/64
+link_directories( ${CMAKE_INSTALL_PREFIX}/lib/64
   $ENV{MAPLE_ROOT}/tools/clang+llvm-10.0.0-x86_64-linux-gnu-ubuntu-18.04/lib
 )
 SET(CMAKE_INSTALL_RPATH_USE_LINK_PATH TRUE)
@@ -52,7 +52,10 @@ add_executable(clang2mpl
   Clang2MapleVisitor.cpp
   )
 
-target_compile_definitions(clang2mpl PRIVATE DYNAMICLANG)
+target_compile_definitions(clang2mpl
+  PRIVATE DYNAMICLANG
+  PRIVATE MAPLE_ROOT=\"$ENV{MAPLE_ROOT}\"
+  PRIVATE INSTALL_DIR=\"${CMAKE_INSTALL_PREFIX}\")
 if (CMAKE_BUILD_TYPE STREQUAL "Debug")
   target_compile_definitions(clang2mpl PRIVATE DEBUG)
 endif (CMAKE_BUILD_TYPE STREQUAL "Debug")
@@ -79,6 +82,10 @@ target_link_libraries(clang2mpl
   ${CMAKE_THREAD_LIBS_INIT}
   )
 
-install(TARGETS clang2mpl
-  RUNTIME DESTINATION "$ENV{MAPLE_EXECUTE_BIN}"
-  )
+install(TARGETS clang2mpl)
+
+set(header_install_dir lib/clang2mpl/include)
+
+install(
+  DIRECTORY ${CMAKE_SOURCE_DIR}/sys/include/
+  DESTINATION ${header_install_dir})
diff --git a/Clang2Maple.cpp b/Clang2Maple.cpp
index 45b9010..08e8157 100644
--- a/Clang2Maple.cpp
+++ b/Clang2Maple.cpp
@@ -25,6 +25,13 @@
 
 #include "Clang2MapleVisitor.h"
 
+#ifndef MAPLE_ROOT
+#error "MAPLE_ROOT must be defined"
+#endif
+#ifndef INSTALL_DIR
+#define INSTALL_DIR "/usr/local"
+#endif
+
 using namespace clang::tooling;
 using namespace llvm;
 
@@ -47,22 +54,12 @@ static cl::extrahelp MoreHelp("\nMore help text...\n");
 int main(int argc, const char **argv) {
   // Set environment variable to ensure proper include directories are used
   // during compilation
-  char *MapleRoot = getenv("MAPLE_ROOT");
-  if (!MapleRoot) {
-    llvm::errs() << "error: MAPLE_ROOT environment variable is not set\n";
-    return 1;
-  }
-
-  std::stringstream SS;
-  SS << MapleRoot
-     << "/tools/clang+llvm-10.0.0-x86_64-linux-gnu-ubuntu-18.04/lib/clang/"
-        "10.0.0/include:"
-     << MapleRoot << "/tools/sysroot-glibc-linaro-2.25/usr/include";
-  char *CurrentCIncludePath = getenv("C_INCLUDE_PATH");
-  if (CurrentCIncludePath) {
-    SS << CurrentCIncludePath;
-  }
-  setenv("C_INCLUDE_PATH", SS.str().c_str(), 1);
+  setenv("C_INCLUDE_PATH",
+         MAPLE_ROOT "/tools/sysroot-glibc-linaro-2.25/usr/include:" INSTALL_DIR
+                    "/lib/clang2mpl/include:" MAPLE_ROOT
+                    "/tools/clang+llvm-10.0.0-x86_64-linux-gnu-ubuntu-18.04/"
+                    "lib/clang/10.0.0/include",
+         1);
 
   auto ExpectedParser = CommonOptionsParser::create(
       argc, argv, Clang2MapleCategory, llvm::cl::OneOrMore);
diff --git a/Clang2MapleVisitor.cpp b/Clang2MapleVisitor.cpp
index 1e8871d..3c2cf46 100644
--- a/Clang2MapleVisitor.cpp
+++ b/Clang2MapleVisitor.cpp
@@ -3065,6 +3065,7 @@ TyIdx Clang2MapleVisitor::type2MplIdx(clang::QualType QT) {
         TI =
             GlobalTables::GetTypeTable().GetPrimType(PTY_v2i64)->GetTypeIndex();
       } else {
+        Ty->dump();
         ASSERT(false, "Unsupported vector type");
       }
       break;
@@ -3076,6 +3077,7 @@ TyIdx Clang2MapleVisitor::type2MplIdx(clang::QualType QT) {
         TI =
             GlobalTables::GetTypeTable().GetPrimType(PTY_v4i32)->GetTypeIndex();
       } else {
+        Ty->dump();
         ASSERT(false, "Unsupported vector type");
       }
       break;
@@ -3087,6 +3089,7 @@ TyIdx Clang2MapleVisitor::type2MplIdx(clang::QualType QT) {
         TI =
             GlobalTables::GetTypeTable().GetPrimType(PTY_v8i16)->GetTypeIndex();
       } else {
+        Ty->dump();
         ASSERT(false, "Unsupported vector type");
       }
       break;
@@ -3097,6 +3100,7 @@ TyIdx Clang2MapleVisitor::type2MplIdx(clang::QualType QT) {
         TI =
             GlobalTables::GetTypeTable().GetPrimType(PTY_v16i8)->GetTypeIndex();
       } else {
+        Ty->dump();
         ASSERT(false, "Unsupported vector type");
       }
       break;
@@ -3107,6 +3111,7 @@ TyIdx Clang2MapleVisitor::type2MplIdx(clang::QualType QT) {
         TI =
             GlobalTables::GetTypeTable().GetPrimType(PTY_v2u64)->GetTypeIndex();
       } else {
+        Ty->dump();
         ASSERT(false, "Unsupported vector type");
       }
       break;
@@ -3118,6 +3123,7 @@ TyIdx Clang2MapleVisitor::type2MplIdx(clang::QualType QT) {
         TI =
             GlobalTables::GetTypeTable().GetPrimType(PTY_v4u32)->GetTypeIndex();
       } else {
+        Ty->dump();
         ASSERT(false, "Unsupported vector type");
       }
       break;
@@ -3129,6 +3135,7 @@ TyIdx Clang2MapleVisitor::type2MplIdx(clang::QualType QT) {
         TI =
             GlobalTables::GetTypeTable().GetPrimType(PTY_v8u16)->GetTypeIndex();
       } else {
+        Ty->dump();
         ASSERT(false, "Unsupported vector type");
       }
       break;
@@ -3139,6 +3146,7 @@ TyIdx Clang2MapleVisitor::type2MplIdx(clang::QualType QT) {
         TI =
             GlobalTables::GetTypeTable().GetPrimType(PTY_v16u8)->GetTypeIndex();
       } else {
+        Ty->dump();
         ASSERT(false, "Unsupported vector type");
       }
       break;
@@ -3149,6 +3157,7 @@ TyIdx Clang2MapleVisitor::type2MplIdx(clang::QualType QT) {
         TI =
             GlobalTables::GetTypeTable().GetPrimType(PTY_v2f64)->GetTypeIndex();
       } else {
+        Ty->dump();
         ASSERT(false, "Unsupported vector type");
       }
       break;
@@ -3160,6 +3169,7 @@ TyIdx Clang2MapleVisitor::type2MplIdx(clang::QualType QT) {
         TI =
             GlobalTables::GetTypeTable().GetPrimType(PTY_v4f32)->GetTypeIndex();
       } else {
+        Ty->dump();
         ASSERT(false, "Unsupported vector type");
       }
       break;
diff --git a/Makefile b/Makefile
index d09889b..f71514c 100644
--- a/Makefile
+++ b/Makefile
@@ -40,7 +40,8 @@ build${BUILD_TYPE}/clang2mpl: $(SOURCES)
 
 setup: CMakeLists.txt
 	mkdir -p build${BUILD_TYPE}
-	cd build${BUILD_TYPE}; cmake -DCMAKE_BUILD_TYPE=${BUILD_TYPE} -G "Unix Makefiles" ../
+	cd build${BUILD_TYPE} && cmake -DCMAKE_BUILD_TYPE=${BUILD_TYPE} \
+		-DCMAKE_INSTALL_PREFIX=${MAPLE_BUILD_OUTPUT} -G "Unix Makefiles" ../
 
 install: build${BUILD_TYPE}/clang2mpl
 	make -C build${BUILD_TYPE} install
-- 
Gitee


From a287046dcf811aa413278626b59f3c6d1981506f Mon Sep 17 00:00:00 2001
From: Brice Dobry <brice.dobry@futurewei.com>
Date: Thu, 27 May 2021 14:53:38 -0400
Subject: [PATCH 05/11] Add prototypes and tests for all vector builtins

---
 sys/include/arm_neon.h | 219 +++++++++++++--
 test/vector.c          | 621 +++++++++++++++++++++++++++++++++++++++++
 2 files changed, 823 insertions(+), 17 deletions(-)
 create mode 100644 test/vector.c

diff --git a/sys/include/arm_neon.h b/sys/include/arm_neon.h
index 13a2db0..1382aa3 100644
--- a/sys/include/arm_neon.h
+++ b/sys/include/arm_neon.h
@@ -55,23 +55,208 @@ typedef __attribute__((neon_vector_type(2))) float64_t float64x2_t;
 #endif
 
 // Define Maple-specific builtins for vectors
-int32x4_t __builtin_mpl_vector_from_scalar_v4i32(int32_t value0);
-uint8x16_t __builtin_mpl_vector_from_scalar_v8u16(uint16_t value0);
-uint8x16_t __builtin_mpl_vector_merge_v8u16(uint8x16_t a, uint8x16_t b,
-                                            const int index);
-uint64x1_t __builtin_mpl_vector_get_low_v2u64(uint64x2_t vec);
-uint64x1_t __builtin_mpl_vector_get_high_v2u64(uint64x2_t vec);
-uint32_t __builtin_mpl_vector_get_element_v2u32(uint32x2_t vec,
-                                                const int index);
-uint8x16_t __builtin_mpl_vector_load_v8u16(uint8_t const *ptr);
-uint32x4_t __builtin_mpl_vector_pairwise_add_v8u16(uint16x8_t vec);
-uint64x2_t __builtin_mpl_vector_pairwise_add_v4u32(uint32x4_t vec);
-uint8x16_t __builtin_mpl_vector_reverse_v8u16(uint8x16_t vec);
-uint32x4_t __builtin_mpl_vector_set_element_v4i32(uint32_t value,
-                                                  uint32x4_t vec,
-                                                  const int index);
-void __builtin_mpl_vector_store_v4i32(int32_t *ptr, int32x4_t vec);
-void __builtin_mpl_vector_store_v16u8(uint8_t *ptr, uint8x16_t vec);
+int64x2_t __builtin_mpl_vector_from_scalar_v2i64(int64_t);
+int32x4_t __builtin_mpl_vector_from_scalar_v4i32(int32_t);
+int16x8_t __builtin_mpl_vector_from_scalar_v8i16(int16_t);
+int8x16_t __builtin_mpl_vector_from_scalar_v16i8(int8_t);
+uint64x2_t __builtin_mpl_vector_from_scalar_v2u64(uint64_t);
+uint32x4_t __builtin_mpl_vector_from_scalar_v4u32(uint32_t);
+uint16x8_t __builtin_mpl_vector_from_scalar_v8u16(uint16_t);
+uint8x16_t __builtin_mpl_vector_from_scalar_v16u8(uint8_t);
+float64x2_t __builtin_mpl_vector_from_scalar_v2f64(float64_t);
+float32x4_t __builtin_mpl_vector_from_scalar_v4f32(float32_t);
+int64x1_t __builtin_mpl_vector_from_scalar_v1i64(int64_t);
+int32x2_t __builtin_mpl_vector_from_scalar_v2i32(int32_t);
+int16x4_t __builtin_mpl_vector_from_scalar_v4i16(int16_t);
+int8x8_t __builtin_mpl_vector_from_scalar_v8i8(int8_t);
+uint64x1_t __builtin_mpl_vector_from_scalar_v1u64(uint64_t);
+uint32x2_t __builtin_mpl_vector_from_scalar_v2u32(uint32_t);
+uint16x4_t __builtin_mpl_vector_from_scalar_v4u16(uint16_t);
+uint8x8_t __builtin_mpl_vector_from_scalar_v8u8(uint8_t);
+float64x1_t __builtin_mpl_vector_from_scalar_v1f64(float64_t);
+float32x2_t __builtin_mpl_vector_from_scalar_v2f32(float32_t);
+
+// vecTy vector_merge(vecTy src1, vecTy src2, int n)
+//     Create a vector by concatenating the high elements of src1, starting
+//     with the nth element, followed by the low elements of src2.
+int64x2_t __builtin_mpl_vector_merge_v2i64(int64x2_t, int64x2_t, int32_t);
+int32x4_t __builtin_mpl_vector_merge_v4i32(int32x4_t, int32x4_t, int32_t);
+int16x8_t __builtin_mpl_vector_merge_v8i16(int16x8_t, int16x8_t, int32_t);
+int8x16_t __builtin_mpl_vector_merge_v16i8(int8x16_t, int8x16_t, int32_t);
+uint64x2_t __builtin_mpl_vector_merge_v2u64(uint64x2_t, uint64x2_t, int32_t);
+uint32x4_t __builtin_mpl_vector_merge_v4u32(uint32x4_t, uint32x4_t, int32_t);
+uint16x8_t __builtin_mpl_vector_merge_v8u16(uint16x8_t, uint16x8_t, int32_t);
+uint8x16_t __builtin_mpl_vector_merge_v16u8(uint8x16_t, uint8x16_t, int32_t);
+float64x2_t __builtin_mpl_vector_merge_v2f64(float64x2_t, float64x2_t, int32_t);
+float32x4_t __builtin_mpl_vector_merge_v4f32(float32x4_t, float32x4_t, int32_t);
+int64x1_t __builtin_mpl_vector_merge_v1i64(int64x1_t, int64x1_t, int32_t);
+int32x2_t __builtin_mpl_vector_merge_v2i32(int32x2_t, int32x2_t, int32_t);
+int16x4_t __builtin_mpl_vector_merge_v4i16(int16x4_t, int16x4_t, int32_t);
+int8x8_t __builtin_mpl_vector_merge_v8i8(int8x8_t, int8x8_t, int32_t);
+uint64x1_t __builtin_mpl_vector_merge_v1u64(uint64x1_t, uint64x1_t, int32_t);
+uint32x2_t __builtin_mpl_vector_merge_v2u32(uint32x2_t, uint32x2_t, int32_t);
+uint16x4_t __builtin_mpl_vector_merge_v4u16(uint16x4_t, uint16x4_t, int32_t);
+uint8x8_t __builtin_mpl_vector_merge_v8u8(uint8x8_t, uint8x8_t, int32_t);
+float64x1_t __builtin_mpl_vector_merge_v1f64(float64x1_t, float64x1_t, int32_t);
+float32x2_t __builtin_mpl_vector_merge_v2f32(float32x2_t, float32x2_t, int32_t);
+
+// vecTy2 vector_get_low(vecTy1 src)
+//     Create a vector from the low part of the source vector.
+int64x1_t __builtin_mpl_vector_get_low_v2i64(int64x2_t);
+int32x2_t __builtin_mpl_vector_get_low_v4i32(int32x4_t);
+int16x4_t __builtin_mpl_vector_get_low_v8i16(int16x8_t);
+int8x8_t __builtin_mpl_vector_get_low_v16i8(int8x16_t);
+uint64x1_t __builtin_mpl_vector_get_low_v2u64(uint64x2_t);
+uint32x2_t __builtin_mpl_vector_get_low_v4u32(uint32x4_t);
+uint16x4_t __builtin_mpl_vector_get_low_v8u16(uint16x8_t);
+uint8x8_t __builtin_mpl_vector_get_low_v16u8(uint8x16_t);
+float64x1_t __builtin_mpl_vector_get_low_v2f64(float64x2_t);
+float32x2_t __builtin_mpl_vector_get_low_v4f32(float32x4_t);
+
+// vecTy2 vector_get_low(vecTy1 src)
+//     Create a vector from the high part of the source vector.
+int64x1_t __builtin_mpl_vector_get_high_v2i64(int64x2_t);
+int32x2_t __builtin_mpl_vector_get_high_v4i32(int32x4_t);
+int16x4_t __builtin_mpl_vector_get_high_v8i16(int16x8_t);
+int8x8_t __builtin_mpl_vector_get_high_v16i8(int8x16_t);
+uint64x1_t __builtin_mpl_vector_get_high_v2u64(uint64x2_t);
+uint32x2_t __builtin_mpl_vector_get_high_v4u32(uint32x4_t);
+uint16x4_t __builtin_mpl_vector_get_high_v8u16(uint16x8_t);
+uint8x8_t __builtin_mpl_vector_get_high_v16u8(uint8x16_t);
+float64x1_t __builtin_mpl_vector_get_high_v2f64(float64x2_t);
+float32x2_t __builtin_mpl_vector_get_high_v4f32(float32x4_t);
+
+// scalarTy vector_get_element(vecTy src, int n)
+//     Get the nth element of the source vector.
+int64_t __builtin_mpl_vector_get_element_v2i64(int64x2_t, int32_t);
+int32_t __builtin_mpl_vector_get_element_v4i32(int32x4_t, int32_t);
+int16_t __builtin_mpl_vector_get_element_v8i16(int16x8_t, int32_t);
+int8_t __builtin_mpl_vector_get_element_v16i8(int8x16_t, int32_t);
+uint64_t __builtin_mpl_vector_get_element_v2u64(uint64x2_t, int32_t);
+uint32_t __builtin_mpl_vector_get_element_v4u32(uint32x4_t, int32_t);
+uint16_t __builtin_mpl_vector_get_element_v8u16(uint16x8_t, int32_t);
+uint8_t __builtin_mpl_vector_get_element_v16u8(uint8x16_t, int32_t);
+float64_t __builtin_mpl_vector_get_element_v2f64(float64x2_t, int32_t);
+float32_t __builtin_mpl_vector_get_element_v4f32(float32x4_t, int32_t);
+int64_t __builtin_mpl_vector_get_element_v1i64(int64x1_t, int32_t);
+int32_t __builtin_mpl_vector_get_element_v2i32(int32x2_t, int32_t);
+int16_t __builtin_mpl_vector_get_element_v4i16(int16x4_t, int32_t);
+int8_t __builtin_mpl_vector_get_element_v8i8(int8x8_t, int32_t);
+uint64_t __builtin_mpl_vector_get_element_v1u64(uint64x1_t, int32_t);
+uint32_t __builtin_mpl_vector_get_element_v2u32(uint32x2_t, int32_t);
+uint16_t __builtin_mpl_vector_get_element_v4u16(uint16x4_t, int32_t);
+uint8_t __builtin_mpl_vector_get_element_v8u8(uint8x8_t, int32_t);
+float64_t __builtin_mpl_vector_get_element_v1f64(float64x1_t, int32_t);
+float32_t __builtin_mpl_vector_get_element_v2f32(float32x2_t, int32_t);
+
+// vecTy vector_set_element(ScalarTy value, VecTy vec, int n)
+//     Set the nth element of the source vector to value.
+int64x2_t __builtin_mpl_vector_set_element_v2i64(int64_t, int64x2_t, int32_t);
+int32x4_t __builtin_mpl_vector_set_element_v4i32(int32_t, int32x4_t, int32_t);
+int16x8_t __builtin_mpl_vector_set_element_v8i16(int16_t, int16x8_t, int32_t);
+int8x16_t __builtin_mpl_vector_set_element_v16i8(int8_t, int8x16_t, int32_t);
+uint64x2_t __builtin_mpl_vector_set_element_v2u64(uint64_t, uint64x2_t, int32_t);
+uint32x4_t __builtin_mpl_vector_set_element_v4u32(uint32_t, uint32x4_t, int32_t);
+uint16x8_t __builtin_mpl_vector_set_element_v8u16(uint16_t, uint16x8_t, int32_t);
+uint8x16_t __builtin_mpl_vector_set_element_v16u8(uint8_t, uint8x16_t, int32_t);
+float64x2_t __builtin_mpl_vector_set_element_v2f64(float64_t, float64x2_t, int32_t);
+float32x4_t __builtin_mpl_vector_set_element_v4f32(float32_t, float32x4_t, int32_t);
+int64x1_t __builtin_mpl_vector_set_element_v1i64(int64_t, int64x1_t, int32_t);
+int32x2_t __builtin_mpl_vector_set_element_v2i32(int32_t, int32x2_t, int32_t);
+int16x4_t __builtin_mpl_vector_set_element_v4i16(int16_t, int16x4_t, int32_t);
+int8x8_t __builtin_mpl_vector_set_element_v8i8(int8_t, int8x8_t, int32_t);
+uint64x1_t __builtin_mpl_vector_set_element_v1u64(uint64_t, uint64x1_t, int32_t);
+uint32x2_t __builtin_mpl_vector_set_element_v2u32(uint32_t, uint32x2_t, int32_t);
+uint16x4_t __builtin_mpl_vector_set_element_v4u16(uint16_t, uint16x4_t, int32_t);
+uint8x8_t __builtin_mpl_vector_set_element_v8u8(uint8_t, uint8x8_t, int32_t);
+float64x1_t __builtin_mpl_vector_set_element_v1f64(float64_t, float64x1_t, int32_t);
+float32x2_t __builtin_mpl_vector_set_element_v2f32(float32_t, float32x2_t, int32_t);
+
+// vecTy2 vector_pairwise_add(vecTy1 src)
+//     Add pairs of elements from the source vector and put the result into the
+//     destination vector, whose element size is twice and the number of
+//     elements is half of the source vector type.
+int64x2_t __builtin_mpl_vector_pairwise_add_v4i32(int32x4_t);
+int32x4_t __builtin_mpl_vector_pairwise_add_v8i16(int16x8_t);
+int16x8_t __builtin_mpl_vector_pairwise_add_v16i8(int8x16_t);
+uint64x2_t __builtin_mpl_vector_pairwise_add_v4u32(uint32x4_t);
+uint32x4_t __builtin_mpl_vector_pairwise_add_v8u16(uint16x8_t);
+uint16x8_t __builtin_mpl_vector_pairwise_add_v16u8(uint8x16_t);
+int64x1_t __builtin_mpl_vector_pairwise_add_v2i32(int32x2_t);
+int32x2_t __builtin_mpl_vector_pairwise_add_v4i16(int16x4_t);
+int16x4_t __builtin_mpl_vector_pairwise_add_v8i8(int8x8_t);
+uint64x1_t __builtin_mpl_vector_pairwise_add_v2u32(uint32x2_t);
+uint32x2_t __builtin_mpl_vector_pairwise_add_v4u16(uint16x4_t);
+uint16x4_t __builtin_mpl_vector_pairwise_add_v8u8(uint8x8_t);
+
+// vecTy vector_reverse(vecTy src)
+//     Create a vector by reversing the order of the elements in src.
+int64x2_t __builtin_mpl_vector_reverse_v2i64(int64x2_t);
+int32x4_t __builtin_mpl_vector_reverse_v4i32(int32x4_t);
+int16x8_t __builtin_mpl_vector_reverse_v8i16(int16x8_t);
+int8x16_t __builtin_mpl_vector_reverse_v16i8(int8x16_t);
+uint64x2_t __builtin_mpl_vector_reverse_v2u64(uint64x2_t);
+uint32x4_t __builtin_mpl_vector_reverse_v4u32(uint32x4_t);
+uint16x8_t __builtin_mpl_vector_reverse_v8u16(uint16x8_t);
+uint8x16_t __builtin_mpl_vector_reverse_v16u8(uint8x16_t);
+float64x2_t __builtin_mpl_vector_reverse_v2f64(float64x2_t);
+float32x4_t __builtin_mpl_vector_reverse_v4f32(float32x4_t);
+int64x1_t __builtin_mpl_vector_reverse_v1i64(int64x1_t);
+int32x2_t __builtin_mpl_vector_reverse_v2i32(int32x2_t);
+int16x4_t __builtin_mpl_vector_reverse_v4i16(int16x4_t);
+int8x8_t __builtin_mpl_vector_reverse_v8i8(int8x8_t);
+uint64x1_t __builtin_mpl_vector_reverse_v1u64(uint64x1_t);
+uint32x2_t __builtin_mpl_vector_reverse_v2u32(uint32x2_t);
+uint16x4_t __builtin_mpl_vector_reverse_v4u16(uint16x4_t);
+uint8x8_t __builtin_mpl_vector_reverse_v8u8(uint8x8_t);
+float64x1_t __builtin_mpl_vector_reverse_v1f64(float64x1_t);
+float32x2_t __builtin_mpl_vector_reverse_v2f32(float32x2_t);
+
+// vecTy vector_load(scalarTy *ptr)
+//     Load the elements pointed to by ptr into a vector.
+int64x2_t __builtin_mpl_vector_load_v2i64(int64_t *);
+int32x4_t __builtin_mpl_vector_load_v4i32(int32_t *);
+int16x8_t __builtin_mpl_vector_load_v8i16(int16_t *);
+int8x16_t __builtin_mpl_vector_load_v16i8(int8_t *);
+uint64x2_t __builtin_mpl_vector_load_v2u64(uint64_t *);
+uint32x4_t __builtin_mpl_vector_load_v4u32(uint32_t *);
+uint16x8_t __builtin_mpl_vector_load_v8u16(uint16_t *);
+uint8x16_t __builtin_mpl_vector_load_v16u8(uint8_t *);
+float64x2_t __builtin_mpl_vector_load_v2f64(float64_t *);
+float32x4_t __builtin_mpl_vector_load_v4f32(float32_t *);
+int64x1_t __builtin_mpl_vector_load_v1i64(int64_t *);
+int32x2_t __builtin_mpl_vector_load_v2i32(int32_t *);
+int16x4_t __builtin_mpl_vector_load_v4i16(int16_t *);
+int8x8_t __builtin_mpl_vector_load_v8i8(int8_t *);
+uint64x1_t __builtin_mpl_vector_load_v1u64(uint64_t *);
+uint32x2_t __builtin_mpl_vector_load_v2u32(uint32_t *);
+uint16x4_t __builtin_mpl_vector_load_v4u16(uint16_t *);
+uint8x8_t __builtin_mpl_vector_load_v8u8(uint8_t *);
+float64x1_t __builtin_mpl_vector_load_v1f64(float64_t *);
+float32x2_t __builtin_mpl_vector_load_v2f32(float32_t *);
+
+// void vector_store(scalarTy *ptr, vecTy src)
+//     Store the elements from src into the memory pointed to by ptr.
+void __builtin_mpl_vector_store_v2i64(int64_t *, int64x2_t);
+void __builtin_mpl_vector_store_v4i32(int32_t *, int32x4_t);
+void __builtin_mpl_vector_store_v8i16(int16_t *, int16x8_t);
+void __builtin_mpl_vector_store_v16i8(int8_t *, int8x16_t);
+void __builtin_mpl_vector_store_v2u64(uint64_t *, uint64x2_t);
+void __builtin_mpl_vector_store_v4u32(uint32_t *, uint32x4_t);
+void __builtin_mpl_vector_store_v8u16(uint16_t *, uint16x8_t);
+void __builtin_mpl_vector_store_v16u8(uint8_t *, uint8x16_t);
+void __builtin_mpl_vector_store_v2f64(float64_t *, float64x2_t);
+void __builtin_mpl_vector_store_v4f32(float32_t *, float32x4_t);
+void __builtin_mpl_vector_store_v1i64(int64_t *, int64x1_t);
+void __builtin_mpl_vector_store_v2i32(int32_t *, int32x2_t);
+void __builtin_mpl_vector_store_v4i16(int16_t *, int16x4_t);
+void __builtin_mpl_vector_store_v8i8(int8_t *, int8x8_t);
+void __builtin_mpl_vector_store_v1u64(uint64_t *, uint64x1_t);
+void __builtin_mpl_vector_store_v2u32(uint32_t *, uint32x2_t);
+void __builtin_mpl_vector_store_v4u16(uint16_t *, uint16x4_t);
+void __builtin_mpl_vector_store_v8u8(uint8_t *, uint8x8_t);
+void __builtin_mpl_vector_store_v1f64(float64_t *, float64x1_t);
+void __builtin_mpl_vector_store_v2f32(float32_t *, float32x2_t);
 
 #define vdupq_n_s32(value) __builtin_mpl_vector_from_scalar_v4i32(value)
 #define vdupq_n_u8(value) __builtin_mpl_vector_from_scalar_v8u16(value)
diff --git a/test/vector.c b/test/vector.c
new file mode 100644
index 0000000..b7d69d2
--- /dev/null
+++ b/test/vector.c
@@ -0,0 +1,621 @@
+/*
+ * Copyright (c) 2021 Futurewei Technologies, Inc.
+ *
+ * clang2mpl is licensed under Mulan PSL v2.
+ * You can use this software according to the terms and conditions of the Mulan
+ * PSL v2. You may obtain a copy of Mulan PSL v2 at:
+ *
+ *     http://license.coscl.org.cn/MulanPSL2
+ *
+ * THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY
+ * KIND, EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO
+ * NON-INFRINGEMENT, MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE. See the
+ * Mulan PSL v2 for more details.
+ */
+// RUN: %clang2mpl --ascii --verify %s -- --target=aarch64-linux-elf -Wno-unused-value
+// RUN: cat %m | %FileCheck %s
+
+#include "arm_neon.h"
+
+// CHECK: [[# FILENUM:]] "{{.*}}/test/vector.c"
+
+void intrinsics() {
+  int8_t scalar_int8;
+  int16_t scalar_int16;
+  int32_t scalar_int32;
+  int64_t scalar_int64;
+  uint8_t scalar_uint8;
+  uint16_t scalar_uint16;
+  uint32_t scalar_uint32;
+  uint64_t scalar_uint64;
+  float32_t scalar_float32;
+  float64_t scalar_float64;
+
+  // CHECK: LOC [[# FILENUM]] [[# @LINE + 2 ]]{{$}}
+  // CHECK-NEXT: var %vec_int8x8 v8i8
+  int8x8_t vec_int8x8;
+  // CHECK: LOC [[# FILENUM]] [[# @LINE + 2 ]]{{$}}
+  // CHECK-NEXT: var %vec_int8x16 v16i8
+  int8x16_t vec_int8x16;
+  // CHECK: LOC [[# FILENUM]] [[# @LINE + 2 ]]{{$}}
+  // CHECK-NEXT: var %vec_int16x4 v4i16
+  int16x4_t vec_int16x4;
+  // CHECK: LOC [[# FILENUM]] [[# @LINE + 2 ]]{{$}}
+  // CHECK-NEXT: var %vec_int16x8 v8i16
+  int16x8_t vec_int16x8;
+  // CHECK: LOC [[# FILENUM]] [[# @LINE + 2 ]]{{$}}
+  // CHECK-NEXT: var %vec_int32x2 v2i32
+  int32x2_t vec_int32x2;
+  // CHECK: LOC [[# FILENUM]] [[# @LINE + 2 ]]{{$}}
+  // CHECK-NEXT: var %vec_int32x4 v4i32
+  int32x4_t vec_int32x4;
+  // CHECK: LOC [[# FILENUM]] [[# @LINE + 2 ]]{{$}}
+  // CHECK-NEXT: var %vec_int64x1 i64
+  int64x1_t vec_int64x1;
+  // CHECK: LOC [[# FILENUM]] [[# @LINE + 2 ]]{{$}}
+  // CHECK-NEXT: var %vec_int64x2 v2i64
+  int64x2_t vec_int64x2;
+  // CHECK: LOC [[# FILENUM]] [[# @LINE + 2 ]]{{$}}
+  // CHECK-NEXT: var %vec_uint8x8 v8u8
+  uint8x8_t vec_uint8x8;
+  // CHECK: LOC [[# FILENUM]] [[# @LINE + 2 ]]{{$}}
+  // CHECK-NEXT: var %vec_uint8x16 v16u8
+  uint8x16_t vec_uint8x16;
+  // CHECK: LOC [[# FILENUM]] [[# @LINE + 2 ]]{{$}}
+  // CHECK-NEXT: var %vec_uint16x4 v4u16
+  uint16x4_t vec_uint16x4;
+  // CHECK: LOC [[# FILENUM]] [[# @LINE + 2 ]]{{$}}
+  // CHECK-NEXT: var %vec_uint16x8 v8u16
+  uint16x8_t vec_uint16x8;
+  // CHECK: LOC [[# FILENUM]] [[# @LINE + 2 ]]{{$}}
+  // CHECK-NEXT: var %vec_uint32x2 v2u32
+  uint32x2_t vec_uint32x2;
+  // CHECK: LOC [[# FILENUM]] [[# @LINE + 2 ]]{{$}}
+  // CHECK-NEXT: var %vec_uint32x4 v4u32
+  uint32x4_t vec_uint32x4;
+  // CHECK: LOC [[# FILENUM]] [[# @LINE + 2 ]]{{$}}
+  // CHECK-NEXT: var %vec_uint64x1 u64
+  uint64x1_t vec_uint64x1;
+  // CHECK: LOC [[# FILENUM]] [[# @LINE + 2 ]]{{$}}
+  // CHECK-NEXT: var %vec_uint64x2 v2u64
+  uint64x2_t vec_uint64x2;
+  // CHECK: LOC [[# FILENUM]] [[# @LINE + 2 ]]{{$}}
+  // CHECK-NEXT: var %vec_float32x2 v2f32
+  float32x2_t vec_float32x2;
+  // CHECK: LOC [[# FILENUM]] [[# @LINE + 2 ]]{{$}}
+  // CHECK-NEXT: var %vec_float32x4 v4f32
+  float32x4_t vec_float32x4;
+  // CHECK: LOC [[# FILENUM]] [[# @LINE + 2 ]]{{$}}
+  // CHECK-NEXT: var %vec_float64x1 f64
+  float64x1_t vec_float64x1;
+  // CHECK: LOC [[# FILENUM]] [[# @LINE + 2 ]]{{$}}
+  // CHECK-NEXT: var %vec_float64x2 v2f64
+  float64x2_t vec_float64x2;
+  void *ptr;
+
+  // CHECK: LOC [[# FILENUM]] [[# @LINE + 2 ]]{{$}}
+  // CHECK-NEXT: dassign %vec_int64x2 0 (intrinsicop v2i64 vector_from_scalar_v2i64 (dread i64 %scalar_int64))
+  vec_int64x2 = __builtin_mpl_vector_from_scalar_v2i64(scalar_int64);
+  // CHECK: LOC [[# FILENUM]] [[# @LINE + 2 ]]{{$}}
+  // CHECK-NEXT: dassign %vec_int32x4 0 (intrinsicop v4i32 vector_from_scalar_v4i32 (dread i32 %scalar_int32))
+  vec_int32x4 = __builtin_mpl_vector_from_scalar_v4i32(scalar_int32);
+  // CHECK: LOC [[# FILENUM]] [[# @LINE + 2 ]]{{$}}
+  // CHECK-NEXT: dassign %vec_int16x8 0 (intrinsicop v8i16 vector_from_scalar_v8i16 (dread i16 %scalar_int16))
+  vec_int16x8 = __builtin_mpl_vector_from_scalar_v8i16(scalar_int16);
+  // CHECK: LOC [[# FILENUM]] [[# @LINE + 2 ]]{{$}}
+  // CHECK-NEXT: dassign %vec_int8x16 0 (intrinsicop v16i8 vector_from_scalar_v16i8 (dread i8 %scalar_int8))
+  vec_int8x16 = __builtin_mpl_vector_from_scalar_v16i8(scalar_int8);
+  // CHECK: LOC [[# FILENUM]] [[# @LINE + 2 ]]{{$}}
+  // CHECK-NEXT: dassign %vec_uint64x2 0 (intrinsicop v2u64 vector_from_scalar_v2u64 (dread u64 %scalar_uint64))
+  vec_uint64x2 = __builtin_mpl_vector_from_scalar_v2u64(scalar_uint64);
+  // CHECK: LOC [[# FILENUM]] [[# @LINE + 2 ]]{{$}}
+  // CHECK-NEXT: dassign %vec_uint32x4 0 (intrinsicop v4u32 vector_from_scalar_v4u32 (dread u32 %scalar_uint32))
+  vec_uint32x4 = __builtin_mpl_vector_from_scalar_v4u32(scalar_uint32);
+  // CHECK: LOC [[# FILENUM]] [[# @LINE + 2 ]]{{$}}
+  // CHECK-NEXT: dassign %vec_uint16x8 0 (intrinsicop v8u16 vector_from_scalar_v8u16 (dread u16 %scalar_uint16))
+  vec_uint16x8 = __builtin_mpl_vector_from_scalar_v8u16(scalar_uint16);
+  // CHECK: LOC [[# FILENUM]] [[# @LINE + 2 ]]{{$}}
+  // CHECK-NEXT: dassign %vec_uint8x16 0 (intrinsicop v16u8 vector_from_scalar_v16u8 (dread u8 %scalar_uint8))
+  vec_uint8x16 = __builtin_mpl_vector_from_scalar_v16u8(scalar_uint8);
+  // CHECK: LOC [[# FILENUM]] [[# @LINE + 2 ]]{{$}}
+  // CHECK-NEXT: dassign %vec_float64x2 0 (intrinsicop v2f64 vector_from_scalar_v2f64 (dread f64 %scalar_float64))
+  vec_float64x2 = __builtin_mpl_vector_from_scalar_v2f64(scalar_float64);
+  // CHECK: LOC [[# FILENUM]] [[# @LINE + 2 ]]{{$}}
+  // CHECK-NEXT: dassign %vec_float32x4 0 (intrinsicop v4f32 vector_from_scalar_v4f32 (dread f32 %scalar_float32))
+  vec_float32x4 = __builtin_mpl_vector_from_scalar_v4f32(scalar_float32);
+  // CHECK: LOC [[# FILENUM]] [[# @LINE + 2 ]]{{$}}
+  // CHECK-NEXT: dassign %vec_int64x1 0 (intrinsicop i64 vector_from_scalar_v1i64 (dread i64 %scalar_int64))
+  vec_int64x1 = __builtin_mpl_vector_from_scalar_v1i64(scalar_int64);
+  // CHECK: LOC [[# FILENUM]] [[# @LINE + 2 ]]{{$}}
+  // CHECK-NEXT: dassign %vec_int32x2 0 (intrinsicop v2i32 vector_from_scalar_v2i32 (dread i32 %scalar_int32))
+  vec_int32x2 = __builtin_mpl_vector_from_scalar_v2i32(scalar_int32);
+  // CHECK: LOC [[# FILENUM]] [[# @LINE + 2 ]]{{$}}
+  // CHECK-NEXT: dassign %vec_int16x4 0 (intrinsicop v4i16 vector_from_scalar_v4i16 (dread i16 %scalar_int16))
+  vec_int16x4 = __builtin_mpl_vector_from_scalar_v4i16(scalar_int16);
+  // CHECK: LOC [[# FILENUM]] [[# @LINE + 2 ]]{{$}}
+  // CHECK-NEXT: dassign %vec_int8x8 0 (intrinsicop v8i8 vector_from_scalar_v8i8 (dread i8 %scalar_int8))
+  vec_int8x8 = __builtin_mpl_vector_from_scalar_v8i8(scalar_int8);
+  // CHECK: LOC [[# FILENUM]] [[# @LINE + 2 ]]{{$}}
+  // CHECK-NEXT: dassign %vec_uint64x1 0 (intrinsicop u64 vector_from_scalar_v1u64 (dread u64 %scalar_uint64))
+  vec_uint64x1 = __builtin_mpl_vector_from_scalar_v1u64(scalar_uint64);
+  // CHECK: LOC [[# FILENUM]] [[# @LINE + 2 ]]{{$}}
+  // CHECK-NEXT: dassign %vec_uint32x2 0 (intrinsicop v2u32 vector_from_scalar_v2u32 (dread u32 %scalar_uint32))
+  vec_uint32x2 = __builtin_mpl_vector_from_scalar_v2u32(scalar_uint32);
+  // CHECK: LOC [[# FILENUM]] [[# @LINE + 2 ]]{{$}}
+  // CHECK-NEXT: dassign %vec_uint16x4 0 (intrinsicop v4u16 vector_from_scalar_v4u16 (dread u16 %scalar_uint16))
+  vec_uint16x4 = __builtin_mpl_vector_from_scalar_v4u16(scalar_uint16);
+  // CHECK: LOC [[# FILENUM]] [[# @LINE + 2 ]]{{$}}
+  // CHECK-NEXT: dassign %vec_uint8x8 0 (intrinsicop v8u8 vector_from_scalar_v8u8 (dread u8 %scalar_uint8))
+  vec_uint8x8 = __builtin_mpl_vector_from_scalar_v8u8(scalar_uint8);
+  // CHECK: LOC [[# FILENUM]] [[# @LINE + 2 ]]{{$}}
+  // CHECK-NEXT: dassign %vec_float64x1 0 (intrinsicop f64 vector_from_scalar_v1f64 (dread f64 %scalar_float64))
+  vec_float64x1 = __builtin_mpl_vector_from_scalar_v1f64(scalar_float64);
+  // CHECK: LOC [[# FILENUM]] [[# @LINE + 2 ]]{{$}}
+  // CHECK-NEXT: dassign %vec_float32x2 0 (intrinsicop v2f32 vector_from_scalar_v2f32 (dread f32 %scalar_float32))
+  vec_float32x2 = __builtin_mpl_vector_from_scalar_v2f32(scalar_float32);
+
+  // CHECK: LOC [[# FILENUM]] [[# @LINE + 2 ]]{{$}}
+  // CHECK-NEXT: dassign %vec_int64x2 0 (intrinsicop v2i64 vector_merge_v2i64 (dread v2i64 %vec_int64x2, dread v2i64 %vec_int64x2, dread i32 %scalar_int32))
+  vec_int64x2 = __builtin_mpl_vector_merge_v2i64(vec_int64x2, vec_int64x2, scalar_int32);
+  // CHECK: LOC [[# FILENUM]] [[# @LINE + 2 ]]{{$}}
+  // CHECK-NEXT: dassign %vec_int32x4 0 (intrinsicop v4i32 vector_merge_v4i32 (dread v4i32 %vec_int32x4, dread v4i32 %vec_int32x4, dread i32 %scalar_int32))
+  vec_int32x4 = __builtin_mpl_vector_merge_v4i32(vec_int32x4, vec_int32x4, scalar_int32);
+  // CHECK: LOC [[# FILENUM]] [[# @LINE + 2 ]]{{$}}
+  // CHECK-NEXT: dassign %vec_int16x8 0 (intrinsicop v8i16 vector_merge_v8i16 (dread v8i16 %vec_int16x8, dread v8i16 %vec_int16x8, dread i32 %scalar_int32))
+  vec_int16x8 = __builtin_mpl_vector_merge_v8i16(vec_int16x8, vec_int16x8, scalar_int32);
+  // CHECK: LOC [[# FILENUM]] [[# @LINE + 2 ]]{{$}}
+  // CHECK-NEXT: dassign %vec_int8x16 0 (intrinsicop v16i8 vector_merge_v16i8 (dread v16i8 %vec_int8x16, dread v16i8 %vec_int8x16, dread i32 %scalar_int32))
+  vec_int8x16 = __builtin_mpl_vector_merge_v16i8(vec_int8x16, vec_int8x16, scalar_int32);
+  // CHECK: LOC [[# FILENUM]] [[# @LINE + 2 ]]{{$}}
+  // CHECK-NEXT: dassign %vec_uint64x2 0 (intrinsicop v2u64 vector_merge_v2u64 (dread v2u64 %vec_uint64x2, dread v2u64 %vec_uint64x2, dread i32 %scalar_int32))
+  vec_uint64x2 = __builtin_mpl_vector_merge_v2u64(vec_uint64x2, vec_uint64x2, scalar_int32);
+  // CHECK: LOC [[# FILENUM]] [[# @LINE + 2 ]]{{$}}
+  // CHECK-NEXT: dassign %vec_uint32x4 0 (intrinsicop v4u32 vector_merge_v4u32 (dread v4u32 %vec_uint32x4, dread v4u32 %vec_uint32x4, dread i32 %scalar_int32))
+  vec_uint32x4 = __builtin_mpl_vector_merge_v4u32(vec_uint32x4, vec_uint32x4, scalar_int32);
+  // CHECK: LOC [[# FILENUM]] [[# @LINE + 2 ]]{{$}}
+  // CHECK-NEXT: dassign %vec_uint16x8 0 (intrinsicop v8u16 vector_merge_v8u16 (dread v8u16 %vec_uint16x8, dread v8u16 %vec_uint16x8, dread i32 %scalar_int32))
+  vec_uint16x8 = __builtin_mpl_vector_merge_v8u16(vec_uint16x8, vec_uint16x8, scalar_int32);
+  // CHECK: LOC [[# FILENUM]] [[# @LINE + 2 ]]{{$}}
+  // CHECK-NEXT: dassign %vec_uint8x16 0 (intrinsicop v16u8 vector_merge_v16u8 (dread v16u8 %vec_uint8x16, dread v16u8 %vec_uint8x16, dread i32 %scalar_int32))
+  vec_uint8x16 = __builtin_mpl_vector_merge_v16u8(vec_uint8x16, vec_uint8x16, scalar_int32);
+  // CHECK: LOC [[# FILENUM]] [[# @LINE + 2 ]]{{$}}
+  // CHECK-NEXT: dassign %vec_float64x2 0 (intrinsicop v2f64 vector_merge_v2f64 (dread v2f64 %vec_float64x2, dread v2f64 %vec_float64x2, dread i32 %scalar_int32))
+  vec_float64x2 = __builtin_mpl_vector_merge_v2f64(vec_float64x2, vec_float64x2, scalar_int32);
+  // CHECK: LOC [[# FILENUM]] [[# @LINE + 2 ]]{{$}}
+  // CHECK-NEXT: dassign %vec_float32x4 0 (intrinsicop v4f32 vector_merge_v4f32 (dread v4f32 %vec_float32x4, dread v4f32 %vec_float32x4, dread i32 %scalar_int32))
+  vec_float32x4 = __builtin_mpl_vector_merge_v4f32(vec_float32x4, vec_float32x4, scalar_int32);
+  // CHECK: LOC [[# FILENUM]] [[# @LINE + 2 ]]{{$}}
+  // CHECK-NEXT: dassign %vec_int64x1 0 (intrinsicop i64 vector_merge_v1i64 (dread i64 %vec_int64x1, dread i64 %vec_int64x1, dread i32 %scalar_int32))
+  vec_int64x1 = __builtin_mpl_vector_merge_v1i64(vec_int64x1, vec_int64x1, scalar_int32);
+  // CHECK: LOC [[# FILENUM]] [[# @LINE + 2 ]]{{$}}
+  // CHECK-NEXT: dassign %vec_int32x2 0 (intrinsicop v2i32 vector_merge_v2i32 (dread v2i32 %vec_int32x2, dread v2i32 %vec_int32x2, dread i32 %scalar_int32))
+  vec_int32x2 = __builtin_mpl_vector_merge_v2i32(vec_int32x2, vec_int32x2, scalar_int32);
+  // CHECK: LOC [[# FILENUM]] [[# @LINE + 2 ]]{{$}}
+  // CHECK-NEXT: dassign %vec_int16x4 0 (intrinsicop v4i16 vector_merge_v4i16 (dread v4i16 %vec_int16x4, dread v4i16 %vec_int16x4, dread i32 %scalar_int32))
+  vec_int16x4 = __builtin_mpl_vector_merge_v4i16(vec_int16x4, vec_int16x4, scalar_int32);
+  // CHECK: LOC [[# FILENUM]] [[# @LINE + 2 ]]{{$}}
+  // CHECK-NEXT: dassign %vec_int8x8 0 (intrinsicop v8i8 vector_merge_v8i8 (dread v8i8 %vec_int8x8, dread v8i8 %vec_int8x8, dread i32 %scalar_int32))
+  vec_int8x8 = __builtin_mpl_vector_merge_v8i8(vec_int8x8, vec_int8x8, scalar_int32);
+  // CHECK: LOC [[# FILENUM]] [[# @LINE + 2 ]]{{$}}
+  // CHECK-NEXT: dassign %vec_uint64x1 0 (intrinsicop u64 vector_merge_v1u64 (dread u64 %vec_uint64x1, dread u64 %vec_uint64x1, dread i32 %scalar_int32))
+  vec_uint64x1 = __builtin_mpl_vector_merge_v1u64(vec_uint64x1, vec_uint64x1, scalar_int32);
+  // CHECK: LOC [[# FILENUM]] [[# @LINE + 2 ]]{{$}}
+  // CHECK-NEXT: dassign %vec_uint32x2 0 (intrinsicop v2u32 vector_merge_v2u32 (dread v2u32 %vec_uint32x2, dread v2u32 %vec_uint32x2, dread i32 %scalar_int32))
+  vec_uint32x2 = __builtin_mpl_vector_merge_v2u32(vec_uint32x2, vec_uint32x2, scalar_int32);
+  // CHECK: LOC [[# FILENUM]] [[# @LINE + 2 ]]{{$}}
+  // CHECK-NEXT: dassign %vec_uint16x4 0 (intrinsicop v4u16 vector_merge_v4u16 (dread v4u16 %vec_uint16x4, dread v4u16 %vec_uint16x4, dread i32 %scalar_int32))
+  vec_uint16x4 = __builtin_mpl_vector_merge_v4u16(vec_uint16x4, vec_uint16x4, scalar_int32);
+  // CHECK: LOC [[# FILENUM]] [[# @LINE + 2 ]]{{$}}
+  // CHECK-NEXT: dassign %vec_uint8x8 0 (intrinsicop v8u8 vector_merge_v8u8 (dread v8u8 %vec_uint8x8, dread v8u8 %vec_uint8x8, dread i32 %scalar_int32))
+  vec_uint8x8 = __builtin_mpl_vector_merge_v8u8(vec_uint8x8, vec_uint8x8, scalar_int32);
+  // CHECK: LOC [[# FILENUM]] [[# @LINE + 2 ]]{{$}}
+  // CHECK-NEXT: dassign %vec_float64x1 0 (intrinsicop f64 vector_merge_v1f64 (dread f64 %vec_float64x1, dread f64 %vec_float64x1, dread i32 %scalar_int32))
+  vec_float64x1 = __builtin_mpl_vector_merge_v1f64(vec_float64x1, vec_float64x1, scalar_int32);
+  // CHECK: LOC [[# FILENUM]] [[# @LINE + 2 ]]{{$}}
+  // CHECK-NEXT: dassign %vec_float32x2 0 (intrinsicop v2f32 vector_merge_v2f32 (dread v2f32 %vec_float32x2, dread v2f32 %vec_float32x2, dread i32 %scalar_int32))
+  vec_float32x2 = __builtin_mpl_vector_merge_v2f32(vec_float32x2, vec_float32x2, scalar_int32);
+
+  // CHECK: LOC [[# FILENUM]] [[# @LINE + 2 ]]{{$}}
+  // CHECK-NEXT: dassign %vec_int64x1 0 (intrinsicop i64 vector_get_low_v2i64 (dread v2i64 %vec_int64x2))
+  vec_int64x1 = __builtin_mpl_vector_get_low_v2i64(vec_int64x2);
+  // CHECK: LOC [[# FILENUM]] [[# @LINE + 2 ]]{{$}}
+  // CHECK-NEXT: dassign %vec_int32x2 0 (intrinsicop v2i32 vector_get_low_v4i32 (dread v4i32 %vec_int32x4))
+  vec_int32x2 = __builtin_mpl_vector_get_low_v4i32(vec_int32x4);
+  // CHECK: LOC [[# FILENUM]] [[# @LINE + 2 ]]{{$}}
+  // CHECK-NEXT: dassign %vec_int16x4 0 (intrinsicop v4i16 vector_get_low_v8i16 (dread v8i16 %vec_int16x8))
+  vec_int16x4 = __builtin_mpl_vector_get_low_v8i16(vec_int16x8);
+  // CHECK: LOC [[# FILENUM]] [[# @LINE + 2 ]]{{$}}
+  // CHECK-NEXT: dassign %vec_int8x8 0 (intrinsicop v8i8 vector_get_low_v16i8 (dread v16i8 %vec_int8x16))
+  vec_int8x8 = __builtin_mpl_vector_get_low_v16i8(vec_int8x16);
+  // CHECK: LOC [[# FILENUM]] [[# @LINE + 2 ]]{{$}}
+  // CHECK-NEXT: dassign %vec_uint64x1 0 (intrinsicop u64 vector_get_low_v2u64 (dread v2u64 %vec_uint64x2))
+  vec_uint64x1 = __builtin_mpl_vector_get_low_v2u64(vec_uint64x2);
+  // CHECK: LOC [[# FILENUM]] [[# @LINE + 2 ]]{{$}}
+  // CHECK-NEXT: dassign %vec_uint32x2 0 (intrinsicop v2u32 vector_get_low_v4u32 (dread v4u32 %vec_uint32x4))
+  vec_uint32x2 = __builtin_mpl_vector_get_low_v4u32(vec_uint32x4);
+  // CHECK: LOC [[# FILENUM]] [[# @LINE + 2 ]]{{$}}
+  // CHECK-NEXT: dassign %vec_uint16x4 0 (intrinsicop v4u16 vector_get_low_v8u16 (dread v8u16 %vec_uint16x8))
+  vec_uint16x4 = __builtin_mpl_vector_get_low_v8u16(vec_uint16x8);
+  // CHECK: LOC [[# FILENUM]] [[# @LINE + 2 ]]{{$}}
+  // CHECK-NEXT: dassign %vec_uint8x8 0 (intrinsicop v8u8 vector_get_low_v16u8 (dread v16u8 %vec_uint8x16))
+  vec_uint8x8 = __builtin_mpl_vector_get_low_v16u8(vec_uint8x16);
+  // CHECK: LOC [[# FILENUM]] [[# @LINE + 2 ]]{{$}}
+  // CHECK-NEXT: dassign %vec_float64x1 0 (intrinsicop f64 vector_get_low_v2f64 (dread v2f64 %vec_float64x2))
+  vec_float64x1 = __builtin_mpl_vector_get_low_v2f64(vec_float64x2);
+  // CHECK: LOC [[# FILENUM]] [[# @LINE + 2 ]]{{$}}
+  // CHECK-NEXT: dassign %vec_float32x2 0 (intrinsicop v2f32 vector_get_low_v4f32 (dread v4f32 %vec_float32x4))
+  vec_float32x2 = __builtin_mpl_vector_get_low_v4f32(vec_float32x4);
+
+  // CHECK: LOC [[# FILENUM]] [[# @LINE + 2 ]]{{$}}
+  // CHECK-NEXT: dassign %vec_int64x1 0 (intrinsicop i64 vector_get_high_v2i64 (dread v2i64 %vec_int64x2))
+  vec_int64x1 = __builtin_mpl_vector_get_high_v2i64(vec_int64x2);
+  // CHECK: LOC [[# FILENUM]] [[# @LINE + 2 ]]{{$}}
+  // CHECK-NEXT: dassign %vec_int32x2 0 (intrinsicop v2i32 vector_get_high_v4i32 (dread v4i32 %vec_int32x4))
+  vec_int32x2 = __builtin_mpl_vector_get_high_v4i32(vec_int32x4);
+  // CHECK: LOC [[# FILENUM]] [[# @LINE + 2 ]]{{$}}
+  // CHECK-NEXT: dassign %vec_int16x4 0 (intrinsicop v4i16 vector_get_high_v8i16 (dread v8i16 %vec_int16x8))
+  vec_int16x4 = __builtin_mpl_vector_get_high_v8i16(vec_int16x8);
+  // CHECK: LOC [[# FILENUM]] [[# @LINE + 2 ]]{{$}}
+  // CHECK-NEXT: dassign %vec_int8x8 0 (intrinsicop v8i8 vector_get_high_v16i8 (dread v16i8 %vec_int8x16))
+  vec_int8x8 = __builtin_mpl_vector_get_high_v16i8(vec_int8x16);
+  // CHECK: LOC [[# FILENUM]] [[# @LINE + 2 ]]{{$}}
+  // CHECK-NEXT: dassign %vec_uint64x1 0 (intrinsicop u64 vector_get_high_v2u64 (dread v2u64 %vec_uint64x2))
+  vec_uint64x1 = __builtin_mpl_vector_get_high_v2u64(vec_uint64x2);
+  // CHECK: LOC [[# FILENUM]] [[# @LINE + 2 ]]{{$}}
+  // CHECK-NEXT: dassign %vec_uint32x2 0 (intrinsicop v2u32 vector_get_high_v4u32 (dread v4u32 %vec_uint32x4))
+  vec_uint32x2 = __builtin_mpl_vector_get_high_v4u32(vec_uint32x4);
+  // CHECK: LOC [[# FILENUM]] [[# @LINE + 2 ]]{{$}}
+  // CHECK-NEXT: dassign %vec_uint16x4 0 (intrinsicop v4u16 vector_get_high_v8u16 (dread v8u16 %vec_uint16x8))
+  vec_uint16x4 = __builtin_mpl_vector_get_high_v8u16(vec_uint16x8);
+  // CHECK: LOC [[# FILENUM]] [[# @LINE + 2 ]]{{$}}
+  // CHECK-NEXT: dassign %vec_uint8x8 0 (intrinsicop v8u8 vector_get_high_v16u8 (dread v16u8 %vec_uint8x16))
+  vec_uint8x8 = __builtin_mpl_vector_get_high_v16u8(vec_uint8x16);
+  // CHECK: LOC [[# FILENUM]] [[# @LINE + 2 ]]{{$}}
+  // CHECK-NEXT: dassign %vec_float64x1 0 (intrinsicop f64 vector_get_high_v2f64 (dread v2f64 %vec_float64x2))
+  vec_float64x1 = __builtin_mpl_vector_get_high_v2f64(vec_float64x2);
+  // CHECK: LOC [[# FILENUM]] [[# @LINE + 2 ]]{{$}}
+  // CHECK-NEXT: dassign %vec_float32x2 0 (intrinsicop v2f32 vector_get_high_v4f32 (dread v4f32 %vec_float32x4))
+  vec_float32x2 = __builtin_mpl_vector_get_high_v4f32(vec_float32x4);
+
+  // CHECK: LOC [[# FILENUM]] [[# @LINE + 2 ]]{{$}}
+  // CHECK-NEXT: dassign %scalar_int64 0 (intrinsicop i64 vector_get_element_v2i64 (dread v2i64 %vec_int64x2, dread i32 %scalar_int32))
+  scalar_int64 = __builtin_mpl_vector_get_element_v2i64(vec_int64x2, scalar_int32);
+  // CHECK: LOC [[# FILENUM]] [[# @LINE + 2 ]]{{$}}
+  // CHECK-NEXT: dassign %scalar_int32 0 (intrinsicop i32 vector_get_element_v4i32 (dread v4i32 %vec_int32x4, dread i32 %scalar_int32))
+  scalar_int32 = __builtin_mpl_vector_get_element_v4i32(vec_int32x4, scalar_int32);
+  // CHECK: LOC [[# FILENUM]] [[# @LINE + 2 ]]{{$}}
+  // CHECK-NEXT: dassign %scalar_int16 0 (intrinsicop i16 vector_get_element_v8i16 (dread v8i16 %vec_int16x8, dread i32 %scalar_int32))
+  scalar_int16 = __builtin_mpl_vector_get_element_v8i16(vec_int16x8, scalar_int32);
+  // CHECK: LOC [[# FILENUM]] [[# @LINE + 2 ]]{{$}}
+  // CHECK-NEXT: dassign %scalar_int8 0 (intrinsicop i8 vector_get_element_v16i8 (dread v16i8 %vec_int8x16, dread i32 %scalar_int32))
+  scalar_int8 = __builtin_mpl_vector_get_element_v16i8(vec_int8x16, scalar_int32);
+  // CHECK: LOC [[# FILENUM]] [[# @LINE + 2 ]]{{$}}
+  // CHECK-NEXT: dassign %scalar_uint64 0 (intrinsicop u64 vector_get_element_v2u64 (dread v2u64 %vec_uint64x2, dread i32 %scalar_int32))
+  scalar_uint64 = __builtin_mpl_vector_get_element_v2u64(vec_uint64x2, scalar_int32);
+  // CHECK: LOC [[# FILENUM]] [[# @LINE + 2 ]]{{$}}
+  // CHECK-NEXT: dassign %scalar_uint32 0 (intrinsicop u32 vector_get_element_v4u32 (dread v4u32 %vec_uint32x4, dread i32 %scalar_int32))
+  scalar_uint32 = __builtin_mpl_vector_get_element_v4u32(vec_uint32x4, scalar_int32);
+  // CHECK: LOC [[# FILENUM]] [[# @LINE + 2 ]]{{$}}
+  // CHECK-NEXT: dassign %scalar_uint16 0 (intrinsicop u16 vector_get_element_v8u16 (dread v8u16 %vec_uint16x8, dread i32 %scalar_int32))
+  scalar_uint16 = __builtin_mpl_vector_get_element_v8u16(vec_uint16x8, scalar_int32);
+  // CHECK: LOC [[# FILENUM]] [[# @LINE + 2 ]]{{$}}
+  // CHECK-NEXT: dassign %scalar_uint8 0 (intrinsicop u8 vector_get_element_v16u8 (dread v16u8 %vec_uint8x16, dread i32 %scalar_int32))
+  scalar_uint8 = __builtin_mpl_vector_get_element_v16u8(vec_uint8x16, scalar_int32);
+  // CHECK: LOC [[# FILENUM]] [[# @LINE + 2 ]]{{$}}
+  // CHECK-NEXT: dassign %scalar_float64 0 (intrinsicop f64 vector_get_element_v2f64 (dread v2f64 %vec_float64x2, dread i32 %scalar_int32))
+  scalar_float64 = __builtin_mpl_vector_get_element_v2f64(vec_float64x2, scalar_int32);
+  // CHECK: LOC [[# FILENUM]] [[# @LINE + 2 ]]{{$}}
+  // CHECK-NEXT: dassign %scalar_float32 0 (intrinsicop f32 vector_get_element_v4f32 (dread v4f32 %vec_float32x4, dread i32 %scalar_int32))
+  scalar_float32 = __builtin_mpl_vector_get_element_v4f32(vec_float32x4, scalar_int32);
+  // CHECK: LOC [[# FILENUM]] [[# @LINE + 2 ]]{{$}}
+  // CHECK-NEXT: dassign %scalar_int64 0 (intrinsicop i64 vector_get_element_v1i64 (dread i64 %vec_int64x1, dread i32 %scalar_int32))
+  scalar_int64 = __builtin_mpl_vector_get_element_v1i64(vec_int64x1, scalar_int32);
+  // CHECK: LOC [[# FILENUM]] [[# @LINE + 2 ]]{{$}}
+  // CHECK-NEXT: dassign %scalar_int32 0 (intrinsicop i32 vector_get_element_v2i32 (dread v2i32 %vec_int32x2, dread i32 %scalar_int32))
+  scalar_int32 = __builtin_mpl_vector_get_element_v2i32(vec_int32x2, scalar_int32);
+  // CHECK: LOC [[# FILENUM]] [[# @LINE + 2 ]]{{$}}
+  // CHECK-NEXT: dassign %scalar_int16 0 (intrinsicop i16 vector_get_element_v4i16 (dread v4i16 %vec_int16x4, dread i32 %scalar_int32))
+  scalar_int16 = __builtin_mpl_vector_get_element_v4i16(vec_int16x4, scalar_int32);
+  // CHECK: LOC [[# FILENUM]] [[# @LINE + 2 ]]{{$}}
+  // CHECK-NEXT: dassign %scalar_int8 0 (intrinsicop i8 vector_get_element_v8i8 (dread v8i8 %vec_int8x8, dread i32 %scalar_int32))
+  scalar_int8 = __builtin_mpl_vector_get_element_v8i8(vec_int8x8, scalar_int32);
+  // CHECK: LOC [[# FILENUM]] [[# @LINE + 2 ]]{{$}}
+  // CHECK-NEXT: dassign %scalar_uint64 0 (intrinsicop u64 vector_get_element_v1u64 (dread u64 %vec_uint64x1, dread i32 %scalar_int32))
+  scalar_uint64 = __builtin_mpl_vector_get_element_v1u64(vec_uint64x1, scalar_int32);
+  // CHECK: LOC [[# FILENUM]] [[# @LINE + 2 ]]{{$}}
+  // CHECK-NEXT: dassign %scalar_uint32 0 (intrinsicop u32 vector_get_element_v2u32 (dread v2u32 %vec_uint32x2, dread i32 %scalar_int32))
+  scalar_uint32 = __builtin_mpl_vector_get_element_v2u32(vec_uint32x2, scalar_int32);
+  // CHECK: LOC [[# FILENUM]] [[# @LINE + 2 ]]{{$}}
+  // CHECK-NEXT: dassign %scalar_uint16 0 (intrinsicop u16 vector_get_element_v4u16 (dread v4u16 %vec_uint16x4, dread i32 %scalar_int32))
+  scalar_uint16 = __builtin_mpl_vector_get_element_v4u16(vec_uint16x4, scalar_int32);
+  // CHECK: LOC [[# FILENUM]] [[# @LINE + 2 ]]{{$}}
+  // CHECK-NEXT: dassign %scalar_uint8 0 (intrinsicop u8 vector_get_element_v8u8 (dread v8u8 %vec_uint8x8, dread i32 %scalar_int32))
+  scalar_uint8 = __builtin_mpl_vector_get_element_v8u8(vec_uint8x8, scalar_int32);
+  // CHECK: LOC [[# FILENUM]] [[# @LINE + 2 ]]{{$}}
+  // CHECK-NEXT: dassign %scalar_float64 0 (intrinsicop f64 vector_get_element_v1f64 (dread f64 %vec_float64x1, dread i32 %scalar_int32))
+  scalar_float64 = __builtin_mpl_vector_get_element_v1f64(vec_float64x1, scalar_int32);
+  // CHECK: LOC [[# FILENUM]] [[# @LINE + 2 ]]{{$}}
+  // CHECK-NEXT: dassign %scalar_float32 0 (intrinsicop f32 vector_get_element_v2f32 (dread v2f32 %vec_float32x2, dread i32 %scalar_int32))
+  scalar_float32 = __builtin_mpl_vector_get_element_v2f32(vec_float32x2, scalar_int32);
+
+  // CHECK: LOC [[# FILENUM]] [[# @LINE + 2 ]]{{$}}
+  // CHECK-NEXT: dassign %vec_int64x2 0 (intrinsicop v2i64 vector_set_element_v2i64 (dread i64 %scalar_int64, dread v2i64 %vec_int64x2, dread i32 %scalar_int32))
+  vec_int64x2 = __builtin_mpl_vector_set_element_v2i64(scalar_int64, vec_int64x2, scalar_int32);
+  // CHECK: LOC [[# FILENUM]] [[# @LINE + 2 ]]{{$}}
+  // CHECK-NEXT: dassign %vec_int32x4 0 (intrinsicop v4i32 vector_set_element_v4i32 (dread i32 %scalar_int32, dread v4i32 %vec_int32x4, dread i32 %scalar_int32))
+  vec_int32x4 = __builtin_mpl_vector_set_element_v4i32(scalar_int32, vec_int32x4, scalar_int32);
+  // CHECK: LOC [[# FILENUM]] [[# @LINE + 2 ]]{{$}}
+  // CHECK-NEXT: dassign %vec_int16x8 0 (intrinsicop v8i16 vector_set_element_v8i16 (dread i16 %scalar_int16, dread v8i16 %vec_int16x8, dread i32 %scalar_int32))
+  vec_int16x8 = __builtin_mpl_vector_set_element_v8i16(scalar_int16, vec_int16x8, scalar_int32);
+  // CHECK: LOC [[# FILENUM]] [[# @LINE + 2 ]]{{$}}
+  // CHECK-NEXT: dassign %vec_int8x16 0 (intrinsicop v16i8 vector_set_element_v16i8 (dread i8 %scalar_int8, dread v16i8 %vec_int8x16, dread i32 %scalar_int32))
+  vec_int8x16 = __builtin_mpl_vector_set_element_v16i8(scalar_int8, vec_int8x16, scalar_int32);
+  // CHECK: LOC [[# FILENUM]] [[# @LINE + 2 ]]{{$}}
+  // CHECK-NEXT: dassign %vec_uint64x2 0 (intrinsicop v2u64 vector_set_element_v2u64 (dread u64 %scalar_uint64, dread v2u64 %vec_uint64x2, dread i32 %scalar_int32))
+  vec_uint64x2 = __builtin_mpl_vector_set_element_v2u64(scalar_uint64, vec_uint64x2, scalar_int32);
+  // CHECK: LOC [[# FILENUM]] [[# @LINE + 2 ]]{{$}}
+  // CHECK-NEXT: dassign %vec_uint32x4 0 (intrinsicop v4u32 vector_set_element_v4u32 (dread u32 %scalar_uint32, dread v4u32 %vec_uint32x4, dread i32 %scalar_int32))
+  vec_uint32x4 = __builtin_mpl_vector_set_element_v4u32(scalar_uint32, vec_uint32x4, scalar_int32);
+  // CHECK: LOC [[# FILENUM]] [[# @LINE + 2 ]]{{$}}
+  // CHECK-NEXT: dassign %vec_uint16x8 0 (intrinsicop v8u16 vector_set_element_v8u16 (dread u16 %scalar_uint16, dread v8u16 %vec_uint16x8, dread i32 %scalar_int32))
+  vec_uint16x8 = __builtin_mpl_vector_set_element_v8u16(scalar_uint16, vec_uint16x8, scalar_int32);
+  // CHECK: LOC [[# FILENUM]] [[# @LINE + 2 ]]{{$}}
+  // CHECK-NEXT: dassign %vec_uint8x16 0 (intrinsicop v16u8 vector_set_element_v16u8 (dread u8 %scalar_uint8, dread v16u8 %vec_uint8x16, dread i32 %scalar_int32))
+  vec_uint8x16 = __builtin_mpl_vector_set_element_v16u8(scalar_uint8, vec_uint8x16, scalar_int32);
+  // CHECK: LOC [[# FILENUM]] [[# @LINE + 2 ]]{{$}}
+  // CHECK-NEXT: dassign %vec_float64x2 0 (intrinsicop v2f64 vector_set_element_v2f64 (dread f64 %scalar_float64, dread v2f64 %vec_float64x2, dread i32 %scalar_int32))
+  vec_float64x2 = __builtin_mpl_vector_set_element_v2f64(scalar_float64, vec_float64x2, scalar_int32);
+  // CHECK: LOC [[# FILENUM]] [[# @LINE + 2 ]]{{$}}
+  // CHECK-NEXT: dassign %vec_float32x4 0 (intrinsicop v4f32 vector_set_element_v4f32 (dread f32 %scalar_float32, dread v4f32 %vec_float32x4, dread i32 %scalar_int32))
+  vec_float32x4 = __builtin_mpl_vector_set_element_v4f32(scalar_float32, vec_float32x4, scalar_int32);
+  // CHECK: LOC [[# FILENUM]] [[# @LINE + 2 ]]{{$}}
+  // CHECK-NEXT: dassign %vec_int64x1 0 (intrinsicop i64 vector_set_element_v1i64 (dread i64 %scalar_int64, dread i64 %vec_int64x1, dread i32 %scalar_int32))
+  vec_int64x1 = __builtin_mpl_vector_set_element_v1i64(scalar_int64, vec_int64x1, scalar_int32);
+  // CHECK: LOC [[# FILENUM]] [[# @LINE + 2 ]]{{$}}
+  // CHECK-NEXT: dassign %vec_int32x2 0 (intrinsicop v2i32 vector_set_element_v2i32 (dread i32 %scalar_int32, dread v2i32 %vec_int32x2, dread i32 %scalar_int32))
+  vec_int32x2 = __builtin_mpl_vector_set_element_v2i32(scalar_int32, vec_int32x2, scalar_int32);
+  // CHECK: LOC [[# FILENUM]] [[# @LINE + 2 ]]{{$}}
+  // CHECK-NEXT: dassign %vec_int16x4 0 (intrinsicop v4i16 vector_set_element_v4i16 (dread i16 %scalar_int16, dread v4i16 %vec_int16x4, dread i32 %scalar_int32))
+  vec_int16x4 = __builtin_mpl_vector_set_element_v4i16(scalar_int16, vec_int16x4, scalar_int32);
+  // CHECK: LOC [[# FILENUM]] [[# @LINE + 2 ]]{{$}}
+  // CHECK-NEXT: dassign %vec_int8x8 0 (intrinsicop v8i8 vector_set_element_v8i8 (dread i8 %scalar_int8, dread v8i8 %vec_int8x8, dread i32 %scalar_int32))
+  vec_int8x8 = __builtin_mpl_vector_set_element_v8i8(scalar_int8, vec_int8x8, scalar_int32);
+  // CHECK: LOC [[# FILENUM]] [[# @LINE + 2 ]]{{$}}
+  // CHECK-NEXT: dassign %vec_uint64x1 0 (intrinsicop u64 vector_set_element_v1u64 (dread u64 %scalar_uint64, dread u64 %vec_uint64x1, dread i32 %scalar_int32))
+  vec_uint64x1 = __builtin_mpl_vector_set_element_v1u64(scalar_uint64, vec_uint64x1, scalar_int32);
+  // CHECK: LOC [[# FILENUM]] [[# @LINE + 2 ]]{{$}}
+  // CHECK-NEXT: dassign %vec_uint32x2 0 (intrinsicop v2u32 vector_set_element_v2u32 (dread u32 %scalar_uint32, dread v2u32 %vec_uint32x2, dread i32 %scalar_int32))
+  vec_uint32x2 = __builtin_mpl_vector_set_element_v2u32(scalar_uint32, vec_uint32x2, scalar_int32);
+  // CHECK: LOC [[# FILENUM]] [[# @LINE + 2 ]]{{$}}
+  // CHECK-NEXT: dassign %vec_uint16x4 0 (intrinsicop v4u16 vector_set_element_v4u16 (dread u16 %scalar_uint16, dread v4u16 %vec_uint16x4, dread i32 %scalar_int32))
+  vec_uint16x4 = __builtin_mpl_vector_set_element_v4u16(scalar_uint16, vec_uint16x4, scalar_int32);
+  // CHECK: LOC [[# FILENUM]] [[# @LINE + 2 ]]{{$}}
+  // CHECK-NEXT: dassign %vec_uint8x8 0 (intrinsicop v8u8 vector_set_element_v8u8 (dread u8 %scalar_uint8, dread v8u8 %vec_uint8x8, dread i32 %scalar_int32))
+  vec_uint8x8 = __builtin_mpl_vector_set_element_v8u8(scalar_uint8, vec_uint8x8, scalar_int32);
+  // CHECK: LOC [[# FILENUM]] [[# @LINE + 2 ]]{{$}}
+  // CHECK-NEXT: dassign %vec_float64x1 0 (intrinsicop f64 vector_set_element_v1f64 (dread f64 %scalar_float64, dread f64 %vec_float64x1, dread i32 %scalar_int32))
+  vec_float64x1 = __builtin_mpl_vector_set_element_v1f64(scalar_float64, vec_float64x1, scalar_int32);
+  // CHECK: LOC [[# FILENUM]] [[# @LINE + 2 ]]{{$}}
+  // CHECK-NEXT: dassign %vec_float32x2 0 (intrinsicop v2f32 vector_set_element_v2f32 (dread f32 %scalar_float32, dread v2f32 %vec_float32x2, dread i32 %scalar_int32))
+  vec_float32x2 = __builtin_mpl_vector_set_element_v2f32(scalar_float32, vec_float32x2, scalar_int32);
+
+  // CHECK: LOC [[# FILENUM]] [[# @LINE + 2 ]]{{$}}
+  // CHECK-NEXT: dassign %vec_int64x2 0 (intrinsicop v2i64 vector_pairwise_add_v4i32 (dread v4i32 %vec_int32x4))
+  vec_int64x2 = __builtin_mpl_vector_pairwise_add_v4i32(vec_int32x4);
+  // CHECK: LOC [[# FILENUM]] [[# @LINE + 2 ]]{{$}}
+  // CHECK-NEXT: dassign %vec_int32x4 0 (intrinsicop v4i32 vector_pairwise_add_v8i16 (dread v8i16 %vec_int16x8))
+  vec_int32x4 = __builtin_mpl_vector_pairwise_add_v8i16(vec_int16x8);
+  // CHECK: LOC [[# FILENUM]] [[# @LINE + 2 ]]{{$}}
+  // CHECK-NEXT: dassign %vec_int16x8 0 (intrinsicop v8i16 vector_pairwise_add_v16i8 (dread v16i8 %vec_int8x16))
+  vec_int16x8 = __builtin_mpl_vector_pairwise_add_v16i8(vec_int8x16);
+  // CHECK: LOC [[# FILENUM]] [[# @LINE + 2 ]]{{$}}
+  // CHECK-NEXT: dassign %vec_uint64x2 0 (intrinsicop v2u64 vector_pairwise_add_v4u32 (dread v4u32 %vec_uint32x4))
+  vec_uint64x2 = __builtin_mpl_vector_pairwise_add_v4u32(vec_uint32x4);
+  // CHECK: LOC [[# FILENUM]] [[# @LINE + 2 ]]{{$}}
+  // CHECK-NEXT: dassign %vec_uint32x4 0 (intrinsicop v4u32 vector_pairwise_add_v8u16 (dread v8u16 %vec_uint16x8))
+  vec_uint32x4 = __builtin_mpl_vector_pairwise_add_v8u16(vec_uint16x8);
+  // CHECK: LOC [[# FILENUM]] [[# @LINE + 2 ]]{{$}}
+  // CHECK-NEXT: dassign %vec_uint16x8 0 (intrinsicop v8u16 vector_pairwise_add_v16u8 (dread v16u8 %vec_uint8x16))
+  vec_uint16x8 = __builtin_mpl_vector_pairwise_add_v16u8(vec_uint8x16);
+  // CHECK: LOC [[# FILENUM]] [[# @LINE + 2 ]]{{$}}
+  // CHECK-NEXT: dassign %vec_int64x1 0 (intrinsicop i64 vector_pairwise_add_v2i32 (dread v2i32 %vec_int32x2))
+  vec_int64x1 = __builtin_mpl_vector_pairwise_add_v2i32(vec_int32x2);
+  // CHECK: LOC [[# FILENUM]] [[# @LINE + 2 ]]{{$}}
+  // CHECK-NEXT: dassign %vec_int32x2 0 (intrinsicop v2i32 vector_pairwise_add_v4i16 (dread v4i16 %vec_int16x4))
+  vec_int32x2 = __builtin_mpl_vector_pairwise_add_v4i16(vec_int16x4);
+  // CHECK: LOC [[# FILENUM]] [[# @LINE + 2 ]]{{$}}
+  // CHECK-NEXT: dassign %vec_int16x4 0 (intrinsicop v4i16 vector_pairwise_add_v8i8 (dread v8i8 %vec_int8x8))
+  vec_int16x4 = __builtin_mpl_vector_pairwise_add_v8i8(vec_int8x8);
+  // CHECK: LOC [[# FILENUM]] [[# @LINE + 2 ]]{{$}}
+  // CHECK-NEXT: dassign %vec_uint64x1 0 (intrinsicop u64 vector_pairwise_add_v2u32 (dread v2u32 %vec_uint32x2))
+  vec_uint64x1 = __builtin_mpl_vector_pairwise_add_v2u32(vec_uint32x2);
+  // CHECK: LOC [[# FILENUM]] [[# @LINE + 2 ]]{{$}}
+  // CHECK-NEXT: dassign %vec_uint32x2 0 (intrinsicop v2u32 vector_pairwise_add_v4u16 (dread v4u16 %vec_uint16x4))
+  vec_uint32x2 = __builtin_mpl_vector_pairwise_add_v4u16(vec_uint16x4);
+  // CHECK: LOC [[# FILENUM]] [[# @LINE + 2 ]]{{$}}
+  // CHECK-NEXT: dassign %vec_uint16x4 0 (intrinsicop v4u16 vector_pairwise_add_v8u8 (dread v8u8 %vec_uint8x8))
+  vec_uint16x4 = __builtin_mpl_vector_pairwise_add_v8u8(vec_uint8x8);
+
+  // CHECK: LOC [[# FILENUM]] [[# @LINE + 2 ]]{{$}}
+  // CHECK-NEXT: dassign %vec_int64x2 0 (intrinsicop v2i64 vector_reverse_v2i64 (dread v2i64 %vec_int64x2))
+  vec_int64x2 = __builtin_mpl_vector_reverse_v2i64(vec_int64x2);
+  // CHECK: LOC [[# FILENUM]] [[# @LINE + 2 ]]{{$}}
+  // CHECK-NEXT: dassign %vec_int32x4 0 (intrinsicop v4i32 vector_reverse_v4i32 (dread v4i32 %vec_int32x4))
+  vec_int32x4 = __builtin_mpl_vector_reverse_v4i32(vec_int32x4);
+  // CHECK: LOC [[# FILENUM]] [[# @LINE + 2 ]]{{$}}
+  // CHECK-NEXT: dassign %vec_int16x8 0 (intrinsicop v8i16 vector_reverse_v8i16 (dread v8i16 %vec_int16x8))
+  vec_int16x8 = __builtin_mpl_vector_reverse_v8i16(vec_int16x8);
+  // CHECK: LOC [[# FILENUM]] [[# @LINE + 2 ]]{{$}}
+  // CHECK-NEXT: dassign %vec_int8x16 0 (intrinsicop v16i8 vector_reverse_v16i8 (dread v16i8 %vec_int8x16))
+  vec_int8x16 = __builtin_mpl_vector_reverse_v16i8(vec_int8x16);
+  // CHECK: LOC [[# FILENUM]] [[# @LINE + 2 ]]{{$}}
+  // CHECK-NEXT: dassign %vec_uint64x2 0 (intrinsicop v2u64 vector_reverse_v2u64 (dread v2u64 %vec_uint64x2))
+  vec_uint64x2 = __builtin_mpl_vector_reverse_v2u64(vec_uint64x2);
+  // CHECK: LOC [[# FILENUM]] [[# @LINE + 2 ]]{{$}}
+  // CHECK-NEXT: dassign %vec_uint32x4 0 (intrinsicop v4u32 vector_reverse_v4u32 (dread v4u32 %vec_uint32x4))
+  vec_uint32x4 = __builtin_mpl_vector_reverse_v4u32(vec_uint32x4);
+  // CHECK: LOC [[# FILENUM]] [[# @LINE + 2 ]]{{$}}
+  // CHECK-NEXT: dassign %vec_uint16x8 0 (intrinsicop v8u16 vector_reverse_v8u16 (dread v8u16 %vec_uint16x8))
+  vec_uint16x8 = __builtin_mpl_vector_reverse_v8u16(vec_uint16x8);
+  // CHECK: LOC [[# FILENUM]] [[# @LINE + 2 ]]{{$}}
+  // CHECK-NEXT: dassign %vec_uint8x16 0 (intrinsicop v16u8 vector_reverse_v16u8 (dread v16u8 %vec_uint8x16))
+  vec_uint8x16 = __builtin_mpl_vector_reverse_v16u8(vec_uint8x16);
+  // CHECK: LOC [[# FILENUM]] [[# @LINE + 2 ]]{{$}}
+  // CHECK-NEXT: dassign %vec_float64x2 0 (intrinsicop v2f64 vector_reverse_v2f64 (dread v2f64 %vec_float64x2))
+  vec_float64x2 = __builtin_mpl_vector_reverse_v2f64(vec_float64x2);
+  // CHECK: LOC [[# FILENUM]] [[# @LINE + 2 ]]{{$}}
+  // CHECK-NEXT: dassign %vec_float32x4 0 (intrinsicop v4f32 vector_reverse_v4f32 (dread v4f32 %vec_float32x4))
+  vec_float32x4 = __builtin_mpl_vector_reverse_v4f32(vec_float32x4);
+  // CHECK: LOC [[# FILENUM]] [[# @LINE + 2 ]]{{$}}
+  // CHECK-NEXT: dassign %vec_int64x1 0 (intrinsicop i64 vector_reverse_v1i64 (dread i64 %vec_int64x1))
+  vec_int64x1 = __builtin_mpl_vector_reverse_v1i64(vec_int64x1);
+  // CHECK: LOC [[# FILENUM]] [[# @LINE + 2 ]]{{$}}
+  // CHECK-NEXT: dassign %vec_int32x2 0 (intrinsicop v2i32 vector_reverse_v2i32 (dread v2i32 %vec_int32x2))
+  vec_int32x2 = __builtin_mpl_vector_reverse_v2i32(vec_int32x2);
+  // CHECK: LOC [[# FILENUM]] [[# @LINE + 2 ]]{{$}}
+  // CHECK-NEXT: dassign %vec_int16x4 0 (intrinsicop v4i16 vector_reverse_v4i16 (dread v4i16 %vec_int16x4))
+  vec_int16x4 = __builtin_mpl_vector_reverse_v4i16(vec_int16x4);
+  // CHECK: LOC [[# FILENUM]] [[# @LINE + 2 ]]{{$}}
+  // CHECK-NEXT: dassign %vec_int8x8 0 (intrinsicop v8i8 vector_reverse_v8i8 (dread v8i8 %vec_int8x8))
+  vec_int8x8 = __builtin_mpl_vector_reverse_v8i8(vec_int8x8);
+  // CHECK: LOC [[# FILENUM]] [[# @LINE + 2 ]]{{$}}
+  // CHECK-NEXT: dassign %vec_uint64x1 0 (intrinsicop u64 vector_reverse_v1u64 (dread u64 %vec_uint64x1))
+  vec_uint64x1 = __builtin_mpl_vector_reverse_v1u64(vec_uint64x1);
+  // CHECK: LOC [[# FILENUM]] [[# @LINE + 2 ]]{{$}}
+  // CHECK-NEXT: dassign %vec_uint32x2 0 (intrinsicop v2u32 vector_reverse_v2u32 (dread v2u32 %vec_uint32x2))
+  vec_uint32x2 = __builtin_mpl_vector_reverse_v2u32(vec_uint32x2);
+  // CHECK: LOC [[# FILENUM]] [[# @LINE + 2 ]]{{$}}
+  // CHECK-NEXT: dassign %vec_uint16x4 0 (intrinsicop v4u16 vector_reverse_v4u16 (dread v4u16 %vec_uint16x4))
+  vec_uint16x4 = __builtin_mpl_vector_reverse_v4u16(vec_uint16x4);
+  // CHECK: LOC [[# FILENUM]] [[# @LINE + 2 ]]{{$}}
+  // CHECK-NEXT: dassign %vec_uint8x8 0 (intrinsicop v8u8 vector_reverse_v8u8 (dread v8u8 %vec_uint8x8))
+  vec_uint8x8 = __builtin_mpl_vector_reverse_v8u8(vec_uint8x8);
+  // CHECK: LOC [[# FILENUM]] [[# @LINE + 2 ]]{{$}}
+  // CHECK-NEXT: dassign %vec_float64x1 0 (intrinsicop f64 vector_reverse_v1f64 (dread f64 %vec_float64x1))
+  vec_float64x1 = __builtin_mpl_vector_reverse_v1f64(vec_float64x1);
+  // CHECK: LOC [[# FILENUM]] [[# @LINE + 2 ]]{{$}}
+  // CHECK-NEXT: dassign %vec_float32x2 0 (intrinsicop v2f32 vector_reverse_v2f32 (dread v2f32 %vec_float32x2))
+  vec_float32x2 = __builtin_mpl_vector_reverse_v2f32(vec_float32x2);
+
+  // CHECK: LOC [[# FILENUM]] [[# @LINE + 2 ]]{{$}}
+  // CHECK-NEXT: dassign %vec_int64x2 0 (intrinsicop v2i64 vector_load_v2i64 (dread a64 %ptr))
+  vec_int64x2 = __builtin_mpl_vector_load_v2i64(ptr);
+  // CHECK: LOC [[# FILENUM]] [[# @LINE + 2 ]]{{$}}
+  // CHECK-NEXT: dassign %vec_int32x4 0 (intrinsicop v4i32 vector_load_v4i32 (dread a64 %ptr))
+  vec_int32x4 = __builtin_mpl_vector_load_v4i32(ptr);
+  // CHECK: LOC [[# FILENUM]] [[# @LINE + 2 ]]{{$}}
+  // CHECK-NEXT: dassign %vec_int16x8 0 (intrinsicop v8i16 vector_load_v8i16 (dread a64 %ptr))
+  vec_int16x8 = __builtin_mpl_vector_load_v8i16(ptr);
+  // CHECK: LOC [[# FILENUM]] [[# @LINE + 2 ]]{{$}}
+  // CHECK-NEXT: dassign %vec_int8x16 0 (intrinsicop v16i8 vector_load_v16i8 (dread a64 %ptr))
+  vec_int8x16 = __builtin_mpl_vector_load_v16i8(ptr);
+  // CHECK: LOC [[# FILENUM]] [[# @LINE + 2 ]]{{$}}
+  // CHECK-NEXT: dassign %vec_uint64x2 0 (intrinsicop v2u64 vector_load_v2u64 (dread a64 %ptr))
+  vec_uint64x2 = __builtin_mpl_vector_load_v2u64(ptr);
+  // CHECK: LOC [[# FILENUM]] [[# @LINE + 2 ]]{{$}}
+  // CHECK-NEXT: dassign %vec_uint32x4 0 (intrinsicop v4u32 vector_load_v4u32 (dread a64 %ptr))
+  vec_uint32x4 = __builtin_mpl_vector_load_v4u32(ptr);
+  // CHECK: LOC [[# FILENUM]] [[# @LINE + 2 ]]{{$}}
+  // CHECK-NEXT: dassign %vec_uint16x8 0 (intrinsicop v8u16 vector_load_v8u16 (dread a64 %ptr))
+  vec_uint16x8 = __builtin_mpl_vector_load_v8u16(ptr);
+  // CHECK: LOC [[# FILENUM]] [[# @LINE + 2 ]]{{$}}
+  // CHECK-NEXT: dassign %vec_uint8x16 0 (intrinsicop v16u8 vector_load_v16u8 (dread a64 %ptr))
+  vec_uint8x16 = __builtin_mpl_vector_load_v16u8(ptr);
+  // CHECK: LOC [[# FILENUM]] [[# @LINE + 2 ]]{{$}}
+  // CHECK-NEXT: dassign %vec_float64x2 0 (intrinsicop v2f64 vector_load_v2f64 (dread a64 %ptr))
+  vec_float64x2 = __builtin_mpl_vector_load_v2f64(ptr);
+  // CHECK: LOC [[# FILENUM]] [[# @LINE + 2 ]]{{$}}
+  // CHECK-NEXT: dassign %vec_float32x4 0 (intrinsicop v4f32 vector_load_v4f32 (dread a64 %ptr))
+  vec_float32x4 = __builtin_mpl_vector_load_v4f32(ptr);
+  // CHECK: LOC [[# FILENUM]] [[# @LINE + 2 ]]{{$}}
+  // CHECK-NEXT: dassign %vec_int64x1 0 (intrinsicop i64 vector_load_v1i64 (dread a64 %ptr))
+  vec_int64x1 = __builtin_mpl_vector_load_v1i64(ptr);
+  // CHECK: LOC [[# FILENUM]] [[# @LINE + 2 ]]{{$}}
+  // CHECK-NEXT: dassign %vec_int32x2 0 (intrinsicop v2i32 vector_load_v2i32 (dread a64 %ptr))
+  vec_int32x2 = __builtin_mpl_vector_load_v2i32(ptr);
+  // CHECK: LOC [[# FILENUM]] [[# @LINE + 2 ]]{{$}}
+  // CHECK-NEXT: dassign %vec_int16x4 0 (intrinsicop v4i16 vector_load_v4i16 (dread a64 %ptr))
+  vec_int16x4 = __builtin_mpl_vector_load_v4i16(ptr);
+  // CHECK: LOC [[# FILENUM]] [[# @LINE + 2 ]]{{$}}
+  // CHECK-NEXT: dassign %vec_int8x8 0 (intrinsicop v8i8 vector_load_v8i8 (dread a64 %ptr))
+  vec_int8x8 = __builtin_mpl_vector_load_v8i8(ptr);
+  // CHECK: LOC [[# FILENUM]] [[# @LINE + 2 ]]{{$}}
+  // CHECK-NEXT: dassign %vec_uint64x1 0 (intrinsicop u64 vector_load_v1u64 (dread a64 %ptr))
+  vec_uint64x1 = __builtin_mpl_vector_load_v1u64(ptr);
+  // CHECK: LOC [[# FILENUM]] [[# @LINE + 2 ]]{{$}}
+  // CHECK-NEXT: dassign %vec_uint32x2 0 (intrinsicop v2u32 vector_load_v2u32 (dread a64 %ptr))
+  vec_uint32x2 = __builtin_mpl_vector_load_v2u32(ptr);
+  // CHECK: LOC [[# FILENUM]] [[# @LINE + 2 ]]{{$}}
+  // CHECK-NEXT: dassign %vec_uint16x4 0 (intrinsicop v4u16 vector_load_v4u16 (dread a64 %ptr))
+  vec_uint16x4 = __builtin_mpl_vector_load_v4u16(ptr);
+  // CHECK: LOC [[# FILENUM]] [[# @LINE + 2 ]]{{$}}
+  // CHECK-NEXT: dassign %vec_uint8x8 0 (intrinsicop v8u8 vector_load_v8u8 (dread a64 %ptr))
+  vec_uint8x8 = __builtin_mpl_vector_load_v8u8(ptr);
+  // CHECK: LOC [[# FILENUM]] [[# @LINE + 2 ]]{{$}}
+  // CHECK-NEXT: dassign %vec_float64x1 0 (intrinsicop f64 vector_load_v1f64 (dread a64 %ptr))
+  vec_float64x1 = __builtin_mpl_vector_load_v1f64(ptr);
+  // CHECK: LOC [[# FILENUM]] [[# @LINE + 2 ]]{{$}}
+  // CHECK-NEXT: dassign %vec_float32x2 0 (intrinsicop v2f32 vector_load_v2f32 (dread a64 %ptr))
+  vec_float32x2 = __builtin_mpl_vector_load_v2f32(ptr);
+
+  // CHECK: LOC [[# FILENUM]] [[# @LINE + 2 ]]{{$}}
+  // CHECK-NEXT: intrinsicop void vector_store_v2i64 (dread a64 %ptr, dread v2i64 %vec_int64x2)
+  __builtin_mpl_vector_store_v2i64(ptr, vec_int64x2);
+  // CHECK: LOC [[# FILENUM]] [[# @LINE + 2 ]]{{$}}
+  // CHECK-NEXT: intrinsicop void vector_store_v4i32 (dread a64 %ptr, dread v4i32 %vec_int32x4)
+  __builtin_mpl_vector_store_v4i32(ptr, vec_int32x4);
+  // CHECK: LOC [[# FILENUM]] [[# @LINE + 2 ]]{{$}}
+  // CHECK-NEXT: intrinsicop void vector_store_v8i16 (dread a64 %ptr, dread v8i16 %vec_int16x8)
+  __builtin_mpl_vector_store_v8i16(ptr, vec_int16x8);
+  // CHECK: LOC [[# FILENUM]] [[# @LINE + 2 ]]{{$}}
+  // CHECK-NEXT: intrinsicop void vector_store_v16i8 (dread a64 %ptr, dread v16i8 %vec_int8x16)
+  __builtin_mpl_vector_store_v16i8(ptr, vec_int8x16);
+  // CHECK: LOC [[# FILENUM]] [[# @LINE + 2 ]]{{$}}
+  // CHECK-NEXT: intrinsicop void vector_store_v2u64 (dread a64 %ptr, dread v2u64 %vec_uint64x2)
+  __builtin_mpl_vector_store_v2u64(ptr, vec_uint64x2);
+  // CHECK: LOC [[# FILENUM]] [[# @LINE + 2 ]]{{$}}
+  // CHECK-NEXT: intrinsicop void vector_store_v4u32 (dread a64 %ptr, dread v4u32 %vec_uint32x4)
+  __builtin_mpl_vector_store_v4u32(ptr, vec_uint32x4);
+  // CHECK: LOC [[# FILENUM]] [[# @LINE + 2 ]]{{$}}
+  // CHECK-NEXT: intrinsicop void vector_store_v8u16 (dread a64 %ptr, dread v8u16 %vec_uint16x8)
+  __builtin_mpl_vector_store_v8u16(ptr, vec_uint16x8);
+  // CHECK: LOC [[# FILENUM]] [[# @LINE + 2 ]]{{$}}
+  // CHECK-NEXT: intrinsicop void vector_store_v16u8 (dread a64 %ptr, dread v16u8 %vec_uint8x16)
+  __builtin_mpl_vector_store_v16u8(ptr, vec_uint8x16);
+  // CHECK: LOC [[# FILENUM]] [[# @LINE + 2 ]]{{$}}
+  // CHECK-NEXT: intrinsicop void vector_store_v2f64 (dread a64 %ptr, dread v2f64 %vec_float64x2)
+  __builtin_mpl_vector_store_v2f64(ptr, vec_float64x2);
+  // CHECK: LOC [[# FILENUM]] [[# @LINE + 2 ]]{{$}}
+  // CHECK-NEXT: intrinsicop void vector_store_v4f32 (dread a64 %ptr, dread v4f32 %vec_float32x4)
+  __builtin_mpl_vector_store_v4f32(ptr, vec_float32x4);
+  // CHECK: LOC [[# FILENUM]] [[# @LINE + 2 ]]{{$}}
+  // CHECK-NEXT: intrinsicop void vector_store_v1i64 (dread a64 %ptr, dread i64 %vec_int64x1)
+  __builtin_mpl_vector_store_v1i64(ptr, vec_int64x1);
+  // CHECK: LOC [[# FILENUM]] [[# @LINE + 2 ]]{{$}}
+  // CHECK-NEXT: intrinsicop void vector_store_v2i32 (dread a64 %ptr, dread v2i32 %vec_int32x2)
+  __builtin_mpl_vector_store_v2i32(ptr, vec_int32x2);
+  // CHECK: LOC [[# FILENUM]] [[# @LINE + 2 ]]{{$}}
+  // CHECK-NEXT: intrinsicop void vector_store_v4i16 (dread a64 %ptr, dread v4i16 %vec_int16x4)
+  __builtin_mpl_vector_store_v4i16(ptr, vec_int16x4);
+  // CHECK: LOC [[# FILENUM]] [[# @LINE + 2 ]]{{$}}
+  // CHECK-NEXT: intrinsicop void vector_store_v8i8 (dread a64 %ptr, dread v8i8 %vec_int8x8)
+  __builtin_mpl_vector_store_v8i8(ptr, vec_int8x8);
+  // CHECK: LOC [[# FILENUM]] [[# @LINE + 2 ]]{{$}}
+  // CHECK-NEXT: intrinsicop void vector_store_v1u64 (dread a64 %ptr, dread u64 %vec_uint64x1)
+  __builtin_mpl_vector_store_v1u64(ptr, vec_uint64x1);
+  // CHECK: LOC [[# FILENUM]] [[# @LINE + 2 ]]{{$}}
+  // CHECK-NEXT: intrinsicop void vector_store_v2u32 (dread a64 %ptr, dread v2u32 %vec_uint32x2)
+  __builtin_mpl_vector_store_v2u32(ptr, vec_uint32x2);
+  // CHECK: LOC [[# FILENUM]] [[# @LINE + 2 ]]{{$}}
+  // CHECK-NEXT: intrinsicop void vector_store_v4u16 (dread a64 %ptr, dread v4u16 %vec_uint16x4)
+  __builtin_mpl_vector_store_v4u16(ptr, vec_uint16x4);
+  // CHECK: LOC [[# FILENUM]] [[# @LINE + 2 ]]{{$}}
+  // CHECK-NEXT: intrinsicop void vector_store_v8u8 (dread a64 %ptr, dread v8u8 %vec_uint8x8)
+  __builtin_mpl_vector_store_v8u8(ptr, vec_uint8x8);
+  // CHECK: LOC [[# FILENUM]] [[# @LINE + 2 ]]{{$}}
+  // CHECK-NEXT: intrinsicop void vector_store_v1f64 (dread a64 %ptr, dread f64 %vec_float64x1)
+  __builtin_mpl_vector_store_v1f64(ptr, vec_float64x1);
+  // CHECK: LOC [[# FILENUM]] [[# @LINE + 2 ]]{{$}}
+  // CHECK-NEXT: intrinsicop void vector_store_v2f32 (dread a64 %ptr, dread v2f32 %vec_float32x2)
+  __builtin_mpl_vector_store_v2f32(ptr, vec_float32x2);
+}
-- 
Gitee


From ed726df3fafe739a341b808d72eeaa6c47932844 Mon Sep 17 00:00:00 2001
From: Brice Dobry <brice.dobry@futurewei.com>
Date: Tue, 1 Jun 2021 13:25:03 -0400
Subject: [PATCH 06/11] Fix typos in arm_neon.h

---
 sys/include/arm_neon.h | 10 +++++-----
 1 file changed, 5 insertions(+), 5 deletions(-)

diff --git a/sys/include/arm_neon.h b/sys/include/arm_neon.h
index 1382aa3..07c3bd3 100644
--- a/sys/include/arm_neon.h
+++ b/sys/include/arm_neon.h
@@ -259,19 +259,19 @@ void __builtin_mpl_vector_store_v1f64(float64_t *, float64x1_t);
 void __builtin_mpl_vector_store_v2f32(float32_t *, float32x2_t);
 
 #define vdupq_n_s32(value) __builtin_mpl_vector_from_scalar_v4i32(value)
-#define vdupq_n_u8(value) __builtin_mpl_vector_from_scalar_v8u16(value)
-#define vextq_u8(a, b, n) __builtin_mpl_vector_merge_v8u16(a, b, n)
+#define vdupq_n_u8(value) __builtin_mpl_vector_from_scalar_v16u8(value)
+#define vextq_u8(a, b, n) __builtin_mpl_vector_merge_v16u8(a, b, n)
 #define vget_high_u64(a) __builtin_mpl_vector_get_high_v2u64(a)
 #define vget_low_u64(a) __builtin_mpl_vector_get_low_v2u64(a)
 #define vget_lane_u32(vec, lane)                                               \
   __builtin_mpl_vector_get_element_v2u32(vec, lane)
-#define vld1q_u8(ptr) __builtin_mpl_vector_load_v8u16(ptr)
+#define vld1q_u8(ptr) __builtin_mpl_vector_load_v16u8(ptr)
 #define vpaddlq_u16(a) __builtin_mpl_vector_pairwise_add_v8u16(a)
 #define vpaddlq_u32(a) __builtin_mpl_vector_pairwise_add_v4u32(a)
 #define vreinterpretq_u64_u8(a) ((uint64x2_t)a)
-#define vrev32q_u8(vec) __builtin_mpl_vector_reverse_v8u16(vec)
+#define vrev32q_u8(vec) __builtin_mpl_vector_reverse_v16u8(vec)
 #define vsetq_lane_u32(value, vec, lane)                                       \
-  __builtin_mpl_vector_set_element_v4i32(value, vec, lane)
+  __builtin_mpl_vector_set_element_v4u32(value, vec, lane)
 #define vst1q_s32(ptr, val) __builtin_mpl_vector_store_v4i32(ptr, val)
 #define vst1q_u8(ptr, val) __builtin_mpl_vector_store_v16u8(ptr, val)
 
-- 
Gitee


From ac92d6a2a265e77b8f19bd3757e8054fa84a8e86 Mon Sep 17 00:00:00 2001
From: Brice Dobry <brice.dobry@futurewei.com>
Date: Tue, 1 Jun 2021 22:18:22 -0400
Subject: [PATCH 07/11] Generate iread/iassign for vector load/store

---
 Clang2MapleVisitor.cpp | 10 ++++++++--
 test/vector.c          | 40 ++++++++++++++++++++--------------------
 2 files changed, 28 insertions(+), 22 deletions(-)

diff --git a/Clang2MapleVisitor.cpp b/Clang2MapleVisitor.cpp
index 3c2cf46..679aa63 100644
--- a/Clang2MapleVisitor.cpp
+++ b/Clang2MapleVisitor.cpp
@@ -1543,10 +1543,8 @@ Result Clang2MapleVisitor::VisitCallExpr(const clang::CallExpr *CE) {
         VECTOR_INTRINSIC(from_scalar)
         else VECTOR_INTRINSIC(merge)
         else VECTOR_INTRINSIC(get_element)
-        else VECTOR_INTRINSIC(load)
         else VECTOR_INTRINSIC(reverse)
         else VECTOR_INTRINSIC(set_element)
-        else VECTOR_INTRINSIC(store)
         else if (VectorOpName.startswith("get_low")) {
           VECTOR_INTRINSIC_TYPE(get_low, v2i64)
           else VECTOR_INTRINSIC_TYPE(get_low, v4i32)
@@ -1584,6 +1582,14 @@ Result Clang2MapleVisitor::VisitCallExpr(const clang::CallExpr *CE) {
           else VECTOR_INTRINSIC_TYPE(pairwise_add, v8u8)
         }
         // clang-format on
+        else if (VectorOpName.startswith("load")) {
+          Call = Builder->CreateExprIread(
+              *Ty, *type2Mpl(CE->getArg(0)->getType()), 0, Args[0]);
+        }
+        else if (VectorOpName.startswith("store")) {
+          Call = Builder->CreateStmtIassign(*type2Mpl(CE->getArg(0)->getType()),
+                                            0, Args[0], Args[1]);
+        }
       } else if (CalleeDecl->getName().startswith("__builtin_")) {
         MIRFunction *Callee = Builder->GetOrCreateFunction(
             CalleeDecl->getName().substr(10).str(), Ty->GetTypeIndex());
diff --git a/test/vector.c b/test/vector.c
index b7d69d2..576719a 100644
--- a/test/vector.c
+++ b/test/vector.c
@@ -498,64 +498,64 @@ void intrinsics() {
   vec_float32x2 = __builtin_mpl_vector_reverse_v2f32(vec_float32x2);
 
   // CHECK: LOC [[# FILENUM]] [[# @LINE + 2 ]]{{$}}
-  // CHECK-NEXT: dassign %vec_int64x2 0 (intrinsicop v2i64 vector_load_v2i64 (dread a64 %ptr))
+  // CHECK-NEXT: dassign %vec_int64x2 0 (iread v2i64 <* i64> 0 (dread a64 %ptr))
   vec_int64x2 = __builtin_mpl_vector_load_v2i64(ptr);
   // CHECK: LOC [[# FILENUM]] [[# @LINE + 2 ]]{{$}}
-  // CHECK-NEXT: dassign %vec_int32x4 0 (intrinsicop v4i32 vector_load_v4i32 (dread a64 %ptr))
+  // CHECK-NEXT: dassign %vec_int32x4 0 (iread v4i32 <* i32> 0 (dread a64 %ptr))
   vec_int32x4 = __builtin_mpl_vector_load_v4i32(ptr);
   // CHECK: LOC [[# FILENUM]] [[# @LINE + 2 ]]{{$}}
-  // CHECK-NEXT: dassign %vec_int16x8 0 (intrinsicop v8i16 vector_load_v8i16 (dread a64 %ptr))
+  // CHECK-NEXT: dassign %vec_int16x8 0 (iread v8i16 <* i16> 0 (dread a64 %ptr))
   vec_int16x8 = __builtin_mpl_vector_load_v8i16(ptr);
   // CHECK: LOC [[# FILENUM]] [[# @LINE + 2 ]]{{$}}
-  // CHECK-NEXT: dassign %vec_int8x16 0 (intrinsicop v16i8 vector_load_v16i8 (dread a64 %ptr))
+  // CHECK-NEXT: dassign %vec_int8x16 0 (iread v16i8 <* i8> 0 (dread a64 %ptr))
   vec_int8x16 = __builtin_mpl_vector_load_v16i8(ptr);
   // CHECK: LOC [[# FILENUM]] [[# @LINE + 2 ]]{{$}}
-  // CHECK-NEXT: dassign %vec_uint64x2 0 (intrinsicop v2u64 vector_load_v2u64 (dread a64 %ptr))
+  // CHECK-NEXT: dassign %vec_uint64x2 0 (iread v2u64 <* u64> 0 (dread a64 %ptr))
   vec_uint64x2 = __builtin_mpl_vector_load_v2u64(ptr);
   // CHECK: LOC [[# FILENUM]] [[# @LINE + 2 ]]{{$}}
-  // CHECK-NEXT: dassign %vec_uint32x4 0 (intrinsicop v4u32 vector_load_v4u32 (dread a64 %ptr))
+  // CHECK-NEXT: dassign %vec_uint32x4 0 (iread v4u32 <* u32> 0 (dread a64 %ptr))
   vec_uint32x4 = __builtin_mpl_vector_load_v4u32(ptr);
   // CHECK: LOC [[# FILENUM]] [[# @LINE + 2 ]]{{$}}
-  // CHECK-NEXT: dassign %vec_uint16x8 0 (intrinsicop v8u16 vector_load_v8u16 (dread a64 %ptr))
+  // CHECK-NEXT: dassign %vec_uint16x8 0 (iread v8u16 <* u16> 0 (dread a64 %ptr))
   vec_uint16x8 = __builtin_mpl_vector_load_v8u16(ptr);
   // CHECK: LOC [[# FILENUM]] [[# @LINE + 2 ]]{{$}}
-  // CHECK-NEXT: dassign %vec_uint8x16 0 (intrinsicop v16u8 vector_load_v16u8 (dread a64 %ptr))
+  // CHECK-NEXT: dassign %vec_uint8x16 0 (iread v16u8 <* u8> 0 (dread a64 %ptr))
   vec_uint8x16 = __builtin_mpl_vector_load_v16u8(ptr);
   // CHECK: LOC [[# FILENUM]] [[# @LINE + 2 ]]{{$}}
-  // CHECK-NEXT: dassign %vec_float64x2 0 (intrinsicop v2f64 vector_load_v2f64 (dread a64 %ptr))
+  // CHECK-NEXT: dassign %vec_float64x2 0 (iread v2f64 <* f64> 0 (dread a64 %ptr))
   vec_float64x2 = __builtin_mpl_vector_load_v2f64(ptr);
   // CHECK: LOC [[# FILENUM]] [[# @LINE + 2 ]]{{$}}
-  // CHECK-NEXT: dassign %vec_float32x4 0 (intrinsicop v4f32 vector_load_v4f32 (dread a64 %ptr))
+  // CHECK-NEXT: dassign %vec_float32x4 0 (iread v4f32 <* f32> 0 (dread a64 %ptr))
   vec_float32x4 = __builtin_mpl_vector_load_v4f32(ptr);
   // CHECK: LOC [[# FILENUM]] [[# @LINE + 2 ]]{{$}}
-  // CHECK-NEXT: dassign %vec_int64x1 0 (intrinsicop i64 vector_load_v1i64 (dread a64 %ptr))
+  // CHECK-NEXT: dassign %vec_int64x1 0 (iread i64 <* i64> 0 (dread a64 %ptr))
   vec_int64x1 = __builtin_mpl_vector_load_v1i64(ptr);
   // CHECK: LOC [[# FILENUM]] [[# @LINE + 2 ]]{{$}}
-  // CHECK-NEXT: dassign %vec_int32x2 0 (intrinsicop v2i32 vector_load_v2i32 (dread a64 %ptr))
+  // CHECK-NEXT: dassign %vec_int32x2 0 (iread v2i32 <* i32> 0 (dread a64 %ptr))
   vec_int32x2 = __builtin_mpl_vector_load_v2i32(ptr);
   // CHECK: LOC [[# FILENUM]] [[# @LINE + 2 ]]{{$}}
-  // CHECK-NEXT: dassign %vec_int16x4 0 (intrinsicop v4i16 vector_load_v4i16 (dread a64 %ptr))
+  // CHECK-NEXT: dassign %vec_int16x4 0 (iread v4i16 <* i16> 0 (dread a64 %ptr))
   vec_int16x4 = __builtin_mpl_vector_load_v4i16(ptr);
   // CHECK: LOC [[# FILENUM]] [[# @LINE + 2 ]]{{$}}
-  // CHECK-NEXT: dassign %vec_int8x8 0 (intrinsicop v8i8 vector_load_v8i8 (dread a64 %ptr))
+  // CHECK-NEXT: dassign %vec_int8x8 0 (iread v8i8 <* i8> 0 (dread a64 %ptr))
   vec_int8x8 = __builtin_mpl_vector_load_v8i8(ptr);
   // CHECK: LOC [[# FILENUM]] [[# @LINE + 2 ]]{{$}}
-  // CHECK-NEXT: dassign %vec_uint64x1 0 (intrinsicop u64 vector_load_v1u64 (dread a64 %ptr))
+  // CHECK-NEXT: dassign %vec_uint64x1 0 (iread u64 <* u64> 0 (dread a64 %ptr))
   vec_uint64x1 = __builtin_mpl_vector_load_v1u64(ptr);
   // CHECK: LOC [[# FILENUM]] [[# @LINE + 2 ]]{{$}}
-  // CHECK-NEXT: dassign %vec_uint32x2 0 (intrinsicop v2u32 vector_load_v2u32 (dread a64 %ptr))
+  // CHECK-NEXT: dassign %vec_uint32x2 0 (iread v2u32 <* u32> 0 (dread a64 %ptr))
   vec_uint32x2 = __builtin_mpl_vector_load_v2u32(ptr);
   // CHECK: LOC [[# FILENUM]] [[# @LINE + 2 ]]{{$}}
-  // CHECK-NEXT: dassign %vec_uint16x4 0 (intrinsicop v4u16 vector_load_v4u16 (dread a64 %ptr))
+  // CHECK-NEXT: dassign %vec_uint16x4 0 (iread v4u16 <* u16> 0 (dread a64 %ptr))
   vec_uint16x4 = __builtin_mpl_vector_load_v4u16(ptr);
   // CHECK: LOC [[# FILENUM]] [[# @LINE + 2 ]]{{$}}
-  // CHECK-NEXT: dassign %vec_uint8x8 0 (intrinsicop v8u8 vector_load_v8u8 (dread a64 %ptr))
+  // CHECK-NEXT: dassign %vec_uint8x8 0 (iread v8u8 <* u8> 0 (dread a64 %ptr))
   vec_uint8x8 = __builtin_mpl_vector_load_v8u8(ptr);
   // CHECK: LOC [[# FILENUM]] [[# @LINE + 2 ]]{{$}}
-  // CHECK-NEXT: dassign %vec_float64x1 0 (intrinsicop f64 vector_load_v1f64 (dread a64 %ptr))
+  // CHECK-NEXT: dassign %vec_float64x1 0 (iread f64 <* f64> 0 (dread a64 %ptr))
   vec_float64x1 = __builtin_mpl_vector_load_v1f64(ptr);
   // CHECK: LOC [[# FILENUM]] [[# @LINE + 2 ]]{{$}}
-  // CHECK-NEXT: dassign %vec_float32x2 0 (intrinsicop v2f32 vector_load_v2f32 (dread a64 %ptr))
+  // CHECK-NEXT: dassign %vec_float32x2 0 (iread v2f32 <* f32> 0 (dread a64 %ptr))
   vec_float32x2 = __builtin_mpl_vector_load_v2f32(ptr);
 
   // CHECK: LOC [[# FILENUM]] [[# @LINE + 2 ]]{{$}}
-- 
Gitee


From d3451043eb8548e3ae5bb712b42e30483f065f38 Mon Sep 17 00:00:00 2001
From: Brice Dobry <brice.dobry@futurewei.com>
Date: Wed, 2 Jun 2021 13:55:00 -0400
Subject: [PATCH 08/11] Use f64 for single element vector primitives

---
 Clang2MapleVisitor.cpp | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/Clang2MapleVisitor.cpp b/Clang2MapleVisitor.cpp
index 679aa63..fdb8929 100644
--- a/Clang2MapleVisitor.cpp
+++ b/Clang2MapleVisitor.cpp
@@ -3066,7 +3066,7 @@ TyIdx Clang2MapleVisitor::type2MplIdx(clang::QualType QT) {
     switch (ElemTy->GetPrimType()) {
     case PTY_i64:
       if (NumElements == 1) {
-        TI = GlobalTables::GetTypeTable().GetPrimType(PTY_i64)->GetTypeIndex();
+        TI = GlobalTables::GetTypeTable().GetPrimType(PTY_f64)->GetTypeIndex();
       } else if (NumElements == 2) {
         TI =
             GlobalTables::GetTypeTable().GetPrimType(PTY_v2i64)->GetTypeIndex();
@@ -3112,7 +3112,7 @@ TyIdx Clang2MapleVisitor::type2MplIdx(clang::QualType QT) {
       break;
     case PTY_u64:
       if (NumElements == 1) {
-        TI = GlobalTables::GetTypeTable().GetPrimType(PTY_u64)->GetTypeIndex();
+        TI = GlobalTables::GetTypeTable().GetPrimType(PTY_f64)->GetTypeIndex();
       } else if (NumElements == 2) {
         TI =
             GlobalTables::GetTypeTable().GetPrimType(PTY_v2u64)->GetTypeIndex();
-- 
Gitee


From 00a48d6436bb667f3ebb3b726c459384b8292e7b Mon Sep 17 00:00:00 2001
From: Brice Dobry <brice.dobry@futurewei.com>
Date: Wed, 2 Jun 2021 15:06:54 -0400
Subject: [PATCH 09/11] Handle `iread` in vector init

---
 Clang2MapleVisitor.cpp | 6 +++++-
 1 file changed, 5 insertions(+), 1 deletion(-)

diff --git a/Clang2MapleVisitor.cpp b/Clang2MapleVisitor.cpp
index fdb8929..4be2731 100644
--- a/Clang2MapleVisitor.cpp
+++ b/Clang2MapleVisitor.cpp
@@ -4194,9 +4194,13 @@ unsigned Clang2MapleVisitor::assignVector(Result &Res, MIRSymbol *VecSym,
       Res.appendStmtBefore(ElemAssign);
       SizeInited += 1;
     }
+  } else if (Src->op == OP_iread) {
+    StmtNode *Init = Builder->CreateStmtDassign(*VecSym, 0, Src);
+    Res.appendStmtBefore(Init);
+    SizeInited += Ty->GetSize();
   } else {
     LogInfo::MapleLogger()
-        << "Warning: Unhandled aggregate initializer in assignArray\n";
+        << "Warning: Unhandled aggregate initializer in assignVector\n";
     Src->Dump();
   }
 
-- 
Gitee


From 6d04c1d6773f34746ef388e90d1b7720d99d1661 Mon Sep 17 00:00:00 2001
From: Brice Dobry <brice.dobry@futurewei.com>
Date: Wed, 2 Jun 2021 20:24:13 -0400
Subject: [PATCH 10/11] Handle `intrinsicop` in vector initialization

---
 Clang2MapleVisitor.cpp | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/Clang2MapleVisitor.cpp b/Clang2MapleVisitor.cpp
index 4be2731..f4d6f71 100644
--- a/Clang2MapleVisitor.cpp
+++ b/Clang2MapleVisitor.cpp
@@ -4194,7 +4194,7 @@ unsigned Clang2MapleVisitor::assignVector(Result &Res, MIRSymbol *VecSym,
       Res.appendStmtBefore(ElemAssign);
       SizeInited += 1;
     }
-  } else if (Src->op == OP_iread) {
+  } else if (Src->op == OP_intrinsicop || Src->op == OP_iread) {
     StmtNode *Init = Builder->CreateStmtDassign(*VecSym, 0, Src);
     Res.appendStmtBefore(Init);
     SizeInited += Ty->GetSize();
-- 
Gitee


From dec264ddbc3debc447d6fced5c812ddde4faf596 Mon Sep 17 00:00:00 2001
From: Brice Dobry <brice.dobry@futurewei.com>
Date: Tue, 25 May 2021 10:53:18 -0400
Subject: [PATCH 11/11] Add support for all functions with intrinsics

Calls to all intrinsics from OAC's intrinsic_c.def can now be generated
by the frontend.

Resolves #I3SX9B
---
 Clang2MapleVisitor.cpp | 294 ++++++++++++++++++++++++++------
 test/intrinsics.c      | 379 +++++++++++++++++++++++++++++++++++++++++
 2 files changed, 618 insertions(+), 55 deletions(-)
 create mode 100644 test/intrinsics.c

diff --git a/Clang2MapleVisitor.cpp b/Clang2MapleVisitor.cpp
index f4d6f71..520dc53 100644
--- a/Clang2MapleVisitor.cpp
+++ b/Clang2MapleVisitor.cpp
@@ -16,6 +16,7 @@
 #include "Clang2MapleOptions.h"
 
 #include "clang/AST/APValue.h"
+#include "clang/AST/AST.h"
 #include "clang/Basic/Builtins.h"
 #include "clang/Basic/TargetInfo.h"
 #include "llvm/ADT/APFloat.h"
@@ -1407,46 +1408,35 @@ Result Clang2MapleVisitor::VisitCallExpr(const clang::CallExpr *CE) {
 
     // Special cases for some builtins
     unsigned int BuiltinID = CalleeDecl->getBuiltinID();
+    // Note: The cases are kept in alphabetical order, ignoring an optional
+    // `__builtin_` prefix, and similar builtins are grouped together, even if
+    // that puts them out of order.
     switch (CalleeDecl->getBuiltinID()) {
-    case clang::Builtin::BI__builtin_va_start:
-      // The address of the ap_list parameter needs to be passed
-      Args[0] = getAddrOfNode(Args[0]);
-
-      Call = Builder->CreateStmtIntrinsicCall(INTRN_C_va_start, Args);
+    case clang::Builtin::BI__builtin_abs:
+    case clang::Builtin::BI__builtin_fabs:
+    case clang::Builtin::BI__builtin_fabsf:
+    case clang::Builtin::BI__builtin_fabsl:
+    case clang::Builtin::BI__builtin_fabsf16:
+    case clang::Builtin::BI__builtin_fabsf128:
+    case clang::Builtin::BI__builtin_labs:
+    case clang::Builtin::BI__builtin_llabs:
+    case clang::Builtin::BIabs:
+    case clang::Builtin::BIlabs:
+    case clang::Builtin::BIllabs:
+    case clang::Builtin::BIfabs:
+    case clang::Builtin::BIfabsf:
+    case clang::Builtin::BIfabsl:
+      Call = Builder->CreateExprUnary(OP_abs, *Ty, Args[0]);
       break;
-    case clang::Builtin::BI__builtin_va_copy: {
-      ASSERT(Args.size() == 2, "ap_copy expects 2 arguments");
-      // The address of the ap_list parameters needs to be passed
-      Args[0] = getAddrOfNode(Args[0]);
-      Args[1] = getAddrOfNode(Args[1]);
-
-      // Add the size of the ap_list structure as the size to memcpy.
-      clang::TypedefDecl *VaListTypeDef = Context->getBuiltinVaListDecl();
-      MIRType *APListTy = type2Mpl(VaListTypeDef->getUnderlyingType());
-      Args.push_back(Builder->GetConstInt(APListTy->GetSize()));
-
-      Call = Builder->CreateStmtIntrinsicCall(INTRN_C_memcpy, Args);
-    } break;
-    case clang::Builtin::BI__builtin_va_end:
-      // Nothing needs to be done for this function
-      return Res;
-    case clang::Builtin::BI__builtin_ctz:
-      if (Ty->GetSize() == 4) {
-        Call = Builder->CreateExprIntrinsicop(INTRN_C_ctz32, OP_intrinsicop,
-                                              *Ty, Args);
-      } else {
-        Call = Builder->CreateExprIntrinsicop(INTRN_C_ctz64, OP_intrinsicop,
-                                              *Ty, Args);
-      }
+    case clang::Builtin::BI__builtin_acos:
+    case clang::Builtin::BIacos:
+      Call = Builder->CreateExprIntrinsicop(INTRN_C_acos, OP_intrinsicop, *Ty,
+                                            Args);
       break;
-    case clang::Builtin::BI__builtin_clz:
-      if (Ty->GetSize() == 4) {
-        Call = Builder->CreateExprIntrinsicop(INTRN_C_clz32, OP_intrinsicop,
-                                              *Ty, Args);
-      } else {
-        Call = Builder->CreateExprIntrinsicop(INTRN_C_clz64, OP_intrinsicop,
-                                              *Ty, Args);
-      }
+    case clang::Builtin::BI__builtin_acosf:
+    case clang::Builtin::BIacosf:
+      Call = Builder->CreateExprIntrinsicop(INTRN_C_acosf, OP_intrinsicop, *Ty,
+                                            Args);
       break;
     case clang::Builtin::BI__builtin_alloca:
     case clang::Builtin::BI_alloca:
@@ -1454,6 +1444,43 @@ Result Clang2MapleVisitor::VisitCallExpr(const clang::CallExpr *CE) {
       Call = Builder->CreateExprUnary(OP_alloca, *Ty, Args[0]);
       Call->SetPrimType(PointerPrimTy);
       break;
+    case clang::Builtin::BI__builtin_asin:
+    case clang::Builtin::BIasin:
+      Call = Builder->CreateExprIntrinsicop(INTRN_C_asin, OP_intrinsicop, *Ty,
+                                            Args);
+      break;
+    case clang::Builtin::BI__builtin_asinf:
+    case clang::Builtin::BIasinf:
+      Call = Builder->CreateExprIntrinsicop(INTRN_C_asinf, OP_intrinsicop, *Ty,
+                                            Args);
+      break;
+    case clang::Builtin::BI__builtin_atan:
+    case clang::Builtin::BIatan:
+      Call = Builder->CreateExprIntrinsicop(INTRN_C_atan, OP_intrinsicop, *Ty,
+                                            Args);
+      break;
+    case clang::Builtin::BI__builtin_atanf:
+    case clang::Builtin::BIatanf:
+      Call = Builder->CreateExprIntrinsicop(INTRN_C_atanf, OP_intrinsicop, *Ty,
+                                            Args);
+      break;
+    case clang::Builtin::BI__builtin_classify_type: {
+      // Let Clang figure out the type classification
+      clang::Expr::EvalResult R;
+      bool Success = CE->EvaluateAsInt(R, *Context);
+      ASSERT(Success, "Failed to evaluate __builtin_classify_type");
+      llvm::APSInt Val = R.Val.getInt();
+      Call = Builder->CreateIntConst(Val.getExtValue(), Ty->GetPrimType());
+    } break;
+    case clang::Builtin::BI__builtin_clz:
+      Call = Builder->CreateExprIntrinsicop(INTRN_C_clz32, OP_intrinsicop, *Ty,
+                                            Args);
+      break;
+    case clang::Builtin::BI__builtin_clzl:
+    case clang::Builtin::BI__builtin_clzll:
+      Call = Builder->CreateExprIntrinsicop(INTRN_C_clz64, OP_intrinsicop, *Ty,
+                                            Args);
+      break;
     case clang::Builtin::BI__builtin_constant_p: {
       int Val = CE->getArg(0)->isConstantInitializer(*Context, false) ? 1 : 0;
       // Pointers are not considered constant
@@ -1463,14 +1490,46 @@ Result Clang2MapleVisitor::VisitCallExpr(const clang::CallExpr *CE) {
       }
       Call = Builder->CreateIntConst(Val, Ty->GetPrimType());
     } break;
-    case clang::Builtin::BI__builtin_classify_type: {
-      // Let Clang figure out the type classification
-      clang::Expr::EvalResult R;
-      bool Success = CE->EvaluateAsInt(R, *Context);
-      ASSERT(Success, "Failed to evaluate __builtin_classify_type");
-      llvm::APSInt Val = R.Val.getInt();
-      Call = Builder->CreateIntConst(Val.getExtValue(), Ty->GetPrimType());
-    } break;
+    case clang::Builtin::BI__builtin_cos:
+    case clang::Builtin::BIcos:
+      Call = Builder->CreateExprIntrinsicop(INTRN_C_cos, OP_intrinsicop, *Ty,
+                                            Args);
+      break;
+    case clang::Builtin::BI__builtin_cosf:
+    case clang::Builtin::BIcosf:
+      Call = Builder->CreateExprIntrinsicop(INTRN_C_cosf, OP_intrinsicop, *Ty,
+                                            Args);
+      break;
+    case clang::Builtin::BI__builtin_cosh:
+    case clang::Builtin::BIcosh:
+      Call = Builder->CreateExprIntrinsicop(INTRN_C_cosh, OP_intrinsicop, *Ty,
+                                            Args);
+      break;
+    case clang::Builtin::BI__builtin_coshf:
+    case clang::Builtin::BIcoshf:
+      Call = Builder->CreateExprIntrinsicop(INTRN_C_coshf, OP_intrinsicop, *Ty,
+                                            Args);
+      break;
+    case clang::Builtin::BI__builtin_ctz:
+      Call = Builder->CreateExprIntrinsicop(INTRN_C_ctz32, OP_intrinsicop, *Ty,
+                                            Args);
+      break;
+    case clang::Builtin::BI__builtin_ctzl:
+    case clang::Builtin::BI__builtin_ctzll:
+      Call = Builder->CreateExprIntrinsicop(INTRN_C_ctz64, OP_intrinsicop, *Ty,
+                                            Args);
+      break;
+    case clang::Builtin::BI__builtin_exp:
+    case clang::Builtin::BIexp:
+      Call = Builder->CreateExprIntrinsicop(INTRN_C_exp, OP_intrinsicop, *Ty,
+                                            Args);
+      break;
+    case clang::Builtin::BI__builtin_expf:
+    case clang::Builtin::BIexpf:
+      Call = Builder->CreateExprIntrinsicop(INTRN_C_expf, OP_intrinsicop, *Ty,
+                                            Args);
+      break;
+
     case clang::Builtin::BI__builtin_expect: {
       ASSERT(Args.size() == 2, "__builtin_expect requires two arguments");
       // Arg 0 is the expression and arg 1 is the expected value.
@@ -1486,14 +1545,28 @@ Result Clang2MapleVisitor::VisitCallExpr(const clang::CallExpr *CE) {
       }
       Res.appendStmtBefore(SNode);
     } break;
-    case clang::Builtin::BI__builtin_signbit: {
-      MIRFunction *Callee =
-          Builder->GetOrCreateFunction("__signbit", Ty->GetTypeIndex());
-      Call = Builder->CreateStmtCall(Callee->GetPuidx(), Args);
-    } break;
-    case clang::Builtin::BI__builtin_prefetch:
-      // TODO: Do something useful here.
-      return Res;
+    case clang::Builtin::BI__builtin_ffs:
+      Call = Builder->CreateExprIntrinsicop(INTRN_C_ffs, OP_intrinsicop, *Ty,
+                                            Args);
+      break;
+    case clang::Builtin::BI__builtin_fmax:
+    case clang::Builtin::BI__builtin_fmaxf:
+    case clang::Builtin::BI__builtin_fmaxf16:
+    case clang::Builtin::BI__builtin_fmaxl:
+    case clang::Builtin::BIfmax:
+    case clang::Builtin::BIfmaxf:
+    case clang::Builtin::BIfmaxl:
+      Call = Builder->CreateExprBinary(OP_max, *Ty, Args[0], Args[1]);
+      break;
+    case clang::Builtin::BI__builtin_fmin:
+    case clang::Builtin::BI__builtin_fminf:
+    case clang::Builtin::BI__builtin_fminf16:
+    case clang::Builtin::BI__builtin_fminl:
+    case clang::Builtin::BIfmin:
+    case clang::Builtin::BIfminf:
+    case clang::Builtin::BIfminl:
+      Call = Builder->CreateExprBinary(OP_min, *Ty, Args[0], Args[1]);
+      break;
     case clang::Builtin::BI__builtin_isinf_sign:
       ASSERT(Args.size() == 1, "Incorrect arguments to isinf");
       if (Args[0]->GetPrimType() == PTY_f64) {
@@ -1504,7 +1577,119 @@ Result Clang2MapleVisitor::VisitCallExpr(const clang::CallExpr *CE) {
         ASSERT(false, "Unsupported type passed to isinf");
       }
       break;
+    case clang::Builtin::BI__builtin_log:
+    case clang::Builtin::BIlog:
+      Call = Builder->CreateExprIntrinsicop(INTRN_C_log, OP_intrinsicop, *Ty,
+                                            Args);
+      break;
+    case clang::Builtin::BI__builtin_logf:
+    case clang::Builtin::BIlogf:
+      Call = Builder->CreateExprIntrinsicop(INTRN_C_logf, OP_intrinsicop, *Ty,
+                                            Args);
+      break;
+    case clang::Builtin::BI__builtin_log10:
+    case clang::Builtin::BIlog10:
+      Call = Builder->CreateExprIntrinsicop(INTRN_C_log10, OP_intrinsicop, *Ty,
+                                            Args);
+      break;
+    case clang::Builtin::BI__builtin_log10f:
+    case clang::Builtin::BIlog10f:
+      Call = Builder->CreateExprIntrinsicop(INTRN_C_log10f, OP_intrinsicop, *Ty,
+                                            Args);
+      break;
+    case clang::Builtin::BI__builtin_memcmp:
+    case clang::Builtin::BImemcmp:
+      Call = Builder->CreateStmtIntrinsicCall(INTRN_C_memcmp, Args);
+      break;
+    case clang::Builtin::BI__builtin_memcpy:
+    case clang::Builtin::BImemcpy:
+      Call = Builder->CreateStmtIntrinsicCall(INTRN_C_memcpy, Args);
+      break;
+    case clang::Builtin::BI__builtin_memmove:
+    case clang::Builtin::BImemmove:
+      Call = Builder->CreateStmtIntrinsicCall(INTRN_C_memmove, Args);
+      break;
+    case clang::Builtin::BI__builtin_memset:
+    case clang::Builtin::BImemset:
+      Call = Builder->CreateStmtIntrinsicCall(INTRN_C_memset, Args);
+      break;
+    case clang::Builtin::BI__builtin_prefetch:
+      // TODO: Do something useful here.
+      return Res;
+    case clang::Builtin::BI__builtin_signbit: {
+      MIRFunction *Callee =
+          Builder->GetOrCreateFunction("__signbit", Ty->GetTypeIndex());
+      Call = Builder->CreateStmtCall(Callee->GetPuidx(), Args);
+    } break;
+    case clang::Builtin::BI__builtin_sin:
+    case clang::Builtin::BIsin:
+      Call = Builder->CreateExprIntrinsicop(INTRN_C_sin, OP_intrinsicop, *Ty,
+                                            Args);
+      break;
+    case clang::Builtin::BI__builtin_sinf:
+    case clang::Builtin::BIsinf:
+      Call = Builder->CreateExprIntrinsicop(INTRN_C_sinf, OP_intrinsicop, *Ty,
+                                            Args);
+      break;
+    case clang::Builtin::BI__builtin_sinh:
+    case clang::Builtin::BIsinh:
+      Call = Builder->CreateExprIntrinsicop(INTRN_C_sinh, OP_intrinsicop, *Ty,
+                                            Args);
+      break;
+    case clang::Builtin::BI__builtin_sinhf:
+    case clang::Builtin::BIsinhf:
+      Call = Builder->CreateExprIntrinsicop(INTRN_C_sinhf, OP_intrinsicop, *Ty,
+                                            Args);
+      break;
+    case clang::Builtin::BI__builtin_strcmp:
+    case clang::Builtin::BIstrcmp:
+      Call = Builder->CreateStmtIntrinsicCall(INTRN_C_strcmp, Args);
+      break;
+    case clang::Builtin::BI__builtin_strncmp:
+    case clang::Builtin::BIstrncmp:
+      Call = Builder->CreateStmtIntrinsicCall(INTRN_C_strncmp, Args);
+      break;
+    case clang::Builtin::BI__builtin_strcpy:
+    case clang::Builtin::BIstrcpy:
+      Call = Builder->CreateStmtIntrinsicCall(INTRN_C_strcpy, Args);
+      break;
+    case clang::Builtin::BI__builtin_strncpy:
+    case clang::Builtin::BIstrncpy:
+      Call = Builder->CreateStmtIntrinsicCall(INTRN_C_strncpy, Args);
+      break;
+    case clang::Builtin::BI__builtin_strlen:
+    case clang::Builtin::BIstrlen:
+      Call = Builder->CreateStmtIntrinsicCall(INTRN_C_strlen, Args);
+      break;
+    case clang::Builtin::BI__builtin_va_copy: {
+      ASSERT(Args.size() == 2, "ap_copy expects 2 arguments");
+      // The address of the ap_list parameters needs to be passed
+      Args[0] = getAddrOfNode(Args[0]);
+      Args[1] = getAddrOfNode(Args[1]);
+
+      // Add the size of the ap_list structure as the size to memcpy.
+      clang::TypedefDecl *VaListTypeDef = Context->getBuiltinVaListDecl();
+      MIRType *APListTy = type2Mpl(VaListTypeDef->getUnderlyingType());
+      Args.push_back(Builder->GetConstInt(APListTy->GetSize()));
+
+      Call = Builder->CreateStmtIntrinsicCall(INTRN_C_memcpy, Args);
+    } break;
+    case clang::Builtin::BI__builtin_va_end:
+      // Nothing needs to be done for this function
+      return Res;
+    case clang::Builtin::BI__builtin_va_start:
+      // The address of the ap_list parameter needs to be passed
+      Args[0] = getAddrOfNode(Args[0]);
+
+      Call = Builder->CreateStmtIntrinsicCall(INTRN_C_va_start, Args);
+      break;
     default: {
+      if (BuiltinID != 0) {
+        LogInfo::MapleLogger()
+            << "Unhandled builtin: " << Context->BuiltinInfo.getName(BuiltinID)
+            << "\n";
+      }
+
       llvm::StringRef CalleeName = CalleeDecl->getName();
       if (CalleeName.startswith("__builtin_mpl_vector_")) {
         StringRef VectorOpName =
@@ -3513,8 +3698,7 @@ BaseNode *Clang2MapleVisitor::getNodeAsRVal(Result &Res) {
           TmpAssign = Call;
         } else {
           TmpAssign = Builder->CreateStmtIntrinsicCallAssigned(
-              Call->GetIntrinsic(), Call->GetNopnd(), Sym,
-              Res.getValueTy()->GetTypeIndex());
+              Call->GetIntrinsic(), Call->GetNopnd(), Sym);
         }
       } else {
         LogInfo::MapleLogger() << "Unhandled call node in getNodeAsRVal:\n";
diff --git a/test/intrinsics.c b/test/intrinsics.c
new file mode 100644
index 0000000..318e992
--- /dev/null
+++ b/test/intrinsics.c
@@ -0,0 +1,379 @@
+/*
+ * Copyright (c) 2021 Futurewei Technologies, Inc.
+ *
+ * clang2mpl is licensed under Mulan PSL v2.
+ * You can use this software according to the terms and conditions of the Mulan
+ * PSL v2. You may obtain a copy of Mulan PSL v2 at:
+ *
+ *     http://license.coscl.org.cn/MulanPSL2
+ *
+ * THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY
+ * KIND, EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO
+ * NON-INFRINGEMENT, MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE. See the
+ * Mulan PSL v2 for more details.
+ */
+// RUN: %clang2mpl --ascii --verify %s -- -Wno-unused-value
+// --target=aarch64-linux-elf RUN: cat %m | %FileCheck %s
+
+#include <math.h>
+#include <stdlib.h>
+#include <string.h>
+#include <strings.h>
+
+// CHECK: [[# FILENUM:]] "{{.*}}/test/intrinsics.c"
+
+struct S {
+  int a;
+  int b;
+};
+
+int g;
+
+void foo() {
+  int i;
+  long l;
+  long long ll;
+  unsigned long ul;
+  float f;
+  double d;
+  long double ld;
+  char *s;
+  struct S sS;
+  const int c = 42;
+
+  // CHECK: LOC [[# FILENUM]] [[# @LINE + 2 ]]{{$}}
+  // CHECK-NEXT: abs i32 (dread i32 %i)
+  abs(i);
+  // CHECK: LOC [[# FILENUM]] [[# @LINE + 2 ]]{{$}}
+  // CHECK-NEXT: abs i32 (dread i32 %i)
+  __builtin_abs(i);
+  // CHECK: LOC [[# FILENUM]] [[# @LINE + 2 ]]{{$}}
+  // CHECK-NEXT: abs i64 (dread i64 %l)
+  labs(l);
+  // CHECK: LOC [[# FILENUM]] [[# @LINE + 2 ]]{{$}}
+  // CHECK-NEXT: abs i64 (dread i64 %l)
+  __builtin_labs(l);
+  // CHECK: LOC [[# FILENUM]] [[# @LINE + 2 ]]{{$}}
+  // CHECK-NEXT: abs i64 (dread i64 %ll)
+  llabs(ll);
+  // CHECK: LOC [[# FILENUM]] [[# @LINE + 2 ]]{{$}}
+  // CHECK-NEXT: abs i64 (dread i64 %ll)
+  __builtin_llabs(ll);
+  // CHECK: LOC [[# FILENUM]] [[# @LINE + 2 ]]{{$}}
+  // CHECK-NEXT: abs f64 (dread f64 %d)
+  fabs(d);
+  // CHECK: LOC [[# FILENUM]] [[# @LINE + 2 ]]{{$}}
+  // CHECK-NEXT: abs f64 (dread f64 %d)
+  __builtin_fabs(d);
+  // CHECK: LOC [[# FILENUM]] [[# @LINE + 2 ]]{{$}}
+  // CHECK-NEXT: abs f32 (dread f32 %f)
+  fabsf(f);
+  // CHECK: LOC [[# FILENUM]] [[# @LINE + 2 ]]{{$}}
+  // CHECK-NEXT: abs f32 (dread f32 %f)
+  __builtin_fabsf(f);
+  // CHECK: LOC [[# FILENUM]] [[# @LINE + 2 ]]{{$}}
+  // CHECK-NEXT: abs f64 (dread f64 %ld)
+  fabsl(ld);
+  // CHECK: LOC [[# FILENUM]] [[# @LINE + 2 ]]{{$}}
+  // CHECK-NEXT: abs f64 (dread f64 %ld)
+  __builtin_fabsl(ld);
+
+  // CHECK: LOC [[# FILENUM]] [[# @LINE + 2 ]]{{$}}
+  // CHECK-NEXT: intrinsicop f64 C_acos (dread f64 %d))
+  __builtin_acos(d);
+  // CHECK: LOC [[# FILENUM]] [[# @LINE + 2 ]]{{$}}
+  // CHECK-NEXT: intrinsicop f64 C_acos (dread f64 %d))
+  acos(d);
+  // CHECK: LOC [[# FILENUM]] [[# @LINE + 2 ]]{{$}}
+  // CHECK-NEXT: intrinsicop f32 C_acosf (dread f32 %f))
+  __builtin_acosf(f);
+  // CHECK: LOC [[# FILENUM]] [[# @LINE + 2 ]]{{$}}
+  // CHECK-NEXT: intrinsicop f32 C_acosf (dread f32 %f))
+  acosf(f);
+
+  // CHECK: LOC [[# FILENUM]] [[# @LINE + 2 ]]{{$}}
+  // CHECK-NEXT: alloca a64 (cvt u64 i32 (constval i32 4))
+  alloca(4);
+  // CHECK: LOC [[# FILENUM]] [[# @LINE + 2 ]]{{$}}
+  // CHECK-NEXT: alloca a64 (cvt u64 i32 (constval i32 16))
+  __builtin_alloca(16);
+
+  // CHECK: LOC [[# FILENUM]] [[# @LINE + 2 ]]{{$}}
+  // CHECK-NEXT: intrinsicop f64 C_asin (dread f64 %d))
+  __builtin_asin(d);
+  // CHECK: LOC [[# FILENUM]] [[# @LINE + 2 ]]{{$}}
+  // CHECK-NEXT: intrinsicop f64 C_asin (dread f64 %d))
+  asin(d);
+  // CHECK: LOC [[# FILENUM]] [[# @LINE + 2 ]]{{$}}
+  // CHECK-NEXT: intrinsicop f32 C_asinf (dread f32 %f))
+  __builtin_asinf(f);
+  // CHECK: LOC [[# FILENUM]] [[# @LINE + 2 ]]{{$}}
+  // CHECK-NEXT: intrinsicop f32 C_asinf (dread f32 %f))
+  asinf(f);
+
+  // CHECK: LOC [[# FILENUM]] [[# @LINE + 2 ]]{{$}}
+  // CHECK-NEXT: intrinsicop f64 C_atan (dread f64 %d))
+  __builtin_atan(d);
+  // CHECK: LOC [[# FILENUM]] [[# @LINE + 2 ]]{{$}}
+  // CHECK-NEXT: intrinsicop f64 C_atan (dread f64 %d))
+  atan(d);
+  // CHECK: LOC [[# FILENUM]] [[# @LINE + 2 ]]{{$}}
+  // CHECK-NEXT: intrinsicop f32 C_atanf (dread f32 %f))
+  __builtin_atanf(f);
+  // CHECK: LOC [[# FILENUM]] [[# @LINE + 2 ]]{{$}}
+  // CHECK-NEXT: intrinsicop f32 C_atanf (dread f32 %f))
+  atanf(f);
+
+  // CHECK: LOC [[# FILENUM]] [[# @LINE + 2 ]]{{$}}
+  // CHECK-NEXT: constval i32 1
+  __builtin_classify_type(i);
+  // CHECK: LOC [[# FILENUM]] [[# @LINE + 2 ]]{{$}}
+  // CHECK-NEXT: constval i32 8
+  __builtin_classify_type(d);
+  // CHECK: LOC [[# FILENUM]] [[# @LINE + 2 ]]{{$}}
+  // CHECK-NEXT: constval i32 5
+  __builtin_classify_type(s);
+  // CHECK: LOC [[# FILENUM]] [[# @LINE + 2 ]]{{$}}
+  // CHECK-NEXT: constval i32 12
+  __builtin_classify_type(sS);
+
+  // CHECK: LOC [[# FILENUM]] [[# @LINE + 2 ]]{{$}}
+  // CHECK-NEXT: intrinsicop f64 C_cos (dread f64 %d))
+  __builtin_cos(d);
+  // CHECK: LOC [[# FILENUM]] [[# @LINE + 2 ]]{{$}}
+  // CHECK-NEXT: intrinsicop f64 C_cos (dread f64 %d))
+  cos(d);
+  // CHECK: LOC [[# FILENUM]] [[# @LINE + 2 ]]{{$}}
+  // CHECK-NEXT: intrinsicop f32 C_cosf (dread f32 %f))
+  __builtin_cosf(f);
+  // CHECK: LOC [[# FILENUM]] [[# @LINE + 2 ]]{{$}}
+  // CHECK-NEXT: intrinsicop f32 C_cosf (dread f32 %f))
+  cosf(f);
+
+  // CHECK: LOC [[# FILENUM]] [[# @LINE + 2 ]]{{$}}
+  // CHECK-NEXT: intrinsicop f64 C_cosh (dread f64 %d))
+  __builtin_cosh(d);
+  // CHECK: LOC [[# FILENUM]] [[# @LINE + 2 ]]{{$}}
+  // CHECK-NEXT: intrinsicop f64 C_cosh (dread f64 %d))
+  cosh(d);
+  // CHECK: LOC [[# FILENUM]] [[# @LINE + 2 ]]{{$}}
+  // CHECK-NEXT: intrinsicop f32 C_coshf (dread f32 %f))
+  __builtin_coshf(f);
+  // CHECK: LOC [[# FILENUM]] [[# @LINE + 2 ]]{{$}}
+  // CHECK-NEXT: intrinsicop f32 C_coshf (dread f32 %f))
+  coshf(f);
+
+  // CHECK: LOC [[# FILENUM]] [[# @LINE + 2 ]]{{$}}
+  // CHECK-NEXT: intrinsicop i32 C_clz32 (cvt u32 i32 (dread i32 %i))
+  __builtin_clz(i);
+  // CHECK: LOC [[# FILENUM]] [[# @LINE + 2 ]]{{$}}
+  // CHECK-NEXT: intrinsicop i32 C_clz64 (cvt u64 i64 (dread i64 %l))
+  __builtin_clzl(l);
+  // CHECK: LOC [[# FILENUM]] [[# @LINE + 2 ]]{{$}}
+  // CHECK-NEXT: intrinsicop i32 C_clz64 (cvt u64 i64 (dread i64 %ll))
+  __builtin_clzll(ll);
+
+  // CHECK: LOC [[# FILENUM]] [[# @LINE + 2 ]]{{$}}
+  // CHECK-NEXT: constval i32 1
+  __builtin_constant_p(3 + 4);
+  // CHECK: LOC [[# FILENUM]] [[# @LINE + 2 ]]{{$}}
+  // CHECK-NEXT: constval i32 1
+  __builtin_constant_p(c);
+  // CHECK: LOC [[# FILENUM]] [[# @LINE + 2 ]]{{$}}
+  // CHECK-NEXT: constval i32 0
+  __builtin_constant_p(&g);
+  // CHECK: LOC [[# FILENUM]] [[# @LINE + 2 ]]{{$}}
+  // CHECK-NEXT: constval i32 0
+  __builtin_constant_p(ll * i);
+
+  // CHECK: LOC [[# FILENUM]] [[# @LINE + 2 ]]{{$}}
+  // CHECK-NEXT: intrinsicop i32 C_ctz32 (cvt u32 i32 (dread i32 %i))
+  __builtin_ctz(i);
+  // CHECK: LOC [[# FILENUM]] [[# @LINE + 2 ]]{{$}}
+  // CHECK-NEXT: intrinsicop i32 C_ctz64 (cvt u64 i64 (dread i64 %l))
+  __builtin_ctzl(l);
+  // CHECK: LOC [[# FILENUM]] [[# @LINE + 2 ]]{{$}}
+  // CHECK-NEXT: intrinsicop i32 C_ctz64 (cvt u64 i64 (dread i64 %ll))
+  __builtin_ctzll(ll);
+
+  // CHECK: LOC [[# FILENUM]] [[# @LINE + 2 ]]{{$}}
+  // CHECK-NEXT: intrinsicop f64 C_exp (dread f64 %d))
+  __builtin_exp(d);
+  // CHECK: LOC [[# FILENUM]] [[# @LINE + 2 ]]{{$}}
+  // CHECK-NEXT: intrinsicop f64 C_exp (dread f64 %d))
+  exp(d);
+  // CHECK: LOC [[# FILENUM]] [[# @LINE + 2 ]]{{$}}
+  // CHECK-NEXT: intrinsicop f32 C_expf (dread f32 %f))
+  __builtin_expf(f);
+  // CHECK: LOC [[# FILENUM]] [[# @LINE + 2 ]]{{$}}
+  // CHECK-NEXT: intrinsicop f32 C_expf (dread f32 %f))
+  expf(f);
+
+  // CHECK: LOC [[# FILENUM]] [[# @LINE + 2 ]]{{$}}
+  // CHECK-NEXT: add i64 (dread i64 %l, dread i64 %l)
+  __builtin_expect(l + l, 0);
+
+  // CHECK: LOC [[# FILENUM]] [[# @LINE + 2 ]]{{$}}
+  // CHECK-NEXT: intrinsicop i32 C_ffs (dread i32 %i))
+  __builtin_ffs(i);
+
+  // CHECK: LOC [[# FILENUM]] [[# @LINE + 2 ]]{{$}}
+  // CHECK-NEXT: max f64 (dread f64 %d, dread f64 %d)
+  fmax(d, d);
+  // CHECK: LOC [[# FILENUM]] [[# @LINE + 2 ]]{{$}}
+  // CHECK-NEXT: max f64 (dread f64 %d, dread f64 %d)
+  __builtin_fmax(d, d);
+  // CHECK: LOC [[# FILENUM]] [[# @LINE + 2 ]]{{$}}
+  // CHECK-NEXT: max f32 (dread f32 %f, dread f32 %f)
+  fmaxf(f, f);
+  // CHECK: LOC [[# FILENUM]] [[# @LINE + 2 ]]{{$}}
+  // CHECK-NEXT: max f32 (dread f32 %f, dread f32 %f)
+  __builtin_fmaxf(f, f);
+  // CHECK: LOC [[# FILENUM]] [[# @LINE + 2 ]]{{$}}
+  // CHECK-NEXT: max f64 (dread f64 %ld, dread f64 %ld)
+  fmaxl(ld, ld);
+  // CHECK: LOC [[# FILENUM]] [[# @LINE + 2 ]]{{$}}
+  // CHECK-NEXT: max f64 (dread f64 %ld, dread f64 %ld)
+  __builtin_fmaxl(ld, ld);
+
+  // CHECK: LOC [[# FILENUM]] [[# @LINE + 2 ]]{{$}}
+  // CHECK-NEXT: min f64 (dread f64 %d, dread f64 %d)
+  fmin(d, d);
+  // CHECK: LOC [[# FILENUM]] [[# @LINE + 2 ]]{{$}}
+  // CHECK-NEXT: min f64 (dread f64 %d, dread f64 %d)
+  __builtin_fmin(d, d);
+  // CHECK: LOC [[# FILENUM]] [[# @LINE + 2 ]]{{$}}
+  // CHECK-NEXT: min f32 (dread f32 %f, dread f32 %f)
+  fminf(f, f);
+  // CHECK: LOC [[# FILENUM]] [[# @LINE + 2 ]]{{$}}
+  // CHECK-NEXT: min f32 (dread f32 %f, dread f32 %f)
+  __builtin_fminf(f, f);
+  // CHECK: LOC [[# FILENUM]] [[# @LINE + 2 ]]{{$}}
+  // CHECK-NEXT: min f64 (dread f64 %ld, dread f64 %ld)
+  fminl(ld, ld);
+  // CHECK: LOC [[# FILENUM]] [[# @LINE + 2 ]]{{$}}
+  // CHECK-NEXT: min f64 (dread f64 %ld, dread f64 %ld)
+  __builtin_fminl(ld, ld);
+
+  // CHECK: LOC [[# FILENUM]] [[# @LINE + 2 ]]{{$}}
+  // CHECK-NEXT: call &__isinf (dread f64 %d)
+  __builtin_isinf_sign(d);
+
+  // CHECK: LOC [[# FILENUM]] [[# @LINE + 2 ]]{{$}}
+  // CHECK-NEXT: intrinsicop f64 C_log (dread f64 %d))
+  __builtin_log(d);
+  // CHECK: LOC [[# FILENUM]] [[# @LINE + 2 ]]{{$}}
+  // CHECK-NEXT: intrinsicop f64 C_log (dread f64 %d))
+  log(d);
+  // CHECK: LOC [[# FILENUM]] [[# @LINE + 2 ]]{{$}}
+  // CHECK-NEXT: intrinsicop f32 C_logf (dread f32 %f))
+  __builtin_logf(f);
+  // CHECK: LOC [[# FILENUM]] [[# @LINE + 2 ]]{{$}}
+  // CHECK-NEXT: intrinsicop f32 C_logf (dread f32 %f))
+  logf(f);
+
+  // CHECK: LOC [[# FILENUM]] [[# @LINE + 2 ]]{{$}}
+  // CHECK-NEXT: intrinsicop f64 C_log10 (dread f64 %d))
+  __builtin_log10(d);
+  // CHECK: LOC [[# FILENUM]] [[# @LINE + 2 ]]{{$}}
+  // CHECK-NEXT: intrinsicop f64 C_log10 (dread f64 %d))
+  log10(d);
+  // CHECK: LOC [[# FILENUM]] [[# @LINE + 2 ]]{{$}}
+  // CHECK-NEXT: intrinsicop f32 C_log10f (dread f32 %f))
+  __builtin_log10f(f);
+  // CHECK: LOC [[# FILENUM]] [[# @LINE + 2 ]]{{$}}
+  // CHECK-NEXT: intrinsicop f32 C_log10f (dread f32 %f))
+  log10f(f);
+
+  // CHECK: LOC [[# FILENUM]] [[# @LINE + 2 ]]{{$}}
+  // CHECK-NEXT: intrinsiccall C_memcmp (dread a64 %s, dread a64 %s, dread u64 %ul)
+  memcmp(s, s, ul);
+  // CHECK: LOC [[# FILENUM]] [[# @LINE + 2 ]]{{$}}
+  // CHECK-NEXT: intrinsiccall C_memcmp (dread a64 %s, dread a64 %s, dread u64 %ul)
+  __builtin_memcmp(s, s, ul);
+
+  // CHECK: LOC [[# FILENUM]] [[# @LINE + 2 ]]{{$}}
+  // CHECK-NEXT: intrinsiccall C_memcpy (dread a64 %s, dread a64 %s, dread u64 %ul)
+  memcpy(s, s, ul);
+  // CHECK: LOC [[# FILENUM]] [[# @LINE + 2 ]]{{$}}
+  // CHECK-NEXT: intrinsiccall C_memcpy (dread a64 %s, dread a64 %s, dread u64 %ul)
+  __builtin_memcpy(s, s, ul);
+
+  // CHECK: LOC [[# FILENUM]] [[# @LINE + 2 ]]{{$}}
+  // CHECK-NEXT: intrinsiccall C_memmove (dread a64 %s, dread a64 %s, dread u64 %ul)
+  memmove(s, s, ul);
+  // CHECK: LOC [[# FILENUM]] [[# @LINE + 2 ]]{{$}}
+  // CHECK-NEXT: intrinsiccall C_memmove (dread a64 %s, dread a64 %s, dread u64 %ul)
+  __builtin_memmove(s, s, ul);
+
+  // CHECK: LOC [[# FILENUM]] [[# @LINE + 2 ]]{{$}}
+  // CHECK-NEXT: intrinsiccall C_memset (dread a64 %s, dread i32 %i, dread u64 %ul)
+  memset(s, i, ul);
+  // CHECK: LOC [[# FILENUM]] [[# @LINE + 2 ]]{{$}}
+  // CHECK-NEXT: intrinsiccall C_memset (dread a64 %s, dread i32 %i, dread u64 %ul)
+  __builtin_memset(s, i, ul);
+
+  // This doesn't generate any code for now, so nothing to check for, just make
+  // sure it doesn't get any errors.
+  __builtin_prefetch(s);
+
+  // CHECK: LOC [[# FILENUM]] [[# @LINE + 2 ]]{{$}}
+  // CHECK-NEXT: call &__signbit (dread f64 %d)
+  __builtin_signbit(d);
+
+  // CHECK: LOC [[# FILENUM]] [[# @LINE + 2 ]]{{$}}
+  // CHECK-NEXT: intrinsicop f64 C_sin (dread f64 %d))
+  __builtin_sin(d);
+  // CHECK: LOC [[# FILENUM]] [[# @LINE + 2 ]]{{$}}
+  // CHECK-NEXT: intrinsicop f64 C_sin (dread f64 %d))
+  sin(d);
+  // CHECK: LOC [[# FILENUM]] [[# @LINE + 2 ]]{{$}}
+  // CHECK-NEXT: intrinsicop f32 C_sinf (dread f32 %f))
+  __builtin_sinf(f);
+  // CHECK: LOC [[# FILENUM]] [[# @LINE + 2 ]]{{$}}
+  // CHECK-NEXT: intrinsicop f32 C_sinf (dread f32 %f))
+  sinf(f);
+
+  // CHECK: LOC [[# FILENUM]] [[# @LINE + 2 ]]{{$}}
+  // CHECK-NEXT: intrinsicop f64 C_sinh (dread f64 %d))
+  __builtin_sinh(d);
+  // CHECK: LOC [[# FILENUM]] [[# @LINE + 2 ]]{{$}}
+  // CHECK-NEXT: intrinsicop f64 C_sinh (dread f64 %d))
+  sinh(d);
+  // CHECK: LOC [[# FILENUM]] [[# @LINE + 2 ]]{{$}}
+  // CHECK-NEXT: intrinsicop f32 C_sinhf (dread f32 %f))
+  __builtin_sinhf(f);
+  // CHECK: LOC [[# FILENUM]] [[# @LINE + 2 ]]{{$}}
+  // CHECK-NEXT: intrinsicop f32 C_sinhf (dread f32 %f))
+  sinhf(f);
+
+  // CHECK: LOC [[# FILENUM]] [[# @LINE + 2 ]]{{$}}
+  // CHECK-NEXT: intrinsiccall C_strcmp (dread a64 %s, dread a64 %s)
+  strcmp(s, s);
+  // CHECK: LOC [[# FILENUM]] [[# @LINE + 2 ]]{{$}}
+  // CHECK-NEXT: intrinsiccall C_strcmp (dread a64 %s, dread a64 %s)
+  __builtin_strcmp(s, s);
+
+  // CHECK: LOC [[# FILENUM]] [[# @LINE + 2 ]]{{$}}
+  // CHECK-NEXT: intrinsiccall C_strncmp (dread a64 %s, dread a64 %s, dread u64 %ul)
+  strncmp(s, s, ul);
+  // CHECK: LOC [[# FILENUM]] [[# @LINE + 2 ]]{{$}}
+  // CHECK-NEXT: intrinsiccall C_strncmp (dread a64 %s, dread a64 %s, dread u64 %ul)
+  __builtin_strncmp(s, s, ul);
+
+  // CHECK: LOC [[# FILENUM]] [[# @LINE + 2 ]]{{$}}
+  // CHECK-NEXT: intrinsiccall C_strcpy (dread a64 %s, dread a64 %s)
+  strcpy(s, s);
+  // CHECK: LOC [[# FILENUM]] [[# @LINE + 2 ]]{{$}}
+  // CHECK-NEXT: intrinsiccall C_strcpy (dread a64 %s, dread a64 %s)
+  __builtin_strcpy(s, s);
+
+  // CHECK: LOC [[# FILENUM]] [[# @LINE + 2 ]]{{$}}
+  // CHECK-NEXT: intrinsiccall C_strncpy (dread a64 %s, dread a64 %s, dread u64 %ul)
+  strncpy(s, s, ul);
+  // CHECK: LOC [[# FILENUM]] [[# @LINE + 2 ]]{{$}}
+  // CHECK-NEXT: intrinsiccall C_strncpy (dread a64 %s, dread a64 %s, dread u64 %ul)
+  __builtin_strncpy(s, s, ul);
+
+
+}
-- 
Gitee